pax_global_header00006660000000000000000000000064122114011630014501gustar00rootroot0000000000000052 comment=9b8b859cd6d3c638c9f689301fb47bc874bdd285 tahoe-lafs-1.10.0/000077500000000000000000000000001221140116300136035ustar00rootroot00000000000000tahoe-lafs-1.10.0/COPYING.GPL000066400000000000000000000573251221140116300152730ustar00rootroot00000000000000This work also comes with the added permission that you may combine it with a work licensed under the OpenSSL license (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the OpenSSL license. This work also comes with the added permission that you may combine it with a work licensed under the Eclipse Public Licence (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Eclipse Public Licence. This work also comes with the added permission that you may combine it with a work licensed under the Q Public Licence (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Q Public Licence. This work also comes with the added permission that you may combine it with a work licensed under the Apache Licence (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Apache Licence. This work also comes with the added permission that you may combine it with a work licensed under the GNU Lesser General Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the GNU Lesser General Public License. This work also comes with the added permission that you may combine it with a work licensed under the Zope Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Zope Public License. This work also comes with the added permission that you may combine it with a work licensed under the Python Software Foundation License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Python Software Foundation License. This work also comes with the added permission that you may combine it with a work licensed under the Academic Free License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Academic Free License. This work also comes with the added permission that you may combine it with a work licensed under the Apple Public Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Apple Public Source License. This work also comes with the added permission that you may combine it with a work licensed under the BitTorrent Open Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the BitTorrent Open Source License. This work also comes with the added permission that you may combine it with a work licensed under the Lucent Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Lucent Public License. This work also comes with the added permission that you may combine it with a work licensed under the Jabber Open Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Jabber Open Source License. This work also comes with the added permission that you may combine it with a work licensed under the Common Development and Distribution License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Common Development and Distribution License. This work also comes with the added permission that you may combine it with a work licensed under the Microsoft Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Microsoft Public License. This work also comes with the added permission that you may combine it with a work licensed under the Microsoft Reciprocal License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Microsoft Reciprocal License. This work also comes with the added permission that you may combine it with a work licensed under the Sun Industry Standards Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Sun Industry Standards Source License. This work also comes with the added permission that you may combine it with a work licensed under the Open Software License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Open Software License. GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. tahoe-lafs-1.10.0/COPYING.TGPPL.rst000066400000000000000000000415531221140116300163420ustar00rootroot00000000000000This work also comes with the added permission that you may combine it with a work licensed under the OpenSSL license (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the OpenSSL license. This work also comes with the added permission that you may combine it with a work licensed under the Eclipse Public Licence (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Eclipse Public Licence. This work also comes with the added permission that you may combine it with a work licensed under the Q Public Licence (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Q Public Licence. This work also comes with the added permission that you may combine it with a work licensed under the Apache Licence (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Apache Licence. This work also comes with the added permission that you may combine it with a work licensed under the GNU Lesser General Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the GNU Lesser General Public License. This work also comes with the added permission that you may combine it with a work licensed under the Zope Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Zope Public License. This work also comes with the added permission that you may combine it with a work licensed under the Python Software Foundation License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Python Software Foundation License. This work also comes with the added permission that you may combine it with a work licensed under the Academic Free License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Academic Free License. This work also comes with the added permission that you may combine it with a work licensed under the Apple Public Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Apple Public Source License. This work also comes with the added permission that you may combine it with a work licensed under the BitTorrent Open Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the BitTorrent Open Source License. This work also comes with the added permission that you may combine it with a work licensed under the Lucent Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Lucent Public License. This work also comes with the added permission that you may combine it with a work licensed under the Jabber Open Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Jabber Open Source License. This work also comes with the added permission that you may combine it with a work licensed under the Common Development and Distribution License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Common Development and Distribution License. This work also comes with the added permission that you may combine it with a work licensed under the Microsoft Public License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Microsoft Public License. This work also comes with the added permission that you may combine it with a work licensed under the Microsoft Reciprocal License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Microsoft Reciprocal License. This work also comes with the added permission that you may combine it with a work licensed under the Sun Industry Standards Source License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Sun Industry Standards Source License. This work also comes with the added permission that you may combine it with a work licensed under the Open Software License (any version) and distribute the resulting combined work, as long as you follow the requirements of the licences of this work in regard to all of the resulting combined work aside from the work licensed under the Open Software License. ======================================================= Transitive Grace Period Public Licence ("TGPPL") v. 1.0 ======================================================= This Transitive Grace Period Public Licence (the "License") applies to any original work of authorship (the "Original Work") whose owner (the "Licensor") has placed the following licensing notice adjacent to the copyright notice for the Original Work: *Licensed under the Transitive Grace Period Public Licence version 1.0* 1. **Grant of Copyright License.** Licensor grants You a worldwide, royalty-free, non-exclusive, sublicensable license, for the duration of the copyright, to do the following: a. to reproduce the Original Work in copies, either alone or as part of a collective work; b. to translate, adapt, alter, transform, modify, or arrange the Original Work, thereby creating derivative works ("Derivative Works") based upon the Original Work; c. to distribute or communicate copies of the Original Work and Derivative Works to the public, with the proviso that copies of Original Work or Derivative Works that You distribute or communicate shall be licensed under this Transitive Grace Period Public Licence no later than 12 months after You distributed or communicated said copies; d. to perform the Original Work publicly; and e. to display the Original Work publicly. 2. **Grant of Patent License.** Licensor grants You a worldwide, royalty-free, non-exclusive, sublicensable license, under patent claims owned or controlled by the Licensor that are embodied in the Original Work as furnished by the Licensor, for the duration of the patents, to make, use, sell, offer for sale, have made, and import the Original Work and Derivative Works. 3. **Grant of Source Code License.** The term "Source Code" means the preferred form of the Original Work for making modifications to it and all available documentation describing how to modify the Original Work. Licensor agrees to provide a machine-readable copy of the Source Code of the Original Work along with each copy of the Original Work that Licensor distributes. Licensor reserves the right to satisfy this obligation by placing a machine-readable copy of the Source Code in an information repository reasonably calculated to permit inexpensive and convenient access by You for as long as Licensor continues to distribute the Original Work. 4. **Exclusions From License Grant.** Neither the names of Licensor, nor the names of any contributors to the Original Work, nor any of their trademarks or service marks, may be used to endorse or promote products derived from this Original Work without express prior permission of the Licensor. Except as expressly stated herein, nothing in this License grants any license to Licensor's trademarks, copyrights, patents, trade secrets or any other intellectual property. No patent license is granted to make, use, sell, offer for sale, have made, or import embodiments of any patent claims other than the licensed claims defined in Section 2. No license is granted to the trademarks of Licensor even if such marks are included in the Original Work. Nothing in this License shall be interpreted to prohibit Licensor from licensing under terms different from this License any Original Work that Licensor otherwise would have a right to license. 5. **External Deployment.** The term "External Deployment" means the use, distribution, or communication of the Original Work or Derivative Works in any way such that the Original Work or Derivative Works may be used by anyone other than You, whether those works are distributed or communicated to those persons or made available as an application intended for use over a network. As an express condition for the grants of license hereunder, You must treat any External Deployment by You of the Original Work or a Derivative Work as a distribution under section 1(c). 6. **Attribution Rights.** You must retain, in the Source Code of any Derivative Works that You create, all copyright, patent, or trademark notices from the Source Code of the Original Work, as well as any notices of licensing and any descriptive text identified therein as an "Attribution Notice." You must cause the Source Code for any Derivative Works that You create to carry a prominent Attribution Notice reasonably calculated to inform recipients that You have modified the Original Work. 7. **Warranty of Provenance and Disclaimer of Warranty.** Licensor warrants that the copyright in and to the Original Work and the patent rights granted herein by Licensor are owned by the Licensor or are sublicensed to You under the terms of this License with the permission of the contributor(s) of those copyrights and patent rights. Except as expressly stated in the immediately preceding sentence, the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without limitation, the warranties of non-infringement, merchantability or fitness for a particular purpose. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No license to the Original Work is granted by this License except under this disclaimer. 8. **Limitation of Liability.** Under no circumstances and under no legal theory, whether in tort (including negligence), contract, or otherwise, shall the Licensor be liable to anyone for any indirect, special, incidental, or consequential damages of any character arising as a result of this License or the use of the Original Work including, without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses. This limitation of liability shall not apply to the extent applicable law prohibits such limitation. 9. **Acceptance and Termination.** If, at any time, You expressly assented to this License, that assent indicates your clear and irrevocable acceptance of this License and all of its terms and conditions. If You distribute or communicate copies of the Original Work or a Derivative Work, You must make a reasonable effort under the circumstances to obtain the express assent of recipients to the terms of this License. This License conditions your rights to undertake the activities listed in Section 1, including your right to create Derivative Works based upon the Original Work, and doing so without honoring these terms and conditions is prohibited by copyright law and international treaty. Nothing in this License is intended to affect copyright exceptions and limitations (including 'fair use' or 'fair dealing'). This License shall terminate immediately and You may no longer exercise any of the rights granted to You by this License upon your failure to honor the conditions in Section 1(c). 10. **Termination for Patent Action.** This License shall terminate automatically and You may no longer exercise any of the rights granted to You by this License as of the date You commence an action, including a cross-claim or counterclaim, against Licensor or any licensee alleging that the Original Work infringes a patent. This termination provision shall not apply for an action alleging patent infringement by combinations of the Original Work with other software or hardware. 11. **Jurisdiction, Venue and Governing Law.** Any action or suit relating to this License may be brought only in the courts of a jurisdiction wherein the Licensor resides or in which Licensor conducts its primary business, and under the laws of that jurisdiction excluding its conflict-of-law provisions. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any use of the Original Work outside the scope of this License or after its termination shall be subject to the requirements and penalties of copyright or patent law in the appropriate jurisdiction. This section shall survive the termination of this License. 12. **Attorneys' Fees.** In any action to enforce the terms of this License or seeking damages relating thereto, the prevailing party shall be entitled to recover its costs and expenses, including, without limitation, reasonable attorneys' fees and costs incurred in connection with such action, including any appeal of such action. This section shall survive the termination of this License. 13. **Miscellaneous.** If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. 14. **Definition of "You" in This License.** "You" throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with you. For purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 15. **Right to Use.** You may use the Original Work in all ways not otherwise restricted or conditioned by this License or by law, and Licensor promises not to interfere with or be responsible for such uses by You. 16. **Modification of This License.** This License is Copyright © 2007 Zooko Wilcox-O'Hearn. Permission is granted to copy, distribute, or communicate this License without modification. Nothing in this License permits You to modify this License as applied to the Original Work or to Derivative Works. However, You may modify the text of this License and copy, distribute or communicate your modified version (the "Modified License") and apply it to other original works of authorship subject to the following conditions: (i) You may not indicate in any way that your Modified License is the "Transitive Grace Period Public Licence" or "TGPPL" and you may not use those names in the name of your Modified License; and (ii) You must replace the notice specified in the first paragraph above with the notice "Licensed under " or with a notice of your own that is not confusingly similar to the notice in this License. tahoe-lafs-1.10.0/CREDITS000066400000000000000000000076361221140116300146370ustar00rootroot00000000000000This is at least a partial credits-file of people that have contributed to the Tahoe-LAFS project. It is formatted to allow easy grepping and beautification by scripts. The fields are: name (N), email (E), web-address (W), PGP key ID and fingerprint (P), description (D), and snail-mail address (S). Thanks. ---------- N: Brian Warner E: warner-tahoe@lothar.com D: main developer N: Zooko E: zooko@zooko.com D: main developer N: Daira Hopwood (formerly David-Sarah Hopwood) E: daira@jacaranda.org E: david-sarah@jacaranda.org P: 3D6A 08E9 1262 3E9A 00B2 1BDC 067F 4920 98CF 2762 (preferred) P: 12F8 A95C C90B B68E 369C 003D 5947 3C63 3CB3 A807 D: Tahoe-LAFS Hacker N: Faried Nawaz E: self@node.pk W: http://www.hungry.com/~fn/ P: 0x09ECEC06, 19 41 1B 3E 25 98 F5 0A 0D 50 F9 37 1B 98 1A FF 09 EC EC 06 D: added private publish, added display of file size N: Arno Waschk E: hamamatsu@gmx.de W: www.arnowaschk.de D: improve logging, documentation, testing/porting/packaging (cygwin) N: Arc O Median D: bug reports N: RobK D: code N: Nathan Wilcox E: nejucomo@gmail.com D: unit tests, attack example, documentation, Linux FUSE interface N: Mike Booker D: documentation (README.win32) N: David Reid E: dreid@dreid.org D: make the provisioning page work in py2exe and py2app packages N: Paul Gerhardt D: don't emit error to stdout when testing for error in make check-deps N: Armin Rigo D: Linux FUSE interface "b" N: Justin Boreta D: user testing and docs N: Chris Galvan D: packaging, buildbot E: cgalvan@enthought.com N: François Deppierraz D: encodings, duplicity, debugging, FUSE, docs, FreeBSD, WUI, ARM, NEWS E: francois@ctrlaltdel.ch N: Larry Hosken E: tahoe@lahosken.san-francisco.ca.us D: make cp -r ignore dangling symlinks N: Toby Murray E: toby.murray@comlab.ox.ac.uk D: add web.ambient_upload_authority option, remove it N: Shawn Willden E: shawn-tahoe@willden.org D: mathematical analysis, code review, Win32 documentation N: Nils Durner E: ndurner@googlemail.com D: security bug report, darcsver fix, Windows build docs, ftpd docs, bzr patch N: Kevin Reid E: kpreid@mac.com D: security bug report, W3 standards, new improved WUI style N: Alberto Berti E: alberto@metapensiero.it W: http://www.metapensiero.it S: via Filatoi, 1 38068 Rovereto (TN), Italy D: improvements to the CLI: exclude patterns, usage text N: DarKnesS_WOlF D: patch Makefile to build .debs for Ubuntu Intrepid N: Kevan Carstensen E: kevan@isnotajoke.com D: Tahoe-LAFS Hacker; MDMF, security, other improvements, code review, docs N: Marc Tooley W: http://rune.ca P: 0xD5A7EE69911DF5CF D: port to NetBSD, help debugging Crypto++ bug N: Sam Mason D: edited docs/running.rst N: Jacob Appelbaum E: jacob@appelbaum.com W: http://www.appelbaum.net/ P: 12E4 04FF D3C9 31F9 3405 2D06 B884 1A91 9D0F ACE4 D: Debian packaging including init scripts N: Jeremy Visser D: Ubuntu packaging, usability testing N: Jeremy Fitzhardinge D: better support for HTTP range queries N: Frédéric Marti E: freestorm77@gmail.com P: 0xD703AE08, F1 82 35 BB FF D8 96 0B 68 E2 91 2F C4 B8 6A 42 D7 03 AE 08 S: Lausanne - Switzerland D: fix layout issue and server version numbers in WUI N: Jacob Lyles E: jacob.lyles@gmail.com D: fixed bug in WUI with Python 2.5 and a system clock set far in the past N: Ravi Pinjala E: ravi@p-static.net D: converted docs from .txt to .rst N: Josh Wilcox D: docs, return None from get_stats() when there aren't enough samples N: Vince_II D: fix incorrect name of other doc file in docs/configuration.rst N: Markus Reichelt E: mr@mareichelt.com W: http://mareichelt.com/ P: DCB3 281F 38B0 711A 41C0 DC20 EE8D 363D 1687 9738 D: packaging for Slackware on SlackBuilds.org, bug reports N: Peter Le Bek E: peter@hyperplex.org P: 0x9BAC3E97, 79CA 34B3 7272 A3CF 82AC 5655 F55A 5B63 9BAC 3E97 D: mtime in ftpd N: Andrew Miller E: amiller@dappervision.com W: http://soc1024.com P: 0xE3787A7250538F3F DEB3 132A 7FBA 37A5 03AC A462 E378 7A72 5053 8F3F D: bugfixes, patches tahoe-lafs-1.10.0/MANIFEST.in000066400000000000000000000004541221140116300153440ustar00rootroot00000000000000include COPYING.GPL COPYING.TGPPL.rst CREDITS Makefile NEWS.rst Tahoe.home include relnotes.txt include bin/tahoe-script.template recursive-include src *.xhtml *.js *.png *.css recursive-include twisted *.py graft docs graft misc graft static graft setuptools-0.6c16dev4.egg global-exclude *~ *.pyc tahoe-lafs-1.10.0/Makefile000066400000000000000000000235071221140116300152520ustar00rootroot00000000000000 # NOTE: this Makefile requires GNU make default: build PYTHON=python export PYTHON # setup.py will extend sys.path to include our support/lib/... directory # itself. It will also create it in the beginning of the 'develop' command. TAHOE=$(PYTHON) bin/tahoe SOURCES=src/allmydata src/buildtest static misc bin/tahoe-script.template twisted setup.py .PHONY: make-version build # This is necessary only if you want to automatically produce a new # _version.py file from the current git/darcs history. make-version: $(PYTHON) ./setup.py update_version .built: $(MAKE) build src/allmydata/_version.py: $(MAKE) make-version # It is unnecessary to have this depend on build or src/allmydata/_version.py, # since 'setup.py build' always updates the version using 'darcsver --count-all-patches'. build: $(PYTHON) setup.py build touch .built # 'make install' will do the following: # build+install tahoe (probably to /usr/lib/pythonN.N/site-packages) # 'make install PREFIX=/usr/local/stow/tahoe-N.N' will do the same, but to # a different location install: ifdef PREFIX mkdir -p $(PREFIX) $(PYTHON) ./setup.py install --single-version-externally-managed \ --prefix=$(PREFIX) --record=./tahoe.files else $(PYTHON) ./setup.py install --single-version-externally-managed endif # TESTING .PHONY: signal-error-deps test check test-coverage quicktest quicktest-coverage .PHONY: coverage-output get-old-coverage-coverage coverage-delta-output # you can use 'make test TEST=allmydata.test.test_introducer' to run just # test_introducer. TEST=allmydata.test.test_client.Basic.test_permute works # too. TEST=allmydata # use 'make test TRIALARGS=--reporter=bwverbose' from buildbot, to # suppress the ansi color sequences # It is unnecessary to have this depend on build or src/allmydata/_version.py, # since 'setup.py test' always updates the version and builds before testing. test: $(PYTHON) setup.py test $(TRIALARGS) -s $(TEST) touch .built check: test test-coverage: build rm -f .coverage $(TAHOE) debug trial --reporter=bwverbose-coverage $(TEST) quicktest: $(TAHOE) debug trial $(TRIALARGS) $(TEST) # "make tmpfstest" may be a faster way of running tests on Linux. It works best when you have # at least 330 MiB of free physical memory (to run the whole test suite). Since it uses sudo # to mount/unmount the tmpfs filesystem, it might prompt for your password. tmpfstest: time make _tmpfstest 'TMPDIR=$(shell mktemp -d --tmpdir=.)' _tmpfstest: sudo mount -t tmpfs -o size=400m tmpfs '$(TMPDIR)' -$(TAHOE) debug trial --rterrors '--temp-directory=$(TMPDIR)/_trial_temp' $(TRIALARGS) $(TEST) sudo umount '$(TMPDIR)' rmdir '$(TMPDIR)' # code-coverage: install the "coverage" package from PyPI, do "make # quicktest-coverage" to do a unit test run with coverage-gathering enabled, # then use "make coverate-output-text" for a brief report, or "make # coverage-output" for a pretty HTML report. Also see "make .coverage.el" and # misc/coding_tools/coverage.el for emacs integration. quicktest-coverage: rm -f .coverage PYTHONPATH=. $(TAHOE) debug trial --reporter=bwverbose-coverage $(TEST) # on my laptop, "quicktest" takes 239s, "quicktest-coverage" takes 304s # --include appeared in coverage-3.4 COVERAGE_OMIT=--include '$(CURDIR)/src/allmydata/*' --omit '$(CURDIR)/src/allmydata/test/*' coverage-output: rm -rf coverage-html coverage html -i -d coverage-html $(COVERAGE_OMIT) cp .coverage coverage-html/coverage.data @echo "now point your browser at coverage-html/index.html" .PHONY: upload-coverage .coverage.el pyflakes count-lines .PHONY: check-memory check-memory-once check-speed check-grid .PHONY: repl test-darcs-boringfile test-clean clean find-trailing-spaces .coverage.el: .coverage $(PYTHON) misc/coding_tools/coverage2el.py # 'upload-coverage' is meant to be run with an UPLOAD_TARGET=host:/dir setting ifdef UPLOAD_TARGET ifndef UPLOAD_HOST $(error UPLOAD_HOST must be set when using UPLOAD_TARGET) endif ifndef COVERAGEDIR $(error COVERAGEDIR must be set when using UPLOAD_TARGET) endif upload-coverage: rsync -a coverage-html/ $(UPLOAD_TARGET) ssh $(UPLOAD_HOST) make update-tahoe-coverage COVERAGEDIR=$(COVERAGEDIR) else upload-coverage: echo "this target is meant to be run with UPLOAD_TARGET=host:/path/" false endif code-checks: build version-and-path check-interfaces check-miscaptures -find-trailing-spaces -check-umids pyflakes version-and-path: $(TAHOE) --version-and-path check-interfaces: $(TAHOE) @misc/coding_tools/check-interfaces.py 2>&1 |tee violations.txt @echo check-miscaptures: $(PYTHON) misc/coding_tools/check-miscaptures.py $(SOURCES) 2>&1 |tee miscaptures.txt @echo pyflakes: @$(PYTHON) -OOu `which pyflakes` $(SOURCES) |sort |uniq @echo check-umids: $(PYTHON) misc/coding_tools/check-umids.py `find $(SOURCES) -name '*.py' -not -name 'old.py'` @echo -check-umids: -$(PYTHON) misc/coding_tools/check-umids.py `find $(SOURCES) -name '*.py' -not -name 'old.py'` @echo doc-checks: check-rst check-rst: @for x in `find *.rst docs -name "*.rst"`; do rst2html -v $${x} >/dev/null; done 2>&1 |grep -v 'Duplicate implicit target name:' @echo count-lines: @echo -n "files: " @find src -name '*.py' |grep -v /build/ |wc -l @echo -n "lines: " @cat `find src -name '*.py' |grep -v /build/` |wc -l @echo -n "TODO: " @grep TODO `find src -name '*.py' |grep -v /build/` | wc -l @echo -n "XXX: " @grep XXX `find src -name '*.py' |grep -v /build/` | wc -l check-memory: .built rm -rf _test_memory $(TAHOE) @src/allmydata/test/check_memory.py upload $(TAHOE) @src/allmydata/test/check_memory.py upload-self $(TAHOE) @src/allmydata/test/check_memory.py upload-POST $(TAHOE) @src/allmydata/test/check_memory.py download $(TAHOE) @src/allmydata/test/check_memory.py download-GET $(TAHOE) @src/allmydata/test/check_memory.py download-GET-slow $(TAHOE) @src/allmydata/test/check_memory.py receive check-memory-once: .built rm -rf _test_memory $(TAHOE) @src/allmydata/test/check_memory.py $(MODE) # The check-speed target uses a pre-established client node to run a canned # set of performance tests against a test network that is also # pre-established (probably on a remote machine). Provide it with the path to # a local directory where this client node has been created (and populated # with the necessary FURLs of the test network). This target will start that # client with the current code and then run the tests. Afterwards it will # stop the client. # # The 'sleep 5' is in there to give the new client a chance to connect to its # storageservers, since check_speed.py has no good way of doing that itself. check-speed: .built if [ -z '$(TESTCLIENTDIR)' ]; then exit 1; fi @echo "stopping any leftover client code" -$(TAHOE) stop $(TESTCLIENTDIR) $(TAHOE) start $(TESTCLIENTDIR) sleep 5 $(TAHOE) @src/allmydata/test/check_speed.py $(TESTCLIENTDIR) $(TAHOE) stop $(TESTCLIENTDIR) # The check-grid target also uses a pre-established client node, along with a # long-term directory that contains some well-known files. See the docstring # in src/allmydata/test/check_grid.py to see how to set this up. check-grid: .built if [ -z '$(TESTCLIENTDIR)' ]; then exit 1; fi $(TAHOE) @src/allmydata/test/check_grid.py $(TESTCLIENTDIR) bin/tahoe bench-dirnode: .built $(TAHOE) @src/allmydata/test/bench_dirnode.py # the provisioning tool runs as a stand-alone webapp server run-provisioning-tool: .built $(TAHOE) @misc/operations_helpers/provisioning/run.py # 'make repl' is a simple-to-type command to get a Python interpreter loop # from which you can type 'import allmydata' repl: $(TAHOE) debug repl test-darcs-boringfile: $(MAKE) $(PYTHON) misc/build_helpers/test-darcs-boringfile.py test-git-ignore: $(MAKE) $(PYTHON) misc/build_helpers/test-git-ignore.py test-clean: find . |grep -vEe "_darcs|allfiles.tmp|src/allmydata/_(version|appname).py" |sort >allfiles.tmp.old $(MAKE) $(MAKE) clean find . |grep -vEe "_darcs|allfiles.tmp|src/allmydata/_(version|appname).py" |sort >allfiles.tmp.new diff allfiles.tmp.old allfiles.tmp.new # It would be nice if 'make clean' deleted any automatically-generated # _version.py too, so that 'make clean; make all' could be useable as a # "what the heck is going on, get me back to a clean state', but we need # 'make clean' to work on non-darcs trees without destroying useful information. clean: rm -rf build _trial_temp _test_memory .built rm -f `find src *.egg -name '*.so' -or -name '*.pyc'` rm -rf src/allmydata_tahoe.egg-info rm -rf support dist rm -rf `ls -d *.egg | grep -vEe"setuptools-|setuptools_darcs-|darcsver-"` rm -rf *.pyc rm -rf misc/dependencies/build misc/dependencies/temp rm -rf misc/dependencies/tahoe_deps.egg-info rm -f bin/tahoe bin/tahoe.pyscript find-trailing-spaces: $(PYTHON) misc/coding_tools/find-trailing-spaces.py -r $(SOURCES) @echo -find-trailing-spaces: -$(PYTHON) misc/coding_tools/find-trailing-spaces.py -r $(SOURCES) @echo # The test-desert-island target grabs the tahoe-deps tarball, unpacks it, # does a build, then asserts that the build did not try to download anything # as it ran. Invoke this on a new tree, or after a 'clean', to make sure the # support/lib/ directory is gone. fetch-and-unpack-deps: test -f tahoe-deps.tar.gz || wget https://tahoe-lafs.org/source/tahoe-lafs/deps/tahoe-lafs-deps.tar.gz rm -rf tahoe-deps tar xzf tahoe-lafs-deps.tar.gz test-desert-island: $(MAKE) fetch-and-unpack-deps $(MAKE) 2>&1 | tee make.out $(PYTHON) misc/build_helpers/check-build.py make.out no-downloads # TARBALL GENERATION .PHONY: tarballs upload-tarballs tarballs: $(MAKE) make-version $(PYTHON) setup.py sdist --formats=bztar,gztar,zip $(PYTHON) setup.py sdist --sumo --formats=bztar,gztar,zip upload-tarballs: @if [ "X${BB_BRANCH}" = "Xmaster" ] || [ "X${BB_BRANCH}" = "X" ]; then for f in dist/allmydata-tahoe-*; do flappclient --furlfile ~/.tahoe-tarball-upload.furl upload-file $$f; done ; else echo not uploading tarballs because this is not trunk but is branch \"${BB_BRANCH}\" ; fi tahoe-lafs-1.10.0/NEWS.rst000066400000000000000000002601731221140116300151220ustar00rootroot00000000000000================================== User-Visible Changes in Tahoe-LAFS ================================== Release 1.10.0 (2013-05-01) ''''''''''''''''''''''''''' New Features ------------ - The Welcome page has been redesigned. This is a preview of the design style that is likely to be used in other parts of the WUI in future Tahoe-LAFS versions. (`#1713`_, `#1457`_, `#1735`_) - A new extensible Introducer protocol has been added, as the basis for future improvements such as accounting. Compatibility with older nodes is not affected. When server, introducer, and client are all upgraded, the welcome page will show node IDs that start with "v0-" instead of the old tubid. See ``__ for details. (`#466`_) - The web-API has a new ``relink`` operation that supports directly moving files between directories. (`#1579`_) Security Improvements --------------------- - The ``introducer.furl`` for new Introducers is now unguessable. In previous releases, this FURL used a predictable swissnum, allowing a network eavesdropper who observes any node connecting to the Introducer to access the Introducer themselves, and thus use servers or offer storage service to clients (i.e. "join the grid"). In the new code, the only way to join a grid is to be told the ``introducer.furl`` by someone who already knew it. Note that pre-existing introducers are not changed. To force an introducer to generate a new FURL, delete the existing ``introducer.furl`` file and restart it. After doing this, the ``[client]introducer.furl`` setting of every client and server that should connect to that introducer must be updated. Note that other users of a shared machine may be able to read ``introducer.furl`` from your ``tahoe.cfg`` file unless you configure the file permissions to prevent them. (`#1802`_) - Both ``introducer.furl`` and ``helper.furl`` are now censored from the Welcome page, to prevent users of your gateway from learning enough to create gateway nodes of their own. For existing guessable introducer FURLs, the ``introducer`` swissnum is still displayed to show that a guessable FURL is in use. (`#860`_) Command-line Syntax Changes --------------------------- - Global options to ``tahoe``, such as ``-d``/``--node-directory``, must now come before rather than after the command name (for example, ``tahoe -d BASEDIR cp -r foo: bar:`` ). (`#166`_) Notable Bugfixes ---------------- - In earlier versions, if a connection problem caused a download failure for an immutable file, subsequent attempts to download the same file could also fail. This is now fixed. (`#1679`_) - Filenames in WUI directory pages are now displayed correctly when they contain characters that require HTML escaping. (`#1143`_) - Non-ASCII node nicknames no longer cause WUI errors. (`#1298`_) - Checking a LIT file using ``tahoe check`` no longer results in an exception. (`#1758`_) - The SFTP frontend now works with recent versions of Twisted, rather than giving errors or warnings about use of ``IFinishableConsumer``. (`#1926`_, `#1564`_, `#1525`_) - ``tahoe cp --verbose`` now counts the files being processed correctly. (`#1805`_, `#1783`_) - Exceptions no longer trigger an unhelpful crash reporter on Ubuntu 12.04 ("Precise") or later. (`#1746`_) - The error message displayed when a CLI tool cannot connect to a gateway has been improved. (`#974`_) - Other minor fixes: `#1781`_, `#1812`_, `#1915`_, `#1484`_, `#1525`_ Compatibility and Dependencies ------------------------------ - Python >= 2.6, except Python 3 (`#1658`_) - Twisted >= 11.0.0 (`#1771`_) - mock >= 0.8 (for unit tests) - pycryptopp >= 0.6.0 (for Ed25519 signatures) - zope.interface >= 3.6.0 (except 3.6.3 or 3.6.4) Other Changes ------------- - The ``flogtool`` utility, used to read detailed event logs, can now be accessed as ``tahoe debug flogtool`` even when Foolscap is not installed system-wide. (`#1693`_) - The provisioning/reliability pages were removed from the main client's web interface, and moved into a standalone web-based tool. Use the ``run.py`` script in ``misc/operations_helpers/provisioning/`` to access them. - Web clients can now cache (ETag) immutable directory pages. (`#443`_) - ``__ was added to document the adminstration of convergence secrets. (`#1761`_) Precautions when Upgrading -------------------------- - When upgrading a grid from a recent revision of trunk, follow the precautions from this `message to the tahoe-dev mailing list`_, to ensure that announcements to the Introducer are recognized after the upgrade. This is not necessary when upgrading from a previous release like 1.9.2. .. _`#166`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/166 .. _`#443`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/443 .. _`#466`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/466 .. _`#860`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/860 .. _`#974`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/974 .. _`#1143`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1143 .. _`#1298`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1298 .. _`#1457`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1457 .. _`#1484`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1484 .. _`#1525`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1525 .. _`#1564`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1564 .. _`#1579`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1579 .. _`#1658`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1658 .. _`#1679`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1679 .. _`#1693`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1693 .. _`#1713`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1713 .. _`#1735`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1735 .. _`#1746`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1746 .. _`#1758`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1758 .. _`#1761`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1761 .. _`#1771`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1771 .. _`#1781`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1781 .. _`#1783`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1783 .. _`#1802`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1802 .. _`#1805`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1805 .. _`#1812`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1812 .. _`#1915`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1915 .. _`#1926`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1926 .. _`message to the tahoe-dev mailing list`: https://tahoe-lafs.org/pipermail/tahoe-dev/2013-March/008096.html Release 1.9.2 (2012-07-03) '''''''''''''''''''''''''' Notable Bugfixes ---------------- - Several regressions in support for reading (`#1636`_), writing/modifying (`#1670`_, `#1749`_), verifying (`#1628`_) and repairing (`#1655`_, `#1669`_, `#1676`_, `#1689`_) mutable files have been fixed. - FTP can now list directories containing mutable files, although it still does not support reading or writing mutable files. (`#680`_) - The FTP frontend would previously show Jan 1 1970 for all timestamps; now it shows the correct modification time of the directory entry. (`#1688`_) - If a node is configured to report incidents to a log gatherer, but the gatherer is offline when some incidents occur, it would previously not "catch up" with those incidents as intended. (`#1725`_) - OpenBSD 5 is now supported. (`#1584`_) - The ``count-good-share-hosts`` field of file check results is now computed correctly. (`#1115`_) Configuration/Behavior Changes ------------------------------ - The capability of the upload directory for the drop-upload frontend is now specified in the file ``private/drop_upload_dircap`` under the gateway's node directory, rather than in its ``tahoe.cfg``. (`#1593`_) Packaging Changes ----------------- - Tahoe-LAFS can be built correctly from a git repository as well as from darcs. Compatibility and Dependencies ------------------------------ - foolscap >= 0.6.3 is required, in order to make Tahoe-LAFS compatible with Twisted >= 11.1.0. (`#1788`_) - Versions 2.0.1 and 2.4 of PyCrypto are excluded. (`#1631`_, `#1574`_) .. _`#680`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/680 .. _`#1115`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1115 .. _`#1574`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1574 .. _`#1584`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1584 .. _`#1593`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1593 .. _`#1628`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1628 .. _`#1631`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1631 .. _`#1636`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1636 .. _`#1655`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1655 .. _`#1669`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1669 .. _`#1670`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1670 .. _`#1676`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1676 .. _`#1688`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1688 .. _`#1689`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1689 .. _`#1725`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1725 .. _`#1749`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1749 .. _`#1788`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1788 Release 1.9.1 (2012-01-12) '''''''''''''''''''''''''' Security-related Bugfix ----------------------- - Fix flaw that would allow servers to cause undetected corruption when retrieving the contents of mutable files (both SDMF and MDMF). (`#1654`_) .. _`#1654`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1654 Release 1.9.0 (2011-10-30) '''''''''''''''''''''''''' New Features ------------ - The most significant new feature in this release is MDMF: "Medium-size Distributed Mutable Files". Unlike standard SDMF files, these provide efficient partial-access (reading and modifying small portions of the file instead of the whole thing). MDMF is opt-in (it is not yet the default format for mutable files), both to ensure compatibility with previous versions, and because the algorithm does not yet meet memory-usage goals. Enable it with ``--format=MDMF`` in the CLI (``tahoe put`` and ``tahoe mkdir``), or the "format" radioboxes in the web interface. See ``__ for more details (`#393`_, `#1507`_) - A "blacklist" feature allows blocking access to specific files through a particular gateway. See the "Access Blacklist" section of ``__ for more details. (`#1425`_) - A "drop-upload" feature has been added, which allows you to upload files to a Tahoe-LAFS directory just by writing them to a local directory. This feature is experimental and should not be relied on to store the only copy of valuable data. It is currently available only on Linux. See ``__ for documentation. (`#1429`_) - The timeline of immutable downloads can be viewed using a zoomable and pannable JavaScript-based visualization. This is accessed using the 'timeline' link on the File Download Status page for the download, which can be reached from the Recent Uploads and Downloads page. Configuration/Behavior Changes ------------------------------ - Prior to Tahoe-LAFS v1.3, the configuration of some node options could be specified using individual config files rather than via ``tahoe.cfg``. These files now cause an error if present. (`#1385`_) - Storage servers now calculate their remaining space based on the filesystem containing the ``storage/shares/`` directory. Previously they looked at the filesystem containing the ``storage/`` directory. This allows ``storage/shares/``, rather than ``storage/``, to be a mount point or a symlink pointing to another filesystem. (`#1384`_) - ``tahoe cp xyz MUTABLE`` will modify the existing mutable file instead of creating a new one. (`#1304`_) - The button for unlinking a file from its directory on a WUI directory listing is now labelled "unlink" rather than "del". (`#1104`_) Notable Bugfixes ---------------- - The security bugfix for the vulnerability allowing deletion of shares, detailed in the news for v1.8.3 below, is also included in this release. (`#1528`_) - Some cases of immutable upload, for example using the ``tahoe put`` and ``tahoe cp`` commands or SFTP, did not appear in the history of Recent Uploads and Downloads. (`#1079`_) - The memory footprint of the verifier has been reduced by serializing block fetches. (`#1395`_) - Large immutable downloads are now a little faster than in v1.8.3 (about 5% on a fast network). (`#1268`_) Packaging Changes ----------------- - The files related to Debian packaging have been removed from the Tahoe source tree, since they are now maintained as part of the official Debian packages. (`#1454`_) - The unmaintained FUSE plugins were removed from the source tree. See ``docs/frontends/FTP-and-SFTP.rst`` for how to mount a Tahoe filesystem on Unix via sshfs. (`#1409`_) - The Tahoe licenses now give explicit permission to combine Tahoe-LAFS with code distributed under the following additional open-source licenses (any version of each): * Academic Free License * Apple Public Source License * BitTorrent Open Source License * Lucent Public License * Jabber Open Source License * Common Development and Distribution License * Microsoft Public License * Microsoft Reciprocal License * Sun Industry Standards Source License * Open Software License Compatibility and Dependencies ------------------------------ - To resolve an incompatibility between Nevow and zope.interface (versions 3.6.3 and 3.6.4), Tahoe-LAFS now requires an earlier or later version of zope.interface. (`#1435`_) - The Twisted dependency has been raised to version 10.1 to ensure we no longer require pywin32 on Windows, the new drop-upload feature has the required support from Twisted on Linux, and that it is never necessary to patch Twisted in order to use the FTP frontend. (`#1274`_, `#1429`_, `#1438`_) - An explicit dependency on pyOpenSSL has been added, replacing the indirect dependency via the "secure_connections" option of foolscap. (`#1383`_) Minor Changes ------------- - A ``man`` page has been added (`#1420`_). All other docs are in ReST format. - The ``tahoe_files`` munin plugin reported an incorrect count of the number of share files. (`#1391`_) - Minor documentation updates: #627, #1104, #1225, #1297, #1342, #1404 - Other minor changes: #636, #1355, #1363, #1366, #1388, #1392, #1412, #1344, #1347, #1359, #1389, #1441, #1442, #1446, #1474, #1503 .. _`#393`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/393 .. _`#1079`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1079 .. _`#1104`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1104 .. _`#1268`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1268 .. _`#1274`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1274 .. _`#1304`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1304 .. _`#1383`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1383 .. _`#1384`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1384 .. _`#1385`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1385 .. _`#1391`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1391 .. _`#1395`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1395 .. _`#1409`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1409 .. _`#1420`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1420 .. _`#1425`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1425 .. _`#1429`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1429 .. _`#1435`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1435 .. _`#1438`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1438 .. _`#1454`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1454 .. _`#1507`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1507 Release 1.8.3 (2011-09-13) '''''''''''''''''''''''''' Security-related Bugfix ----------------------- - Fix flaw that would allow a person who knows a storage index of a file to delete shares of that file. (`#1528`_) - Remove corner cases in mutable file bounds management which could expose extra lease info or old share data (from prior versions of the mutable file) if someone with write authority to that mutable file exercised these corner cases in a way that no actual Tahoe-LAFS client does. (Probably not exploitable.) (`#1528`_) .. _`#1528`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1528 Release 1.8.2 (2011-01-30) '''''''''''''''''''''''''' Compatibility and Dependencies ------------------------------ - Tahoe is now compatible with Twisted-10.2 (released last month), as well as with earlier versions. The previous Tahoe-1.8.1 release failed to run against Twisted-10.2, raising an AttributeError on StreamServerEndpointService (`#1286`_) - Tahoe now depends upon the "mock" testing library, and the foolscap dependency was raised to 0.6.1 . It no longer requires pywin32 (which was used only on windows). Future developers should note that reactor.spawnProcess and derivatives may no longer be used inside Tahoe code. Other Changes ------------- - the default reserved_space value for new storage nodes is 1 GB (`#1208`_) - documentation is now in reStructuredText (.rst) format - "tahoe cp" should now handle non-ASCII filenames - the unmaintained Mac/Windows GUI applications have been removed (`#1282`_) - tahoe processes should appear in top and ps as "tahoe", not "python", on some unix platforms. (`#174`_) - "tahoe debug trial" can be used to run the test suite (`#1296`_) - the SFTP frontend now reports unknown sizes as "0" instead of "?", to improve compatibility with clients like FileZilla (`#1337`_) - "tahoe --version" should now report correct values in situations where 1.8.1 might have been wrong (`#1287`_) .. _`#1208`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1208 .. _`#1282`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1282 .. _`#1286`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1286 .. _`#1287`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1287 .. _`#1296`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1296 .. _`#1337`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1337 Release 1.8.1 (2010-10-28) '''''''''''''''''''''''''' Bugfixes and Improvements ------------------------- - Allow the repairer to improve the health of a file by uploading some shares, even if it cannot achieve the configured happiness threshold. This fixes a regression introduced between v1.7.1 and v1.8.0. (`#1212`_) - Fix a memory leak in the ResponseCache which is used during mutable file/directory operations. (`#1045`_) - Fix a regression and add a performance improvement in the downloader. This issue caused repair to fail in some special cases. (`#1223`_) - Fix a bug that caused 'tahoe cp' to fail for a grid-to-grid copy involving a non-ASCII filename. (`#1224`_) - Fix a rarely-encountered bug involving printing large strings to the console on Windows. (`#1232`_) - Perform ~ expansion in the --exclude-from filename argument to 'tahoe backup'. (`#1241`_) - The CLI's 'tahoe mv' and 'tahoe ln' commands previously would try to use an HTTP proxy if the HTTP_PROXY environment variable was set. These now always connect directly to the WAPI, thus avoiding giving caps to the HTTP proxy (and also avoiding failures in the case that the proxy is failing or requires authentication). (`#1253`_) - The CLI now correctly reports failure in the case that 'tahoe mv' fails to unlink the file from its old location. (`#1255`_) - 'tahoe start' now gives a more positive indication that the node has started. (`#71`_) - The arguments seen by 'ps' or other tools for node processes are now more useful (in particular, they include the path of the 'tahoe' script, rather than an obscure tool named 'twistd'). (`#174`_) Removed Features ---------------- - The tahoe start/stop/restart and node creation commands no longer accept the -m or --multiple option, for consistency between platforms. (`#1262`_) Packaging --------- - We now host binary packages so that users on certain operating systems can install without having a compiler. - Use a newer version of a dependency if needed, even if an older version is installed. This would previously cause a VersionConflict error. (`#1190`_) - Use a precompiled binary of a dependency if one with a sufficiently high version number is available, instead of attempting to compile the dependency from source, even if the source version has a higher version number. (`#1233`_) Documentation ------------- - All current documentation in .txt format has been converted to .rst format. (`#1225`_) - Added docs/backdoors.rst declaring that we won't add backdoors to Tahoe-LAFS, or add anything to facilitate government access to data. (`#1216`_) .. _`#71`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/71 .. _`#174`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/174 .. _`#1212`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1212 .. _`#1045`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1045 .. _`#1190`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1190 .. _`#1216`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1216 .. _`#1223`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1223 .. _`#1224`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1224 .. _`#1225`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1225 .. _`#1232`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232 .. _`#1233`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1233 .. _`#1241`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1241 .. _`#1253`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1253 .. _`#1255`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1255 .. _`#1262`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1262 Release 1.8.0 (2010-09-23) '''''''''''''''''''''''''' New Features ------------ - A completely new downloader which improves performance and robustness of immutable-file downloads. It uses the fastest K servers to download the data in K-way parallel. It automatically fails over to alternate servers if servers fail in mid-download. It allows seeking to arbitrary locations in the file (the previous downloader which would only read the entire file sequentially from beginning to end). It minimizes unnecessary round trips and unnecessary bytes transferred to improve performance. It sends requests to fewer servers to reduce the load on servers (the previous one would send a small request to every server for every download) (`#287`_, `#288`_, `#448`_, `#798`_, `#800`_, `#990`_, `#1170`_, `#1191`_) - Non-ASCII command-line arguments and non-ASCII outputs now work on Windows. In addition, the command-line tool now works on 64-bit Windows. (`#1074`_) Bugfixes and Improvements ------------------------- - Document and clean up the command-line options for specifying the node's base directory. (`#188`_, `#706`_, `#715`_, `#772`_, `#1108`_) - The default node directory for Windows is ".tahoe" in the user's home directory, the same as on other platforms. (`#890`_) - Fix a case in which full cap URIs could be logged. (`#685`_, `#1155`_) - Fix bug in WUI in Python 2.5 when the system clock is set back to 1969. Now you can use Tahoe-LAFS with Python 2.5 and set your system clock to 1969 and still use the WUI. (`#1055`_) - Many improvements in code organization, tests, logging, documentation, and packaging. (`#983`_, `#1074`_, `#1108`_, `#1127`_, `#1129`_, `#1131`_, `#1166`_, `#1175`_) Dependency Updates ------------------ - on x86 and x86-64 platforms, pycryptopp >= 0.5.20 - pycrypto 2.2 is excluded due to a bug .. _`#188`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/188 .. _`#288`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/288 .. _`#448`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/448 .. _`#685`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/685 .. _`#706`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/706 .. _`#715`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/715 .. _`#772`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/772 .. _`#798`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/798 .. _`#800`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/800 .. _`#890`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/890 .. _`#983`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/983 .. _`#990`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/990 .. _`#1055`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1055 .. _`#1074`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1074 .. _`#1108`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1108 .. _`#1155`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1155 .. _`#1170`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1170 .. _`#1191`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1191 .. _`#1127`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1127 .. _`#1129`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1129 .. _`#1131`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1131 .. _`#1166`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1166 .. _`#1175`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1175 Release 1.7.1 (2010-07-18) '''''''''''''''''''''''''' Bugfixes and Improvements ------------------------- - Fix bug in which uploader could fail with AssertionFailure or report that it had achieved servers-of-happiness when it hadn't. (`#1118`_) - Fix bug in which servers could get into a state where they would refuse to accept shares of a certain file (`#1117`_) - Add init scripts for managing the gateway server on Debian/Ubuntu (`#961`_) - Fix bug where server version number was always 0 on the welcome page (`#1067`_) - Add new command-line command "tahoe unlink" as a synonym for "tahoe rm" (`#776`_) - The FTP frontend now encrypts its temporary files, protecting their contents from an attacker who is able to read the disk. (`#1083`_) - Fix IP address detection on FreeBSD 7, 8, and 9 (`#1098`_) - Fix minor layout issue in the Web User Interface with Internet Explorer (`#1097`_) - Fix rarely-encountered incompatibility between Twisted logging utility and the new unicode support added in v1.7.0 (`#1099`_) - Forward-compatibility improvements for non-ASCII caps (`#1051`_) Code improvements ----------------- - Simplify and tidy-up directories, unicode support, test code (`#923`_, `#967`_, `#1072`_) .. _`#776`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/776 .. _`#923`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/923 .. _`#961`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/961 .. _`#967`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/967 .. _`#1051`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1051 .. _`#1067`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1067 .. _`#1072`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1072 .. _`#1083`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1083 .. _`#1097`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1097 .. _`#1098`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1098 .. _`#1099`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1099 .. _`#1117`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1117 .. _`#1118`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1118 Release 1.7.0 (2010-06-18) '''''''''''''''''''''''''' New Features ------------ - SFTP support (`#1037`_) Your Tahoe-LAFS gateway now acts like a full-fledged SFTP server. It has been tested with sshfs to provide a virtual filesystem in Linux. Many users have asked for this feature. We hope that it serves them well! See the `FTP-and-SFTP.rst`_ document to get started. - support for non-ASCII character encodings (`#534`_) Tahoe-LAFS now correctly handles filenames containing non-ASCII characters on all supported platforms: - when reading files in from the local filesystem (such as when you run "tahoe backup" to back up your local files to a Tahoe-LAFS grid); - when writing files out to the local filesystem (such as when you run "tahoe cp -r" to recursively copy files out of a Tahoe-LAFS grid); - when displaying filenames to the terminal (such as when you run "tahoe ls"), subject to limitations of the terminal and locale; - when parsing command-line arguments, except on Windows. - Servers of Happiness (`#778`_) Tahoe-LAFS now measures during immutable file upload to see how well distributed it is across multiple servers. It aborts the upload if the pieces of the file are not sufficiently well-distributed. This behavior is controlled by a configuration parameter called "servers of happiness". With the default settings for its erasure coding, Tahoe-LAFS generates 10 shares for each file, such that any 3 of those shares are sufficient to recover the file. The default value of "servers of happiness" is 7, which means that Tahoe-LAFS will guarantee that there are at least 7 servers holding some of the shares, such that any 3 of those servers can completely recover your file. The new upload code also distributes the shares better than the previous version in some cases and takes better advantage of pre-existing shares (when a file has already been previously uploaded). See the `architecture.rst`_ document [3] for details. Bugfixes and Improvements ------------------------- - Premature abort of upload if some shares were already present and some servers fail. (`#608`_) - python ./setup.py install -- can't create or remove files in install directory. (`#803`_) - Network failure => internal TypeError. (`#902`_) - Install of Tahoe on CentOS 5.4. (`#933`_) - CLI option --node-url now supports https url. (`#1028`_) - HTML/CSS template files were not correctly installed under Windows. (`#1033`_) - MetadataSetter does not enforce restriction on setting "tahoe" subkeys. (`#1034`_) - ImportError: No module named setuptools_darcs.setuptools_darcs. (`#1054`_) - Renamed Title in xhtml files. (`#1062`_) - Increase Python version dependency to 2.4.4, to avoid a critical CPython security bug. (`#1066`_) - Typo correction for the munin plugin tahoe_storagespace. (`#968`_) - Fix warnings found by pylint. (`#973`_) - Changing format of some documentation files. (`#1027`_) - the misc/ directory was tied up. (`#1068`_) - The 'ctime' and 'mtime' metadata fields are no longer written except by "tahoe backup". (`#924`_) - Unicode filenames in Tahoe-LAFS directories are normalized so that names that differ only in how accents are encoded are treated as the same. (`#1076`_) - Various small improvements to documentation. (`#937`_, `#911`_, `#1024`_, `#1082`_) Removals -------- - The 'tahoe debug consolidate' subcommand (for converting old allmydata Windows client backups to a newer format) has been removed. Dependency Updates ------------------ - the Python version dependency is raised to 2.4.4 in some cases (2.4.3 for Redhat-based Linux distributions, 2.4.2 for UCS-2 builds) (`#1066`_) - pycrypto >= 2.0.1 - pyasn1 >= 0.0.8a - mock (only required by unit tests) .. _`#534`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/534 .. _`#608`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/608 .. _`#778`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/778 .. _`#803`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/803 .. _`#902`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/902 .. _`#911`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/911 .. _`#924`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/924 .. _`#937`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/937 .. _`#933`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/933 .. _`#968`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/968 .. _`#973`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/973 .. _`#1024`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1024 .. _`#1027`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1027 .. _`#1028`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1028 .. _`#1033`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1033 .. _`#1034`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1034 .. _`#1037`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1037 .. _`#1054`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1054 .. _`#1062`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1062 .. _`#1066`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1066 .. _`#1068`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1068 .. _`#1076`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1076 .. _`#1082`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1082 .. _architecture.rst: docs/architecture.rst .. _FTP-and-SFTP.rst: docs/frontends/FTP-and-SFTP.rst Release 1.6.1 (2010-02-27) '''''''''''''''''''''''''' Bugfixes -------- - Correct handling of Small Immutable Directories Immutable directories can now be deep-checked and listed in the web UI in all cases. (In v1.6.0, some operations, such as deep-check, on a directory graph that included very small immutable directories, would result in an exception causing the whole operation to abort.) (`#948`_) Usability Improvements ---------------------- - Improved user interface messages and error reporting. (`#681`_, `#837`_, `#939`_) - The timeouts for operation handles have been greatly increased, so that you can view the results of an operation up to 4 days after it has completed. After viewing them for the first time, the results are retained for a further day. (`#577`_) Release 1.6.0 (2010-02-01) '''''''''''''''''''''''''' New Features ------------ - Immutable Directories Tahoe-LAFS can now create and handle immutable directories. (`#607`_, `#833`_, `#931`_) These are read just like normal directories, but are "deep-immutable", meaning that all their children (and everything reachable from those children) must be immutable objects (i.e. immutable or literal files, and other immutable directories). These directories must be created in a single webapi call that provides all of the children at once. (Since they cannot be changed after creation, the usual create/add/add sequence cannot be used.) They have URIs that start with "URI:DIR2-CHK:" or "URI:DIR2-LIT:", and are described on the human-facing web interface (aka the "WUI") with a "DIR-IMM" abbreviation (as opposed to "DIR" for the usual read-write directories and "DIR-RO" for read-only directories). Tahoe-LAFS releases before 1.6.0 cannot read the contents of an immutable directory. 1.5.0 will tolerate their presence in a directory listing (and display it as "unknown"). 1.4.1 and earlier cannot tolerate them: a DIR-IMM child in any directory will prevent the listing of that directory. Immutable directories are repairable, just like normal immutable files. The webapi "POST t=mkdir-immutable" call is used to create immutable directories. See `webapi.rst`_ for details. - "tahoe backup" now creates immutable directories, backupdb has dircache The "tahoe backup" command has been enhanced to create immutable directories (in previous releases, it created read-only mutable directories) (`#828`_). This is significantly faster, since it does not need to create an RSA keypair for each new directory. Also "DIR-IMM" immutable directories are repairable, unlike "DIR-RO" read-only mutable directories at present. (A future Tahoe-LAFS release should also be able to repair DIR-RO.) In addition, the backupdb (used by "tahoe backup" to remember what it has already copied) has been enhanced to store information about existing immutable directories. This allows it to re-use directories that have moved but still contain identical contents, or that have been deleted and later replaced. (The 1.5.0 "tahoe backup" command could only re-use directories that were in the same place as they were in the immediately previous backup.) With this change, the backup process no longer needs to read the previous snapshot out of the Tahoe-LAFS grid, reducing the network load considerably. (`#606`_) A "null backup" (in which nothing has changed since the previous backup) will require only two Tahoe-side operations: one to add an Archives/$TIMESTAMP entry, and a second to update the Latest/ link. On the local disk side, it will readdir() all your local directories and stat() all your local files. If you've been using "tahoe backup" for a while, you will notice that your first use of it after upgrading to 1.6.0 may take a long time: it must create proper immutable versions of all the old read-only mutable directories. This process won't take as long as the initial backup (where all the file contents had to be uploaded too): it will require time proportional to the number and size of your directories. After this initial pass, all subsequent passes should take a tiny fraction of the time. As noted above, Tahoe-LAFS versions earlier than 1.5.0 cannot list a directory containing an immutable subdirectory. Tahoe-LAFS versions earlier than 1.6.0 cannot read the contents of an immutable directory. The "tahoe backup" command has been improved to skip over unreadable objects (like device files, named pipes, and files with permissions that prevent the command from reading their contents), instead of throwing an exception and terminating the backup process. It also skips over symlinks, because these cannot be represented faithfully in the Tahoe-side filesystem. A warning message will be emitted each time something is skipped. (`#729`_, `#850`_, `#641`_) - "create-node" command added, "create-client" now implies --no-storage The basic idea behind Tahoe-LAFS's client+server and client-only processes is that you are creating a general-purpose Tahoe-LAFS "node" process, which has several components that can be activated. Storage service is one of these optional components, as is the Helper, FTP server, and SFTP server. Web gateway functionality is nominally on this list, but it is always active; a future release will make it optional. There are three special purpose servers that can't currently be run as a component in a node: introducer, key-generator, and stats-gatherer. So now "tahoe create-node" will create a Tahoe-LAFS node process, and after creation you can edit its tahoe.cfg to enable or disable the desired services. It is a more general-purpose replacement for "tahoe create-client". The default configuration has storage service enabled. For convenience, the "--no-storage" argument makes a tahoe.cfg file that disables storage service. (`#760`_) "tahoe create-client" has been changed to create a Tahoe-LAFS node without a storage service. It is equivalent to "tahoe create-node --no-storage". This helps to reduce the confusion surrounding the use of a command with "client" in its name to create a storage *server*. Use "tahoe create-client" to create a purely client-side node. If you want to offer storage to the grid, use "tahoe create-node" instead. In the future, other services will be added to the node, and they will be controlled through options in tahoe.cfg . The most important of these services may get additional --enable-XYZ or --disable-XYZ arguments to "tahoe create-node". - Performance Improvements Download of immutable files begins as soon as the downloader has located the K necessary shares (`#928`_, `#287`_). In both the previous and current releases, a downloader will first issue queries to all storage servers on the grid to locate shares before it begins downloading the shares. In previous releases of Tahoe-LAFS, download would not begin until all storage servers on the grid had replied to the query, at which point K shares would be chosen for download from among the shares that were located. In this release, download begins as soon as any K shares are located. This means that downloads start sooner, which is particularly important if there is a server on the grid that is extremely slow or even hung in such a way that it will never respond. In previous releases such a server would have a negative impact on all downloads from that grid. In this release, such a server will have no impact on downloads, as long as K shares can be found on other, quicker, servers. This also means that downloads now use the "best-alacrity" servers that they talk to, as measured by how quickly the servers reply to the initial query. This might cause downloads to go faster, especially on grids with heterogeneous servers or geographical dispersion. Minor Changes ------------- - The webapi acquired a new "t=mkdir-with-children" command, to create and populate a directory in a single call. This is significantly faster than using separate "t=mkdir" and "t=set-children" operations (it uses one gateway-to-grid roundtrip, instead of three or four). (`#533`_) - The t=set-children (note the hyphen) operation is now documented in webapi.rst, and is the new preferred spelling of the old t=set_children (with an underscore). The underscore version remains for backwards compatibility. (`#381`_, `#927`_) - The tracebacks produced by errors in CLI tools should now be in plain text, instead of HTML (which is unreadable outside of a browser). (`#646`_) - The [storage]reserved_space configuration knob (which causes the storage server to refuse shares when available disk space drops below a threshold) should work on Windows now, not just UNIX. (`#637`_) - "tahoe cp" should now exit with status "1" if it cannot figure out a suitable target filename, such as when you copy from a bare filecap. (`#761`_) - "tahoe get" no longer creates a zero-length file upon error. (`#121`_) - "tahoe ls" can now list single files. (`#457`_) - "tahoe deep-check --repair" should tolerate repair failures now, instead of halting traversal. (`#874`_, `#786`_) - "tahoe create-alias" no longer corrupts the aliases file if it had previously been edited to have no trailing newline. (`#741`_) - Many small packaging improvements were made to facilitate the "tahoe-lafs" package being included in Ubuntu. Several mac/win32 binary libraries were removed, some figleaf code-coverage files were removed, a bundled copy of darcsver-1.2.1 was removed, and additional licensing text was added. - Several DeprecationWarnings for python2.6 were silenced. (`#859`_) - The checker --add-lease option would sometimes fail for shares stored on old (Tahoe v1.2.0) servers. (`#875`_) - The documentation for installing on Windows (docs/quickstart.rst) has been improved. (`#773`_) For other changes not mentioned here, see . To include the tickets mentioned above, go to . .. _`#121`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/121 .. _`#287`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/287 .. _`#381`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/381 .. _`#457`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/457 .. _`#533`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/533 .. _`#577`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/577 .. _`#606`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/606 .. _`#607`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/607 .. _`#637`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/637 .. _`#641`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/641 .. _`#646`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/646 .. _`#681`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/681 .. _`#729`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/729 .. _`#741`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/741 .. _`#760`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/760 .. _`#761`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/761 .. _`#773`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/773 .. _`#786`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/786 .. _`#828`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/828 .. _`#833`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/833 .. _`#859`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/859 .. _`#874`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/874 .. _`#875`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/875 .. _`#931`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/931 .. _`#837`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/837 .. _`#850`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/850 .. _`#927`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/927 .. _`#928`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/928 .. _`#939`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/939 .. _`#948`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/948 .. _webapi.rst: docs/frontends/webapi.rst Release 1.5.0 (2009-08-01) '''''''''''''''''''''''''' Improvements ------------ - Uploads of immutable files now use pipelined writes, improving upload speed slightly (10%) over high-latency connections. (`#392`_) - Processing large directories has been sped up, by removing a O(N^2) algorithm from the dirnode decoding path and retaining unmodified encrypted entries. (`#750`_, `#752`_) - The human-facing web interface (aka the "WUI") received a significant CSS makeover by Kevin Reid, making it much prettier and easier to read. The WUI "check" and "deep-check" forms now include a "Renew Lease" checkbox, mirroring the CLI --add-lease option, so leases can be added or renewed from the web interface. - The CLI "tahoe mv" command now refuses to overwrite directories. (`#705`_) - The CLI "tahoe webopen" command, when run without arguments, will now bring up the "Welcome Page" (node status and mkdir/upload forms). - The 3.5MB limit on mutable files was removed, so it should be possible to upload arbitrarily-sized mutable files. Note, however, that the data format and algorithm remains the same, so using mutable files still requires bandwidth, computation, and RAM in proportion to the size of the mutable file. (`#694`_) - This version of Tahoe-LAFS will tolerate directory entries that contain filecap formats which it does not recognize: files and directories from the future. This should improve the user experience (for 1.5.0 users) when we add new cap formats in the future. Previous versions would fail badly, preventing the user from seeing or editing anything else in those directories. These unrecognized objects can be renamed and deleted, but obviously not read or written. Also they cannot generally be copied. (`#683`_) Bugfixes -------- - deep-check-and-repair now tolerates read-only directories, such as the ones produced by the "tahoe backup" CLI command. Read-only directories and mutable files are checked, but not repaired. Previous versions threw an exception when attempting the repair and failed to process the remaining contents. We cannot yet repair these read-only objects, but at least this version allows the rest of the check+repair to proceed. (`#625`_) - A bug in 1.4.1 which caused a server to be listed multiple times (and frequently broke all connections to that server) was fixed. (`#653`_) - The plaintext-hashing code was removed from the Helper interface, removing the Helper's ability to mount a partial-information-guessing attack. (`#722`_) Platform/packaging changes -------------------------- - Tahoe-LAFS now runs on NetBSD, OpenBSD, ArchLinux, and NixOS, and on an embedded system based on an ARM CPU running at 266 MHz. - Unit test timeouts have been raised to allow the tests to complete on extremely slow platforms like embedded ARM-based NAS boxes, which may take several hours to run the test suite. An ARM-specific data-corrupting bug in an older version of Crypto++ (5.5.2) was identified: ARM-users are encouraged to use recent Crypto++/pycryptopp which avoids this problem. - Tahoe-LAFS now requires a SQLite library, either the sqlite3 that comes built-in with python2.5/2.6, or the add-on pysqlite2 if you're using python2.4. In the previous release, this was only needed for the "tahoe backup" command: now it is mandatory. - Several minor documentation updates were made. - To help get Tahoe-LAFS into Linux distributions like Fedora and Debian, packaging improvements are being made in both Tahoe-LAFS and related libraries like pycryptopp and zfec. - The Crypto++ library included in the pycryptopp package has been upgraded to version 5.6.0 of Crypto++, which includes a more efficient implementation of SHA-256 in assembly for x86 or amd64 architectures. dependency updates ------------------ - foolscap-0.4.1 - no python-2.4.0 or 2.4.1 (2.4.2 is good) (they contained a bug in base64.b32decode) - avoid python-2.6 on windows with mingw: compiler issues - python2.4 requires pysqlite2 (2.5,2.6 does not) - no python-3.x - pycryptopp-0.5.15 .. _#392: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/392 .. _#625: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/625 .. _#653: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/653 .. _#683: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/683 .. _#694: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/694 .. _#705: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/705 .. _#722: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/722 .. _#750: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/750 .. _#752: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/752 Release 1.4.1 (2009-04-13) '''''''''''''''''''''''''' Garbage Collection ------------------ - The big feature for this release is the implementation of garbage collection, allowing Tahoe storage servers to delete shares for old deleted files. When enabled, this uses a "mark and sweep" process: clients are responsible for updating the leases on their shares (generally by running "tahoe deep-check --add-lease"), and servers are allowed to delete any share which does not have an up-to-date lease. The process is described in detail in `garbage-collection.rst`_. The server must be configured to enable garbage-collection, by adding directives to the [storage] section that define an age limit for shares. The default configuration will not delete any shares. Both servers and clients should be upgraded to this release to make the garbage-collection as pleasant as possible. 1.2.0 servers have code to perform the update-lease operation but it suffers from a fatal bug, while 1.3.0 servers have update-lease but will return an exception for unknown storage indices, causing clients to emit an Incident for each exception, slowing the add-lease process down to a crawl. 1.1.0 servers did not have the add-lease operation at all. Security/Usability Problems Fixed --------------------------------- - A super-linear algorithm in the Merkle Tree code was fixed, which previously caused e.g. download of a 10GB file to take several hours before the first byte of plaintext could be produced. The new "alacrity" is about 2 minutes. A future release should reduce this to a few seconds by fixing ticket `#442`_. - The previous version permitted a small timing attack (due to our use of strcmp) against the write-enabler and lease-renewal/cancel secrets. An attacker who could measure response-time variations of approximatly 3ns against a very noisy background time of about 15ms might be able to guess these secrets. We do not believe this attack was actually feasible. This release closes the attack by first hashing the two strings to be compared with a random secret. webapi changes -------------- - In most cases, HTML tracebacks will only be sent if an "Accept: text/html" header was provided with the HTTP request. This will generally cause browsers to get an HTMLized traceback but send regular text/plain tracebacks to non-browsers (like the CLI clients). More errors have been mapped to useful HTTP error codes. - The streaming webapi operations (deep-check and manifest) now have a way to indicate errors (an output line that starts with "ERROR" instead of being legal JSON). See `webapi.rst`_ for details. - The storage server now has its own status page (at /storage), linked from the Welcome page. This page shows progress and results of the two new share-crawlers: one which merely counts shares (to give an estimate of how many files/directories are being stored in the grid), the other examines leases and reports how much space would be freed if GC were enabled. The page also shows how much disk space is present, used, reserved, and available for the Tahoe server, and whether the server is currently running in "read-write" mode or "read-only" mode. - When a directory node cannot be read (perhaps because of insufficent shares), a minimal webapi page is created so that the "more-info" links (including a Check/Repair operation) will still be accessible. - A new "reliability" page was added, with the beginnings of work on a statistical loss model. You can tell this page how many servers you are using and their independent failure probabilities, and it will tell you the likelihood that an arbitrary file will survive each repair period. The "numpy" package must be installed to access this page. A partial paper, written by Shawn Willden, has been added to docs/proposed/lossmodel.lyx . CLI changes ----------- - "tahoe check" and "tahoe deep-check" now accept an "--add-lease" argument, to update a lease on all shares. This is the "mark" side of garbage collection. - In many cases, CLI error messages have been improved: the ugly HTMLized traceback has been replaced by a normal python traceback. - "tahoe deep-check" and "tahoe manifest" now have better error reporting. "tahoe cp" is now non-verbose by default. - "tahoe backup" now accepts several "--exclude" arguments, to ignore certain files (like editor temporary files and version-control metadata) during backup. - On windows, the CLI now accepts local paths like "c:\dir\file.txt", which previously was interpreted as a Tahoe path using a "c:" alias. - The "tahoe restart" command now uses "--force" by default (meaning it will start a node even if it didn't look like there was one already running). - The "tahoe debug consolidate" command was added. This takes a series of independent timestamped snapshot directories (such as those created by the allmydata.com windows backup program, or a series of "tahoe cp -r" commands) and creates new snapshots that used shared read-only directories whenever possible (like the output of "tahoe backup"). In the most common case (when the snapshots are fairly similar), the result will use significantly fewer directories than the original, allowing "deep-check" and similar tools to run much faster. In some cases, the speedup can be an order of magnitude or more. This tool is still somewhat experimental, and only needs to be run on large backups produced by something other than "tahoe backup", so it was placed under the "debug" category. - "tahoe cp -r --caps-only tahoe:dir localdir" is a diagnostic tool which, instead of copying the full contents of files into the local directory, merely copies their filecaps. This can be used to verify the results of a "consolidation" operation. other fixes ----------- - The codebase no longer rauses RuntimeError as a kind of assert(). Specific exception classes were created for each previous instance of RuntimeError. - Many unit tests were changed to use a non-network test harness, speeding them up considerably. - Deep-traversal operations (manifest and deep-check) now walk individual directories in alphabetical order. Occasional turn breaks are inserted to prevent a stack overflow when traversing directories with hundreds of entries. - The experimental SFTP server had its path-handling logic changed slightly, to accomodate more SFTP clients, although there are still issues (`#645`_). .. _#442: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/442 .. _#645: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/645 .. _garbage-collection.rst: docs/garbage-collection.rst Release 1.3.0 (2009-02-13) '''''''''''''''''''''''''' Checker/Verifier/Repairer ------------------------- - The primary focus of this release has been writing a checker / verifier / repairer for files and directories. "Checking" is the act of asking storage servers whether they have a share for the given file or directory: if there are not enough shares available, the file or directory will be unrecoverable. "Verifying" is the act of downloading and cryptographically asserting that the server's share is undamaged: it requires more work (bandwidth and CPU) than checking, but can catch problems that simple checking cannot. "Repair" is the act of replacing missing or damaged shares with new ones. - This release includes a full checker, a partial verifier, and a partial repairer. The repairer is able to handle missing shares: new shares are generated and uploaded to make up for the missing ones. This is currently the best application of the repairer: to replace shares that were lost because of server departure or permanent drive failure. - The repairer in this release is somewhat able to handle corrupted shares. The limitations are: - Immutable verifier is incomplete: not all shares are used, and not all fields of those shares are verified. Therefore the immutable verifier has only a moderate chance of detecting corrupted shares. - The mutable verifier is mostly complete: all shares are examined, and most fields of the shares are validated. - The storage server protocol offers no way for the repairer to replace or delete immutable shares. If corruption is detected, the repairer will upload replacement shares to other servers, but the corrupted shares will be left in place. - read-only directories and read-only mutable files must be repaired by someone who holds the write-cap: the read-cap is insufficient. Moreover, the deep-check-and-repair operation will halt with an error if it attempts to repair one of these read-only objects. - Some forms of corruption can cause both download and repair operations to fail. A future release will fix this, since download should be tolerant of any corruption as long as there are at least 'k' valid shares, and repair should be able to fix any file that is downloadable. - If the downloader, verifier, or repairer detects share corruption, the servers which provided the bad shares will be notified (via a file placed in the BASEDIR/storage/corruption-advisories directory) so their operators can manually delete the corrupted shares and investigate the problem. In addition, the "incident gatherer" mechanism will automatically report share corruption to an incident gatherer service, if one is configured. Note that corrupted shares indicate hardware failures, serious software bugs, or malice on the part of the storage server operator, so a corrupted share should be considered highly unusual. - By periodically checking/repairing all files and directories, objects in the Tahoe filesystem remain resistant to recoverability failures due to missing and/or broken servers. - This release includes a wapi mechanism to initiate checks on individual files and directories (with or without verification, and with or without automatic repair). A related mechanism is used to initiate a "deep-check" on a directory: recursively traversing the directory and its children, checking (and/or verifying/repairing) everything underneath. Both mechanisms can be run with an "output=JSON" argument, to obtain machine-readable check/repair status results. These results include a copy of the filesystem statistics from the "deep-stats" operation (including total number of files, size histogram, etc). If repair is possible, a "Repair" button will appear on the results page. - The client web interface now features some extra buttons to initiate check and deep-check operations. When these operations finish, they display a results page that summarizes any problems that were encountered. All long-running deep-traversal operations, including deep-check, use a start-and-poll mechanism, to avoid depending upon a single long-lived HTTP connection. `webapi.rst`_ has details. Efficient Backup ---------------- - The "tahoe backup" command is new in this release, which creates efficient versioned backups of a local directory. Given a local pathname and a target Tahoe directory, this will create a read-only snapshot of the local directory in $target/Archives/$timestamp. It will also create $target/Latest, which is a reference to the latest such snapshot. Each time you run "tahoe backup" with the same source and target, a new $timestamp snapshot will be added. These snapshots will share directories that have not changed since the last backup, to speed up the process and minimize storage requirements. In addition, a small database is used to keep track of which local files have been uploaded already, to avoid uploading them a second time. This drastically reduces the work needed to do a "null backup" (when nothing has changed locally), making "tahoe backup' suitable to run from a daily cronjob. Note that the "tahoe backup" CLI command must be used in conjunction with a 1.3.0-or-newer Tahoe client node; there was a bug in the 1.2.0 webapi implementation that would prevent the last step (create $target/Latest) from working. Large Files ----------- - The 12GiB (approximate) immutable-file-size limitation is lifted. This release knows how to handle so-called "v2 immutable shares", which permit immutable files of up to about 18 EiB (about 3*10^14). These v2 shares are created if the file to be uploaded is too large to fit into v1 shares. v1 shares are created if the file is small enough to fit into them, so that files created with tahoe-1.3.0 can still be read by earlier versions if they are not too large. Note that storage servers also had to be changed to support larger files, and this release is the first release in which they are able to do that. Clients will detect which servers are capable of supporting large files on upload and will not attempt to upload shares of a large file to a server which doesn't support it. FTP/SFTP Server --------------- - Tahoe now includes experimental FTP and SFTP servers. When configured with a suitable method to translate username+password into a root directory cap, it provides simple access to the virtual filesystem. Remember that FTP is completely unencrypted: passwords, filenames, and file contents are all sent over the wire in cleartext, so FTP should only be used on a local (127.0.0.1) connection. This feature is still in development: there are no unit tests yet, and behavior with respect to Unicode filenames is uncertain. Please see `FTP-and-SFTP.rst`_ for configuration details. (`#512`_, `#531`_) CLI Changes ----------- - This release adds the 'tahoe create-alias' command, which is a combination of 'tahoe mkdir' and 'tahoe add-alias'. This also allows you to start using a new tahoe directory without exposing its URI in the argv list, which is publicly visible (through the process table) on most unix systems. Thanks to Kevin Reid for bringing this issue to our attention. - The single-argument form of "tahoe put" was changed to create an unlinked file. I.e. "tahoe put bar.txt" will take the contents of a local "bar.txt" file, upload them to the grid, and print the resulting read-cap; the file will not be attached to any directories. This seemed a bit more useful than the previous behavior (copy stdin, upload to the grid, attach the resulting file into your default tahoe: alias in a child named 'bar.txt'). - "tahoe put" was also fixed to handle mutable files correctly: "tahoe put bar.txt URI:SSK:..." will read the contents of the local bar.txt and use them to replace the contents of the given mutable file. - The "tahoe webopen" command was modified to accept aliases. This means "tahoe webopen tahoe:" will cause your web browser to open to a "wui" page that gives access to the directory associated with the default "tahoe:" alias. It should also accept leading slashes, like "tahoe webopen tahoe:/stuff". - Many esoteric debugging commands were moved down into a "debug" subcommand: - tahoe debug dump-cap - tahoe debug dump-share - tahoe debug find-shares - tahoe debug catalog-shares - tahoe debug corrupt-share The last command ("tahoe debug corrupt-share") flips a random bit of the given local sharefile. This is used to test the file verifying/repairing code, and obviously should not be used on user data. The cli might not correctly handle arguments which contain non-ascii characters in Tahoe v1.3 (although depending on your platform it might, especially if your platform can be configured to pass such characters on the command-line in utf-8 encoding). See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/565 for details. Web changes ----------- - The "default webapi port", used when creating a new client node (and in the getting-started documentation), was changed from 8123 to 3456, to reduce confusion when Tahoe accessed through a Firefox browser on which the "Torbutton" extension has been installed. Port 8123 is occasionally used as a Tor control port, so Torbutton adds 8123 to Firefox's list of "banned ports" to avoid CSRF attacks against Tor. Once 8123 is banned, it is difficult to diagnose why you can no longer reach a Tahoe node, so the Tahoe default was changed. Note that 3456 is reserved by IANA for the "vat" protocol, but there are argueably more Torbutton+Tahoe users than vat users these days. Note that this will only affect newly-created client nodes. Pre-existing client nodes, created by earlier versions of tahoe, may still be listening on 8123. - All deep-traversal operations (start-manifest, start-deep-size, start-deep-stats, start-deep-check) now use a start-and-poll approach, instead of using a single (fragile) long-running synchronous HTTP connection. All these "start-" operations use POST instead of GET. The old "GET manifest", "GET deep-size", and "POST deep-check" operations have been removed. - The new "POST start-manifest" operation, when it finally completes, results in a table of (path,cap), instead of the list of verifycaps produced by the old "GET manifest". The table is available in several formats: use output=html, output=text, or output=json to choose one. The JSON output also includes stats, and a list of verifycaps and storage-index strings. The "return_to=" and "when_done=" arguments have been removed from the t=check and deep-check operations. - The top-level status page (/status) now has a machine-readable form, via "/status/?t=json". This includes information about the currently-active uploads and downloads, which may be useful for frontends that wish to display progress information. There is no easy way to correlate the activities displayed here with recent wapi requests, however. - Any files in BASEDIR/public_html/ (configurable) will be served in response to requests in the /static/ portion of the URL space. This will simplify the deployment of javascript-based frontends that can still access wapi calls by conforming to the (regrettable) "same-origin policy". - The welcome page now has a "Report Incident" button, which is tied into the "Incident Gatherer" machinery. If the node is attached to an incident gatherer (via log_gatherer.furl), then pushing this button will cause an Incident to be signalled: this means recent log events are aggregated and sent in a bundle to the gatherer. The user can push this button after something strange takes place (and they can provide a short message to go along with it), and the relevant data will be delivered to a centralized incident-gatherer for later processing by operations staff. - The "HEAD" method should now work correctly, in addition to the usual "GET", "PUT", and "POST" methods. "HEAD" is supposed to return exactly the same headers as "GET" would, but without any of the actual response body data. For mutable files, this now does a brief mapupdate (to figure out the size of the file that would be returned), without actually retrieving the file's contents. - The "GET" operation on files can now support the HTTP "Range:" header, allowing requests for partial content. This allows certain media players to correctly stream audio and movies out of a Tahoe grid. The current implementation uses a disk-based cache in BASEDIR/private/cache/download , which holds the plaintext of the files being downloaded. Future implementations might not use this cache. GET for immutable files now returns an ETag header. - Each file and directory now has a "Show More Info" web page, which contains much of the information that was crammed into the directory page before. This includes readonly URIs, storage index strings, object type, buttons to control checking/verifying/repairing, and deep-check/deep-stats buttons (for directories). For mutable files, the "replace contents" upload form has been moved here too. As a result, the directory page is now much simpler and cleaner, and several potentially-misleading links (like t=uri) are now gone. - Slashes are discouraged in Tahoe file/directory names, since they cause problems when accessing the filesystem through the wapi. However, there are a couple of accidental ways to generate such names. This release tries to make it easier to correct such mistakes by escaping slashes in several places, allowing slashes in the t=info and t=delete commands, and in the source (but not the target) of a t=rename command. Packaging --------- - Tahoe's dependencies have been extended to require the "[secure_connections]" feature from Foolscap, which will cause pyOpenSSL to be required and/or installed. If OpenSSL and its development headers are already installed on your system, this can occur automatically. Tahoe now uses pollreactor (instead of the default selectreactor) to work around a bug between pyOpenSSL and the most recent release of Twisted (8.1.0). This bug only affects unit tests (hang during shutdown), and should not impact regular use. - The Tahoe source code tarballs now come in two different forms: regular and "sumo". The regular tarball contains just Tahoe, nothing else. When building from the regular tarball, the build process will download any unmet dependencies from the internet (starting with the index at PyPI) so it can build and install them. The "sumo" tarball contains copies of all the libraries that Tahoe requires (foolscap, twisted, zfec, etc), so using the "sumo" tarball should not require any internet access during the build process. This can be useful if you want to build Tahoe while on an airplane, a desert island, or other bandwidth-limited environments. - Similarly, tahoe-lafs.org now hosts a "tahoe-deps" tarball which contains the latest versions of all these dependencies. This tarball, located at https://tahoe-lafs.org/source/tahoe/deps/tahoe-deps.tar.gz, can be unpacked in the tahoe source tree (or in its parent directory), and the build process should satisfy its downloading needs from it instead of reaching out to PyPI. This can be useful if you want to build Tahoe from a darcs checkout while on that airplane or desert island. - Because of the previous two changes ("sumo" tarballs and the "tahoe-deps" bundle), most of the files have been removed from misc/dependencies/ . This brings the regular Tahoe tarball down to 2MB (compressed), and the darcs checkout (without history) to about 7.6MB. A full darcs checkout will still be fairly large (because of the historical patches which included the dependent libraries), but a 'lazy' one should now be small. - The default "make" target is now an alias for "setup.py build", which itself is an alias for "setup.py develop --prefix support", with some extra work before and after (see setup.cfg). Most of the complicated platform-dependent code in the Makefile was rewritten in Python and moved into setup.py, simplifying things considerably. - Likewise, the "make test" target now delegates most of its work to "setup.py test", which takes care of getting PYTHONPATH configured to access the tahoe code (and dependencies) that gets put in support/lib/ by the build_tahoe step. This should allow unit tests to be run even when trial (which is part of Twisted) wasn't already installed (in this case, trial gets installed to support/bin because Twisted is a dependency of Tahoe). - Tahoe is now compatible with the recently-released Python 2.6 , although it is recommended to use Tahoe on Python 2.5, on which it has received more thorough testing and deployment. - Tahoe is now compatible with simplejson-2.0.x . The previous release assumed that simplejson.loads always returned unicode strings, which is no longer the case in 2.0.x . Grid Management Tools --------------------- - Several tools have been added or updated in the misc/ directory, mostly munin plugins that can be used to monitor a storage grid. - The misc/spacetime/ directory contains a "disk watcher" daemon (startable with 'tahoe start'), which can be configured with a set of HTTP URLs (pointing at the wapi '/statistics' page of a bunch of storage servers), and will periodically fetch disk-used/disk-available information from all the servers. It keeps this information in an Axiom database (a sqlite-based library available from divmod.org). The daemon computes time-averaged rates of disk usage, as well as a prediction of how much time is left before the grid is completely full. - The misc/munin/ directory contains a new set of munin plugins (tahoe_diskleft, tahoe_diskusage, tahoe_doomsday) which talk to the disk-watcher and provide graphs of its calculations. - To support the disk-watcher, the Tahoe statistics component (visible through the wapi at the /statistics/ URL) now includes disk-used and disk-available information. Both are derived through an equivalent of the unix 'df' command (i.e. they ask the kernel for the number of free blocks on the partition that encloses the BASEDIR/storage directory). In the future, the disk-available number will be further influenced by the local storage policy: if that policy says that the server should refuse new shares when less than 5GB is left on the partition, then "disk-available" will report zero even though the kernel sees 5GB remaining. - The 'tahoe_overhead' munin plugin interacts with an allmydata.com-specific server which reports the total of the 'deep-size' reports for all active user accounts, compares this with the disk-watcher data, to report on overhead percentages. This provides information on how much space could be recovered once Tahoe implements some form of garbage collection. Configuration Changes: single INI-format tahoe.cfg file ------------------------------------------------------- - The Tahoe node is now configured with a single INI-format file, named "tahoe.cfg", in the node's base directory. Most of the previous multiple-separate-files are still read for backwards compatibility (the embedded SSH debug server and the advertised_ip_addresses files are the exceptions), but new directives will only be added to tahoe.cfg . The "tahoe create-client" command will create a tahoe.cfg for you, with sample values commented out. (ticket `#518`_) - tahoe.cfg now has controls for the foolscap "keepalive" and "disconnect" timeouts (`#521`_). - tahoe.cfg now has controls for the encoding parameters: "shares.needed" and "shares.total" in the "[client]" section. The default parameters are still 3-of-10. - The inefficient storage 'sizelimit' control (which established an upper bound on the amount of space that a storage server is allowed to consume) has been replaced by a lightweight 'reserved_space' control (which establishes a lower bound on the amount of remaining space). The storage server will reject all writes that would cause the remaining disk space (as measured by a '/bin/df' equivalent) to drop below this value. The "[storage]reserved_space=" tahoe.cfg parameter controls this setting. (note that this only affects immutable shares: it is an outstanding bug that reserved_space does not prevent the allocation of new mutable shares, nor does it prevent the growth of existing mutable shares). Other Changes ------------- - Clients now declare which versions of the protocols they support. This is part of a new backwards-compatibility system: https://tahoe-lafs.org/trac/tahoe-lafs/wiki/Versioning . - The version strings for human inspection (as displayed on the Welcome web page, and included in logs) now includes a platform identifer (frequently including a linux distribution name, processor architecture, etc). - Several bugs have been fixed, including one that would cause an exception (in the logs) if a wapi download operation was cancelled (by closing the TCP connection, or pushing the "stop" button in a web browser). - Tahoe now uses Foolscap "Incidents", writing an "incident report" file to logs/incidents/ each time something weird occurs. These reports are available to an "incident gatherer" through the flogtool command. For more details, please see the Foolscap logging documentation. An incident-classifying plugin function is provided in misc/incident-gatherer/classify_tahoe.py . - If clients detect corruption in shares, they now automatically report it to the server holding that share, if it is new enough to accept the report. These reports are written to files in BASEDIR/storage/corruption-advisories . - The 'nickname' setting is now defined to be a UTF-8 -encoded string, allowing non-ascii nicknames. - The 'tahoe start' command will now accept a --syslog argument and pass it through to twistd, making it easier to launch non-Tahoe nodes (like the cpu-watcher) and have them log to syslogd instead of a local file. This is useful when running a Tahoe node out of a USB flash drive. - The Mac GUI in src/allmydata/gui/ has been improved. .. _#512: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/512 .. _#518: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/518 .. _#521: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/521 .. _#531: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/531 Release 1.2.0 (2008-07-21) '''''''''''''''''''''''''' Security -------- - This release makes the immutable-file "ciphertext hash tree" mandatory. Previous releases allowed the uploader to decide whether their file would have an integrity check on the ciphertext or not. A malicious uploader could use this to create a readcap that would download as one file or a different one, depending upon which shares the client fetched first, with no errors raised. There are other integrity checks on the shares themselves, preventing a storage server or other party from violating the integrity properties of the read-cap: this failure was only exploitable by the uploader who gives you a carefully constructed read-cap. If you download the file with Tahoe 1.2.0 or later, you will not be vulnerable to this problem. `#491`_ This change does not introduce a compatibility issue, because all existing versions of Tahoe will emit the ciphertext hash tree in their shares. Dependencies ------------ - Tahoe now requires Foolscap-0.2.9 . It also requires pycryptopp 0.5 or newer, since earlier versions had a bug that interacted with specific compiler versions that could sometimes result in incorrect encryption behavior. Both packages are included in the Tahoe source tarball in misc/dependencies/ , and should be built automatically when necessary. Web API ------- - Web API directory pages should now contain properly-slash-terminated links to other directories. They have also stopped using absolute links in forms and pages (which interfered with the use of a front-end load-balancing proxy). - The behavior of the "Check This File" button changed, in conjunction with larger internal changes to file checking/verification. The button triggers an immediate check as before, but the outcome is shown on its own page, and does not get stored anywhere. As a result, the web directory page no longer shows historical checker results. - A new "Deep-Check" button has been added, which allows a user to initiate a recursive check of the given directory and all files and directories reachable from it. This can cause quite a bit of work, and has no intermediate progress information or feedback about the process. In addition, the results of the deep-check are extremely limited. A later release will improve this behavior. - The web server's behavior with respect to non-ASCII (unicode) filenames in the "GET save=true" operation has been improved. To achieve maximum compatibility with variously buggy web browsers, the server does not try to figure out the character set of the inbound filename. It just echoes the same bytes back to the browser in the Content-Disposition header. This seems to make both IE7 and Firefox work correctly. Checker/Verifier/Repairer ------------------------- - Tahoe is slowly acquiring convenient tools to check up on file health, examine existing shares for errors, and repair files that are not fully healthy. This release adds a mutable checker/verifier/repairer, although testing is very limited, and there are no web interfaces to trigger repair yet. The "Check" button next to each file or directory on the wapi page will perform a file check, and the "deep check" button on each directory will recursively check all files and directories reachable from there (which may take a very long time). Future releases will improve access to this functionality. Operations/Packaging -------------------- - A "check-grid" script has been added, along with a Makefile target. This is intended (with the help of a pre-configured node directory) to check upon the health of a Tahoe grid, uploading and downloading a few files. This can be used as a monitoring tool for a deployed grid, to be run periodically and to signal an error if it ever fails. It also helps with compatibility testing, to verify that the latest Tahoe code is still able to handle files created by an older version. - The munin plugins from misc/munin/ are now copied into any generated debian packages, and are made executable (and uncompressed) so they can be symlinked directly from /etc/munin/plugins/ . - Ubuntu "Hardy" was added as a supported debian platform, with a Makefile target to produce hardy .deb packages. Some notes have been added to `debian.rst`_ about building Tahoe on a debian/ubuntu system. - Storage servers now measure operation rates and latency-per-operation, and provides results through the /statistics web page as well as the stats gatherer. Munin plugins have been added to match. Other ----- - Tahoe nodes now use Foolscap "incident logging" to record unusual events to their NODEDIR/logs/incidents/ directory. These incident files can be examined by Foolscap logging tools, or delivered to an external log-gatherer for further analysis. Note that Tahoe now requires Foolscap-0.2.9, since 0.2.8 had a bug that complained about "OSError: File exists" when trying to create the incidents/ directory for a second time. - If no servers are available when retrieving a mutable file (like a directory), the node now reports an error instead of hanging forever. Earlier releases would not only hang (causing the wapi directory listing to get stuck half-way through), but the internal dirnode serialization would cause all subsequent attempts to retrieve or modify the same directory to hang as well. `#463`_ - A minor internal exception (reported in logs/twistd.log, in the "stopProducing" method) was fixed, which complained about "self._paused_at not defined" whenever a file download was stopped from the web browser end. .. _#463: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/463 .. _#491: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/491 .. _debian.rst: docs/debian.rst Release 1.1.0 (2008-06-11) '''''''''''''''''''''''''' CLI: new "alias" model ---------------------- - The new CLI code uses an scp/rsync -like interface, in which directories in the Tahoe storage grid are referenced by a colon-suffixed alias. The new commands look like: - tahoe cp local.txt tahoe:virtual.txt - tahoe ls work:subdir - More functionality is available through the CLI: creating unlinked files and directories, recursive copy in or out of the storage grid, hardlinks, and retrieving the raw read- or write- caps through the 'ls' command. Please read `CLI.rst`_ for complete details. wapi: new pages, new commands ----------------------------- - Several new pages were added to the web API: - /helper_status : to describe what a Helper is doing - /statistics : reports node uptime, CPU usage, other stats - /file : for easy file-download URLs, see `#221`_ - /cap == /uri : future compatibility - The localdir=/localfile= and t=download operations were removed. These required special configuration to enable anyways, but this feature was a security problem, and was mostly obviated by the new "cp -r" command. - Several new options to the GET command were added: - t=deep-size : add up the size of all immutable files reachable from the directory - t=deep-stats : return a JSON-encoded description of number of files, size distribution, total size, etc - POST is now preferred over PUT for most operations which cause side-effects. - Most wapi calls now accept overwrite=, and default to overwrite=true - "POST /uri/DIRCAP/parent/child?t=mkdir" is now the preferred API to create multiple directories at once, rather than ...?t=mkdir-p . - PUT to a mutable file ("PUT /uri/MUTABLEFILECAP", "PUT /uri/DIRCAP/child") will modify the file in-place. - more munin graphs in misc/munin/ - tahoe-introstats - tahoe-rootdir-space - tahoe_estimate_files - mutable files published/retrieved - tahoe_cpu_watcher - tahoe_spacetime New Dependencies ---------------- - zfec 1.1.0 - foolscap 0.2.8 - pycryptopp 0.5 - setuptools (now required at runtime) New Mutable-File Code --------------------- - The mutable-file handling code (mostly used for directories) has been completely rewritten. The new scheme has a better API (with a modify() method) and is less likely to lose data when several uncoordinated writers change a file at the same time. - In addition, a single Tahoe process will coordinate its own writes. If you make two concurrent directory-modifying wapi calls to a single tahoe node, it will internally make one of them wait for the other to complete. This prevents auto-collision (`#391`_). - The new mutable-file code also detects errors during publish better. Earlier releases might believe that a mutable file was published when in fact it failed. other features -------------- - The node now monitors its own CPU usage, as a percentage, measured every 60 seconds. 1/5/15 minute moving averages are available on the /statistics web page and via the stats-gathering interface. - Clients now accelerate reconnection to all servers after being offline (`#374`_). When a client is offline for a long time, it scales back reconnection attempts to approximately once per hour, so it may take a while to make the first attempt, but once any attempt succeeds, the other server connections will be retried immediately. - A new "offloaded KeyGenerator" facility can be configured, to move RSA key generation out from, say, a wapi node, into a separate process. RSA keys can take several seconds to create, and so a wapi node which is being used for directory creation will be unavailable for anything else during this time. The Key Generator process will pre-compute a small pool of keys, to speed things up further. This also takes better advantage of multi-core CPUs, or SMP hosts. - The node will only use a potentially-slow "du -s" command at startup (to measure how much space has been used) if the "sizelimit" parameter has been configured (to limit how much space is used). Large storage servers should turn off sizelimit until a later release improves the space-management code, since "du -s" on a terabyte filesystem can take hours. - The Introducer now allows new announcements to replace old ones, to avoid buildups of obsolete announcements. - Immutable files are limited to about 12GiB (when using the default 3-of-10 encoding), because larger files would be corrupted by the four-byte share-size field on the storage servers (`#439`_). A later release will remove this limit. Earlier releases would allow >12GiB uploads, but the resulting file would be unretrievable. - The docs/ directory has been rearranged, with old docs put in docs/historical/ and not-yet-implemented ones in docs/proposed/ . - The Mac OS-X FUSE plugin has a significant bug fix: earlier versions would corrupt writes that used seek() instead of writing the file in linear order. The rsync tool is known to perform writes in this order. This has been fixed. .. _#221: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/221 .. _#374: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/374 .. _#391: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/391 .. _#439: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/439 .. _CLI.rst: docs/CLI.rst tahoe-lafs-1.10.0/PKG-INFO000066400000000000000000000067241221140116300147110ustar00rootroot00000000000000Metadata-Version: 1.0 Name: allmydata-tahoe Version: 1.10.0 Summary: secure, decentralized, fault-tolerant filesystem Home-page: https://tahoe-lafs.org/ Author: the Tahoe-LAFS project Author-email: tahoe-dev@tahoe-lafs.org License: GNU GPL Description: ========== Tahoe-LAFS ========== Tahoe-LAFS is a Free Software/Open Source decentralized data store. It distributes your filesystem across multiple servers, and even if some of the servers fail or are taken over by an attacker, the entire filesystem continues to work correctly and to preserve your privacy and security. To get started please see `quickstart.rst`_ in the docs directory. LICENCE ======= Copyright 2006-2013 The Tahoe-LAFS Software Foundation You may use this package under the GNU General Public License, version 2 or, at your option, any later version. You may use this package under the Transitive Grace Period Public Licence, version 1.0, or at your option, any later version. (You may choose to use this package under the terms of either licence, at your option.) See the file `COPYING.GPL`_ for the terms of the GNU General Public License, version 2. See the file `COPYING.TGPPL.rst`_ for the terms of the Transitive Grace Period Public Licence, version 1.0. See `TGPPL.PDF`_ for why the TGPPL exists, graphically illustrated on three slides. .. _quickstart.rst: https://github.com/tahoe-lafs/tahoe-lafs/blob/master/docs/quickstart.rst .. _COPYING.GPL: https://github.com/tahoe-lafs/tahoe-lafs/blob/master/COPYING.GPL .. _COPYING.TGPPL.rst: https://github.com/tahoe-lafs/tahoe-lafs/blob/master/COPYING.TGPPL.rst .. _TGPPL.PDF: https://tahoe-lafs.org/~zooko/tgppl.pdf Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console Classifier: Environment :: Web Environment Classifier: License :: OSI Approved :: GNU General Public License (GPL) Classifier: License :: DFSG approved Classifier: License :: Other/Proprietary License Classifier: Intended Audience :: Developers Classifier: Intended Audience :: End Users/Desktop Classifier: Intended Audience :: System Administrators Classifier: Operating System :: Microsoft Classifier: Operating System :: Microsoft :: Windows Classifier: Operating System :: Microsoft :: Windows :: Windows NT/2000 Classifier: Operating System :: Unix Classifier: Operating System :: POSIX :: Linux Classifier: Operating System :: POSIX Classifier: Operating System :: MacOS :: MacOS X Classifier: Operating System :: OS Independent Classifier: Natural Language :: English Classifier: Programming Language :: C Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.4 Classifier: Programming Language :: Python :: 2.5 Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Topic :: Utilities Classifier: Topic :: System :: Systems Administration Classifier: Topic :: System :: Filesystems Classifier: Topic :: System :: Distributed Computing Classifier: Topic :: Software Development :: Libraries Classifier: Topic :: Communications :: Usenet News Classifier: Topic :: System :: Archiving :: Backup Classifier: Topic :: System :: Archiving :: Mirroring Classifier: Topic :: System :: Archiving tahoe-lafs-1.10.0/README.txt000066400000000000000000000025671221140116300153130ustar00rootroot00000000000000========== Tahoe-LAFS ========== Tahoe-LAFS is a Free Software/Open Source decentralized data store. It distributes your filesystem across multiple servers, and even if some of the servers fail or are taken over by an attacker, the entire filesystem continues to work correctly and to preserve your privacy and security. To get started please see `quickstart.rst`_ in the docs directory. LICENCE ======= Copyright 2006-2013 The Tahoe-LAFS Software Foundation You may use this package under the GNU General Public License, version 2 or, at your option, any later version. You may use this package under the Transitive Grace Period Public Licence, version 1.0, or at your option, any later version. (You may choose to use this package under the terms of either licence, at your option.) See the file `COPYING.GPL`_ for the terms of the GNU General Public License, version 2. See the file `COPYING.TGPPL.rst`_ for the terms of the Transitive Grace Period Public Licence, version 1.0. See `TGPPL.PDF`_ for why the TGPPL exists, graphically illustrated on three slides. .. _quickstart.rst: https://github.com/tahoe-lafs/tahoe-lafs/blob/master/docs/quickstart.rst .. _COPYING.GPL: https://github.com/tahoe-lafs/tahoe-lafs/blob/master/COPYING.GPL .. _COPYING.TGPPL.rst: https://github.com/tahoe-lafs/tahoe-lafs/blob/master/COPYING.TGPPL.rst .. _TGPPL.PDF: https://tahoe-lafs.org/~zooko/tgppl.pdf tahoe-lafs-1.10.0/Tahoe.home000066400000000000000000000001101221140116300155050ustar00rootroot00000000000000This file exists so the preamble in bin/tahoe can find its source tree. tahoe-lafs-1.10.0/bin/000077500000000000000000000000001221140116300143535ustar00rootroot00000000000000tahoe-lafs-1.10.0/bin/tahoe-script.template000066400000000000000000000112241221140116300205120ustar00rootroot00000000000000#!/bin/false # You must specify a python interpreter. import sys; assert sys.version_info < (3,), ur"Tahoe-LAFS does not run under Python 3. Please use a version of Python between 2.6 and 2.7.x inclusive." import os, subprocess where = os.path.realpath(sys.argv[0]) base = os.path.dirname(os.path.dirname(where)) if sys.platform == "win32": perhaps_installed_tahoe = os.path.join(os.path.dirname(sys.executable), 'Scripts', 'tahoe.pyscript') else: perhaps_installed_tahoe = "/usr/bin/tahoe" whoami = '''\ I am a "bin%stahoe" executable for the convenience of running Tahoe-LAFS from its source distribution -- I work only when invoked as the "tahoe" script that lives in the "bin" subdirectory of a Tahoe source code distribution, and only if you have already run "python setup.py build". ''' % (os.path.sep,) # look for Tahoe.home . homemarker = os.path.join(base, "Tahoe.home") if not os.path.exists(homemarker): print(whoami) print('''\ I just tried to run and found that I am not living in such a directory, so I am stopping now. To run Tahoe after it has been is installed, please execute my brother, who gets installed into the appropriate place for executables when you run "make install" (perhaps as "%s"). ''' % (perhaps_installed_tahoe,)) sys.exit(1) # we've found our home. Put the tahoe support/lib etc. in our PYTHONPATH. if sys.platform == "win32": supportdir = os.path.join(base, "support", "Lib", "site-packages") else: supportdir = os.path.join(base, "support", "lib", "python%d.%d" % sys.version_info[:2], "site-packages") # update PYTHONPATH so that child processes (like twistd) will use this too pp = os.environ.get("PYTHONPATH") if pp: pp = os.pathsep.join([supportdir] + pp.split(os.pathsep)) else: pp = supportdir os.environ["PYTHONPATH"] = pp # find commandline args and the location of the tahoe executable. if sys.platform == "win32": import re from ctypes import WINFUNCTYPE, POINTER, byref, c_wchar_p, c_int, windll GetCommandLineW = WINFUNCTYPE(c_wchar_p)(("GetCommandLineW", windll.kernel32)) CommandLineToArgvW = WINFUNCTYPE(POINTER(c_wchar_p), c_wchar_p, POINTER(c_int)) \ (("CommandLineToArgvW", windll.shell32)) argc = c_int(0) argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) # See src/allmydata/scripts/runner.py for the corresponding unmangler. # Note that this doesn't escape \x7F. If it did, test_unicode_arguments_and_output # in test_runner.py wouldn't work. def mangle(s): return str(re.sub(u'[^\\x20-\\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s)) argv = [mangle(argv_unicode[i]) for i in xrange(0, argc.value)] # Take only the suffix with the same number of arguments as sys.argv. # This accounts for anything that can cause initial arguments to be stripped, # for example, the Python interpreter or any options passed to it, or runner # scripts such as 'coverage run'. It works even if there are no such arguments, # as in the case of a frozen executable created by bb-freeze or similar. argv = argv[-len(sys.argv):] # On Windows, the script is not directly executable and must be run via python. prefix = [sys.executable] script = os.path.join(base, "support", "Scripts", "tahoe.pyscript") args = argv[1:] else: # On non-Windows, invoke the script directly, so that 'top' for example shows 'tahoe'. prefix = [] script = os.path.join(base, "support", "bin", "tahoe") args = sys.argv[1:] # Support indirection via another "runner" script (e.g. coverage). # For example: bin/tahoe @RUNNER RUNNER_ARGS @tahoe TAHOE_ARGS if len(args) >= 1 and args[0].startswith('@'): runner = args[0][1:] if runner.endswith('.py') or runner.endswith('.pyscript'): prefix = [sys.executable] else: prefix = [] def _subst(a): if a == '@tahoe': return script return a command = prefix + [runner] + map(_subst, args[1:]) else: runner = script command = prefix + [script] + args if not os.path.exists(script): print(whoami) print('''\ I could not find the support script "%s". To run an installed version of Tahoe-LAFS, please execute the "tahoe" script that is installed into the appropriate place for executables when you run "python setup.py install" (perhaps as "%s"). ''' % (script, perhaps_installed_tahoe)) sys.exit(1) try: res = subprocess.call(command, env=os.environ) except Exception as le: print(whoami) print('''\ I just tried to invoke "%s" and got an exception. ''' % (runner,)) raise else: sys.exit(res) tahoe-lafs-1.10.0/docs/000077500000000000000000000000001221140116300145335ustar00rootroot00000000000000tahoe-lafs-1.10.0/docs/Makefile000066400000000000000000000006011221140116300161700ustar00rootroot00000000000000 SOURCES = subtree1.svg lease-tradeoffs.svg PNGS = $(patsubst %.svg,%.png,$(SOURCES)) EPSS = $(patsubst %.svg,%.eps,$(SOURCES)) .PHONY: images-png images-eps all: $(PNGS) $(EPSS) images-png: $(PNGS) images-eps: $(EPSS) %.png: %.svg inkscape -b white -d 90 -D --export-png $@ $< %.eps: %.svg inkscape --export-eps $@ $< %.html: %.rst rst2html.py $< $@ clean: rm -f *.png *.eps tahoe-lafs-1.10.0/docs/about.rst000066400000000000000000000134251221140116300164040ustar00rootroot00000000000000 .. -*- coding: utf-8 -*- Welcome to Tahoe-LAFS! ====================== Welcome to Tahoe-LAFS_, the first decentralized storage system with *provider-independent security*. .. _Tahoe-LAFS: https://tahoe-lafs.org What is "provider-independent security"? ======================================== Every seller of cloud storage services will tell you that their service is "secure". But what they mean by that is something fundamentally different from what we mean. What they mean by "secure" is that after you've given them the power to read and modify your data, they try really hard not to let this power be abused. This turns out to be difficult! Bugs, misconfigurations, or operator error can accidentally expose your data to another customer or to the public, or can corrupt your data. Criminals routinely gain illicit access to corporate servers. Even more insidious is the fact that the employees themselves sometimes violate customer privacy out of carelessness, avarice, or mere curiousity. The most conscientious of these service providers spend considerable effort and expense trying to mitigate these risks. What we mean by "security" is something different. *The service provider never has the ability to read or modify your data in the first place: never.* If you use Tahoe-LAFS, then all of the threats described above are non-issues to you. Not only is it easy and inexpensive for the service provider to maintain the security of your data, but in fact they couldn't violate its security if they tried. This is what we call *provider-independent security*. This guarantee is integrated naturally into the Tahoe-LAFS storage system and doesn't require you to perform a manual pre-encryption step or cumbersome key management. (After all, having to do cumbersome manual operations when storing or accessing your data would nullify one of the primary benefits of using cloud storage in the first place: convenience.) Here's how it works: .. image:: network-and-reliance-topology.svg A "storage grid" is made up of a number of storage servers. A storage server has direct attached storage (typically one or more hard disks). A "gateway" uses the storage servers and provides access to the filesystem over HTTP(S) or (S)FTP. Users do not rely on storage servers to provide *confidentiality* nor *integrity* for their data -- instead all of the data is encrypted and integrity-checked by the gateway, so that the servers can neither read nor modify the contents of the files. Users do rely on storage servers for *availability*. The ciphertext is erasure-coded into ``N`` shares distributed across at least ``H`` distinct storage servers (the default value for ``N`` is 10 and for ``H`` is 7) so that it can be recovered from any ``K`` of these servers (the default value of ``K`` is 3). Therefore only the failure of ``H-K+1`` (with the defaults, 5) servers can make the data unavailable. In the typical deployment mode each user runs her own gateway on her own machine. This way she relies on her own machine for the confidentiality and integrity of the data. An alternate deployment mode is that the gateway runs on a remote machine and the user connects to it over HTTPS or SFTP. This means that the operator of the gateway can view and modify the user's data (the user *relies on* the gateway for confidentiality and integrity), but the advantage is that the user can access the filesystem with a client that doesn't have the gateway software installed, such as an Internet kiosk or cell phone. Access Control ============== There are two kinds of files: immutable and mutable. When you upload a file to the storage grid you can choose which kind of file it will be in the grid. Immutable files can't be modified once they have been uploaded. A mutable file can be modified by someone with read-write access to it. A user can have read-write access to a mutable file or read-only access to it, or no access to it at all. A user who has read-write access to a mutable file or directory can give another user read-write access to that file or directory, or they can give read-only access to that file or directory. A user who has read-only access to a file or directory can give another user read-only access to it. When linking a file or directory into a parent directory, you can use a read-write link or a read-only link. If you use a read-write link, then anyone who has read-write access to the parent directory can gain read-write access to the child, and anyone who has read-only access to the parent directory can gain read-only access to the child. If you use a read-only link, then anyone who has either read-write or read-only access to the parent directory can gain read-only access to the child. For more technical detail, please see the `the doc page`_ on the Wiki. .. _the doc page: https://tahoe-lafs.org/trac/tahoe-lafs/wiki/Doc Get Started =========== To use Tahoe-LAFS, please see quickstart.rst_. .. _quickstart.rst: quickstart.rst License ======= Copyright 2006-2012 The Tahoe-LAFS Software Foundation You may use this package under the GNU General Public License, version 2 or, at your option, any later version. See the file COPYING.GPL_ for the terms of the GNU General Public License, version 2. You may use this package under the Transitive Grace Period Public Licence, version 1 or, at your option, any later version. The Transitive Grace Period Public Licence has requirements similar to the GPL except that it allows you to wait for up to twelve months after you redistribute a derived work before releasing the source code of your derived work. See the file COPYING.TGPPL.rst_ for the terms of the Transitive Grace Period Public Licence, version 1. (You may choose to use this package under the terms of either licence, at your option.) .. _COPYING.GPL: ../COPYING.GPL .. _COPYING.TGPPL.rst: ../COPYING.TGPPL.rst tahoe-lafs-1.10.0/docs/architecture.rst000066400000000000000000000735441221140116300177640ustar00rootroot00000000000000======================= Tahoe-LAFS Architecture ======================= 1. `Overview`_ 2. `The Key-Value Store`_ 3. `File Encoding`_ 4. `Capabilities`_ 5. `Server Selection`_ 6. `Swarming Download, Trickling Upload`_ 7. `The Filesystem Layer`_ 8. `Leases, Refreshing, Garbage Collection`_ 9. `File Repairer`_ 10. `Security`_ 11. `Reliability`_ Overview ======== (See the `docs/specifications directory `_ for more details.) There are three layers: the key-value store, the filesystem, and the application. The lowest layer is the key-value store. The keys are "capabilities" -- short ASCII strings -- and the values are sequences of data bytes. This data is encrypted and distributed across a number of nodes, such that it will survive the loss of most of the nodes. There are no hard limits on the size of the values, but there may be performance issues with extremely large values (just due to the limitation of network bandwidth). In practice, values as small as a few bytes and as large as tens of gigabytes are in common use. The middle layer is the decentralized filesystem: a directed graph in which the intermediate nodes are directories and the leaf nodes are files. The leaf nodes contain only the data -- they contain no metadata other than the length in bytes. The edges leading to leaf nodes have metadata attached to them about the file they point to. Therefore, the same file may be associated with different metadata if it is referred to through different edges. The top layer consists of the applications using the filesystem. Allmydata.com used it for a backup service: the application periodically copies files from the local disk onto the decentralized filesystem. We later provide read-only access to those files, allowing users to recover them. There are several other applications built on top of the Tahoe-LAFS filesystem (see the `RelatedProjects `_ page of the wiki for a list). The Key-Value Store =================== The key-value store is implemented by a grid of Tahoe-LAFS storage servers -- user-space processes. Tahoe-LAFS storage clients communicate with the storage servers over TCP. Storage servers hold data in the form of "shares". Shares are encoded pieces of files. There are a configurable number of shares for each file, 10 by default. Normally, each share is stored on a separate server, but in some cases a single server can hold multiple shares of a file. Nodes learn about each other through an "introducer". Each server connects to the introducer at startup and announces its presence. Each client connects to the introducer at startup, and receives a list of all servers from it. Each client then connects to every server, creating a "bi-clique" topology. In the current release, nodes behind NAT boxes will connect to all nodes that they can open connections to, but they cannot open connections to other nodes behind NAT boxes. Therefore, the more nodes behind NAT boxes, the less the topology resembles the intended bi-clique topology. The introducer is a Single Point of Failure ("SPoF"), in that clients who never connect to the introducer will be unable to connect to any storage servers, but once a client has been introduced to everybody, it does not need the introducer again until it is restarted. The danger of a SPoF is further reduced in two ways. First, the introducer is defined by a hostname and a private key, which are easy to move to a new host in case the original one suffers an unrecoverable hardware problem. Second, even if the private key is lost, clients can be reconfigured to use a new introducer. For future releases, we have plans to decentralize introduction, allowing any server to tell a new client about all the others. File Encoding ============= When a client stores a file on the grid, it first encrypts the file. It then breaks the encrypted file into small segments, in order to reduce the memory footprint, and to decrease the lag between initiating a download and receiving the first part of the file; for example the lag between hitting "play" and a movie actually starting. The client then erasure-codes each segment, producing blocks of which only a subset are needed to reconstruct the segment (3 out of 10, with the default settings). It sends one block from each segment to a given server. The set of blocks on a given server constitutes a "share". Therefore a subset f the shares (3 out of 10, by default) are needed to reconstruct the file. A hash of the encryption key is used to form the "storage index", which is used for both server selection (described below) and to index shares within the Storage Servers on the selected nodes. The client computes secure hashes of the ciphertext and of the shares. It uses `Merkle Trees`_ so that it is possible to verify the correctness of a subset of the data without requiring all of the data. For example, this allows you to verify the correctness of the first segment of a movie file and then begin playing the movie file in your movie viewer before the entire movie file has been downloaded. These hashes are stored in a small datastructure named the Capability Extension Block which is stored on the storage servers alongside each share. The capability contains the encryption key, the hash of the Capability Extension Block, and any encoding parameters necessary to perform the eventual decoding process. For convenience, it also contains the size of the file being stored. To download, the client that wishes to turn a capability into a sequence of bytes will obtain the blocks from storage servers, use erasure-decoding to turn them into segments of ciphertext, use the decryption key to convert that into plaintext, then emit the plaintext bytes to the output target. .. _`Merkle Trees`: http://systems.cs.colorado.edu/grunwald/Classes/Fall2003-InformationStorage/Papers/merkle-tree.pdf Capabilities ============ Capabilities to immutable files represent a specific set of bytes. Think of it like a hash function: you feed in a bunch of bytes, and you get out a capability, which is deterministically derived from the input data: changing even one bit of the input data will result in a completely different capability. Read-only capabilities to mutable files represent the ability to get a set of bytes representing some version of the file, most likely the latest version. Each read-only capability is unique. In fact, each mutable file has a unique public/private key pair created when the mutable file is created, and the read-only capability to that file includes a secure hash of the public key. Read-write capabilities to mutable files represent the ability to read the file (just like a read-only capability) and also to write a new version of the file, overwriting any extant version. Read-write capabilities are unique -- each one includes the secure hash of the private key associated with that mutable file. The capability provides both "location" and "identification": you can use it to retrieve a set of bytes, and then you can use it to validate ("identify") that these potential bytes are indeed the ones that you were looking for. The "key-value store" layer doesn't include human-meaningful names. Capabilities sit on the "global+secure" edge of `Zooko's Triangle`_. They are self-authenticating, meaning that nobody can trick you into accepting a file that doesn't match the capability you used to refer to that file. The filesystem layer (described below) adds human-meaningful names atop the key-value layer. .. _`Zooko's Triangle`: https://en.wikipedia.org/wiki/Zooko%27s_triangle Server Selection ================ When a file is uploaded, the encoded shares are sent to some servers. But to which ones? The "server selection" algorithm is used to make this choice. The storage index is used to consistently-permute the set of all servers nodes (by sorting them by ``HASH(storage_index+nodeid)``). Each file gets a different permutation, which (on average) will evenly distribute shares among the grid and avoid hotspots. Each server has announced its available space when it connected to the introducer, and we use that available space information to remove any servers that cannot hold an encoded share for our file. Then we ask some of the servers thus removed if they are already holding any encoded shares for our file; we use this information later. (We ask any servers which are in the first 2*``N`` elements of the permuted list.) We then use the permuted list of servers to ask each server, in turn, if it will hold a share for us (a share that was not reported as being already present when we talked to the full servers earlier, and that we have not already planned to upload to a different server). We plan to send a share to a server by sending an 'allocate_buckets() query' to the server with the number of that share. Some will say yes they can hold that share, others (those who have become full since they announced their available space) will say no; when a server refuses our request, we take that share to the next server on the list. In the response to allocate_buckets() the server will also inform us of any shares of that file that it already has. We keep going until we run out of shares that need to be stored. At the end of the process, we'll have a table that maps each share number to a server, and then we can begin the encode and push phase, using the table to decide where each share should be sent. Most of the time, this will result in one share per server, which gives us maximum reliability. If there are fewer writable servers than there are unstored shares, we'll be forced to loop around, eventually giving multiple shares to a single server. If we have to loop through the node list a second time, we accelerate the query process, by asking each node to hold multiple shares on the second pass. In most cases, this means we'll never send more than two queries to any given node. If a server is unreachable, or has an error, or refuses to accept any of our shares, we remove it from the permuted list, so we won't query it again for this file. If a server already has shares for the file we're uploading, we add that information to the share-to-server table. This lets us do less work for files which have been uploaded once before, while making sure we still wind up with as many shares as we desire. Before a file upload is called successful, it has to pass an upload health check. For immutable files, we check to see that a condition called 'servers-of-happiness' is satisfied. When satisfied, 'servers-of-happiness' assures us that enough pieces of the file are distributed across enough servers on the grid to ensure that the availability of the file will not be affected if a few of those servers later fail. For mutable files and directories, we check to see that all of the encoded shares generated during the upload process were successfully placed on the grid. This is a weaker check than 'servers-of-happiness'; it does not consider any information about how the encoded shares are placed on the grid, and cannot detect situations in which all or a majority of the encoded shares generated during the upload process reside on only one storage server. We hope to extend 'servers-of-happiness' to mutable files in a future release of Tahoe-LAFS. If, at the end of the upload process, the appropriate upload health check fails, the upload is considered a failure. The current defaults use ``k`` = 3, ``servers_of_happiness`` = 7, and ``N`` = 10. ``N`` = 10 means that we'll try to place 10 shares. ``k`` = 3 means that we need any three shares to recover the file. ``servers_of_happiness`` = 7 means that we'll consider an immutable file upload to be successful if we can place shares on enough servers that there are 7 different servers, the correct functioning of any ``k`` of which guarantee the availability of the immutable file. ``N`` = 10 and ``k`` = 3 means there is a 3.3x expansion factor. On a small grid, you should set ``N`` about equal to the number of storage servers in your grid; on a large grid, you might set it to something smaller to avoid the overhead of contacting every server to place a file. In either case, you should then set ``k`` such that ``N``/``k`` reflects your desired availability goals. The best value for ``servers_of_happiness`` will depend on how you use Tahoe-LAFS. In a friendnet with a variable number of servers, it might make sense to set it to the smallest number of servers that you expect to have online and accepting shares at any given time. In a stable environment without much server churn, it may make sense to set ``servers_of_happiness`` = ``N``. When downloading a file, the current version just asks all known servers for any shares they might have. Once it has received enough responses that it knows where to find the needed k shares, it downloads at least the first segment from those servers. This means that it tends to download shares from the fastest servers. If some servers had more than one share, it will continue sending "Do You Have Block" requests to other servers, so that it can download subsequent segments from distinct servers (sorted by their DYHB round-trip times), if possible. *future work* A future release will use the server selection algorithm to reduce the number of queries that must be sent out. Other peer-node selection algorithms are possible. One earlier version (known as "Tahoe 3") used the permutation to place the nodes around a large ring, distributed the shares evenly around the same ring, then walked clockwise from 0 with a basket. Each time it encountered a share, it put it in the basket, each time it encountered a server, give it as many shares from the basket as they'd accept. This reduced the number of queries (usually to 1) for small grids (where ``N`` is larger than the number of nodes), but resulted in extremely non-uniform share distribution, which significantly hurt reliability (sometimes the permutation resulted in most of the shares being dumped on a single node). Another algorithm (known as "denver airport" [#naming]_) uses the permuted hash to decide on an approximate target for each share, then sends lease requests via Chord routing. The request includes the contact information of the uploading node, and asks that the node which eventually accepts the lease should contact the uploader directly. The shares are then transferred over direct connections rather than through multiple Chord hops. Download uses the same approach. This allows nodes to avoid maintaining a large number of long-term connections, at the expense of complexity and latency. .. [#naming] all of these names are derived from the location where they were concocted, in this case in a car ride from Boulder to DEN. To be precise, "Tahoe 1" was an unworkable scheme in which everyone who holds shares for a given file would form a sort of cabal which kept track of all the others, "Tahoe 2" is the first-100-nodes in the permuted hash described in this document, and "Tahoe 3" (or perhaps "Potrero hill 1") was the abandoned ring-with-many-hands approach. Swarming Download, Trickling Upload =================================== Because the shares being downloaded are distributed across a large number of nodes, the download process will pull from many of them at the same time. The current encoding parameters require 3 shares to be retrieved for each segment, which means that up to 3 nodes will be used simultaneously. For larger networks, 8-of-22 encoding could be used, meaning 8 nodes can be used simultaneously. This allows the download process to use the sum of the available nodes' upload bandwidths, resulting in downloads that take full advantage of the common 8x disparity between download and upload bandwith on modern ADSL lines. On the other hand, uploads are hampered by the need to upload encoded shares that are larger than the original data (3.3x larger with the current default encoding parameters), through the slow end of the asymmetric connection. This means that on a typical 8x ADSL line, uploading a file will take about 32 times longer than downloading it again later. Smaller expansion ratios can reduce this upload penalty, at the expense of reliability (see `Reliability`_, below). By using an "upload helper", this penalty is eliminated: the client does a 1x upload of encrypted data to the helper, then the helper performs encoding and pushes the shares to the storage servers. This is an improvement if the helper has significantly higher upload bandwidth than the client, so it makes the most sense for a commercially-run grid for which all of the storage servers are in a colo facility with high interconnect bandwidth. In this case, the helper is placed in the same facility, so the helper-to-storage-server bandwidth is huge. See ``_ for details about the upload helper. The Filesystem Layer ==================== The "filesystem" layer is responsible for mapping human-meaningful pathnames (directories and filenames) to pieces of data. The actual bytes inside these files are referenced by capability, but the filesystem layer is where the directory names, file names, and metadata are kept. The filesystem layer is a graph of directories. Each directory contains a table of named children. These children are either other directories or files. All children are referenced by their capability. A directory has two forms of capability: read-write caps and read-only caps. The table of children inside the directory has a read-write and read-only capability for each child. If you have a read-only capability for a given directory, you will not be able to access the read-write capability of its children. This results in "transitively read-only" directory access. By having two different capabilities, you can choose which you want to share with someone else. If you create a new directory and share the read-write capability for it with a friend, then you will both be able to modify its contents. If instead you give them the read-only capability, then they will *not* be able to modify the contents. Any capability that you receive can be linked in to any directory that you can modify, so very powerful shared+published directory structures can be built from these components. This structure enable individual users to have their own personal space, with links to spaces that are shared with specific other users, and other spaces that are globally visible. Leases, Refreshing, Garbage Collection ====================================== When a file or directory in the virtual filesystem is no longer referenced, the space that its shares occupied on each storage server can be freed, making room for other shares. Tahoe-LAFS uses a garbage collection ("GC") mechanism to implement this space-reclamation process. Each share has one or more "leases", which are managed by clients who want the file/directory to be retained. The storage server accepts each share for a pre-defined period of time, and is allowed to delete the share if all of the leases are cancelled or allowed to expire. Garbage collection is not enabled by default: storage servers will not delete shares without being explicitly configured to do so. When GC is enabled, clients are responsible for renewing their leases on a periodic basis at least frequently enough to prevent any of the leases from expiring before the next renewal pass. See ``_ for further information, and for how to configure garbage collection. File Repairer ============= Shares may go away because the storage server hosting them has suffered a failure: either temporary downtime (affecting availability of the file), or a permanent data loss (affecting the preservation of the file). Hard drives crash, power supplies explode, coffee spills, and asteroids strike. The goal of a robust distributed filesystem is to survive these setbacks. To work against this slow, continual loss of shares, a File Checker is used to periodically count the number of shares still available for any given file. A more extensive form of checking known as the File Verifier can download the ciphertext of the target file and perform integrity checks (using strong hashes) to make sure the data is stil intact. When the file is found to have decayed below some threshold, the File Repairer can be used to regenerate and re-upload the missing shares. These processes are conceptually distinct (the repairer is only run if the checker/verifier decides it is necessary), but in practice they will be closely related, and may run in the same process. The repairer process does not get the full capability of the file to be maintained: it merely gets the "repairer capability" subset, which does not include the decryption key. The File Verifier uses that data to find out which nodes ought to hold shares for this file, and to see if those nodes are still around and willing to provide the data. If the file is not healthy enough, the File Repairer is invoked to download the ciphertext, regenerate any missing shares, and upload them to new nodes. The goal of the File Repairer is to finish up with a full set of ``N`` shares. There are a number of engineering issues to be resolved here. The bandwidth, disk IO, and CPU time consumed by the verification/repair process must be balanced against the robustness that it provides to the grid. The nodes involved in repair will have very different access patterns than normal nodes, such that these processes may need to be run on hosts with more memory or network connectivity than usual. The frequency of repair will directly affect the resources consumed. In some cases, verification of multiple files can be performed at the same time, and repair of files can be delegated off to other nodes. *future work* Currently there are two modes of checking on the health of your file: "Checker" simply asks storage servers which shares they have and does nothing to try to verify that they aren't lying. "Verifier" downloads and cryptographically verifies every bit of every share of the file from every server, which costs a lot of network and CPU. A future improvement would be to make a random-sampling verifier which downloads and cryptographically verifies only a few randomly-chosen blocks from each server. This would require much less network and CPU but it could make it extremely unlikely that any sort of corruption -- even malicious corruption intended to evade detection -- would evade detection. This would be an instance of a cryptographic notion called "Proof of Retrievability". Note that to implement this requires no change to the server or to the cryptographic data structure -- with the current data structure and the current protocol it is up to the client which blocks they choose to download, so this would be solely a change in client behavior. Security ======== The design goal for this project is that an attacker may be able to deny service (i.e. prevent you from recovering a file that was uploaded earlier) but can accomplish none of the following three attacks: 1) violate confidentiality: the attacker gets to view data to which you have not granted them access 2) violate integrity: the attacker convinces you that the wrong data is actually the data you were intending to retrieve 3) violate unforgeability: the attacker gets to modify a mutable file or directory (either the pathnames or the file contents) to which you have not given them write permission Integrity (the promise that the downloaded data will match the uploaded data) is provided by the hashes embedded in the capability (for immutable files) or the digital signature (for mutable files). Confidentiality (the promise that the data is only readable by people with the capability) is provided by the encryption key embedded in the capability (for both immutable and mutable files). Data availability (the hope that data which has been uploaded in the past will be downloadable in the future) is provided by the grid, which distributes failures in a way that reduces the correlation between individual node failure and overall file recovery failure, and by the erasure-coding technique used to generate shares. Many of these security properties depend upon the usual cryptographic assumptions: the resistance of AES and RSA to attack, the resistance of SHA-256 to collision attacks and pre-image attacks, and upon the proximity of 2^-128 and 2^-256 to zero. A break in AES would allow a confidentiality violation, a collision break in SHA-256 would allow a consistency violation, and a break in RSA would allow a mutability violation. There is no attempt made to provide anonymity, neither of the origin of a piece of data nor the identity of the subsequent downloaders. In general, anyone who already knows the contents of a file will be in a strong position to determine who else is uploading or downloading it. Also, it is quite easy for a sufficiently large coalition of nodes to correlate the set of nodes who are all uploading or downloading the same file, even if the attacker does not know the contents of the file in question. Also note that the file size and (when convergence is being used) a keyed hash of the plaintext are not protected. Many people can determine the size of the file you are accessing, and if they already know the contents of a given file, they will be able to determine that you are uploading or downloading the same one. The capability-based security model is used throughout this project. Directory operations are expressed in terms of distinct read- and write- capabilities. Knowing the read-capability of a file is equivalent to the ability to read the corresponding data. The capability to validate the correctness of a file is strictly weaker than the read-capability (possession of read-capability automatically grants you possession of validate-capability, but not vice versa). These capabilities may be expressly delegated (irrevocably) by simply transferring the relevant secrets. The application layer can provide whatever access model is desired, built on top of this capability access model. The first big user of this system so far is allmydata.com. The allmydata.com access model currently works like a normal web site, using username and password to give a user access to her "virtual drive". In addition, allmydata.com users can share individual files (using a file sharing interface built on top of the immutable file read capabilities). Reliability =========== File encoding and peer-node selection parameters can be adjusted to achieve different goals. Each choice results in a number of properties; there are many tradeoffs. First, some terms: the erasure-coding algorithm is described as ``k``-out-of-``N`` (for this release, the default values are ``k`` = 3 and ``N`` = 10). Each grid will have some number of nodes; this number will rise and fall over time as nodes join, drop out, come back, and leave forever. Files are of various sizes, some are popular, others are unpopular. Nodes have various capacities, variable upload/download bandwidths, and network latency. Most of the mathematical models that look at node failure assume some average (and independent) probability 'P' of a given node being available: this can be high (servers tend to be online and available >90% of the time) or low (laptops tend to be turned on for an hour then disappear for several days). Files are encoded in segments of a given maximum size, which affects memory usage. The ratio of ``N``/``k`` is the "expansion factor". Higher expansion factors improve reliability very quickly (the binomial distribution curve is very sharp), but consumes much more grid capacity. When P=50%, the absolute value of ``k`` affects the granularity of the binomial curve (1-out-of-2 is much worse than 50-out-of-100), but high values asymptotically approach a constant (i.e. 500-of-1000 is not much better than 50-of-100). When P is high and the expansion factor is held at a constant, higher values of ``k`` and ``N`` give much better reliability (for P=99%, 50-out-of-100 is much much better than 5-of-10, roughly 10^50 times better), because there are more shares that can be lost without losing the file. Likewise, the total number of nodes in the network affects the same granularity: having only one node means a single point of failure, no matter how many copies of the file you make. Independent nodes (with uncorrelated failures) are necessary to hit the mathematical ideals: if you have 100 nodes but they are all in the same office building, then a single power failure will take out all of them at once. Pseudospoofing, also called a "Sybil Attack", is where a single attacker convinces you that they are actually multiple servers, so that you think you are using a large number of independent nodes, but in fact you have a single point of failure (where the attacker turns off all their machines at once). Large grids, with lots of truly independent nodes, will enable the use of lower expansion factors to achieve the same reliability, but will increase overhead because each node needs to know something about every other, and the rate at which nodes come and go will be higher (requiring network maintenance traffic). Also, the File Repairer work will increase with larger grids, although then the job can be distributed out to more nodes. Higher values of ``N`` increase overhead: more shares means more Merkle hashes that must be included with the data, and more nodes to contact to retrieve the shares. Smaller segment sizes reduce memory usage (since each segment must be held in memory while erasure coding runs) and improves "alacrity" (since downloading can validate a smaller piece of data faster, delivering it to the target sooner), but also increase overhead (because more blocks means more Merkle hashes to validate them). In general, small private grids should work well, but the participants will have to decide between storage overhead and reliability. Large stable grids will be able to reduce the expansion factor down to a bare minimum while still retaining high reliability, but large unstable grids (where nodes are coming and going very quickly) may require more repair/verification bandwidth than actual upload/download traffic. tahoe-lafs-1.10.0/docs/backdoors.rst000066400000000000000000000040751221140116300172420ustar00rootroot00000000000000Statement on Backdoors ====================== October 5, 2010 The New York Times has `recently reported`_ that the current U.S. administration is proposing a bill that would apparently, if passed, require communication systems to facilitate government wiretapping and access to encrypted data. (login required; username/password pairs available at `bugmenot`_). .. _recently reported: https://www.nytimes.com/2010/09/27/us/27wiretap.html .. _bugmenot: http://www.bugmenot.com/view/nytimes.com Commentary by the `Electronic Frontier Foundation`_, `Peter Suderman / Reason`_, `Julian Sanchez / Cato Institute`_. .. _Electronic Frontier Foundation: https://www.eff.org/deeplinks/2010/09/government-seeks .. _Peter Suderman / Reason: http://reason.com/blog/2010/09/27/obama-administration-frustrate .. _Julian Sanchez / Cato Institute: http://www.cato-at-liberty.org/designing-an-insecure-internet/ The core Tahoe developers promise never to change Tahoe-LAFS to facilitate government access to data stored or transmitted by it. Even if it were desirable to facilitate such access -- which it is not -- we believe it would not be technically feasible to do so without severely compromising Tahoe-LAFS' security against other attackers. There have been many examples in which backdoors intended for use by government have introduced vulnerabilities exploitable by other parties (a notable example being the Greek cellphone eavesdropping scandal in 2004/5). RFCs `1984`_ and `2804`_ elaborate on the security case against such backdoors. .. _1984: https://tools.ietf.org/html/rfc1984 .. _2804: https://tools.ietf.org/html/rfc2804 Note that since Tahoe-LAFS is open-source software, forks by people other than the current core developers are possible. In that event, we would try to persuade any such forks to adopt a similar policy. The following Tahoe-LAFS developers agree with this statement: David-Sarah Hopwood [Daira Hopwood] Zooko Wilcox-O'Hearn Brian Warner Kevan Carstensen Frédéric Marti Jack Lloyd François Deppierraz Yu Xue Marc Tooley Peter Secor Shawn Willden Terrell Russell tahoe-lafs-1.10.0/docs/backupdb.rst000066400000000000000000000202061221140116300170400ustar00rootroot00000000000000================== The Tahoe BackupDB ================== 1. `Overview`_ 2. `Schema`_ 3. `Upload Operation`_ 4. `Directory Operations`_ Overview ======== To speed up backup operations, Tahoe maintains a small database known as the "backupdb". This is used to avoid re-uploading files which have already been uploaded recently. This database lives in ``~/.tahoe/private/backupdb.sqlite``, and is a SQLite single-file database. It is used by the "``tahoe backup``" command. In the future, it may optionally be used by other commands such as "``tahoe cp``". The purpose of this database is twofold: to manage the file-to-cap translation (the "upload" step) and the directory-to-cap translation (the "mkdir-immutable" step). The overall goal of optimizing backup is to reduce the work required when the source disk has not changed (much) since the last backup. In the ideal case, running "``tahoe backup``" twice in a row, with no intervening changes to the disk, will not require any network traffic. Minimal changes to the source disk should result in minimal traffic. This database is optional. If it is deleted, the worst effect is that a subsequent backup operation may use more effort (network bandwidth, CPU cycles, and disk IO) than it would have without the backupdb. The database uses sqlite3, which is included as part of the standard Python library with Python 2.5 and later. For Python 2.4, Tahoe will try to install the "pysqlite" package at build-time, but this will succeed only if sqlite3 with development headers is already installed. On Debian and Debian derivatives you can install the "python-pysqlite2" package (which, despite the name, actually provides sqlite3 rather than sqlite2). On old distributions such as Debian etch (4.0 "oldstable") or Ubuntu Edgy (6.10) the "python-pysqlite2" package won't work, but the "sqlite3-dev" package will. Schema ====== The database contains the following tables:: CREATE TABLE version ( version integer # contains one row, set to 1 ); CREATE TABLE local_files ( path varchar(1024), PRIMARY KEY -- index, this is an absolute UTF-8-encoded local filename size integer, -- os.stat(fn)[stat.ST_SIZE] mtime number, -- os.stat(fn)[stat.ST_MTIME] ctime number, -- os.stat(fn)[stat.ST_CTIME] fileid integer ); CREATE TABLE caps ( fileid integer PRIMARY KEY AUTOINCREMENT, filecap varchar(256) UNIQUE -- URI:CHK:... ); CREATE TABLE last_upload ( fileid INTEGER PRIMARY KEY, last_uploaded TIMESTAMP, last_checked TIMESTAMP ); CREATE TABLE directories ( dirhash varchar(256) PRIMARY KEY, dircap varchar(256), last_uploaded TIMESTAMP, last_checked TIMESTAMP ); Upload Operation ================ The upload process starts with a pathname (like ``~/.emacs``) and wants to end up with a file-cap (like ``URI:CHK:...``). The first step is to convert the path to an absolute form (``/home/warner/.emacs``) and do a lookup in the local_files table. If the path is not present in this table, the file must be uploaded. The upload process is: 1. record the file's size, ctime (which is the directory-entry change time or file creation time depending on OS) and modification time 2. upload the file into the grid, obtaining an immutable file read-cap 3. add an entry to the 'caps' table, with the read-cap, to get a fileid 4. add an entry to the 'last_upload' table, with the current time 5. add an entry to the 'local_files' table, with the fileid, the path, and the local file's size/ctime/mtime If the path *is* present in 'local_files', the easy-to-compute identifying information is compared: file size and ctime/mtime. If these differ, the file must be uploaded. The row is removed from the local_files table, and the upload process above is followed. If the path is present but ctime or mtime differs, the file may have changed. If the size differs, then the file has certainly changed. At this point, a future version of the "backup" command might hash the file and look for a match in an as-yet-defined table, in the hopes that the file has simply been moved from somewhere else on the disk. This enhancement requires changes to the Tahoe upload API before it can be significantly more efficient than simply handing the file to Tahoe and relying upon the normal convergence to notice the similarity. If ctime, mtime, or size is different, the client will upload the file, as above. If these identifiers are the same, the client will assume that the file is unchanged (unless the ``--ignore-timestamps`` option is provided, in which case the client always re-uploads the file), and it may be allowed to skip the upload. For safety, however, we require the client periodically perform a filecheck on these probably-already-uploaded files, and re-upload anything that doesn't look healthy. The client looks the fileid up in the 'last_checked' table, to see how long it has been since the file was last checked. A "random early check" algorithm should be used, in which a check is performed with a probability that increases with the age of the previous results. E.g. files that were last checked within a month are not checked, files that were checked 5 weeks ago are re-checked with 25% probability, 6 weeks with 50%, more than 8 weeks are always checked. This reduces the "thundering herd" of filechecks-on-everything that would otherwise result when a backup operation is run one month after the original backup. If a filecheck reveals the file is not healthy, it is re-uploaded. If the filecheck shows the file is healthy, or if the filecheck was skipped, the client gets to skip the upload, and uses the previous filecap (from the 'caps' table) to add to the parent directory. If a new file is uploaded, a new entry is put in the 'caps' and 'last_upload' table, and an entry is made in the 'local_files' table to reflect the mapping from local disk pathname to uploaded filecap. If an old file is re-uploaded, the 'last_upload' entry is updated with the new timestamps. If an old file is checked and found healthy, the 'last_upload' entry is updated. Relying upon timestamps is a compromise between efficiency and safety: a file which is modified without changing the timestamp or size will be treated as unmodified, and the "``tahoe backup``" command will not copy the new contents into the grid. The ``--no-timestamps`` option can be used to disable this optimization, forcing every byte of the file to be hashed and encoded. Directory Operations ==================== Once the contents of a directory are known (a filecap for each file, and a dircap for each directory), the backup process must find or create a tahoe directory node with the same contents. The contents are hashed, and the hash is queried in the 'directories' table. If found, the last-checked timestamp is used to perform the same random-early-check algorithm described for files above, but no new upload is performed. Since "``tahoe backup``" creates immutable directories, it is perfectly safe to re-use a directory from a previous backup. If not found, the web-API "mkdir-immutable" operation is used to create a new directory, and an entry is stored in the table. The comparison operation ignores timestamps and metadata, and pays attention solely to the file names and contents. By using a directory-contents hash, the "``tahoe backup``" command is able to re-use directories from other places in the backed up data, or from old backups. This means that renaming a directory and moving a subdirectory to a new parent both count as "minor changes" and will result in minimal Tahoe operations and subsequent network traffic (new directories will be created for the modified directory and all of its ancestors). It also means that you can perform a backup ("#1"), delete a file or directory, perform a backup ("#2"), restore it, and then the next backup ("#3") will re-use the directories from backup #1. The best case is a null backup, in which nothing has changed. This will result in minimal network bandwidth: one directory read and two modifies. The ``Archives/`` directory must be read to locate the latest backup, and must be modified to add a new snapshot, and the ``Latest/`` directory will be updated to point to that same snapshot. tahoe-lafs-1.10.0/docs/cautions.rst000066400000000000000000000062111221140116300171120ustar00rootroot00000000000000 ======================================================= Things To Be Careful About As We Venture Boldly Forth ======================================================= See also known_issues.rst_. .. _known_issues.rst: file:known_issues.rst Timing Attacks ============== Asymmetric-key cryptography operations are particularly sensitive to side-channel attacks. Unless the library is carefully hardened against timing attacks, it is dangerous to allow an attacker to measure how long signature and pubkey-derivation operations take. With enough samples, the attacker can deduce the private signing key from these measurements. (Note that verification operations are only sensitive if the verifying key is secret, which is not the case for anything in Tahoe). We currently use private-key operations in mutable-file writes, and anticipate using them in signed-introducer announcements and accounting setup. Mutable-file writes can reveal timing information to the attacker because the signature operation takes place in the middle of a read-modify-write cycle. Modifying a directory requires downloading the old contents of the mutable file, modifying the contents, signing the new contents, then uploading the new contents. By observing the elapsed time between the receipt of the last packet for the download, and the emission of the first packet of the upload, the attacker will learn information about how long the signature took. The attacker might ensure that they run one of the servers, and delay responding to the download request so that their packet is the last one needed by the client. They might also manage to be the first server to which a new upload packet is sent. This attack gives the adversary timing information about one signature operation per mutable-file write. Note that the UCWE automatic-retry response (used by default in directory modification code) can cause multiple mutable-file read-modify-write cycles per user-triggered operation, giving the adversary a slightly higher multiplier. The signed-introducer announcement involves a signature made as the client node is booting, before the first connection is established to the Introducer. This might reveal timing information if any information is revealed about the client's exact boot time: the signature operation starts a fixed number of cycles after node startup, and the first packet to the Introducer is sent a fixed number of cycles after the signature is made. An adversary who can compare the node boot time against the transmission time of the first packet will learn information about the signature operation, one measurement per reboot. We currently do not provide boot-time information in Introducer messages or other client-to-server data. In general, we are not worried about these leakages, because timing-channel attacks typically require thousands or millions of measurements to detect the (presumably) small timing variations exposed by our asymmetric crypto operations, which would require thousands of mutable-file writes or thousands of reboots to be of use to the adversary. However, future authors should take care to not make changes that could provide additional information to attackers. tahoe-lafs-1.10.0/docs/configuration.rst000066400000000000000000000707231221140116300201450ustar00rootroot00000000000000============================= Configuring a Tahoe-LAFS node ============================= 1. `Node Types`_ 2. `Overall Node Configuration`_ 3. `Client Configuration`_ 4. `Storage Server Configuration`_ 5. `Frontend Configuration`_ 6. `Running A Helper`_ 7. `Running An Introducer`_ 8. `Other Files in BASEDIR`_ 9. `Other files`_ 10. `Example`_ A Tahoe-LAFS node is configured by writing to files in its base directory. These files are read by the node when it starts, so each time you change them, you need to restart the node. The node also writes state to its base directory, so it will create files on its own. This document contains a complete list of the config files that are examined by the client node, as well as the state files that you'll observe in its base directory. The main file is named "``tahoe.cfg``", and is an ".INI"-style configuration file (parsed by the Python stdlib 'ConfigParser' module: "``[name]``" section markers, lines with "``key.subkey: value``", rfc822-style continuations). There are also other files containing information that does not easily fit into this format. The "``tahoe create-node``" or "``tahoe create-client``" command will create an initial ``tahoe.cfg`` file for you. After creation, the node will never modify the ``tahoe.cfg`` file: all persistent state is put in other files. The item descriptions below use the following types: ``boolean`` one of (True, yes, on, 1, False, off, no, 0), case-insensitive ``strports string`` a Twisted listening-port specification string, like "``tcp:80``" or "``tcp:3456:interface=127.0.0.1``". For a full description of the format, see `the Twisted strports documentation `_. Please note, if interface= is not specified, Tahoe-LAFS will attempt to bind the port specified on all interfaces. ``FURL string`` a Foolscap endpoint identifier, like ``pb://soklj4y7eok5c3xkmjeqpw@192.168.69.247:44801/eqpwqtzm`` Node Types ========== A node can be a client/server, an introducer, a statistics gatherer, or a key generator. Client/server nodes provide one or more of the following services: * web-API service * SFTP service * FTP service * drop-upload service * helper service * storage service. A client/server that provides storage service (i.e. storing shares for clients) is called a "storage server". If it provides any of the other services, it is a "storage client" (a node can be both a storage server and a storage client). A client/server node that provides web-API service is called a "gateway". Overall Node Configuration ========================== This section controls the network behavior of the node overall: which ports and IP addresses are used, when connections are timed out, etc. This configuration applies to all node types and is independent of the services that the node is offering. If your node is behind a firewall or NAT device and you want other clients to connect to it, you'll need to open a port in the firewall or NAT, and specify that port number in the tub.port option. If behind a NAT, you *may* need to set the ``tub.location`` option described below. ``[node]`` ``nickname = (UTF-8 string, optional)`` This value will be displayed in management tools as this node's "nickname". If not provided, the nickname will be set to "". This string shall be a UTF-8 encoded Unicode string. ``web.port = (strports string, optional)`` This controls where the node's web server should listen, providing node status and, if the node is a client/server, providing web-API service as defined in `webapi.rst _`. This file contains a Twisted "strports" specification such as "``3456``" or "``tcp:3456:interface=127.0.0.1``". The "``tahoe create-node``" or "``tahoe create-client``" commands set the ``web.port`` to "``tcp:3456:interface=127.0.0.1``" by default; this is overridable by the ``--webport`` option. You can make it use SSL by writing "``ssl:3456:privateKey=mykey.pem:certKey=cert.pem``" instead. If this is not provided, the node will not run a web server. ``web.static = (string, optional)`` This controls where the ``/static`` portion of the URL space is served. The value is a directory name (``~username`` is allowed, and non-absolute names are interpreted relative to the node's basedir), which can contain HTML and other files. This can be used to serve a Javascript-based frontend to the Tahoe-LAFS node, or other services. The default value is "``public_html``", which will serve ``BASEDIR/public_html`` . With the default settings, ``http://127.0.0.1:3456/static/foo.html`` will serve the contents of ``BASEDIR/public_html/foo.html`` . ``tub.port = (integer, optional)`` This controls which port the node uses to accept Foolscap connections from other nodes. If not provided, the node will ask the kernel for any available port. The port will be written to a separate file (named ``client.port`` or ``introducer.port``), so that subsequent runs will re-use the same port. ``tub.location = (string, optional)`` In addition to running as a client, each Tahoe-LAFS node also runs as a server, listening for connections from other Tahoe-LAFS clients. The node announces its location by publishing a "FURL" (a string with some connection hints) to the Introducer. The string it publishes can be found in ``BASEDIR/private/storage.furl`` . The ``tub.location`` configuration controls what location is published in this announcement. If you don't provide ``tub.location``, the node will try to figure out a useful one by itself, by using tools like "``ifconfig``" to determine the set of IP addresses on which it can be reached from nodes both near and far. It will also include the TCP port number on which it is listening (either the one specified by ``tub.port``, or whichever port was assigned by the kernel when ``tub.port`` is left unspecified). You might want to override this value if your node lives behind a firewall that is doing inbound port forwarding, or if you are using other proxies such that the local IP address or port number is not the same one that remote clients should use to connect. You might also want to control this when using a Tor proxy to avoid revealing your actual IP address through the Introducer announcement. The value is a comma-separated string of host:port location hints, like this:: 123.45.67.89:8098,tahoe.example.com:8098,127.0.0.1:8098 A few examples: * Emulate default behavior, assuming your host has IP address 123.45.67.89 and the kernel-allocated port number was 8098:: tub.port = 8098 tub.location = 123.45.67.89:8098,127.0.0.1:8098 * Use a DNS name so you can change the IP address more easily:: tub.port = 8098 tub.location = tahoe.example.com:8098 * Run a node behind a firewall (which has an external IP address) that has been configured to forward port 7912 to our internal node's port 8098:: tub.port = 8098 tub.location = external-firewall.example.com:7912 * Run a node behind a Tor proxy (perhaps via ``torsocks``), in client-only mode (i.e. we can make outbound connections, but other nodes will not be able to connect to us). The literal '``unreachable.example.org``' will not resolve, but will serve as a reminder to human observers that this node cannot be reached. "Don't call us.. we'll call you":: tub.port = 8098 tub.location = unreachable.example.org:0 * Run a node behind a Tor proxy, and make the server available as a Tor "hidden service". (This assumes that other clients are running their node with ``torsocks``, such that they are prepared to connect to a ``.onion`` address.) The hidden service must first be configured in Tor, by giving it a local port number and then obtaining a ``.onion`` name, using something in the ``torrc`` file like:: HiddenServiceDir /var/lib/tor/hidden_services/tahoe HiddenServicePort 29212 127.0.0.1:8098 once Tor is restarted, the ``.onion`` hostname will be in ``/var/lib/tor/hidden_services/tahoe/hostname``. Then set up your ``tahoe.cfg`` like:: tub.port = 8098 tub.location = ualhejtq2p7ohfbb.onion:29212 Most users will not need to set ``tub.location``. ``log_gatherer.furl = (FURL, optional)`` If provided, this contains a single FURL string that is used to contact a "log gatherer", which will be granted access to the logport. This can be used to gather operational logs in a single place. Note that in previous releases of Tahoe-LAFS, if an old-style ``BASEDIR/log_gatherer.furl`` file existed it would also be used in addition to this value, allowing multiple log gatherers to be used at once. As of Tahoe-LAFS v1.9.0, an old-style file is ignored and a warning will be emitted if one is detected. This means that as of Tahoe-LAFS v1.9.0 you can have at most one log gatherer per node. See ticket `#1423`_ about lifting this restriction and letting you have multiple log gatherers. .. _`#1423`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1423 ``timeout.keepalive = (integer in seconds, optional)`` ``timeout.disconnect = (integer in seconds, optional)`` If ``timeout.keepalive`` is provided, it is treated as an integral number of seconds, and sets the Foolscap "keepalive timer" to that value. For each connection to another node, if nothing has been heard for a while, we will attempt to provoke the other end into saying something. The duration of silence that passes before sending the PING will be between KT and 2*KT. This is mainly intended to keep NAT boxes from expiring idle TCP sessions, but also gives TCP's long-duration keepalive/disconnect timers some traffic to work with. The default value is 240 (i.e. 4 minutes). If timeout.disconnect is provided, this is treated as an integral number of seconds, and sets the Foolscap "disconnect timer" to that value. For each connection to another node, if nothing has been heard for a while, we will drop the connection. The duration of silence that passes before dropping the connection will be between DT-2*KT and 2*DT+2*KT (please see ticket `#521`_ for more details). If we are sending a large amount of data to the other end (which takes more than DT-2*KT to deliver), we might incorrectly drop the connection. The default behavior (when this value is not provided) is to disable the disconnect timer. See ticket `#521`_ for a discussion of how to pick these timeout values. Using 30 minutes means we'll disconnect after 22 to 68 minutes of inactivity. Receiving data will reset this timeout, however if we have more than 22min of data in the outbound queue (such as 800kB in two pipelined segments of 10 shares each) and the far end has no need to contact us, our ping might be delayed, so we may disconnect them by accident. .. _`#521`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/521 ``ssh.port = (strports string, optional)`` ``ssh.authorized_keys_file = (filename, optional)`` This enables an SSH-based interactive Python shell, which can be used to inspect the internal state of the node, for debugging. To cause the node to accept SSH connections on port 8022 from the same keys as the rest of your account, use:: [tub] ssh.port = 8022 ssh.authorized_keys_file = ~/.ssh/authorized_keys ``tempdir = (string, optional)`` This specifies a temporary directory for the web-API server to use, for holding large files while they are being uploaded. If a web-API client attempts to upload a 10GB file, this tempdir will need to have at least 10GB available for the upload to complete. The default value is the ``tmp`` directory in the node's base directory (i.e. ``BASEDIR/tmp``), but it can be placed elsewhere. This directory is used for files that usually (on a Unix system) go into ``/tmp``. The string will be interpreted relative to the node's base directory. Client Configuration ==================== ``[client]`` ``introducer.furl = (FURL string, mandatory)`` This FURL tells the client how to connect to the introducer. Each Tahoe-LAFS grid is defined by an introducer. The introducer's FURL is created by the introducer node and written into its private base directory when it starts, whereupon it should be published to everyone who wishes to attach a client to that grid ``helper.furl = (FURL string, optional)`` If provided, the node will attempt to connect to and use the given helper for uploads. See ``_ for details. ``key_generator.furl = (FURL string, optional)`` If provided, the node will attempt to connect to and use the given key-generator service, using RSA keys from the external process rather than generating its own. ``stats_gatherer.furl = (FURL string, optional)`` If provided, the node will connect to the given stats gatherer and provide it with operational statistics. ``shares.needed = (int, optional) aka "k", default 3`` ``shares.total = (int, optional) aka "N", N >= k, default 10`` ``shares.happy = (int, optional) 1 <= happy <= N, default 7`` These three values set the default encoding parameters. Each time a new file is uploaded, erasure-coding is used to break the ciphertext into separate shares. There will be ``N`` (i.e. ``shares.total``) shares created, and the file will be recoverable if any ``k`` (i.e. ``shares.needed``) shares are retrieved. The default values are 3-of-10 (i.e. ``shares.needed = 3``, ``shares.total = 10``). Setting ``k`` to 1 is equivalent to simple replication (uploading ``N`` copies of the file). These values control the tradeoff between storage overhead and reliability. To a first approximation, a 1MB file will use (1MB * ``N``/``k``) of backend storage space (the actual value will be a bit more, because of other forms of overhead). Up to ``N``-``k`` shares can be lost before the file becomes unrecoverable. So large ``N``/``k`` ratios are more reliable, and small ``N``/``k`` ratios use less disk space. ``N`` cannot be larger than 256, because of the 8-bit erasure-coding algorithm that Tahoe-LAFS uses. ``k`` can not be greater than ``N``. See ``_ for more details. ``shares.happy`` allows you control over how well to "spread out" the shares of an immutable file. For a successful upload, shares are guaranteed to be initially placed on at least ``shares.happy`` distinct servers, the correct functioning of any ``k`` of which is sufficient to guarantee the availability of the uploaded file. This value should not be larger than the number of servers on your grid. A value of ``shares.happy`` <= ``k`` is allowed, but this is not guaranteed to provide any redundancy if some servers fail or lose shares. It may still provide redundancy in practice if ``N`` is greater than the number of connected servers, because in that case there will typically be more than one share on at least some storage nodes. However, since a successful upload only guarantees that at least ``shares.happy`` shares have been stored, the worst case is still that there is no redundancy. (Mutable files use a different share placement algorithm that does not currently consider this parameter.) ``mutable.format = sdmf or mdmf`` This value tells Tahoe-LAFS what the default mutable file format should be. If ``mutable.format=sdmf``, then newly created mutable files will be in the old SDMF format. This is desirable for clients that operate on grids where some peers run older versions of Tahoe-LAFS, as these older versions cannot read the new MDMF mutable file format. If ``mutable.format`` is ``mdmf``, then newly created mutable files will use the new MDMF format, which supports efficient in-place modification and streaming downloads. You can overwrite this value using a special mutable-type parameter in the webapi. If you do not specify a value here, Tahoe-LAFS will use SDMF for all newly-created mutable files. Note that this parameter applies only to files, not to directories. Mutable directories, which are stored in mutable files, are not controlled by this parameter and will always use SDMF. We may revisit this decision in future versions of Tahoe-LAFS. See ``_ for details about mutable file formats. Frontend Configuration ====================== The Tahoe client process can run a variety of frontend file-access protocols. You will use these to create and retrieve files from the virtual filesystem. Configuration details for each are documented in the following protocol-specific guides: HTTP Tahoe runs a webserver by default on port 3456. This interface provides a human-oriented "WUI", with pages to create, modify, and browse directories and files, as well as a number of pages to check on the status of your Tahoe node. It also provides a machine-oriented "WAPI", with a REST-ful HTTP interface that can be used by other programs (including the CLI tools). Please see ``_ for full details, and the ``web.port`` and ``web.static`` config variables above. The ``_ document also describes a few WUI status pages. CLI The main "bin/tahoe" executable includes subcommands for manipulating the filesystem, uploading/downloading files, and creating/running Tahoe nodes. See ``_ for details. SFTP, FTP Tahoe can also run both SFTP and FTP servers, and map a username/password pair to a top-level Tahoe directory. See ``_ for instructions on configuring these services, and the ``[sftpd]`` and ``[ftpd]`` sections of ``tahoe.cfg``. Drop-Upload As of Tahoe-LAFS v1.9.0, a node running on Linux can be configured to automatically upload files that are created or changed in a specified local directory. See ``_ for details. Storage Server Configuration ============================ ``[storage]`` ``enabled = (boolean, optional)`` If this is ``True``, the node will run a storage server, offering space to other clients. If it is ``False``, the node will not run a storage server, meaning that no shares will be stored on this node. Use ``False`` for clients who do not wish to provide storage service. The default value is ``True``. ``readonly = (boolean, optional)`` If ``True``, the node will run a storage server but will not accept any shares, making it effectively read-only. Use this for storage servers that are being decommissioned: the ``storage/`` directory could be mounted read-only, while shares are moved to other servers. Note that this currently only affects immutable shares. Mutable shares (used for directories) will be written and modified anyway. See ticket `#390 `_ for the current status of this bug. The default value is ``False``. ``reserved_space = (str, optional)`` If provided, this value defines how much disk space is reserved: the storage server will not accept any share that causes the amount of free disk space to drop below this value. (The free space is measured by a call to ``statvfs(2)`` on Unix, or ``GetDiskFreeSpaceEx`` on Windows, and is the space available to the user account under which the storage server runs.) This string contains a number, with an optional case-insensitive scale suffix, optionally followed by "B" or "iB". The supported scale suffixes are "K", "M", "G", "T", "P" and "E", and a following "i" indicates to use powers of 1024 rather than 1000. So "100MB", "100 M", "100000000B", "100000000", and "100000kb" all mean the same thing. Likewise, "1MiB", "1024KiB", "1024 Ki", and "1048576 B" all mean the same thing. "``tahoe create-node``" generates a tahoe.cfg with "``reserved_space=1G``", but you may wish to raise, lower, or remove the reservation to suit your needs. ``expire.enabled =`` ``expire.mode =`` ``expire.override_lease_duration =`` ``expire.cutoff_date =`` ``expire.immutable =`` ``expire.mutable =`` These settings control garbage collection, in which the server will delete shares that no longer have an up-to-date lease on them. Please see ``_ for full details. Running A Helper ================ A "helper" is a regular client node that also offers the "upload helper" service. ``[helper]`` ``enabled = (boolean, optional)`` If ``True``, the node will run a helper (see ``_ for details). The helper's contact FURL will be placed in ``private/helper.furl``, from which it can be copied to any clients that wish to use it. Clearly nodes should not both run a helper and attempt to use one: do not create ``helper.furl`` and also define ``[helper]enabled`` in the same node. The default is ``False``. Running An Introducer ===================== The introducer node uses a different ``.tac`` file (named "``introducer.tac``"), and pays attention to the ``[node]`` section, but not the others. The Introducer node maintains some different state than regular client nodes. ``BASEDIR/private/introducer.furl`` This is generated the first time the introducer node is started, and used again on subsequent runs, to give the introduction service a persistent long-term identity. This file should be published and copied into new client nodes before they are started for the first time. Other Files in BASEDIR ====================== Some configuration is not kept in ``tahoe.cfg``, for the following reasons: * it is generated by the node at startup, e.g. encryption keys. The node never writes to ``tahoe.cfg``. * it is generated by user action, e.g. the "``tahoe create-alias``" command. In addition, non-configuration persistent state is kept in the node's base directory, next to the configuration knobs. This section describes these other files. ``private/node.pem`` This contains an SSL private-key certificate. The node generates this the first time it is started, and re-uses it on subsequent runs. This certificate allows the node to have a cryptographically-strong identifier (the Foolscap "TubID"), and to establish secure connections to other nodes. ``storage/`` Nodes that host StorageServers will create this directory to hold shares of files on behalf of other clients. There will be a directory underneath it for each StorageIndex for which this node is holding shares. There is also an "incoming" directory where partially-completed shares are held while they are being received. ``tahoe-client.tac`` This file defines the client, by constructing the actual Client instance each time the node is started. It is used by the "``twistd``" daemonization program (in the ``-y`` mode), which is run internally by the "``tahoe start``" command. This file is created by the "``tahoe create-node``" or "``tahoe create-client``" commands. ``tahoe-introducer.tac`` This file is used to construct an introducer, and is created by the "``tahoe create-introducer``" command. ``tahoe-key-generator.tac`` This file is used to construct a key generator, and is created by the "``tahoe create-key-gernerator``" command. ``tahoe-stats-gatherer.tac`` This file is used to construct a statistics gatherer, and is created by the "``tahoe create-stats-gatherer``" command. ``private/control.furl`` This file contains a FURL that provides access to a control port on the client node, from which files can be uploaded and downloaded. This file is created with permissions that prevent anyone else from reading it (on operating systems that support such a concept), to insure that only the owner of the client node can use this feature. This port is intended for debugging and testing use. ``private/logport.furl`` This file contains a FURL that provides access to a 'log port' on the client node, from which operational logs can be retrieved. Do not grant logport access to strangers, because occasionally secret information may be placed in the logs. ``private/helper.furl`` If the node is running a helper (for use by other clients), its contact FURL will be placed here. See ``_ for more details. ``private/root_dir.cap`` (optional) The command-line tools will read a directory cap out of this file and use it, if you don't specify a '--dir-cap' option or if you specify '--dir-cap=root'. ``private/convergence`` (automatically generated) An added secret for encrypting immutable files. Everyone who has this same string in their ``private/convergence`` file encrypts their immutable files in the same way when uploading them. This causes identical files to "converge" -- to share the same storage space since they have identical ciphertext -- which conserves space and optimizes upload time, but it also exposes file contents to the possibility of a brute-force attack by people who know that string. In this attack, if the attacker can guess most of the contents of a file, then they can use brute-force to learn the remaining contents. So the set of people who know your ``private/convergence`` string is the set of people who converge their storage space with you when you and they upload identical immutable files, and it is also the set of people who could mount such an attack. The content of the ``private/convergence`` file is a base-32 encoded string. If the file doesn't exist, then when the Tahoe-LAFS client starts up it will generate a random 256-bit string and write the base-32 encoding of this string into the file. If you want to converge your immutable files with as many people as possible, put the empty string (so that ``private/convergence`` is a zero-length file). Other files =========== ``logs/`` Each Tahoe-LAFS node creates a directory to hold the log messages produced as the node runs. These logfiles are created and rotated by the "``twistd``" daemonization program, so ``logs/twistd.log`` will contain the most recent messages, ``logs/twistd.log.1`` will contain the previous ones, ``logs/twistd.log.2`` will be older still, and so on. ``twistd`` rotates logfiles after they grow beyond 1MB in size. If the space consumed by logfiles becomes troublesome, they should be pruned: a cron job to delete all files that were created more than a month ago in this ``logs/`` directory should be sufficient. ``my_nodeid`` this is written by all nodes after startup, and contains a base32-encoded (i.e. human-readable) NodeID that identifies this specific node. This NodeID is the same string that gets displayed on the web page (in the "which peers am I connected to" list), and the shortened form (the first few characters) is recorded in various log messages. ``access.blacklist`` Gateway nodes may find it necessary to prohibit access to certain files. The web-API has a facility to block access to filecaps by their storage index, returning a 403 "Forbidden" error instead of the original file. For more details, see the "Access Blacklist" section of ``_. Example ======= The following is a sample ``tahoe.cfg`` file, containing values for some of the keys described in the previous section. Note that this is not a recommended configuration (most of these are not the default values), merely a legal one. :: [node] nickname = Bob's Tahoe-LAFS Node tub.port = 34912 tub.location = 123.45.67.89:8098,44.55.66.77:8098 web.port = 3456 log_gatherer.furl = pb://soklj4y7eok5c3xkmjeqpw@192.168.69.247:44801/eqpwqtzm timeout.keepalive = 240 timeout.disconnect = 1800 ssh.port = 8022 ssh.authorized_keys_file = ~/.ssh/authorized_keys [client] introducer.furl = pb://ok45ssoklj4y7eok5c3xkmj@tahoe.example:44801/ii3uumo helper.furl = pb://ggti5ssoklj4y7eok5c3xkmj@helper.tahoe.example:7054/kk8lhr [storage] enabled = True readonly = True reserved_space = 10000000000 [helper] enabled = True Old Configuration Files ======================= Tahoe-LAFS releases before v1.3.0 had no ``tahoe.cfg`` file, and used distinct files for each item. This is no longer supported and if you have configuration in the old format you must manually convert it to the new format for Tahoe-LAFS to detect it. See ``_. tahoe-lafs-1.10.0/docs/convergence-secret.rst000066400000000000000000000070541221140116300210540ustar00rootroot00000000000000 What Is It? ----------- The identifier of a file (also called the "capability" to a file) is derived from two pieces of information when the file is uploaded: the content of the file and the upload client's "convergence secret". By default, the convergence secret is randomly generated by the client when it first starts up, then stored in the client's base directory (/private/convergence) and re-used after that. So the same file content uploaded from the same client will always have the same cap. Uploading the file from a different client with a different convergence secret would result in a different cap -- and in a second copy of the file's contents stored on the grid. If you want files you upload to converge (also known as "deduplicate") with files uploaded by someone else, just make sure you're using the same convergence secret when you upload files as them. The advantages of deduplication should be clear, but keep in mind that the convergence secret was created to protect confidentiality. There are two attacks that can be used against you by someone who knows the convergence secret you use. The first one is called the "Confirmation-of-a-File Attack". Someone who knows the convergence secret that you used when you uploaded a file, and who has a copy of that file themselves, can check whether you have a copy of that file. This is usually not a problem, but it could be if that file is, for example, a book or movie that is banned in your country. The second attack is more subtle. It is called the "Learn-the-Remaining-Information Attack". Suppose you've received a confidential document, such as a PDF from your bank which contains many pages of boilerplate text as well as containing your bank account number and balance. Someone who knows your convergence secret can generate a file with all of the boilerplate text (perhaps they would open an account with the same bank so they receive the same document with their account number and balance). Then they can try a "brute force search" to find your account number and your balance. The defense against these attacks is that only someone who knows the convergence secret that you used on each file can perform these attacks on that file. Both of these attacks and the defense are described in more detail in `Drew Perttula's Hack Tahoe-LAFS Hall Of Fame entry`_ .. _`Drew Perttula's Hack Tahoe-LAFS Hall Of Fame entry`: https://tahoe-lafs.org/hacktahoelafs/drew_perttula.html What If I Change My Convergence Secret? --------------------------------------- All your old file capabilities will still work, but the new data that you upload will not be deduplicated with the old data. If you upload all of the same things to the grid, you will end up using twice the space until garbage collection kicks in (if it's enabled). Changing the convergence secret that a storage client uses for uploads can be though of as moving the client to a new "deduplication domain". How To Use It ------------- To enable deduplication between different clients, **securely** copy the convergence secret file from one client to all the others. For example, if you are on host A and have an account on host B and you have scp installed, run: *scp ~/.tahoe/private/convergence my_other_account@B:.tahoe/private/convergence* If you have two different clients on a single computer, say one for each disk, you would do: *cp /tahoe1/private/convergence /tahoe2/private/convergence* After you change the convergence secret file, you must restart the client before it will stop using the old one and read the new one from the file. tahoe-lafs-1.10.0/docs/debian.rst000066400000000000000000000044011221140116300165060ustar00rootroot00000000000000========================= Debian and Ubuntu Support ========================= 1. `Overview`_ 2. `Dependency Packages`_ Overview ======== Tahoe-LAFS is provided as a ``.deb`` package in current Debian (>= wheezy) and Ubuntu (>= lucid) releases. Before official packages were added, the Tahoe source tree provided support for building unofficial packages for a variety of popular Debian/Ubuntu versions. The project also ran buildbots to create ``.debs`` of current trunk for ease of testing. As of version 1.9, the source tree no longer provides these tools. To construct a ``.deb`` from current trunk, your best bet is to apply the current Debian diff from the latest upstream package and invoke the ``debian/rules`` as usual. Debian's standard ``apt-get`` tool can be used to fetch the current source package (including the Debian-specific diff): run "``apt-get source tahoe-lafs``". That will fetch three files: the ``.dsc`` control file, the main Tahoe tarball, and the Debian-specific ``.debian.tar.gz`` file. Just unpack the ``.debian.tar.gz`` file inside your Tahoe source tree, modify the version number in ``debian/changelog``, then run "``fakeroot ./debian/rules binary``", and a new ``.deb`` will be placed in the parent directory. Dependency Packages =================== Tahoe depends upon a number of additional libraries. When building Tahoe from source, any dependencies that are not already present in the environment will be downloaded (via ``easy_install``) and stored in the ``support/lib`` directory. The ``.deb`` packages, of course, rely solely upon other ``.deb`` packages. For reference, here is a list of the debian package names that provide Tahoe's dependencies as of the 1.9 release: * python * python-zfec * python-pycryptopp * python-foolscap * python-openssl (needed by foolscap) * python-twisted * python-nevow * python-mock * python-simplejson * python-setuptools * python-support (for Debian-specific install-time tools) When building your own Debian packages, a convenient way to get all these dependencies installed is to first install the official "tahoe-lafs" package, then uninstall it, leaving the dependencies behind. You may also find it useful to run "``apt-get build-dep tahoe-lafs``" to make sure all the usual build-essential tools are installed. tahoe-lafs-1.10.0/docs/filesystem-notes.rst000066400000000000000000000017221221140116300206010ustar00rootroot00000000000000========================= Filesystem-specific notes ========================= 1. ext3_ Tahoe storage servers use a large number of subdirectories to store their shares on local disk. This format is simple and robust, but depends upon the local filesystem to provide fast access to those directories. ext3 ==== For moderate- or large-sized storage servers, you'll want to make sure the "directory index" feature is enabled on your ext3 directories, otherwise share lookup may be very slow. Recent versions of ext3 enable this automatically, but older filesystems may not have it enabled:: $ sudo tune2fs -l /dev/sda1 |grep feature Filesystem features: has_journal ext_attr resize_inode dir_index filetype needs_recovery sparse_super large_file If "dir_index" is present in the "features:" line, then you're all set. If not, you'll need to use tune2fs and e2fsck to enable and build the index. See ``_ for some hints. tahoe-lafs-1.10.0/docs/frontends/000077500000000000000000000000001221140116300165355ustar00rootroot00000000000000tahoe-lafs-1.10.0/docs/frontends/CLI.rst000066400000000000000000000557141221140116300177120ustar00rootroot00000000000000=========================== The Tahoe-LAFS CLI commands =========================== 1. `Overview`_ 2. `CLI Command Overview`_ 1. `Unicode Support`_ 3. `Node Management`_ 4. `Filesystem Manipulation`_ 1. `Starting Directories`_ 2. `Command Syntax Summary`_ 3. `Command Examples`_ 5. `Storage Grid Maintenance`_ 6. `Debugging`_ Overview ======== Tahoe-LAFS provides a single executable named "``tahoe``", which can be used to create and manage client/server nodes, manipulate the filesystem, and perform several debugging/maintenance tasks. This executable lives in the source tree at "``bin/tahoe``". Once you've done a build (by running "``make``" or "``python setup.py build``"), ``bin/tahoe`` can be run in-place: if it discovers that it is being run from within a Tahoe-LAFS source tree, it will modify ``sys.path`` as necessary to use all the source code and dependent libraries contained in that tree. If you've installed Tahoe-LAFS (using "``make install``" or "``python setup.py install``", or by installing a binary package), then the ``tahoe`` executable will be available somewhere else, perhaps in ``/usr/bin/tahoe``. In this case, it will use your platform's normal PYTHONPATH search path to find the Tahoe-LAFS code and other libraries. CLI Command Overview ==================== The "``tahoe``" tool provides access to three categories of commands. * node management: create a client/server node, start/stop/restart it * filesystem manipulation: list files, upload, download, unlink, rename * debugging: unpack cap-strings, examine share files To get a list of all commands, just run "``tahoe``" with no additional arguments. "``tahoe --help``" might also provide something useful. Running "``tahoe --version``" will display a list of version strings, starting with the "allmydata" module (which contains the majority of the Tahoe-LAFS functionality) and including versions for a number of dependent libraries, like Twisted, Foolscap, pycryptopp, and zfec. "``tahoe --version-and-path``" will also show the path from which each library was imported. On Unix systems, the shell expands filename wildcards (``'*'`` and ``'?'``) before the program is able to read them, which may produce unexpected results for many ``tahoe`` comands. We recommend, if you use wildcards, to start the path with "``./``", for example "``tahoe cp -r ./* somewhere:``". This prevents the expanded filename from being interpreted as an option or as an alias, allowing filenames that start with a dash or contain colons to be handled correctly. On Windows, a single letter followed by a colon is treated as a drive specification rather than an alias (and is invalid unless a local path is allowed in that context). Wildcards cannot be used to specify multiple filenames to ``tahoe`` on Windows. Unicode Support --------------- As of Tahoe-LAFS v1.7.0 (v1.8.0 on Windows), the ``tahoe`` tool supports non-ASCII characters in command lines and output. On Unix, the command-line arguments are assumed to use the character encoding specified by the current locale (usually given by the ``LANG`` environment variable). If a name to be output contains control characters or characters that cannot be represented in the encoding used on your terminal, it will be quoted. The quoting scheme used is similar to `POSIX shell quoting`_: in a "double-quoted" string, backslashes introduce escape sequences (like those in Python strings), but in a 'single-quoted' string all characters stand for themselves. This quoting is only used for output, on all operating systems. Your shell interprets any quoting or escapes used on the command line. .. _`POSIX shell quoting`: http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html Node Management =============== "``tahoe create-node [NODEDIR]``" is the basic make-a-new-node command. It creates a new directory and populates it with files that will allow the "``tahoe start``" command to use it later on. This command creates nodes that have client functionality (upload/download files), web API services (controlled by the '[node]web.port' configuration), and storage services (unless ``--no-storage`` is specified). NODEDIR defaults to ``~/.tahoe/`` , and newly-created nodes default to publishing a web server on port 3456 (limited to the loopback interface, at 127.0.0.1, to restrict access to other programs on the same host). All of the other "``tahoe``" subcommands use corresponding defaults (with the exception that "``tahoe run``" defaults to running a node in the current directory). "``tahoe create-client [NODEDIR]``" creates a node with no storage service. That is, it behaves like "``tahoe create-node --no-storage [NODEDIR]``". (This is a change from versions prior to v1.6.0.) "``tahoe create-introducer [NODEDIR]``" is used to create the Introducer node. This node provides introduction services and nothing else. When started, this node will produce a ``private/introducer.furl`` file, which should be published to all clients. "``tahoe create-key-generator [NODEDIR]``" is used to create a special "key-generation" service, which allows a client to offload their RSA key generation to a separate process. Since RSA key generation takes several seconds, and must be done each time a directory is created, moving it to a separate process allows the first process (perhaps a busy web-API server) to continue servicing other requests. The key generator exports a FURL that can be copied into a node to enable this functionality. "``tahoe run [NODEDIR]``" will start a previously-created node in the foreground. "``tahoe start [NODEDIR]``" will launch a previously-created node. It will launch the node into the background, using the standard Twisted "``twistd``" daemon-launching tool. On some platforms (including Windows) this command is unable to run a daemon in the background; in that case it behaves in the same way as "``tahoe run``". "``tahoe stop [NODEDIR]``" will shut down a running node. "``tahoe restart [NODEDIR]``" will stop and then restart a running node. This is most often used by developers who have just modified the code and want to start using their changes. Filesystem Manipulation ======================= These commands let you exmaine a Tahoe-LAFS filesystem, providing basic list/upload/download/unlink/rename/mkdir functionality. They can be used as primitives by other scripts. Most of these commands are fairly thin wrappers around web-API calls, which are described in ``__. By default, all filesystem-manipulation commands look in ``~/.tahoe/`` to figure out which Tahoe-LAFS node they should use. When the CLI command makes web-API calls, it will use ``~/.tahoe/node.url`` for this purpose: a running Tahoe-LAFS node that provides a web-API port will write its URL into this file. If you want to use a node on some other host, just create ``~/.tahoe/`` and copy that node's web-API URL into this file, and the CLI commands will contact that node instead of a local one. These commands also use a table of "aliases" to figure out which directory they ought to use a starting point. This is explained in more detail below. Starting Directories -------------------- As described in `docs/architecture.rst <../architecture.rst>`__, the Tahoe-LAFS distributed filesystem consists of a collection of directories and files, each of which has a "read-cap" or a "write-cap" (also known as a URI). Each directory is simply a table that maps a name to a child file or directory, and this table is turned into a string and stored in a mutable file. The whole set of directory and file "nodes" are connected together into a directed graph. To use this collection of files and directories, you need to choose a starting point: some specific directory that we will refer to as a "starting directory". For a given starting directory, the "``ls [STARTING_DIR]``" command would list the contents of this directory, the "``ls [STARTING_DIR]/dir1``" command would look inside this directory for a child named "``dir1``" and list its contents, "``ls [STARTING_DIR]/dir1/subdir2``" would look two levels deep, etc. Note that there is no real global "root" directory, but instead each starting directory provides a different, possibly overlapping perspective on the graph of files and directories. Each Tahoe-LAFS node remembers a list of starting points, called "aliases", which are short Unicode strings that stand in for a directory read- or write- cap. They are stored (encoded as UTF-8) in the file ``NODEDIR/private/aliases`` . If you use the command line "``tahoe ls``" without any "[STARTING_DIR]" argument, then it will use the default alias, which is ``tahoe:``, therefore "``tahoe ls``" has the same effect as "``tahoe ls tahoe:``". The same goes for the other commands that can reasonably use a default alias: ``get``, ``put``, ``mkdir``, ``mv``, and ``rm``. For backwards compatibility with Tahoe-LAFS v1.0, if the ``tahoe:`` alias is not found in ``~/.tahoe/private/aliases``, the CLI will use the contents of ``~/.tahoe/private/root_dir.cap`` instead. Tahoe-LAFS v1.0 had only a single starting point, and stored it in this ``root_dir.cap`` file, so v1.1 and later will use it if necessary. However, once you've set a ``tahoe:`` alias with "``tahoe set-alias``", that will override anything in the old ``root_dir.cap`` file. The Tahoe-LAFS CLI commands use the same path syntax as ``scp`` and ``rsync`` -- an optional ``ALIAS:`` prefix, followed by the pathname or filename. Some commands (like "``tahoe cp``") use the lack of an alias to mean that you want to refer to a local file, instead of something from the Tahoe-LAFS filesystem. [TODO] Another way to indicate this is to start the pathname with a dot, slash, or tilde. When you're dealing a single starting directory, the ``tahoe:`` alias is all you need. But when you want to refer to something that isn't yet attached to the graph rooted at that starting directory, you need to refer to it by its capability. The way to do that is either to use its capability directory as an argument on the command line, or to add an alias to it, with the "``tahoe add-alias``" command. Once you've added an alias, you can use that alias as an argument to commands. The best way to get started with Tahoe-LAFS is to create a node, start it, then use the following command to create a new directory and set it as your ``tahoe:`` alias:: tahoe create-alias tahoe After that you can use "``tahoe ls tahoe:``" and "``tahoe cp local.txt tahoe:``", and both will refer to the directory that you've just created. SECURITY NOTE: For users of shared systems `````````````````````````````````````````` Another way to achieve the same effect as the above "``tahoe create-alias``" command is:: tahoe add-alias tahoe `tahoe mkdir` However, command-line arguments are visible to other users (through the ``ps`` command or ``/proc`` filesystem, or the Windows Process Explorer tool), so if you are using a Tahoe-LAFS node on a shared host, your login neighbors will be able to see (and capture) any directory caps that you set up with the "``tahoe add-alias``" command. The "``tahoe create-alias``" command avoids this problem by creating a new directory and putting the cap into your aliases file for you. Alternatively, you can edit the ``NODEDIR/private/aliases`` file directly, by adding a line like this:: fun: URI:DIR2:ovjy4yhylqlfoqg2vcze36dhde:4d4f47qko2xm5g7osgo2yyidi5m4muyo2vjjy53q4vjju2u55mfa By entering the dircap through the editor, the command-line arguments are bypassed, and other users will not be able to see them. Once you've added the alias, no other secrets are passed through the command line, so this vulnerability becomes less significant: they can still see your filenames and other arguments you type there, but not the caps that Tahoe-LAFS uses to permit access to your files and directories. Command Syntax Summary ---------------------- ``tahoe add-alias ALIAS[:] DIRCAP`` ``tahoe create-alias ALIAS[:]`` ``tahoe list-aliases`` ``tahoe mkdir`` ``tahoe mkdir PATH`` ``tahoe ls [PATH]`` ``tahoe webopen [PATH]`` ``tahoe put [--mutable] [FROMLOCAL|-]`` ``tahoe put [--mutable] FROMLOCAL|- TOPATH`` ``tahoe put [FROMLOCAL|-] mutable-file-writecap`` ``tahoe get FROMPATH [TOLOCAL|-]`` ``tahoe cp [-r] FROMPATH TOPATH`` ``tahoe rm PATH`` ``tahoe mv FROMPATH TOPATH`` ``tahoe ln FROMPATH TOPATH`` ``tahoe backup FROMLOCAL TOPATH`` In these summaries, ``PATH``, ``TOPATH`` or ``FROMPATH`` can be one of: * ``[SUBDIRS/]FILENAME`` for a path relative to the default ``tahoe:`` alias; * ``ALIAS:[SUBDIRS/]FILENAME`` for a path relative to another alias; * ``DIRCAP/[SUBDIRS/]FILENAME`` or ``DIRCAP:./[SUBDIRS/]FILENAME`` for a path relative to a directory cap. See `CLI Command Overview`_ above for information on using wildcards with local paths, and different treatment of colons between Unix and Windows. ``FROMLOCAL`` or ``TOLOCAL`` is a path in the local filesystem. Command Examples ---------------- ``tahoe add-alias ALIAS[:] DIRCAP`` An example would be:: tahoe add-alias fun URI:DIR2:ovjy4yhylqlfoqg2vcze36dhde:4d4f47qko2xm5g7osgo2yyidi5m4muyo2vjjy53q4vjju2u55mfa This creates an alias ``fun:`` and configures it to use the given directory cap. Once this is done, "``tahoe ls fun:``" will list the contents of this directory. Use "``tahoe add-alias tahoe DIRCAP``" to set the contents of the default ``tahoe:`` alias. Since Tahoe-LAFS v1.8.2, the alias name can be given with or without the trailing colon. On Windows, the alias should not be a single character, because it would be confused with the drive letter of a local path. ``tahoe create-alias fun`` This combines "``tahoe mkdir``" and "``tahoe add-alias``" into a single step. ``tahoe list-aliases`` This displays a table of all configured aliases. ``tahoe mkdir`` This creates a new empty unlinked directory, and prints its write-cap to stdout. The new directory is not attached to anything else. ``tahoe mkdir subdir`` ``tahoe mkdir /subdir`` This creates a new empty directory and attaches it below the root directory of the default ``tahoe:`` alias with the name "``subdir``". ``tahoe ls`` ``tahoe ls /`` ``tahoe ls tahoe:`` ``tahoe ls tahoe:/`` All four list the root directory of the default ``tahoe:`` alias. ``tahoe ls subdir`` This lists a subdirectory of your filesystem. ``tahoe webopen`` ``tahoe webopen tahoe:`` ``tahoe webopen tahoe:subdir/`` ``tahoe webopen subdir/`` This uses the python 'webbrowser' module to cause a local web browser to open to the web page for the given directory. This page offers interfaces to add, download, rename, and unlink files and subdirectories in that directory. If no alias or path is given, this command opens the root directory of the default ``tahoe:`` alias. ``tahoe put file.txt`` ``tahoe put ./file.txt`` ``tahoe put /tmp/file.txt`` ``tahoe put ~/file.txt`` These upload the local file into the grid, and prints the new read-cap to stdout. The uploaded file is not attached to any directory. All one-argument forms of "``tahoe put``" perform an unlinked upload. ``tahoe put -`` ``tahoe put`` These also perform an unlinked upload, but the data to be uploaded is taken from stdin. ``tahoe put file.txt uploaded.txt`` ``tahoe put file.txt tahoe:uploaded.txt`` These upload the local file and add it to your ``tahoe:`` root with the name "``uploaded.txt``". ``tahoe put file.txt subdir/foo.txt`` ``tahoe put - subdir/foo.txt`` ``tahoe put file.txt tahoe:subdir/foo.txt`` ``tahoe put file.txt DIRCAP/foo.txt`` ``tahoe put file.txt DIRCAP/subdir/foo.txt`` These upload the named file and attach them to a subdirectory of the given root directory, under the name "``foo.txt``". When a directory write-cap is given, you can use either ``/`` (as shown above) or ``:./`` to separate it from the following path. When the source file is named "``-``", the contents are taken from stdin. ``tahoe put file.txt --mutable`` Create a new (SDMF) mutable file, fill it with the contents of ``file.txt``, and print the new write-cap to stdout. ``tahoe put file.txt MUTABLE-FILE-WRITECAP`` Replace the contents of the given mutable file with the contents of ``file.txt`` and print the same write-cap to stdout. ``tahoe cp file.txt tahoe:uploaded.txt`` ``tahoe cp file.txt tahoe:`` ``tahoe cp file.txt tahoe:/`` ``tahoe cp ./file.txt tahoe:`` These upload the local file and add it to your ``tahoe:`` root with the name "``uploaded.txt``". ``tahoe cp tahoe:uploaded.txt downloaded.txt`` ``tahoe cp tahoe:uploaded.txt ./downloaded.txt`` ``tahoe cp tahoe:uploaded.txt /tmp/downloaded.txt`` ``tahoe cp tahoe:uploaded.txt ~/downloaded.txt`` This downloads the named file from your ``tahoe:`` root, and puts the result on your local filesystem. ``tahoe cp tahoe:uploaded.txt fun:stuff.txt`` This copies a file from your ``tahoe:`` root to a different directory, set up earlier with "``tahoe add-alias fun DIRCAP``" or "``tahoe create-alias fun``". ``tahoe unlink uploaded.txt`` ``tahoe unlink tahoe:uploaded.txt`` This unlinks a file from your ``tahoe:`` root (that is, causes there to no longer be an entry ``uploaded.txt`` in the root directory that points to it). Note that this does not delete the file from the grid. For backward compatibility, ``tahoe rm`` is accepted as a synonym for ``tahoe unlink``. ``tahoe mv uploaded.txt renamed.txt`` ``tahoe mv tahoe:uploaded.txt tahoe:renamed.txt`` These rename a file within your ``tahoe:`` root directory. ``tahoe mv uploaded.txt fun:`` ``tahoe mv tahoe:uploaded.txt fun:`` ``tahoe mv tahoe:uploaded.txt fun:uploaded.txt`` These move a file from your ``tahoe:`` root directory to the directory set up earlier with "``tahoe add-alias fun DIRCAP``" or "``tahoe create-alias fun``". ``tahoe backup ~ work:backups`` This command performs a full versioned backup of every file and directory underneath your "``~``" home directory, placing an immutable timestamped snapshot in e.g. ``work:backups/Archives/2009-02-06_04:00:05Z/`` (note that the timestamp is in UTC, hence the "Z" suffix), and a link to the latest snapshot in work:backups/Latest/ . This command uses a small SQLite database known as the "backupdb", stored in ``~/.tahoe/private/backupdb.sqlite``, to remember which local files have been backed up already, and will avoid uploading files that have already been backed up. It compares timestamps and filesizes when making this comparison. It also re-uses existing directories which have identical contents. This lets it run faster and reduces the number of directories created. If you reconfigure your client node to switch to a different grid, you should delete the stale backupdb.sqlite file, to force "``tahoe backup``" to upload all files to the new grid. ``tahoe backup --exclude=*~ ~ work:backups`` Same as above, but this time the backup process will ignore any filename that will end with '~'. ``--exclude`` will accept any standard Unix shell-style wildcards, as implemented by the `Python fnmatch module `__. You may give multiple ``--exclude`` options. Please pay attention that the pattern will be matched against any level of the directory tree; it's still impossible to specify absolute path exclusions. ``tahoe backup --exclude-from=/path/to/filename ~ work:backups`` ``--exclude-from`` is similar to ``--exclude``, but reads exclusion patterns from ``/path/to/filename``, one per line. ``tahoe backup --exclude-vcs ~ work:backups`` This command will ignore any file or directory name known to be used by version control systems to store metadata. The excluded names are: * CVS * RCS * SCCS * .git * .gitignore * .cvsignore * .svn * .arch-ids * {arch} * =RELEASE-ID * =meta-update * =update * .bzr * .bzrignore * .bzrtags * .hg * .hgignore * _darcs Storage Grid Maintenance ======================== ``tahoe manifest tahoe:`` ``tahoe manifest --storage-index tahoe:`` ``tahoe manifest --verify-cap tahoe:`` ``tahoe manifest --repair-cap tahoe:`` ``tahoe manifest --raw tahoe:`` This performs a recursive walk of the given directory, visiting every file and directory that can be reached from that point. It then emits one line to stdout for each object it encounters. The default behavior is to print the access cap string (like ``URI:CHK:..`` or ``URI:DIR2:..``), followed by a space, followed by the full path name. If ``--storage-index`` is added, each line will instead contain the object's storage index. This (string) value is useful to determine which share files (on the server) are associated with this directory tree. The ``--verify-cap`` and ``--repair-cap`` options are similar, but emit a verify-cap and repair-cap, respectively. If ``--raw`` is provided instead, the output will be a JSON-encoded dictionary that includes keys for pathnames, storage index strings, and cap strings. The last line of the ``--raw`` output will be a JSON encoded deep-stats dictionary. ``tahoe stats tahoe:`` This performs a recursive walk of the given directory, visiting every file and directory that can be reached from that point. It gathers statistics on the sizes of the objects it encounters, and prints a summary to stdout. Debugging ========= For a list of all debugging commands, use "``tahoe debug``". For more detailed help on any of these commands, use "``tahoe debug COMMAND --help``". "``tahoe debug find-shares STORAGEINDEX NODEDIRS..``" will look through one or more storage nodes for the share files that are providing storage for the given storage index. "``tahoe debug catalog-shares NODEDIRS..``" will look through one or more storage nodes and locate every single share they contain. It produces a report on stdout with one line per share, describing what kind of share it is, the storage index, the size of the file is used for, etc. It may be useful to concatenate these reports from all storage hosts and use it to look for anomalies. "``tahoe debug dump-share SHAREFILE``" will take the name of a single share file (as found by "``tahoe find-shares``") and print a summary of its contents to stdout. This includes a list of leases, summaries of the hash tree, and information from the UEB (URI Extension Block). For mutable file shares, it will describe which version (seqnum and root-hash) is being stored in this share. "``tahoe debug dump-cap CAP``" will take any Tahoe-LAFS URI and unpack it into separate pieces. The most useful aspect of this command is to reveal the storage index for any given URI. This can be used to locate the share files that are holding the encoded+encrypted data for this file. "``tahoe debug repl``" will launch an interactive Python interpreter in which the Tahoe-LAFS packages and modules are available on ``sys.path`` (e.g. by using '``import allmydata``'). This is most useful from a source tree: it simply sets the PYTHONPATH correctly and runs the Python executable. "``tahoe debug corrupt-share SHAREFILE``" will flip a bit in the given sharefile. This can be used to test the client-side verification/repair code. Obviously, this command should not be used during normal operation. "``tahoe debug trial [OPTIONS] [TESTSUITE]``" will run the tests specified by TESTSUITE (defaulting to the whole Tahoe test suite), using Twisted Trial. tahoe-lafs-1.10.0/docs/frontends/FTP-and-SFTP.rst000066400000000000000000000264631221140116300212450ustar00rootroot00000000000000================================= Tahoe-LAFS SFTP and FTP Frontends ================================= 1. `SFTP/FTP Background`_ 2. `Tahoe-LAFS Support`_ 3. `Creating an Account File`_ 4. `Running An Account Server (accounts.url)`_ 5. `Configuring SFTP Access`_ 6. `Configuring FTP Access`_ 7. `Dependencies`_ 8. `Immutable and Mutable Files`_ 9. `Known Issues`_ SFTP/FTP Background =================== FTP is the venerable internet file-transfer protocol, first developed in 1971. The FTP server usually listens on port 21. A separate connection is used for the actual data transfers, either in the same direction as the initial client-to-server connection (for PORT mode), or in the reverse direction (for PASV) mode. Connections are unencrypted, so passwords, file names, and file contents are visible to eavesdroppers. SFTP is the modern replacement, developed as part of the SSH "secure shell" protocol, and runs as a subchannel of the regular SSH connection. The SSH server usually listens on port 22. All connections are encrypted. Both FTP and SFTP were developed assuming a UNIX-like server, with accounts and passwords, octal file modes (user/group/other, read/write/execute), and ctime/mtime timestamps. We recommend SFTP over FTP, because the protocol is better, and the server implementation in Tahoe-LAFS is more complete. See `Known Issues`_, below, for details. Tahoe-LAFS Support ================== All Tahoe-LAFS client nodes can run a frontend SFTP server, allowing regular SFTP clients (like ``/usr/bin/sftp``, the ``sshfs`` FUSE plugin, and many others) to access the virtual filesystem. They can also run an FTP server, so FTP clients (like ``/usr/bin/ftp``, ``ncftp``, and others) can too. These frontends sit at the same level as the web-API interface. Since Tahoe-LAFS does not use user accounts or passwords, the SFTP/FTP servers must be configured with a way to first authenticate a user (confirm that a prospective client has a legitimate claim to whatever authorities we might grant a particular user), and second to decide what directory cap should be used as the root directory for a log-in by the authenticated user. A username and password is used for this purpose. (The SFTP protocol is also capable of using client RSA or DSA public keys, but this is not currently implemented in Tahoe-LAFS.) Tahoe-LAFS provides two mechanisms to perform this user-to-cap mapping. The first is a simple flat file with one account per line. The second is an HTTP-based login mechanism, backed by simple PHP script and a database. Creating an Account File ======================== To use the first form, create a file (for example ``BASEDIR/private/accounts``) in which each non-comment/non-blank line is a space-separated line of (USERNAME, PASSWORD, ROOTCAP), like so:: % cat BASEDIR/private/accounts # This is a password line, (username, password, cap) alice password URI:DIR2:ioej8xmzrwilg772gzj4fhdg7a:wtiizszzz2rgmczv4wl6bqvbv33ag4kvbr6prz3u6w3geixa6m6a bob sekrit URI:DIR2:6bdmeitystckbl9yqlw7g56f4e:serp5ioqxnh34mlbmzwvkp3odehsyrr7eytt5f64we3k9hhcrcja Future versions of Tahoe-LAFS may support using client public keys for SFTP. The words "ssh-rsa" and "ssh-dsa" after the username are reserved to specify the public key format, so users cannot have a password equal to either of these strings. Now add an ``accounts.file`` directive to your ``tahoe.cfg`` file, as described in the next sections. Running An Account Server (accounts.url) ======================================== The accounts.url directive allows access requests to be controlled by an HTTP-based login service, useful for centralized deployments. This was used by AllMyData to provide web-based file access, where the service used a simple PHP script and database lookups to map an account email address and password to a Tahoe-LAFS directory cap. The service will receive a multipart/form-data POST, just like one created with a
and fields, with three parameters: • action: "authenticate" (this is a static string, for backwards compatibility with the old AllMyData authentication service) • email: USERNAME (Tahoe-LAFS has no notion of email addresses, but the authentication service uses them as account names, so the interface presents this argument as "email" rather than "username"). • passwd: PASSWORD It should return a single string that either contains a Tahoe-LAFS directory cap (URI:DIR2:...), or "0" to indicate a login failure. Tahoe-LAFS recommends the service be secure, preferably localhost-only. This makes it harder for attackers to brute force the password or use DNS poisoning to cause the Tahoe-LAFS gateway to talk with the wrong server, thereby revealing the usernames and passwords. Configuring SFTP Access ======================= The Tahoe-LAFS SFTP server requires a host keypair, just like the regular SSH server. It is important to give each server a distinct keypair, to prevent one server from masquerading as different one. The first time a client program talks to a given server, it will store the host key it receives, and will complain if a subsequent connection uses a different key. This reduces the opportunity for man-in-the-middle attacks to just the first connection. Exercise caution when connecting to the SFTP server remotely. The AES implementation used by the SFTP code does not have defenses against timing attacks. The code for encrypting the SFTP connection was not written by the Tahoe-LAFS team, and we have not reviewed it as carefully as we have reviewed the code for encrypting files and directories in Tahoe-LAFS itself. If you can connect to the SFTP server (which is provided by the Tahoe-LAFS gateway) only from a client on the same host, then you would be safe from any problem with the SFTP connection security. The examples given below enforce this policy by including ":interface=127.0.0.1" in the "port" option, which causes the server to only accept connections from localhost. You will use directives in the tahoe.cfg file to tell the SFTP code where to find these keys. To create one, use the ``ssh-keygen`` tool (which comes with the standard OpenSSH client distribution):: % cd BASEDIR % ssh-keygen -f private/ssh_host_rsa_key The server private key file must not have a passphrase. Then, to enable the SFTP server with an accounts file, add the following lines to the BASEDIR/tahoe.cfg file:: [sftpd] enabled = true port = tcp:8022:interface=127.0.0.1 host_pubkey_file = private/ssh_host_rsa_key.pub host_privkey_file = private/ssh_host_rsa_key accounts.file = private/accounts The SFTP server will listen on the given port number and on the loopback interface only. The "accounts.file" pathname will be interpreted relative to the node's BASEDIR. Or, to use an account server instead, do this:: [sftpd] enabled = true port = tcp:8022:interface=127.0.0.1 host_pubkey_file = private/ssh_host_rsa_key.pub host_privkey_file = private/ssh_host_rsa_key accounts.url = https://example.com/login You can provide both accounts.file and accounts.url, although it probably isn't very useful except for testing. For further information on SFTP compatibility and known issues with various clients and with the sshfs filesystem, see wiki:SftpFrontend_ .. _wiki:SftpFrontend: https://tahoe-lafs.org/trac/tahoe-lafs/wiki/SftpFrontend Configuring FTP Access ====================== To enable the FTP server with an accounts file, add the following lines to the BASEDIR/tahoe.cfg file:: [ftpd] enabled = true port = tcp:8021:interface=127.0.0.1 accounts.file = private/accounts The FTP server will listen on the given port number and on the loopback interface only. The "accounts.file" pathname will be interpreted relative to the node's BASEDIR. To enable the FTP server with an account server instead, provide the URL of that server in an "accounts.url" directive:: [ftpd] enabled = true port = tcp:8021:interface=127.0.0.1 accounts.url = https://example.com/login You can provide both accounts.file and accounts.url, although it probably isn't very useful except for testing. FTP provides no security, and so your password or caps could be eavesdropped if you connect to the FTP server remotely. The examples above include ":interface=127.0.0.1" in the "port" option, which causes the server to only accept connections from localhost. Dependencies ============ The Tahoe-LAFS SFTP server requires the Twisted "Conch" component (a "conch" is a twisted shell, get it?). Many Linux distributions package the Conch code separately: debian puts it in the "python-twisted-conch" package. Conch requires the "pycrypto" package, which is a Python+C implementation of many cryptographic functions (the debian package is named "python-crypto"). Note that "pycrypto" is different than the "pycryptopp" package that Tahoe-LAFS uses (which is a Python wrapper around the C++ -based Crypto++ library, a library that is frequently installed as /usr/lib/libcryptopp.a, to avoid problems with non-alphanumerics in filenames). Immutable and Mutable Files =========================== All files created via SFTP (and FTP) are immutable files. However, files can only be created in writeable directories, which allows the directory entry to be relinked to a different file. Normally, when the path of an immutable file is opened for writing by SFTP, the directory entry is relinked to another file with the newly written contents when the file handle is closed. The old file is still present on the grid, and any other caps to it will remain valid. (See `docs/garbage-collection.rst`_ for how to reclaim the space used by files that are no longer needed.) The 'no-write' metadata field of a directory entry can override this behaviour. If the 'no-write' field holds a true value, then a permission error will occur when trying to write to the file, even if it is in a writeable directory. This does not prevent the directory entry from being unlinked or replaced. When using sshfs, the 'no-write' field can be set by clearing the 'w' bits in the Unix permissions, for example using the command ``chmod 444 path/to/file``. Note that this does not mean that arbitrary combinations of Unix permissions are supported. If the 'w' bits are cleared on a link to a mutable file or directory, that link will become read-only. If SFTP is used to write to an existing mutable file, it will publish a new version when the file handle is closed. .. _docs/garbage-collection.rst: file:../garbage-collection.rst Known Issues ============ Known Issues in the SFTP Frontend --------------------------------- Upload errors may not be reported when writing files using SFTP via sshfs (`ticket #1059`_). Non-ASCII filenames are supported with SFTP only if the client encodes filenames as UTF-8 (`ticket #1089`_). See also wiki:SftpFrontend_. .. _ticket #1059: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1059 .. _ticket #1089: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1089 Known Issues in the FTP Frontend -------------------------------- Mutable files are not supported by the FTP frontend (`ticket #680`_). Non-ASCII filenames are not supported by FTP (`ticket #682`_). The FTP frontend sometimes fails to report errors, for example if an upload fails because it does meet the "servers of happiness" threshold (`ticket #1081`_). .. _ticket #680: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/680 .. _ticket #682: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/682 .. _ticket #1081: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1081 tahoe-lafs-1.10.0/docs/frontends/download-status.rst000066400000000000000000000122201221140116300224140ustar00rootroot00000000000000=============== Download status =============== Introduction ============ The WUI will display the "status" of uploads and downloads. The Welcome Page has a link entitled "Recent Uploads and Downloads" which goes to this URL: http://$GATEWAY/status Each entry in the list of recent operations has a "status" link which will take you to a page describing that operation. For immutable downloads, the page has a lot of information, and this document is to explain what it all means. It was written by Brian Warner, who wrote the v1.8.0 downloader code and the code which generates this status report about the v1.8.0 downloader's behavior. Brian posted it to the trac: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1169#comment:1 Then Zooko lightly edited it while copying it into the docs/ directory. What's involved in a download? ============================== Downloads are triggered by read() calls, each with a starting offset (defaults to 0) and a length (defaults to the whole file). A regular web-API GET request will result in a whole-file read() call. Each read() call turns into an ordered sequence of get_segment() calls. A whole-file read will fetch all segments, in order, but partial reads or multiple simultaneous reads will result in random-access of segments. Segment reads always return ciphertext: the layer above that (in read()) is responsible for decryption. Before we can satisfy any segment reads, we need to find some shares. ("DYHB" is an abbreviation for "Do You Have Block", and is the message we send to storage servers to ask them if they have any shares for us. The name is historical, from Mojo Nation/Mnet/Mountain View, but nicely distinctive. Tahoe-LAFS's actual message name is remote_get_buckets().). Responses come back eventually, or don't. Once we get enough positive DYHB responses, we have enough shares to start downloading. We send "block requests" for various pieces of the share. Responses come back eventually, or don't. When we get enough block-request responses for a given segment, we can decode the data and satisfy the segment read. When the segment read completes, some or all of the segment data is used to satisfy the read() call (if the read call started or ended in the middle of a segment, we'll only use part of the data, otherwise we'll use all of it). Data on the download-status page ================================ DYHB Requests ------------- This shows every Do-You-Have-Block query sent to storage servers and their results. Each line shows the following: * the serverid to which the request was sent * the time at which the request was sent. Note that all timestamps are relative to the start of the first read() call and indicated with a "+" sign * the time at which the response was received (if ever) * the share numbers that the server has, if any * the elapsed time taken by the request Also, each line is colored according to the serverid. This color is also used in the "Requests" section below. Read Events ----------- This shows all the FileNode read() calls and their overall results. Each line shows: * the range of the file that was requested (as [OFFSET:+LENGTH]). A whole-file GET will start at 0 and read the entire file. * the time at which the read() was made * the time at which the request finished, either because the last byte of data was returned to the read() caller, or because they cancelled the read by calling stopProducing (i.e. closing the HTTP connection) * the number of bytes returned to the caller so far * the time spent on the read, so far * the total time spent in AES decryption * total time spend paused by the client (pauseProducing), generally because the HTTP connection filled up, which most streaming media players will do to limit how much data they have to buffer * effective speed of the read(), not including paused time Segment Events -------------- This shows each get_segment() call and its resolution. This table is not well organized, and my post-1.8.0 work will clean it up a lot. In its present form, it records "request" and "delivery" events separately, indicated by the "type" column. Each request shows the segment number being requested and the time at which the get_segment() call was made. Each delivery shows: * segment number * range of file data (as [OFFSET:+SIZE]) delivered * elapsed time spent doing ZFEC decoding * overall elapsed time fetching the segment * effective speed of the segment fetch Requests -------- This shows every block-request sent to the storage servers. Each line shows: * the server to which the request was sent * which share number it is referencing * the portion of the share data being requested (as [OFFSET:+SIZE]) * the time the request was sent * the time the response was received (if ever) * the amount of data that was received (which might be less than SIZE if we tried to read off the end of the share) * the elapsed time for the request (RTT=Round-Trip-Time) Also note that each Request line is colored according to the serverid it was sent to. And all timestamps are shown relative to the start of the first read() call: for example the first DYHB message was sent at +0.001393s about 1.4 milliseconds after the read() call started everything off. tahoe-lafs-1.10.0/docs/frontends/drop-upload.rst000066400000000000000000000155041221140116300215220ustar00rootroot00000000000000=============================== Tahoe-LAFS Drop-Upload Frontend =============================== 1. `Introduction`_ 2. `Configuration`_ 3. `Known Issues and Limitations`_ Introduction ============ The drop-upload frontend allows an upload to a Tahoe-LAFS grid to be triggered automatically whenever a file is created or changed in a specific local directory. This is a preview of a feature that we expect to support across several platforms, but it currently works only on Linux. The implementation was written as a prototype at the First International Tahoe-LAFS Summit in June 2011, and is not currently in as mature a state as the other frontends (web, CLI, SFTP and FTP). This means that you probably should not keep important data in the upload directory, and should not rely on all changes to files in the local directory to result in successful uploads. There might be (and have been) incompatible changes to how the feature is configured. There is even the possibility that it may be abandoned, for example if unsolveable reliability issues are found. We are very interested in feedback on how well this feature works for you, and suggestions to improve its usability, functionality, and reliability. Configuration ============= The drop-upload frontend runs as part of a gateway node. To set it up, you need to choose the local directory to monitor for file changes, and a mutable directory on the grid to which files will be uploaded. These settings are configured in the ``[drop_upload]`` section of the gateway's ``tahoe.cfg`` file. ``[drop_upload]`` ``enabled = (boolean, optional)`` If this is ``True``, drop-upload will be enabled. The default value is ``False``. ``local.directory = (UTF-8 path)`` This specifies the local directory to be monitored for new or changed files. If the path contains non-ASCII characters, it should be encoded in UTF-8 regardless of the system's filesystem encoding. Relative paths will be interpreted starting from the node's base directory. In addition, the file ``private/drop_upload_dircap`` must contain a writecap pointing to an existing mutable directory to be used as the target of uploads. It will start with ``URI:DIR2:``, and cannot include an alias or path. After setting the above fields and starting or restarting the gateway, you can confirm that the feature is working by copying a file into the local directory. Then, use the WUI or CLI to check that it has appeared in the upload directory with the same filename. A large file may take some time to appear, since it is only linked into the directory after the upload has completed. The 'Operational Statistics' page linked from the Welcome page shows counts of the number of files uploaded, the number of change events currently queued, and the number of failed uploads. The 'Recent Uploads and Downloads' page and the node log_ may be helpful to determine the cause of any failures. .. _log: ../logging.rst Known Issues and Limitations ============================ This frontend only works on Linux. There is an even-more-experimental implementation for Windows (`#1431`_), and a ticket to add support for Mac OS X and BSD-based systems (`#1432`_). Subdirectories of the local directory are not monitored. If a subdirectory is created, it will be ignored. (`#1433`_) If files are created or changed in the local directory just after the gateway has started, it might not have connected to a sufficient number of servers when the upload is attempted, causing the upload to fail. (`#1449`_) Files that were created or changed in the local directory while the gateway was not running, will not be uploaded. (`#1458`_) The only way to determine whether uploads have failed is to look at the 'Operational Statistics' page linked from the Welcome page. This only shows a count of failures, not the names of files. Uploads are never retried. The drop-upload frontend performs its uploads sequentially (i.e. it waits until each upload is finished before starting the next), even when there would be enough memory and bandwidth to efficiently perform them in parallel. A drop-upload can occur in parallel with an upload by a different frontend, though. (`#1459`_) If there are a large number of near-simultaneous file creation or change events (greater than the number specified in the file ``/proc/sys/fs/inotify/max_queued_events``), it is possible that some events could be missed. This is fairly unlikely under normal circumstances, because the default value of ``max_queued_events`` in most Linux distributions is 16384, and events are removed from this queue immediately without waiting for the corresponding upload to complete. (`#1430`_) Some filesystems may not support the necessary change notifications. So, it is recommended for the local directory to be on a directly attached disk-based filesystem, not a network filesystem or one provided by a virtual machine. Attempts to read the mutable directory at about the same time as an uploaded file is being linked into it, might fail, even if they are done through the same gateway. (`#1105`_) When a local file is changed and closed several times in quick succession, it may be uploaded more times than necessary to keep the remote copy up-to-date. (`#1440`_) Files deleted from the local directory will not be unlinked from the upload directory. (`#1710`_) The ``private/drop_upload_dircap`` file cannot use an alias or path to specify the upload directory. (`#1711`_) Files are always uploaded as immutable. If there is an existing mutable file of the same name in the upload directory, it will be unlinked and replaced with an immutable file. (`#1712`_) If a file in the upload directory is changed (actually relinked to a new file), then the old file is still present on the grid, and any other caps to it will remain valid. See `docs/garbage-collection.rst <../garbage-collection.rst>`_ for how to reclaim the space used by files that are no longer needed. Unicode names are supported, but the local name of a file must be encoded correctly in order for it to be uploaded. The expected encoding is that printed by ``python -c "import sys; print sys.getfilesystemencoding()"``. .. _`#1105`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1105 .. _`#1430`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1430 .. _`#1431`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1431 .. _`#1432`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1432 .. _`#1433`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1433 .. _`#1440`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1440 .. _`#1449`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1449 .. _`#1458`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1458 .. _`#1459`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1459 .. _`#1710`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1710 .. _`#1711`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1711 .. _`#1712`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1712 tahoe-lafs-1.10.0/docs/frontends/webapi.rst000066400000000000000000003203671221140116300205510ustar00rootroot00000000000000========================== The Tahoe REST-ful Web API ========================== 1. `Enabling the web-API port`_ 2. `Basic Concepts: GET, PUT, DELETE, POST`_ 3. `URLs`_ 1. `Child Lookup`_ 4. `Slow Operations, Progress, and Cancelling`_ 5. `Programmatic Operations`_ 1. `Reading a file`_ 2. `Writing/Uploading a File`_ 3. `Creating a New Directory`_ 4. `Getting Information About a File Or Directory (as JSON)`_ 5. `Attaching an Existing File or Directory by its read- or write-cap`_ 6. `Adding Multiple Files or Directories to a Parent Directory at Once`_ 7. `Unlinking a File or Directory`_ 6. `Browser Operations: Human-Oriented Interfaces`_ 1. `Viewing a Directory (as HTML)`_ 2. `Viewing/Downloading a File`_ 3. `Getting Information About a File Or Directory (as HTML)`_ 4. `Creating a Directory`_ 5. `Uploading a File`_ 6. `Attaching an Existing File Or Directory (by URI)`_ 7. `Unlinking a Child`_ 8. `Renaming a Child`_ 9. `Relinking ("Moving") a Child`_ 10. `Other Utilities`_ 11. `Debugging and Testing Features`_ 7. `Other Useful Pages`_ 8. `Static Files in /public_html`_ 9. `Safety and Security Issues -- Names vs. URIs`_ 10. `Concurrency Issues`_ 11. `Access Blacklist`_ Enabling the web-API port ========================= Every Tahoe node is capable of running a built-in HTTP server. To enable this, just write a port number into the "[node]web.port" line of your node's tahoe.cfg file. For example, writing "web.port = 3456" into the "[node]" section of $NODEDIR/tahoe.cfg will cause the node to run a webserver on port 3456. This string is actually a Twisted "strports" specification, meaning you can get more control over the interface to which the server binds by supplying additional arguments. For more details, see the documentation on `twisted.application.strports `_. Writing "tcp:3456:interface=127.0.0.1" into the web.port line does the same but binds to the loopback interface, ensuring that only the programs on the local host can connect. Using "ssl:3456:privateKey=mykey.pem:certKey=cert.pem" runs an SSL server. This webport can be set when the node is created by passing a --webport option to the 'tahoe create-node' command. By default, the node listens on port 3456, on the loopback (127.0.0.1) interface. Basic Concepts: GET, PUT, DELETE, POST ====================================== As described in `docs/architecture.rst <../architecture.rst>`_, each file and directory in a Tahoe virtual filesystem is referenced by an identifier that combines the designation of the object with the authority to do something with it (such as read or modify the contents). This identifier is called a "read-cap" or "write-cap", depending upon whether it enables read-only or read-write access. These "caps" are also referred to as URIs (which may be confusing because they are not currently `RFC3986 `_-compliant URIs). The Tahoe web-based API is "REST-ful", meaning it implements the concepts of "REpresentational State Transfer": the original scheme by which the World Wide Web was intended to work. Each object (file or directory) is referenced by a URL that includes the read- or write- cap. HTTP methods (GET, PUT, and DELETE) are used to manipulate these objects. You can think of the URL as a noun, and the method as a verb. In REST, the GET method is used to retrieve information about an object, or to retrieve some representation of the object itself. When the object is a file, the basic GET method will simply return the contents of that file. Other variations (generally implemented by adding query parameters to the URL) will return information about the object, such as metadata. GET operations are required to have no side-effects. PUT is used to upload new objects into the filesystem, or to replace an existing link or the contents of a mutable file. DELETE is used to unlink objects from directories. Both PUT and DELETE are required to be idempotent: performing the same operation multiple times must have the same side-effects as only performing it once. POST is used for more complicated actions that cannot be expressed as a GET, PUT, or DELETE. POST operations can be thought of as a method call: sending some message to the object referenced by the URL. In Tahoe, POST is also used for operations that must be triggered by an HTML form (including upload and unlinking), because otherwise a regular web browser has no way to accomplish these tasks. In general, everything that can be done with a PUT or DELETE can also be done with a POST. Tahoe's web API is designed for two different kinds of consumer. The first is a program that needs to manipulate the virtual file system. Such programs are expected to use the RESTful interface described above. The second is a human using a standard web browser to work with the filesystem. This user is given a series of HTML pages with links to download files, and forms that use POST actions to upload, rename, and unlink files. When an error occurs, the HTTP response code will be set to an appropriate 400-series code (like 404 Not Found for an unknown childname, or 400 Bad Request when the parameters to a web-API operation are invalid), and the HTTP response body will usually contain a few lines of explanation as to the cause of the error and possible responses. Unusual exceptions may result in a 500 Internal Server Error as a catch-all, with a default response body containing a Nevow-generated HTML-ized representation of the Python exception stack trace that caused the problem. CLI programs which want to copy the response body to stderr should provide an "Accept: text/plain" header to their requests to get a plain text stack trace instead. If the Accept header contains ``*/*``, or ``text/*``, or text/html (or if there is no Accept header), HTML tracebacks will be generated. URLs ==== Tahoe uses a variety of read- and write- caps to identify files and directories. The most common of these is the "immutable file read-cap", which is used for most uploaded files. These read-caps look like the following:: URI:CHK:ime6pvkaxuetdfah2p2f35pe54:4btz54xk3tew6nd4y2ojpxj4m6wxjqqlwnztgre6gnjgtucd5r4a:3:10:202 The next most common is a "directory write-cap", which provides both read and write access to a directory, and look like this:: URI:DIR2:djrdkfawoqihigoett4g6auz6a:jx5mplfpwexnoqff7y5e4zjus4lidm76dcuarpct7cckorh2dpgq There are also "directory read-caps", which start with "URI:DIR2-RO:", and give read-only access to a directory. Finally there are also mutable file read- and write- caps, which start with "URI:SSK", and give access to mutable files. (Later versions of Tahoe will make these strings shorter, and will remove the unfortunate colons, which must be escaped when these caps are embedded in URLs.) To refer to any Tahoe object through the web API, you simply need to combine a prefix (which indicates the HTTP server to use) with the cap (which indicates which object inside that server to access). Since the default Tahoe webport is 3456, the most common prefix is one that will use a local node listening on this port:: http://127.0.0.1:3456/uri/ + $CAP So, to access the directory named above, the URL would be:: http://127.0.0.1:3456/uri/URI%3ADIR2%3Adjrdkfawoqihigoett4g6auz6a%3Ajx5mplfpwexnoqff7y5e4zjus4lidm76dcuarpct7cckorh2dpgq/ (note that the colons in the directory-cap are url-encoded into "%3A" sequences). Likewise, to access the file named above, use:: http://127.0.0.1:3456/uri/URI%3ACHK%3Aime6pvkaxuetdfah2p2f35pe54%3A4btz54xk3tew6nd4y2ojpxj4m6wxjqqlwnztgre6gnjgtucd5r4a%3A3%3A10%3A202 In the rest of this document, we'll use "$DIRCAP" as shorthand for a read-cap or write-cap that refers to a directory, and "$FILECAP" to abbreviate a cap that refers to a file (whether mutable or immutable). So those URLs above can be abbreviated as:: http://127.0.0.1:3456/uri/$DIRCAP/ http://127.0.0.1:3456/uri/$FILECAP The operation summaries below will abbreviate these further, by eliding the server prefix. They will be displayed like this:: /uri/$DIRCAP/ /uri/$FILECAP /cap can be used as a synonym for /uri. If interoperability with older web-API servers is required, /uri should be used. Child Lookup ------------ Tahoe directories contain named child entries, just like directories in a regular local filesystem. These child entries, called "dirnodes", consist of a name, metadata, a write slot, and a read slot. The write and read slots normally contain a write-cap and read-cap referring to the same object, which can be either a file or a subdirectory. The write slot may be empty (actually, both may be empty, but that is unusual). If you have a Tahoe URL that refers to a directory, and want to reference a named child inside it, just append the child name to the URL. For example, if our sample directory contains a file named "welcome.txt", we can refer to that file with:: http://127.0.0.1:3456/uri/$DIRCAP/welcome.txt (or http://127.0.0.1:3456/uri/URI%3ADIR2%3Adjrdkfawoqihigoett4g6auz6a%3Ajx5mplfpwexnoqff7y5e4zjus4lidm76dcuarpct7cckorh2dpgq/welcome.txt) Multiple levels of subdirectories can be handled this way:: http://127.0.0.1:3456/uri/$DIRCAP/tahoe-source/docs/architecture.rst In this document, when we need to refer to a URL that references a file using this child-of-some-directory format, we'll use the following string:: /uri/$DIRCAP/[SUBDIRS../]FILENAME The "[SUBDIRS../]" part means that there are zero or more (optional) subdirectory names in the middle of the URL. The "FILENAME" at the end means that this whole URL refers to a file of some sort, rather than to a directory. When we need to refer specifically to a directory in this way, we'll write:: /uri/$DIRCAP/[SUBDIRS../]SUBDIR Note that all components of pathnames in URLs are required to be UTF-8 encoded, so "resume.doc" (with an acute accent on both E's) would be accessed with:: http://127.0.0.1:3456/uri/$DIRCAP/r%C3%A9sum%C3%A9.doc Also note that the filenames inside upload POST forms are interpreted using whatever character set was provided in the conventional '_charset' field, and defaults to UTF-8 if not otherwise specified. The JSON representation of each directory contains native Unicode strings. Tahoe directories are specified to contain Unicode filenames, and cannot contain binary strings that are not representable as such. All Tahoe operations that refer to existing files or directories must include a suitable read- or write- cap in the URL: the web-API server won't add one for you. If you don't know the cap, you can't access the file. This allows the security properties of Tahoe caps to be extended across the web-API interface. Slow Operations, Progress, and Cancelling ========================================= Certain operations can be expected to take a long time. The "t=deep-check", described below, will recursively visit every file and directory reachable from a given starting point, which can take minutes or even hours for extremely large directory structures. A single long-running HTTP request is a fragile thing: proxies, NAT boxes, browsers, and users may all grow impatient with waiting and give up on the connection. For this reason, long-running operations have an "operation handle", which can be used to poll for status/progress messages while the operation proceeds. This handle can also be used to cancel the operation. These handles are created by the client, and passed in as a an "ophandle=" query argument to the POST or PUT request which starts the operation. The following operations can then be used to retrieve status: ``GET /operations/$HANDLE?output=HTML (with or without t=status)`` ``GET /operations/$HANDLE?output=JSON (same)`` These two retrieve the current status of the given operation. Each operation presents a different sort of information, but in general the page retrieved will indicate: * whether the operation is complete, or if it is still running * how much of the operation is complete, and how much is left, if possible Note that the final status output can be quite large: a deep-manifest of a directory structure with 300k directories and 200k unique files is about 275MB of JSON, and might take two minutes to generate. For this reason, the full status is not provided until the operation has completed. The HTML form will include a meta-refresh tag, which will cause a regular web browser to reload the status page about 60 seconds later. This tag will be removed once the operation has completed. There may be more status information available under /operations/$HANDLE/$ETC : i.e., the handle forms the root of a URL space. ``POST /operations/$HANDLE?t=cancel`` This terminates the operation, and returns an HTML page explaining what was cancelled. If the operation handle has already expired (see below), this POST will return a 404, which indicates that the operation is no longer running (either it was completed or terminated). The response body will be the same as a GET /operations/$HANDLE on this operation handle, and the handle will be expired immediately afterwards. The operation handle will eventually expire, to avoid consuming an unbounded amount of memory. The handle's time-to-live can be reset at any time, by passing a retain-for= argument (with a count of seconds) to either the initial POST that starts the operation, or the subsequent GET request which asks about the operation. For example, if a 'GET /operations/$HANDLE?output=JSON&retain-for=600' query is performed, the handle will remain active for 600 seconds (10 minutes) after the GET was received. In addition, if the GET includes a release-after-complete=True argument, and the operation has completed, the operation handle will be released immediately. If a retain-for= argument is not used, the default handle lifetimes are: * handles will remain valid at least until their operation finishes * uncollected handles for finished operations (i.e. handles for operations that have finished but for which the GET page has not been accessed since completion) will remain valid for four days, or for the total time consumed by the operation, whichever is greater. * collected handles (i.e. the GET page has been retrieved at least once since the operation completed) will remain valid for one day. Many "slow" operations can begin to use unacceptable amounts of memory when operating on large directory structures. The memory usage increases when the ophandle is polled, as the results must be copied into a JSON string, sent over the wire, then parsed by a client. So, as an alternative, many "slow" operations have streaming equivalents. These equivalents do not use operation handles. Instead, they emit line-oriented status results immediately. Client code can cancel the operation by simply closing the HTTP connection. Programmatic Operations ======================= Now that we know how to build URLs that refer to files and directories in a Tahoe virtual filesystem, what sorts of operations can we do with those URLs? This section contains a catalog of GET, PUT, DELETE, and POST operations that can be performed on these URLs. This set of operations are aimed at programs that use HTTP to communicate with a Tahoe node. A later section describes operations that are intended for web browsers. Reading a File -------------- ``GET /uri/$FILECAP`` ``GET /uri/$DIRCAP/[SUBDIRS../]FILENAME`` This will retrieve the contents of the given file. The HTTP response body will contain the sequence of bytes that make up the file. To view files in a web browser, you may want more control over the Content-Type and Content-Disposition headers. Please see the next section "Browser Operations", for details on how to modify these URLs for that purpose. Writing/Uploading a File ------------------------ ``PUT /uri/$FILECAP`` ``PUT /uri/$DIRCAP/[SUBDIRS../]FILENAME`` Upload a file, using the data from the HTTP request body, and add whatever child links and subdirectories are necessary to make the file available at the given location. Once this operation succeeds, a GET on the same URL will retrieve the same contents that were just uploaded. This will create any necessary intermediate subdirectories. To use the /uri/$FILECAP form, $FILECAP must be a write-cap for a mutable file. In the /uri/$DIRCAP/[SUBDIRS../]FILENAME form, if the target file is a writeable mutable file, that file's contents will be overwritten in-place. If it is a read-cap for a mutable file, an error will occur. If it is an immutable file, the old file will be discarded, and a new one will be put in its place. If the target file is a writable mutable file, you may also specify an "offset" parameter -- a byte offset that determines where in the mutable file the data from the HTTP request body is placed. This operation is relatively efficient for MDMF mutable files, and is relatively inefficient (but still supported) for SDMF mutable files. If no offset parameter is specified, then the entire file is replaced with the data from the HTTP request body. For an immutable file, the "offset" parameter is not valid. When creating a new file, you can control the type of file created by specifying a format= argument in the query string. format=MDMF creates an MDMF mutable file. format=SDMF creates an SDMF mutable file. format=CHK creates an immutable file. The value of the format argument is case-insensitive. If no format is specified, the newly-created file will be immutable (but see below). For compatibility with previous versions of Tahoe-LAFS, the web-API will also accept a mutable=true argument in the query string. If mutable=true is given, then the new file will be mutable, and its format will be the default mutable file format, as configured by the [client]mutable.format option of tahoe.cfg on the Tahoe-LAFS node hosting the webapi server. Use of mutable=true is discouraged; new code should use format= instead of mutable=true (unless it needs to be compatible with web-API servers older than v1.9.0). If neither format= nor mutable=true are given, the newly-created file will be immutable. This returns the file-cap of the resulting file. If a new file was created by this method, the HTTP response code (as dictated by rfc2616) will be set to 201 CREATED. If an existing file was replaced or modified, the response code will be 200 OK. Note that the 'curl -T localfile http://127.0.0.1:3456/uri/$DIRCAP/foo.txt' command can be used to invoke this operation. ``PUT /uri`` This uploads a file, and produces a file-cap for the contents, but does not attach the file into the filesystem. No directories will be modified by this operation. The file-cap is returned as the body of the HTTP response. This method accepts format= and mutable=true as query string arguments, and interprets those arguments in the same way as the linked forms of PUT described immediately above. Creating a New Directory ------------------------ ``POST /uri?t=mkdir`` ``PUT /uri?t=mkdir`` Create a new empty directory and return its write-cap as the HTTP response body. This does not make the newly created directory visible from the filesystem. The "PUT" operation is provided for backwards compatibility: new code should use POST. This supports a format= argument in the query string. The format= argument, if specified, controls the format of the directory. format=MDMF indicates that the directory should be stored as an MDMF file; format=SDMF indicates that the directory should be stored as an SDMF file. The value of the format= argument is case-insensitive. If no format= argument is given, the directory's format is determined by the default mutable file format, as configured on the Tahoe-LAFS node responding to the request. ``POST /uri?t=mkdir-with-children`` Create a new directory, populated with a set of child nodes, and return its write-cap as the HTTP response body. The new directory is not attached to any other directory: the returned write-cap is the only reference to it. The format of the directory can be controlled with the format= argument in the query string, as described above. Initial children are provided as the body of the POST form (this is more efficient than doing separate mkdir and set_children operations). If the body is empty, the new directory will be empty. If not empty, the body will be interpreted as a UTF-8 JSON-encoded dictionary of children with which the new directory should be populated, using the same format as would be returned in the 'children' value of the t=json GET request, described below. Each dictionary key should be a child name, and each value should be a list of [TYPE, PROPDICT], where PROPDICT contains "rw_uri", "ro_uri", and "metadata" keys (all others are ignored). For example, the PUT request body could be:: { "Fran\u00e7ais": [ "filenode", { "ro_uri": "URI:CHK:...", "metadata": { "ctime": 1202777696.7564139, "mtime": 1202777696.7564139, "tahoe": { "linkcrtime": 1202777696.7564139, "linkmotime": 1202777696.7564139 } } } ], "subdir": [ "dirnode", { "rw_uri": "URI:DIR2:...", "ro_uri": "URI:DIR2-RO:...", "metadata": { "ctime": 1202778102.7589991, "mtime": 1202778111.2160511, "tahoe": { "linkcrtime": 1202777696.7564139, "linkmotime": 1202777696.7564139 } } } ] } For forward-compatibility, a mutable directory can also contain caps in a format that is unknown to the web-API server. When such caps are retrieved from a mutable directory in a "ro_uri" field, they will be prefixed with the string "ro.", indicating that they must not be decoded without checking that they are read-only. The "ro." prefix must not be stripped off without performing this check. (Future versions of the web-API server will perform it where necessary.) If both the "rw_uri" and "ro_uri" fields are present in a given PROPDICT, and the web-API server recognizes the rw_uri as a write cap, then it will reset the ro_uri to the corresponding read cap and discard the original contents of ro_uri (in order to ensure that the two caps correspond to the same object and that the ro_uri is in fact read-only). However this may not happen for caps in a format unknown to the web-API server. Therefore, when writing a directory the web-API client should ensure that the contents of "rw_uri" and "ro_uri" for a given PROPDICT are a consistent (write cap, read cap) pair if possible. If the web-API client only has one cap and does not know whether it is a write cap or read cap, then it is acceptable to set "rw_uri" to that cap and omit "ro_uri". The client must not put a write cap into a "ro_uri" field. The metadata may have a "no-write" field. If this is set to true in the metadata of a link, it will not be possible to open that link for writing via the SFTP frontend; see ``_ for details. Also, if the "no-write" field is set to true in the metadata of a link to a mutable child, it will cause the link to be diminished to read-only. Note that the web-API-using client application must not provide the "Content-Type: multipart/form-data" header that usually accompanies HTML form submissions, since the body is not formatted this way. Doing so will cause a server error as the lower-level code misparses the request body. Child file names should each be expressed as a Unicode string, then used as keys of the dictionary. The dictionary should then be converted into JSON, and the resulting string encoded into UTF-8. This UTF-8 bytestring should then be used as the POST body. ``POST /uri?t=mkdir-immutable`` Like t=mkdir-with-children above, but the new directory will be deep-immutable. This means that the directory itself is immutable, and that it can only contain objects that are treated as being deep-immutable, like immutable files, literal files, and deep-immutable directories. For forward-compatibility, a deep-immutable directory can also contain caps in a format that is unknown to the web-API server. When such caps are retrieved from a deep-immutable directory in a "ro_uri" field, they will be prefixed with the string "imm.", indicating that they must not be decoded without checking that they are immutable. The "imm." prefix must not be stripped off without performing this check. (Future versions of the web-API server will perform it where necessary.) The cap for each child may be given either in the "rw_uri" or "ro_uri" field of the PROPDICT (not both). If a cap is given in the "rw_uri" field, then the web-API server will check that it is an immutable read-cap of a *known* format, and give an error if it is not. If a cap is given in the "ro_uri" field, then the web-API server will still check whether known caps are immutable, but for unknown caps it will simply assume that the cap can be stored, as described above. Note that an attacker would be able to store any cap in an immutable directory, so this check when creating the directory is only to help non-malicious clients to avoid accidentally giving away more authority than intended. A non-empty request body is mandatory, since after the directory is created, it will not be possible to add more children to it. ``POST /uri/$DIRCAP/[SUBDIRS../]SUBDIR?t=mkdir`` ``PUT /uri/$DIRCAP/[SUBDIRS../]SUBDIR?t=mkdir`` Create new directories as necessary to make sure that the named target ($DIRCAP/SUBDIRS../SUBDIR) is a directory. This will create additional intermediate mutable directories as necessary. If the named target directory already exists, this will make no changes to it. If the final directory is created, it will be empty. This accepts a format= argument in the query string, which controls the format of the named target directory, if it does not already exist. format= is interpreted in the same way as in the POST /uri?t=mkdir form. Note that format= only controls the format of the named target directory; intermediate directories, if created, are created based on the default mutable type, as configured on the Tahoe-LAFS server responding to the request. This operation will return an error if a blocking file is present at any of the parent names, preventing the server from creating the necessary parent directory; or if it would require changing an immutable directory. The write-cap of the new directory will be returned as the HTTP response body. ``POST /uri/$DIRCAP/[SUBDIRS../]SUBDIR?t=mkdir-with-children`` Like /uri?t=mkdir-with-children, but the final directory is created as a child of an existing mutable directory. This will create additional intermediate mutable directories as necessary. If the final directory is created, it will be populated with initial children from the POST request body, as described above. This accepts a format= argument in the query string, which controls the format of the target directory, if the target directory is created as part of the operation. format= is interpreted in the same way as in the POST/ uri?t=mkdir-with-children operation. Note that format= only controls the format of the named target directory; intermediate directories, if created, are created using the default mutable type setting, as configured on the Tahoe-LAFS server responding to the request. This operation will return an error if a blocking file is present at any of the parent names, preventing the server from creating the necessary parent directory; or if it would require changing an immutable directory; or if the immediate parent directory already has a a child named SUBDIR. ``POST /uri/$DIRCAP/[SUBDIRS../]SUBDIR?t=mkdir-immutable`` Like /uri?t=mkdir-immutable, but the final directory is created as a child of an existing mutable directory. The final directory will be deep-immutable, and will be populated with the children specified as a JSON dictionary in the POST request body. In Tahoe 1.6 this operation creates intermediate mutable directories if necessary, but that behaviour should not be relied on; see ticket #920. This operation will return an error if the parent directory is immutable, or already has a child named SUBDIR. ``POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir&name=NAME`` Create a new empty mutable directory and attach it to the given existing directory. This will create additional intermediate directories as necessary. This accepts a format= argument in the query string, which controls the format of the named target directory, if it does not already exist. format= is interpreted in the same way as in the POST /uri?t=mkdir form. Note that format= only controls the format of the named target directory; intermediate directories, if created, are created based on the default mutable type, as configured on the Tahoe-LAFS server responding to the request. This operation will return an error if a blocking file is present at any of the parent names, preventing the server from creating the necessary parent directory, or if it would require changing any immutable directory. The URL of this operation points to the parent of the bottommost new directory, whereas the /uri/$DIRCAP/[SUBDIRS../]SUBDIR?t=mkdir operation above has a URL that points directly to the bottommost new directory. ``POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir-with-children&name=NAME`` Like /uri/$DIRCAP/[SUBDIRS../]?t=mkdir&name=NAME, but the new directory will be populated with initial children via the POST request body. This command will create additional intermediate mutable directories as necessary. This accepts a format= argument in the query string, which controls the format of the target directory, if the target directory is created as part of the operation. format= is interpreted in the same way as in the POST/ uri?t=mkdir-with-children operation. Note that format= only controls the format of the named target directory; intermediate directories, if created, are created using the default mutable type setting, as configured on the Tahoe-LAFS server responding to the request. This operation will return an error if a blocking file is present at any of the parent names, preventing the server from creating the necessary parent directory; or if it would require changing an immutable directory; or if the immediate parent directory already has a a child named NAME. Note that the name= argument must be passed as a queryarg, because the POST request body is used for the initial children JSON. ``POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir-immutable&name=NAME`` Like /uri/$DIRCAP/[SUBDIRS../]?t=mkdir-with-children&name=NAME, but the final directory will be deep-immutable. The children are specified as a JSON dictionary in the POST request body. Again, the name= argument must be passed as a queryarg. In Tahoe 1.6 this operation creates intermediate mutable directories if necessary, but that behaviour should not be relied on; see ticket #920. This operation will return an error if the parent directory is immutable, or already has a child named NAME. Getting Information About a File Or Directory (as JSON) ------------------------------------------------------- ``GET /uri/$FILECAP?t=json`` ``GET /uri/$DIRCAP?t=json`` ``GET /uri/$DIRCAP/[SUBDIRS../]SUBDIR?t=json`` ``GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json`` This returns a machine-parseable JSON-encoded description of the given object. The JSON always contains a list, and the first element of the list is always a flag that indicates whether the referenced object is a file or a directory. If it is a capability to a file, then the information includes file size and URI, like this:: GET /uri/$FILECAP?t=json : [ "filenode", { "ro_uri": file_uri, "verify_uri": verify_uri, "size": bytes, "mutable": false, "format": "CHK" } ] If it is a capability to a directory followed by a path from that directory to a file, then the information also includes metadata from the link to the file in the parent directory, like this:: GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json [ "filenode", { "ro_uri": file_uri, "verify_uri": verify_uri, "size": bytes, "mutable": false, "format": "CHK", "metadata": { "ctime": 1202777696.7564139, "mtime": 1202777696.7564139, "tahoe": { "linkcrtime": 1202777696.7564139, "linkmotime": 1202777696.7564139 } } } ] If it is a directory, then it includes information about the children of this directory, as a mapping from child name to a set of data about the child (the same data that would appear in a corresponding GET?t=json of the child itself). The child entries also include metadata about each child, including link-creation- and link-change- timestamps. The output looks like this:: GET /uri/$DIRCAP?t=json : GET /uri/$DIRCAP/[SUBDIRS../]SUBDIR?t=json : [ "dirnode", { "rw_uri": read_write_uri, "ro_uri": read_only_uri, "verify_uri": verify_uri, "mutable": true, "format": "SDMF", "children": { "foo.txt": [ "filenode", { "ro_uri": uri, "size": bytes, "metadata": { "ctime": 1202777696.7564139, "mtime": 1202777696.7564139, "tahoe": { "linkcrtime": 1202777696.7564139, "linkmotime": 1202777696.7564139 } } } ], "subdir": [ "dirnode", { "rw_uri": rwuri, "ro_uri": rouri, "metadata": { "ctime": 1202778102.7589991, "mtime": 1202778111.2160511, "tahoe": { "linkcrtime": 1202777696.7564139, "linkmotime": 1202777696.7564139 } } } ] } } ] In the above example, note how 'children' is a dictionary in which the keys are child names and the values depend upon whether the child is a file or a directory. The value is mostly the same as the JSON representation of the child object (except that directories do not recurse -- the "children" entry of the child is omitted, and the directory view includes the metadata that is stored on the directory edge). The rw_uri field will be present in the information about a directory if and only if you have read-write access to that directory. The verify_uri field will be present if and only if the object has a verify-cap (non-distributed LIT files do not have verify-caps). If the cap is of an unknown format, then the file size and verify_uri will not be available:: GET /uri/$UNKNOWNCAP?t=json : [ "unknown", { "ro_uri": unknown_read_uri } ] GET /uri/$DIRCAP/[SUBDIRS../]UNKNOWNCHILDNAME?t=json : [ "unknown", { "rw_uri": unknown_write_uri, "ro_uri": unknown_read_uri, "mutable": true, "metadata": { "ctime": 1202777696.7564139, "mtime": 1202777696.7564139, "tahoe": { "linkcrtime": 1202777696.7564139, "linkmotime": 1202777696.7564139 } } } ] As in the case of file nodes, the metadata will only be present when the capability is to a directory followed by a path. The "mutable" field is also not always present; when it is absent, the mutability of the object is not known. About the metadata `````````````````` The value of the 'tahoe':'linkmotime' key is updated whenever a link to a child is set. The value of the 'tahoe':'linkcrtime' key is updated whenever a link to a child is created -- i.e. when there was not previously a link under that name. Note however, that if the edge in the Tahoe filesystem points to a mutable file and the contents of that mutable file is changed, then the 'tahoe':'linkmotime' value on that edge will *not* be updated, since the edge itself wasn't updated -- only the mutable file was. The timestamps are represented as a number of seconds since the UNIX epoch (1970-01-01 00:00:00 UTC), with leap seconds not being counted in the long term. In Tahoe earlier than v1.4.0, 'mtime' and 'ctime' keys were populated instead of the 'tahoe':'linkmotime' and 'tahoe':'linkcrtime' keys. Starting in Tahoe v1.4.0, the 'linkmotime'/'linkcrtime' keys in the 'tahoe' sub-dict are populated. However, prior to Tahoe v1.7beta, a bug caused the 'tahoe' sub-dict to be deleted by web-API requests in which new metadata is specified, and not to be added to existing child links that lack it. From Tahoe v1.7.0 onward, the 'mtime' and 'ctime' fields are no longer populated or updated (see ticket #924), except by "tahoe backup" as explained below. For backward compatibility, when an existing link is updated and 'tahoe':'linkcrtime' is not present in the previous metadata but 'ctime' is, the old value of 'ctime' is used as the new value of 'tahoe':'linkcrtime'. The reason we added the new fields in Tahoe v1.4.0 is that there is a "set_children" API (described below) which you can use to overwrite the values of the 'mtime'/'ctime' pair, and this API is used by the "tahoe backup" command (in Tahoe v1.3.0 and later) to set the 'mtime' and 'ctime' values when backing up files from a local filesystem into the Tahoe filesystem. As of Tahoe v1.4.0, the set_children API cannot be used to set anything under the 'tahoe' key of the metadata dict -- if you include 'tahoe' keys in your 'metadata' arguments then it will silently ignore those keys. Therefore, if the 'tahoe' sub-dict is present, you can rely on the 'linkcrtime' and 'linkmotime' values therein to have the semantics described above. (This is assuming that only official Tahoe clients have been used to write those links, and that their system clocks were set to what you expected -- there is nothing preventing someone from editing their Tahoe client or writing their own Tahoe client which would overwrite those values however they like, and there is nothing to constrain their system clock from taking any value.) When an edge is created or updated by "tahoe backup", the 'mtime' and 'ctime' keys on that edge are set as follows: * 'mtime' is set to the timestamp read from the local filesystem for the "mtime" of the local file in question, which means the last time the contents of that file were changed. * On Windows, 'ctime' is set to the creation timestamp for the file read from the local filesystem. On other platforms, 'ctime' is set to the UNIX "ctime" of the local file, which means the last time that either the contents or the metadata of the local file was changed. There are several ways that the 'ctime' field could be confusing: 1. You might be confused about whether it reflects the time of the creation of a link in the Tahoe filesystem (by a version of Tahoe < v1.7.0) or a timestamp copied in by "tahoe backup" from a local filesystem. 2. You might be confused about whether it is a copy of the file creation time (if "tahoe backup" was run on a Windows system) or of the last contents-or-metadata change (if "tahoe backup" was run on a different operating system). 3. You might be confused by the fact that changing the contents of a mutable file in Tahoe doesn't have any effect on any links pointing at that file in any directories, although "tahoe backup" sets the link 'ctime'/'mtime' to reflect timestamps about the local file corresponding to the Tahoe file to which the link points. 4. Also, quite apart from Tahoe, you might be confused about the meaning of the "ctime" in UNIX local filesystems, which people sometimes think means file creation time, but which actually means, in UNIX local filesystems, the most recent time that the file contents or the file metadata (such as owner, permission bits, extended attributes, etc.) has changed. Note that although "ctime" does not mean file creation time in UNIX, links created by a version of Tahoe prior to v1.7.0, and never written by "tahoe backup", will have 'ctime' set to the link creation time. Attaching an Existing File or Directory by its read- or write-cap ----------------------------------------------------------------- ``PUT /uri/$DIRCAP/[SUBDIRS../]CHILDNAME?t=uri`` This attaches a child object (either a file or directory) to a specified location in the virtual filesystem. The child object is referenced by its read- or write- cap, as provided in the HTTP request body. This will create intermediate directories as necessary. This is similar to a UNIX hardlink: by referencing a previously-uploaded file (or previously-created directory) instead of uploading/creating a new one, you can create two references to the same object. The read- or write- cap of the child is provided in the body of the HTTP request, and this same cap is returned in the response body. The default behavior is to overwrite any existing object at the same location. To prevent this (and make the operation return an error instead of overwriting), add a "replace=false" argument, as "?t=uri&replace=false". With replace=false, this operation will return an HTTP 409 "Conflict" error if there is already an object at the given location, rather than overwriting the existing object. To allow the operation to overwrite a file, but return an error when trying to overwrite a directory, use "replace=only-files" (this behavior is closer to the traditional UNIX "mv" command). Note that "true", "t", and "1" are all synonyms for "True", and "false", "f", and "0" are synonyms for "False", and the parameter is case-insensitive. Note that this operation does not take its child cap in the form of separate "rw_uri" and "ro_uri" fields. Therefore, it cannot accept a child cap in a format unknown to the web-API server, unless its URI starts with "ro." or "imm.". This restriction is necessary because the server is not able to attenuate an unknown write cap to a read cap. Unknown URIs starting with "ro." or "imm.", on the other hand, are assumed to represent read caps. The client should not prefix a write cap with "ro." or "imm." and pass it to this operation, since that would result in granting the cap's write authority to holders of the directory read cap. Adding Multiple Files or Directories to a Parent Directory at Once ------------------------------------------------------------------ ``POST /uri/$DIRCAP/[SUBDIRS..]?t=set_children`` ``POST /uri/$DIRCAP/[SUBDIRS..]?t=set-children`` (Tahoe >= v1.6) This command adds multiple children to a directory in a single operation. It reads the request body and interprets it as a JSON-encoded description of the child names and read/write-caps that should be added. The body should be a JSON-encoded dictionary, in the same format as the "children" value returned by the "GET /uri/$DIRCAP?t=json" operation described above. In this format, each key is a child names, and the corresponding value is a tuple of (type, childinfo). "type" is ignored, and "childinfo" is a dictionary that contains "rw_uri", "ro_uri", and "metadata" keys. You can take the output of "GET /uri/$DIRCAP1?t=json" and use it as the input to "POST /uri/$DIRCAP2?t=set_children" to make DIR2 look very much like DIR1 (except for any existing children of DIR2 that were not overwritten, and any existing "tahoe" metadata keys as described below). When the set_children request contains a child name that already exists in the target directory, this command defaults to overwriting that child with the new value (both child cap and metadata, but if the JSON data does not contain a "metadata" key, the old child's metadata is preserved). The command takes a boolean "overwrite=" query argument to control this behavior. If you use "?t=set_children&overwrite=false", then an attempt to replace an existing child will instead cause an error. Any "tahoe" key in the new child's "metadata" value is ignored. Any existing "tahoe" metadata is preserved. The metadata["tahoe"] value is reserved for metadata generated by the tahoe node itself. The only two keys currently placed here are "linkcrtime" and "linkmotime". For details, see the section above entitled "Getting Information About a File Or Directory (as JSON)", in the "About the metadata" subsection. Note that this command was introduced with the name "set_children", which uses an underscore rather than a hyphen as other multi-word command names do. The variant with a hyphen is now accepted, but clients that desire backward compatibility should continue to use "set_children". Unlinking a File or Directory ----------------------------- ``DELETE /uri/$DIRCAP/[SUBDIRS../]CHILDNAME`` This removes the given name from its parent directory. CHILDNAME is the name to be removed, and $DIRCAP/SUBDIRS.. indicates the directory that will be modified. Note that this does not actually delete the file or directory that the name points to from the tahoe grid -- it only unlinks the named reference from this directory. If there are other names in this directory or in other directories that point to the resource, then it will remain accessible through those paths. Even if all names pointing to this object are removed from their parent directories, then someone with possession of its read-cap can continue to access the object through that cap. The object will only become completely unreachable once 1: there are no reachable directories that reference it, and 2: nobody is holding a read- or write- cap to the object. (This behavior is very similar to the way hardlinks and anonymous files work in traditional UNIX filesystems). This operation will not modify more than a single directory. Intermediate directories which were implicitly created by PUT or POST methods will *not* be automatically removed by DELETE. This method returns the file- or directory- cap of the object that was just removed. Browser Operations: Human-oriented interfaces ============================================= This section describes the HTTP operations that provide support for humans running a web browser. Most of these operations use HTML forms that use POST to drive the Tahoe node. This section is intended for HTML authors who want to write web pages that contain forms and buttons which manipulate the Tahoe filesystem. Note that for all POST operations, the arguments listed can be provided either as URL query arguments or as form body fields. URL query arguments are separated from the main URL by "?", and from each other by "&". For example, "POST /uri/$DIRCAP?t=upload&mutable=true". Form body fields are usually specified by using elements. For clarity, the descriptions below display the most significant arguments as URL query args. Viewing a Directory (as HTML) ----------------------------- ``GET /uri/$DIRCAP/[SUBDIRS../]`` This returns an HTML page, intended to be displayed to a human by a web browser, which contains HREF links to all files and directories reachable from this directory. These HREF links do not have a t= argument, meaning that a human who follows them will get pages also meant for a human. It also contains forms to upload new files, and to unlink files and directories from their parent directory. Those forms use POST methods to do their job. Viewing/Downloading a File -------------------------- ``GET /uri/$FILECAP`` ``GET /uri/$DIRCAP/[SUBDIRS../]FILENAME`` This will retrieve the contents of the given file. The HTTP response body will contain the sequence of bytes that make up the file. If you want the HTTP response to include a useful Content-Type header, either use the second form (which starts with a $DIRCAP), or add a "filename=foo" query argument, like "GET /uri/$FILECAP?filename=foo.jpg". The bare "GET /uri/$FILECAP" does not give the Tahoe node enough information to determine a Content-Type (since Tahoe immutable files are merely sequences of bytes, not typed+named file objects). If the URL has both filename= and "save=true" in the query arguments, then the server to add a "Content-Disposition: attachment" header, along with a filename= parameter. When a user clicks on such a link, most browsers will offer to let the user save the file instead of displaying it inline (indeed, most browsers will refuse to display it inline). "true", "t", "1", and other case-insensitive equivalents are all treated the same. Character-set handling in URLs and HTTP headers is a dubious art [1]_. For maximum compatibility, Tahoe simply copies the bytes from the filename= argument into the Content-Disposition header's filename= parameter, without trying to interpret them in any particular way. ``GET /named/$FILECAP/FILENAME`` This is an alternate download form which makes it easier to get the correct filename. The Tahoe server will provide the contents of the given file, with a Content-Type header derived from the given filename. This form is used to get browsers to use the "Save Link As" feature correctly, and also helps command-line tools like "wget" and "curl" use the right filename. Note that this form can *only* be used with file caps; it is an error to use a directory cap after the /named/ prefix. URLs may also use /file/$FILECAP/FILENAME as a synonym for /named/$FILECAP/FILENAME. Getting Information About a File Or Directory (as HTML) ------------------------------------------------------- ``GET /uri/$FILECAP?t=info`` ``GET /uri/$DIRCAP/?t=info`` ``GET /uri/$DIRCAP/[SUBDIRS../]SUBDIR/?t=info`` ``GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=info`` This returns a human-oriented HTML page with more detail about the selected file or directory object. This page contains the following items: * object size * storage index * JSON representation * raw contents (text/plain) * access caps (URIs): verify-cap, read-cap, write-cap (for mutable objects) * check/verify/repair form * deep-check/deep-size/deep-stats/manifest (for directories) * replace-contents form (for mutable files) Creating a Directory -------------------- ``POST /uri?t=mkdir`` This creates a new empty directory, but does not attach it to the virtual filesystem. If a "redirect_to_result=true" argument is provided, then the HTTP response will cause the web browser to be redirected to a /uri/$DIRCAP page that gives access to the newly-created directory. If you bookmark this page, you'll be able to get back to the directory again in the future. This is the recommended way to start working with a Tahoe server: create a new unlinked directory (using redirect_to_result=true), then bookmark the resulting /uri/$DIRCAP page. There is a "create directory" button on the Welcome page to invoke this action. This accepts a format= argument in the query string. Refer to the documentation of the PUT /uri?t=mkdir operation in `Creating A New Directory`_ for information on the behavior of the format= argument. If "redirect_to_result=true" is not provided (or is given a value of "false"), then the HTTP response body will simply be the write-cap of the new directory. ``POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir&name=CHILDNAME`` This creates a new empty directory as a child of the designated SUBDIR. This will create additional intermediate directories as necessary. This accepts a format= argument in the query string. Refer to the documentation of POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir&name=CHILDNAME in `Creating a New Directory`_ for information on the behavior of the format= argument. If a "when_done=URL" argument is provided, the HTTP response will cause the web browser to redirect to the given URL. This provides a convenient way to return the browser to the directory that was just modified. Without a when_done= argument, the HTTP response will simply contain the write-cap of the directory that was just created. Uploading a File ---------------- ``POST /uri?t=upload`` This uploads a file, and produces a file-cap for the contents, but does not attach the file into the filesystem. No directories will be modified by this operation. The file must be provided as the "file" field of an HTML encoded form body, produced in response to an HTML form like this:: If a "when_done=URL" argument is provided, the response body will cause the browser to redirect to the given URL. If the when_done= URL has the string "%(uri)s" in it, that string will be replaced by a URL-escaped form of the newly created file-cap. (Note that without this substitution, there is no way to access the file that was just uploaded). The default (in the absence of when_done=) is to return an HTML page that describes the results of the upload. This page will contain information about which storage servers were used for the upload, how long each operation took, etc. This accepts format= and mutable=true query string arguments. Refer to `Writing/Uploading a File`_ for information on the behavior of format= and mutable=true. ``POST /uri/$DIRCAP/[SUBDIRS../]?t=upload`` This uploads a file, and attaches it as a new child of the given directory, which must be mutable. The file must be provided as the "file" field of an HTML-encoded form body, produced in response to an HTML form like this::
A "name=" argument can be provided to specify the new child's name, otherwise it will be taken from the "filename" field of the upload form (most web browsers will copy the last component of the original file's pathname into this field). To avoid confusion, name= is not allowed to contain a slash. If there is already a child with that name, and it is a mutable file, then its contents are replaced with the data being uploaded. If it is not a mutable file, the default behavior is to remove the existing child before creating a new one. To prevent this (and make the operation return an error instead of overwriting the old child), add a "replace=false" argument, as "?t=upload&replace=false". With replace=false, this operation will return an HTTP 409 "Conflict" error if there is already an object at the given location, rather than overwriting the existing object. Note that "true", "t", and "1" are all synonyms for "True", and "false", "f", and "0" are synonyms for "False". the parameter is case-insensitive. This will create additional intermediate directories as necessary, although since it is expected to be triggered by a form that was retrieved by "GET /uri/$DIRCAP/[SUBDIRS../]", it is likely that the parent directory will already exist. This accepts format= and mutable=true query string arguments. Refer to `Writing/Uploading a File`_ for information on the behavior of format= and mutable=true. If a "when_done=URL" argument is provided, the HTTP response will cause the web browser to redirect to the given URL. This provides a convenient way to return the browser to the directory that was just modified. Without a when_done= argument, the HTTP response will simply contain the file-cap of the file that was just uploaded (a write-cap for mutable files, or a read-cap for immutable files). ``POST /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=upload`` This also uploads a file and attaches it as a new child of the given directory, which must be mutable. It is a slight variant of the previous operation, as the URL refers to the target file rather than the parent directory. It is otherwise identical: this accepts mutable= and when_done= arguments too. ``POST /uri/$FILECAP?t=upload`` This modifies the contents of an existing mutable file in-place. An error is signalled if $FILECAP does not refer to a mutable file. It behaves just like the "PUT /uri/$FILECAP" form, but uses a POST for the benefit of HTML forms in a web browser. Attaching An Existing File Or Directory (by URI) ------------------------------------------------ ``POST /uri/$DIRCAP/[SUBDIRS../]?t=uri&name=CHILDNAME&uri=CHILDCAP`` This attaches a given read- or write- cap "CHILDCAP" to the designated directory, with a specified child name. This behaves much like the PUT t=uri operation, and is a lot like a UNIX hardlink. It is subject to the same restrictions as that operation on the use of cap formats unknown to the web-API server. This will create additional intermediate directories as necessary, although since it is expected to be triggered by a form that was retrieved by "GET /uri/$DIRCAP/[SUBDIRS../]", it is likely that the parent directory will already exist. This accepts the same replace= argument as POST t=upload. Unlinking a Child ----------------- ``POST /uri/$DIRCAP/[SUBDIRS../]?t=delete&name=CHILDNAME`` ``POST /uri/$DIRCAP/[SUBDIRS../]?t=unlink&name=CHILDNAME`` (Tahoe >= v1.9) This instructs the node to remove a child object (file or subdirectory) from the given directory, which must be mutable. Note that the entire subtree is unlinked from the parent. Unlike deleting a subdirectory in a UNIX local filesystem, the subtree need not be empty; if it isn't, then other references into the subtree will see that the child subdirectories are not modified by this operation. Only the link from the given directory to its child is severed. In Tahoe-LAFS v1.9.0 and later, t=unlink can be used as a synonym for t=delete. If interoperability with older web-API servers is required, t=delete should be used. Renaming a Child ---------------- ``POST /uri/$DIRCAP/[SUBDIRS../]?t=rename&from_name=OLD&to_name=NEW`` This instructs the node to rename a child of the given directory, which must be mutable. This has a similar effect to removing the child, then adding the same child-cap under the new name, except that it preserves metadata. This operation cannot move the child to a different directory. The default behavior is to overwrite any existing link at the destination (replace=true). To prevent this (and make the operation return an error instead of overwriting), add a "replace=false" argument. With replace=false, this operation will return an HTTP 409 "Conflict" error if the destination is not the same link as the source and there is already a link at the destination, rather than overwriting the existing link. To allow the operation to overwrite a link to a file, but return an HTTP 409 error when trying to overwrite a link to a directory, use "replace=only-files" (this behavior is closer to the traditional UNIX "mv" command). Note that "true", "t", and "1" are all synonyms for "True"; "false", "f", and "0" are synonyms for "False"; and the parameter is case-insensitive. Relinking ("Moving") a Child ---------------------------- ``POST /uri/$DIRCAP/[SUBDIRS../]?t=relink&from_name=OLD&to_dir=$NEWDIRCAP/[NEWSUBDIRS../]&to_name=NEW`` ``[&replace=true|false|only-files]`` (Tahoe >= v1.10) This instructs the node to move a child of the given source directory, into a different directory and/or to a different name. The command is named ``relink`` because what it does is add a new link to the child from the new location, then remove the old link. Nothing is actually "moved": the child is still reachable through any path from which it was formerly reachable, and the storage space occupied by its ciphertext is not affected. The source and destination directories must be writeable. If {{{to_dir}}} is not present, the child link is renamed within the same directory. If {{{to_name}}} is not present then it defaults to {{{from_name}}}. If the destination link (directory and name) is the same as the source link, the operation has no effect. Metadata from the source directory entry is preserved. Multiple levels of descent in the source and destination paths are supported. This operation will return an HTTP 404 "Not Found" error if ``$DIRCAP/[SUBDIRS../]``, the child being moved, or the destination directory does not exist. It will return an HTTP 400 "Bad Request" error if any entry in the source or destination paths is not a directory. The default behavior is to overwrite any existing link at the destination (replace=true). To prevent this (and make the operation return an error instead of overwriting), add a "replace=false" argument. With replace=false, this operation will return an HTTP 409 "Conflict" error if the destination is not the same link as the source and there is already a link at the destination, rather than overwriting the existing link. To allow the operation to overwrite a link to a file, but return an HTTP 409 error when trying to overwrite a link to a directory, use "replace=only-files" (this behavior is closer to the traditional UNIX "mv" command). Note that "true", "t", and "1" are all synonyms for "True"; "false", "f", and "0" are synonyms for "False"; and the parameter is case-insensitive. When relinking into a different directory, for safety, the child link is not removed from the old directory until it has been successfully added to the new directory. This implies that in case of a crash or failure, the link to the child will not be lost, but it could be linked at both the old and new locations. The source link should not be the same as any link (directory and child name) in the ``to_dir`` path. This restriction is not enforced, but it may be enforced in a future version. If it were violated then the result would be to create a cycle in the directory structure that is not necessarily reachable from the root of the destination path (``$NEWDIRCAP``), which could result in data loss, as described in ticket `#943`_. .. _`#943`: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/943 Other Utilities --------------- ``GET /uri?uri=$CAP`` This causes a redirect to /uri/$CAP, and retains any additional query arguments (like filename= or save=). This is for the convenience of web forms which allow the user to paste in a read- or write- cap (obtained through some out-of-band channel, like IM or email). Note that this form merely redirects to the specific file or directory indicated by the $CAP: unlike the GET /uri/$DIRCAP form, you cannot traverse to children by appending additional path segments to the URL. ``GET /uri/$DIRCAP/[SUBDIRS../]?t=rename-form&name=$CHILDNAME`` This provides a useful facility to browser-based user interfaces. It returns a page containing a form targetting the "POST $DIRCAP t=rename" functionality described above, with the provided $CHILDNAME present in the 'from_name' field of that form. I.e. this presents a form offering to rename $CHILDNAME, requesting the new name, and submitting POST rename. This same URL format can also be used with "move-form" with the expected results. ``GET /uri/$DIRCAP/[SUBDIRS../]CHILDNAME?t=uri`` This returns the file- or directory- cap for the specified object. ``GET /uri/$DIRCAP/[SUBDIRS../]CHILDNAME?t=readonly-uri`` This returns a read-only file- or directory- cap for the specified object. If the object is an immutable file, this will return the same value as t=uri. Debugging and Testing Features ------------------------------ These URLs are less-likely to be helpful to the casual Tahoe user, and are mainly intended for developers. ``POST $URL?t=check`` This triggers the FileChecker to determine the current "health" of the given file or directory, by counting how many shares are available. The page that is returned will display the results. This can be used as a "show me detailed information about this file" page. If a verify=true argument is provided, the node will perform a more intensive check, downloading and verifying every single bit of every share. If an add-lease=true argument is provided, the node will also add (or renew) a lease to every share it encounters. Each lease will keep the share alive for a certain period of time (one month by default). Once the last lease expires or is explicitly cancelled, the storage server is allowed to delete the share. If an output=JSON argument is provided, the response will be machine-readable JSON instead of human-oriented HTML. The data is a dictionary with the following keys:: storage-index: a base32-encoded string with the objects's storage index, or an empty string for LIT files summary: a string, with a one-line summary of the stats of the file results: a dictionary that describes the state of the file. For LIT files, this dictionary has only the 'healthy' key, which will always be True. For distributed files, this dictionary has the following keys: count-shares-good: the number of good shares that were found count-shares-needed: 'k', the number of shares required for recovery count-shares-expected: 'N', the number of total shares generated count-good-share-hosts: the number of distinct storage servers with good shares. Note that a high value does not necessarily imply good share distribution, because some of these servers may only hold duplicate shares. count-wrong-shares: for mutable files, the number of shares for versions other than the 'best' one (highest sequence number, highest roothash). These are either old, or created by an uncoordinated or not fully successful write. count-recoverable-versions: for mutable files, the number of recoverable versions of the file. For a healthy file, this will equal 1. count-unrecoverable-versions: for mutable files, the number of unrecoverable versions of the file. For a healthy file, this will be 0. count-corrupt-shares: the number of shares with integrity failures list-corrupt-shares: a list of "share locators", one for each share that was found to be corrupt. Each share locator is a list of (serverid, storage_index, sharenum). needs-rebalancing: (bool) This field is intended to be True iff reliability could be improved for this file by rebalancing, i.e. by moving some shares to other servers. It may be incorrect in some cases for Tahoe-LAFS up to and including v1.10, and its precise definition is expected to change. servers-responding: list of base32-encoded storage server identifiers, one for each server which responded to the share query. healthy: (bool) True if the file is completely healthy, False otherwise. Healthy files have at least N good shares. Overlapping shares do not currently cause a file to be marked unhealthy. If there are at least N good shares, then corrupt shares do not cause the file to be marked unhealthy, although the corrupt shares will be listed in the results (list-corrupt-shares) and should be manually removed to wasting time in subsequent downloads (as the downloader rediscovers the corruption and uses alternate shares). Future compatibility: the meaning of this field may change to reflect whether the servers-of-happiness criterion is met (see ticket #614). sharemap: dict mapping share identifier to list of serverids (base32-encoded strings). This indicates which servers are holding which shares. For immutable files, the shareid is an integer (the share number, from 0 to N-1). For immutable files, it is a string of the form 'seq%d-%s-sh%d', containing the sequence number, the roothash, and the share number. ``POST $URL?t=start-deep-check`` (must add &ophandle=XYZ) This initiates a recursive walk of all files and directories reachable from the target, performing a check on each one just like t=check. The result page will contain a summary of the results, including details on any file/directory that was not fully healthy. t=start-deep-check can only be invoked on a directory. An error (400 BAD_REQUEST) will be signalled if it is invoked on a file. The recursive walker will deal with loops safely. This accepts the same verify= and add-lease= arguments as t=check. Since this operation can take a long time (perhaps a second per object), the ophandle= argument is required (see "Slow Operations, Progress, and Cancelling" above). The response to this POST will be a redirect to the corresponding /operations/$HANDLE page (with output=HTML or output=JSON to match the output= argument given to the POST). The deep-check operation will continue to run in the background, and the /operations page should be used to find out when the operation is done. Detailed check results for non-healthy files and directories will be available under /operations/$HANDLE/$STORAGEINDEX, and the HTML status will contain links to these detailed results. The HTML /operations/$HANDLE page for incomplete operations will contain a meta-refresh tag, set to 60 seconds, so that a browser which uses deep-check will automatically poll until the operation has completed. The JSON page (/options/$HANDLE?output=JSON) will contain a machine-readable JSON dictionary with the following keys:: finished: a boolean, True if the operation is complete, else False. Some of the remaining keys may not be present until the operation is complete. root-storage-index: a base32-encoded string with the storage index of the starting point of the deep-check operation count-objects-checked: count of how many objects were checked. Note that non-distributed objects (i.e. small immutable LIT files) are not checked, since for these objects, the data is contained entirely in the URI. count-objects-healthy: how many of those objects were completely healthy count-objects-unhealthy: how many were damaged in some way count-corrupt-shares: how many shares were found to have corruption, summed over all objects examined list-corrupt-shares: a list of "share identifiers", one for each share that was found to be corrupt. Each share identifier is a list of (serverid, storage_index, sharenum). list-unhealthy-files: a list of (pathname, check-results) tuples, for each file that was not fully healthy. 'pathname' is a list of strings (which can be joined by "/" characters to turn it into a single string), relative to the directory on which deep-check was invoked. The 'check-results' field is the same as that returned by t=check&output=JSON, described above. stats: a dictionary with the same keys as the t=start-deep-stats command (described below) ``POST $URL?t=stream-deep-check`` This initiates a recursive walk of all files and directories reachable from the target, performing a check on each one just like t=check. For each unique object (duplicates are skipped), a single line of JSON is emitted to the HTTP response channel (or an error indication, see below). When the walk is complete, a final line of JSON is emitted which contains the accumulated file-size/count "deep-stats" data. This command takes the same arguments as t=start-deep-check. A CLI tool can split the response stream on newlines into "response units", and parse each response unit as JSON. Each such parsed unit will be a dictionary, and will contain at least the "type" key: a string, one of "file", "directory", or "stats". For all units that have a type of "file" or "directory", the dictionary will contain the following keys:: "path": a list of strings, with the path that is traversed to reach the object "cap": a write-cap URI for the file or directory, if available, else a read-cap URI "verifycap": a verify-cap URI for the file or directory "repaircap": an URI for the weakest cap that can still be used to repair the object "storage-index": a base32 storage index for the object "check-results": a copy of the dictionary which would be returned by t=check&output=json, with three top-level keys: "storage-index", "summary", and "results", and a variety of counts and sharemaps in the "results" value. Note that non-distributed files (i.e. LIT files) will have values of None for verifycap, repaircap, and storage-index, since these files can neither be verified nor repaired, and are not stored on the storage servers. Likewise the check-results dictionary will be limited: an empty string for storage-index, and a results dictionary with only the "healthy" key. The last unit in the stream will have a type of "stats", and will contain the keys described in the "start-deep-stats" operation, below. If any errors occur during the traversal (specifically if a directory is unrecoverable, such that further traversal is not possible), an error indication is written to the response body, instead of the usual line of JSON. This error indication line will begin with the string "ERROR:" (in all caps), and contain a summary of the error on the rest of the line. The remaining lines of the response body will be a python exception. The client application should look for the ERROR: and stop processing JSON as soon as it is seen. Note that neither a file being unrecoverable nor a directory merely being unhealthy will cause traversal to stop. The line just before the ERROR: will describe the directory that was untraversable, since the unit is emitted to the HTTP response body before the child is traversed. ``POST $URL?t=check&repair=true`` This performs a health check of the given file or directory, and if the checker determines that the object is not healthy (some shares are missing or corrupted), it will perform a "repair". During repair, any missing shares will be regenerated and uploaded to new servers. This accepts the same verify=true and add-lease= arguments as t=check. When an output=JSON argument is provided, the machine-readable JSON response will contain the following keys:: storage-index: a base32-encoded string with the objects's storage index, or an empty string for LIT files repair-attempted: (bool) True if repair was attempted repair-successful: (bool) True if repair was attempted and the file was fully healthy afterwards. False if no repair was attempted, or if a repair attempt failed. pre-repair-results: a dictionary that describes the state of the file before any repair was performed. This contains exactly the same keys as the 'results' value of the t=check response, described above. post-repair-results: a dictionary that describes the state of the file after any repair was performed. If no repair was performed, post-repair-results and pre-repair-results will be the same. This contains exactly the same keys as the 'results' value of the t=check response, described above. ``POST $URL?t=start-deep-check&repair=true`` (must add &ophandle=XYZ) This triggers a recursive walk of all files and directories, performing a t=check&repair=true on each one. Like t=start-deep-check without the repair= argument, this can only be invoked on a directory. An error (400 BAD_REQUEST) will be signalled if it is invoked on a file. The recursive walker will deal with loops safely. This accepts the same verify= and add-lease= arguments as t=start-deep-check. It uses the same ophandle= mechanism as start-deep-check. When an output=JSON argument is provided, the response will contain the following keys:: finished: (bool) True if the operation has completed, else False root-storage-index: a base32-encoded string with the storage index of the starting point of the deep-check operation count-objects-checked: count of how many objects were checked count-objects-healthy-pre-repair: how many of those objects were completely healthy, before any repair count-objects-unhealthy-pre-repair: how many were damaged in some way count-objects-healthy-post-repair: how many of those objects were completely healthy, after any repair count-objects-unhealthy-post-repair: how many were damaged in some way count-repairs-attempted: repairs were attempted on this many objects. count-repairs-successful: how many repairs resulted in healthy objects count-repairs-unsuccessful: how many repairs resulted did not results in completely healthy objects count-corrupt-shares-pre-repair: how many shares were found to have corruption, summed over all objects examined, before any repair count-corrupt-shares-post-repair: how many shares were found to have corruption, summed over all objects examined, after any repair list-corrupt-shares: a list of "share identifiers", one for each share that was found to be corrupt (before any repair). Each share identifier is a list of (serverid, storage_index, sharenum). list-remaining-corrupt-shares: like list-corrupt-shares, but mutable shares that were successfully repaired are not included. These are shares that need manual processing. Since immutable shares cannot be modified by clients, all corruption in immutable shares will be listed here. list-unhealthy-files: a list of (pathname, check-results) tuples, for each file that was not fully healthy. 'pathname' is relative to the directory on which deep-check was invoked. The 'check-results' field is the same as that returned by t=check&repair=true&output=JSON, described above. stats: a dictionary with the same keys as the t=start-deep-stats command (described below) ``POST $URL?t=stream-deep-check&repair=true`` This triggers a recursive walk of all files and directories, performing a t=check&repair=true on each one. For each unique object (duplicates are skipped), a single line of JSON is emitted to the HTTP response channel (or an error indication). When the walk is complete, a final line of JSON is emitted which contains the accumulated file-size/count "deep-stats" data. This emits the same data as t=stream-deep-check (without the repair=true), except that the "check-results" field is replaced with a "check-and-repair-results" field, which contains the keys returned by t=check&repair=true&output=json (i.e. repair-attempted, repair-successful, pre-repair-results, and post-repair-results). The output does not contain the summary dictionary that is provied by t=start-deep-check&repair=true (the one with count-objects-checked and list-unhealthy-files), since the receiving client is expected to calculate those values itself from the stream of per-object check-and-repair-results. Note that the "ERROR:" indication will only be emitted if traversal stops, which will only occur if an unrecoverable directory is encountered. If a file or directory repair fails, the traversal will continue, and the repair failure will be indicated in the JSON data (in the "repair-successful" key). ``POST $DIRURL?t=start-manifest`` (must add &ophandle=XYZ) This operation generates a "manfest" of the given directory tree, mostly for debugging. This is a table of (path, filecap/dircap), for every object reachable from the starting directory. The path will be slash-joined, and the filecap/dircap will contain a link to the object in question. This page gives immediate access to every object in the virtual filesystem subtree. This operation uses the same ophandle= mechanism as deep-check. The corresponding /operations/$HANDLE page has three different forms. The default is output=HTML. If output=text is added to the query args, the results will be a text/plain list. The first line is special: it is either "finished: yes" or "finished: no"; if the operation is not finished, you must periodically reload the page until it completes. The rest of the results are a plaintext list, with one file/dir per line, slash-separated, with the filecap/dircap separated by a space. If output=JSON is added to the queryargs, then the results will be a JSON-formatted dictionary with six keys. Note that because large directory structures can result in very large JSON results, the full results will not be available until the operation is complete (i.e. until output["finished"] is True):: finished (bool): if False then you must reload the page until True origin_si (base32 str): the storage index of the starting point manifest: list of (path, cap) tuples, where path is a list of strings. verifycaps: list of (printable) verify cap strings storage-index: list of (base32) storage index strings stats: a dictionary with the same keys as the t=start-deep-stats command (described below) ``POST $DIRURL?t=start-deep-size`` (must add &ophandle=XYZ) This operation generates a number (in bytes) containing the sum of the filesize of all directories and immutable files reachable from the given directory. This is a rough lower bound of the total space consumed by this subtree. It does not include space consumed by mutable files, nor does it take expansion or encoding overhead into account. Later versions of the code may improve this estimate upwards. The /operations/$HANDLE status output consists of two lines of text:: finished: yes size: 1234 ``POST $DIRURL?t=start-deep-stats`` (must add &ophandle=XYZ) This operation performs a recursive walk of all files and directories reachable from the given directory, and generates a collection of statistics about those objects. The result (obtained from the /operations/$OPHANDLE page) is a JSON-serialized dictionary with the following keys (note that some of these keys may be missing until 'finished' is True):: finished: (bool) True if the operation has finished, else False count-immutable-files: count of how many CHK files are in the set count-mutable-files: same, for mutable files (does not include directories) count-literal-files: same, for LIT files (data contained inside the URI) count-files: sum of the above three count-directories: count of directories count-unknown: count of unrecognized objects (perhaps from the future) size-immutable-files: total bytes for all CHK files in the set, =deep-size size-mutable-files (TODO): same, for current version of all mutable files size-literal-files: same, for LIT files size-directories: size of directories (includes size-literal-files) size-files-histogram: list of (minsize, maxsize, count) buckets, with a histogram of filesizes, 5dB/bucket, for both literal and immutable files largest-directory: number of children in the largest directory largest-immutable-file: number of bytes in the largest CHK file size-mutable-files is not implemented, because it would require extra queries to each mutable file to get their size. This may be implemented in the future. Assuming no sharing, the basic space consumed by a single root directory is the sum of size-immutable-files, size-mutable-files, and size-directories. The actual disk space used by the shares is larger, because of the following sources of overhead:: integrity data expansion due to erasure coding share management data (leases) backend (ext3) minimum block size ``POST $URL?t=stream-manifest`` This operation performs a recursive walk of all files and directories reachable from the given starting point. For each such unique object (duplicates are skipped), a single line of JSON is emitted to the HTTP response channel (or an error indication, see below). When the walk is complete, a final line of JSON is emitted which contains the accumulated file-size/count "deep-stats" data. A CLI tool can split the response stream on newlines into "response units", and parse each response unit as JSON. Each such parsed unit will be a dictionary, and will contain at least the "type" key: a string, one of "file", "directory", or "stats". For all units that have a type of "file" or "directory", the dictionary will contain the following keys:: "path": a list of strings, with the path that is traversed to reach the object "cap": a write-cap URI for the file or directory, if available, else a read-cap URI "verifycap": a verify-cap URI for the file or directory "repaircap": an URI for the weakest cap that can still be used to repair the object "storage-index": a base32 storage index for the object Note that non-distributed files (i.e. LIT files) will have values of None for verifycap, repaircap, and storage-index, since these files can neither be verified nor repaired, and are not stored on the storage servers. The last unit in the stream will have a type of "stats", and will contain the keys described in the "start-deep-stats" operation, below. If any errors occur during the traversal (specifically if a directory is unrecoverable, such that further traversal is not possible), an error indication is written to the response body, instead of the usual line of JSON. This error indication line will begin with the string "ERROR:" (in all caps), and contain a summary of the error on the rest of the line. The remaining lines of the response body will be a python exception. The client application should look for the ERROR: and stop processing JSON as soon as it is seen. The line just before the ERROR: will describe the directory that was untraversable, since the manifest entry is emitted to the HTTP response body before the child is traversed. Other Useful Pages ================== The portion of the web namespace that begins with "/uri" (and "/named") is dedicated to giving users (both humans and programs) access to the Tahoe virtual filesystem. The rest of the namespace provides status information about the state of the Tahoe node. ``GET /`` (the root page) This is the "Welcome Page", and contains a few distinct sections:: Node information: library versions, local nodeid, services being provided. Filesystem Access Forms: create a new directory, view a file/directory by URI, upload a file (unlinked), download a file by URI. Grid Status: introducer information, helper information, connected storage servers. ``GET /status/`` This page lists all active uploads and downloads, and contains a short list of recent upload/download operations. Each operation has a link to a page that describes file sizes, servers that were involved, and the time consumed in each phase of the operation. A GET of /status/?t=json will contain a machine-readable subset of the same data. It returns a JSON-encoded dictionary. The only key defined at this time is "active", with a value that is a list of operation dictionaries, one for each active operation. Once an operation is completed, it will no longer appear in data["active"] . Each op-dict contains a "type" key, one of "upload", "download", "mapupdate", "publish", or "retrieve" (the first two are for immutable files, while the latter three are for mutable files and directories). The "upload" op-dict will contain the following keys:: type (string): "upload" storage-index-string (string): a base32-encoded storage index total-size (int): total size of the file status (string): current status of the operation progress-hash (float): 1.0 when the file has been hashed progress-ciphertext (float): 1.0 when the file has been encrypted. progress-encode-push (float): 1.0 when the file has been encoded and pushed to the storage servers. For helper uploads, the ciphertext value climbs to 1.0 first, then encoding starts. For unassisted uploads, ciphertext and encode-push progress will climb at the same pace. The "download" op-dict will contain the following keys:: type (string): "download" storage-index-string (string): a base32-encoded storage index total-size (int): total size of the file status (string): current status of the operation progress (float): 1.0 when the file has been fully downloaded Front-ends which want to report progress information are advised to simply average together all the progress-* indicators. A slightly more accurate value can be found by ignoring the progress-hash value (since the current implementation hashes synchronously, so clients will probably never see progress-hash!=1.0). ``GET /helper_status/`` If the node is running a helper (i.e. if [helper]enabled is set to True in tahoe.cfg), then this page will provide a list of all the helper operations currently in progress. If "?t=json" is added to the URL, it will return a JSON-formatted list of helper statistics, which can then be used to produce graphs to indicate how busy the helper is. ``GET /statistics/`` This page provides "node statistics", which are collected from a variety of sources:: load_monitor: every second, the node schedules a timer for one second in the future, then measures how late the subsequent callback is. The "load_average" is this tardiness, measured in seconds, averaged over the last minute. It is an indication of a busy node, one which is doing more work than can be completed in a timely fashion. The "max_load" value is the highest value that has been seen in the last 60 seconds. cpu_monitor: every minute, the node uses time.clock() to measure how much CPU time it has used, and it uses this value to produce 1min/5min/15min moving averages. These values range from 0% (0.0) to 100% (1.0), and indicate what fraction of the CPU has been used by the Tahoe node. Not all operating systems provide meaningful data to time.clock(): they may report 100% CPU usage at all times. uploader: this counts how many immutable files (and bytes) have been uploaded since the node was started downloader: this counts how many immutable files have been downloaded since the node was started publishes: this counts how many mutable files (including directories) have been modified since the node was started retrieves: this counts how many mutable files (including directories) have been read since the node was started There are other statistics that are tracked by the node. The "raw stats" section shows a formatted dump of all of them. By adding "?t=json" to the URL, the node will return a JSON-formatted dictionary of stats values, which can be used by other tools to produce graphs of node behavior. The misc/munin/ directory in the source distribution provides some tools to produce these graphs. ``GET /`` (introducer status) For Introducer nodes, the welcome page displays information about both clients and servers which are connected to the introducer. Servers make "service announcements", and these are listed in a table. Clients will subscribe to hear about service announcements, and these subscriptions are listed in a separate table. Both tables contain information about what version of Tahoe is being run by the remote node, their advertised and outbound IP addresses, their nodeid and nickname, and how long they have been available. By adding "?t=json" to the URL, the node will return a JSON-formatted dictionary of stats values, which can be used to produce graphs of connected clients over time. This dictionary has the following keys:: ["subscription_summary"] : a dictionary mapping service name (like "storage") to an integer with the number of clients that have subscribed to hear about that service ["announcement_summary"] : a dictionary mapping service name to an integer with the number of servers which are announcing that service ["announcement_distinct_hosts"] : a dictionary mapping service name to an integer which represents the number of distinct hosts that are providing that service. If two servers have announced FURLs which use the same hostnames (but different ports and tubids), they are considered to be on the same host. Static Files in /public_html ============================ The web-API server will take any request for a URL that starts with /static and serve it from a configurable directory which defaults to $BASEDIR/public_html . This is configured by setting the "[node]web.static" value in $BASEDIR/tahoe.cfg . If this is left at the default value of "public_html", then http://127.0.0.1:3456/static/subdir/foo.html will be served with the contents of the file $BASEDIR/public_html/subdir/foo.html . This can be useful to serve a javascript application which provides a prettier front-end to the rest of the Tahoe web-API. Safety and Security Issues -- Names vs. URIs ============================================ Summary: use explicit file- and dir- caps whenever possible, to reduce the potential for surprises when the filesystem structure is changed. Tahoe provides a mutable filesystem, but the ways that the filesystem can change are limited. The only thing that can change is that the mapping from child names to child objects that each directory contains can be changed by adding a new child name pointing to an object, removing an existing child name, or changing an existing child name to point to a different object. Obviously if you query Tahoe for information about the filesystem and then act to change the filesystem (such as by getting a listing of the contents of a directory and then adding a file to the directory), then the filesystem might have been changed after you queried it and before you acted upon it. However, if you use the URI instead of the pathname of an object when you act upon the object, then the only change that can happen is if the object is a directory then the set of child names it has might be different. If, on the other hand, you act upon the object using its pathname, then a different object might be in that place, which can result in more kinds of surprises. For example, suppose you are writing code which recursively downloads the contents of a directory. The first thing your code does is fetch the listing of the contents of the directory. For each child that it fetched, if that child is a file then it downloads the file, and if that child is a directory then it recurses into that directory. Now, if the download and the recurse actions are performed using the child's name, then the results might be wrong, because for example a child name that pointed to a sub-directory when you listed the directory might have been changed to point to a file (in which case your attempt to recurse into it would result in an error and the file would be skipped), or a child name that pointed to a file when you listed the directory might now point to a sub-directory (in which case your attempt to download the child would result in a file containing HTML text describing the sub-directory!). If your recursive algorithm uses the uri of the child instead of the name of the child, then those kinds of mistakes just can't happen. Note that both the child's name and the child's URI are included in the results of listing the parent directory, so it isn't any harder to use the URI for this purpose. The read and write caps in a given directory node are separate URIs, and can't be assumed to point to the same object even if they were retrieved in the same operation (although the web-API server attempts to ensure this in most cases). If you need to rely on that property, you should explicitly verify it. More generally, you should not make assumptions about the internal consistency of the contents of mutable directories. As a result of the signatures on mutable object versions, it is guaranteed that a given version was written in a single update, but -- as in the case of a file -- the contents may have been chosen by a malicious writer in a way that is designed to confuse applications that rely on their consistency. In general, use names if you want "whatever object (whether file or directory) is found by following this name (or sequence of names) when my request reaches the server". Use URIs if you want "this particular object". Concurrency Issues ================== Tahoe uses both mutable and immutable files. Mutable files can be created explicitly by doing an upload with ?mutable=true added, or implicitly by creating a new directory (since a directory is just a special way to interpret a given mutable file). Mutable files suffer from the same consistency-vs-availability tradeoff that all distributed data storage systems face. It is not possible to simultaneously achieve perfect consistency and perfect availability in the face of network partitions (servers being unreachable or faulty). Tahoe tries to achieve a reasonable compromise, but there is a basic rule in place, known as the Prime Coordination Directive: "Don't Do That". What this means is that if write-access to a mutable file is available to several parties, then those parties are responsible for coordinating their activities to avoid multiple simultaneous updates. This could be achieved by having these parties talk to each other and using some sort of locking mechanism, or by serializing all changes through a single writer. The consequences of performing uncoordinated writes can vary. Some of the writers may lose their changes, as somebody else wins the race condition. In many cases the file will be left in an "unhealthy" state, meaning that there are not as many redundant shares as we would like (reducing the reliability of the file against server failures). In the worst case, the file can be left in such an unhealthy state that no version is recoverable, even the old ones. It is this small possibility of data loss that prompts us to issue the Prime Coordination Directive. Tahoe nodes implement internal serialization to make sure that a single Tahoe node cannot conflict with itself. For example, it is safe to issue two directory modification requests to a single tahoe node's web-API server at the same time, because the Tahoe node will internally delay one of them until after the other has finished being applied. (This feature was introduced in Tahoe-1.1; back with Tahoe-1.0 the web client was responsible for serializing web requests themselves). For more details, please see the "Consistency vs Availability" and "The Prime Coordination Directive" sections of `mutable.rst <../specifications/mutable.rst>`_. Access Blacklist ================ Gateway nodes may find it necessary to prohibit access to certain files. The web-API has a facility to block access to filecaps by their storage index, returning a 403 "Forbidden" error instead of the original file. This blacklist is recorded in $NODEDIR/access.blacklist, and contains one blocked file per line. Comment lines (starting with ``#``) are ignored. Each line consists of the storage-index (in the usual base32 format as displayed by the "More Info" page, or by the "tahoe debug dump-cap" command), followed by whitespace, followed by a reason string, which will be included in the 403 error message. This could hold a URL to a page that explains why the file is blocked, for example. So for example, if you found a need to block access to a file with filecap ``URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861``, you could do the following:: tahoe debug dump-cap URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 -> storage index: whpepioyrnff7orecjolvbudeu echo "whpepioyrnff7orecjolvbudeu my puppy told me to" >>$NODEDIR/access.blacklist tahoe restart $NODEDIR tahoe get URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 -> error, 403 Access Prohibited: my puppy told me to The ``access.blacklist`` file will be checked each time a file or directory is accessed: the file's ``mtime`` is used to decide whether it need to be reloaded. Therefore no node restart is necessary when creating the initial blacklist, nor when adding second, third, or additional entries to the list. When modifying the file, be careful to update it atomically, otherwise a request may arrive while the file is only halfway written, and the partial file may be incorrectly parsed. The blacklist is applied to all access paths (including SFTP, FTP, and CLI operations), not just the web-API. The blacklist also applies to directories. If a directory is blacklisted, the gateway will refuse access to both that directory and any child files/directories underneath it, when accessed via "DIRCAP/SUBDIR/FILENAME" -style URLs. Users who go directly to the child file/dir will bypass the blacklist. The node will log the SI of the file being blocked, and the reason code, into the ``logs/twistd.log`` file. .. [1] URLs and HTTP and UTF-8, Oh My HTTP does not provide a mechanism to specify the character set used to encode non-ASCII names in URLs (`RFC3986#2.1 `_). We prefer the convention that the ``filename=`` argument shall be a URL-escaped UTF-8 encoded Unicode string. For example, suppose we want to provoke the server into using a filename of "f i a n c e-acute e" (i.e. f i a n c U+00E9 e). The UTF-8 encoding of this is 0x66 0x69 0x61 0x6e 0x63 0xc3 0xa9 0x65 (or "fianc\\xC3\\xA9e", as python's ``repr()`` function would show). To encode this into a URL, the non-printable characters must be escaped with the urlencode ``%XX`` mechanism, giving us "fianc%C3%A9e". Thus, the first line of the HTTP request will be "``GET /uri/CAP...?save=true&filename=fianc%C3%A9e HTTP/1.1``". Not all browsers provide this: IE7 by default uses the Latin-1 encoding, which is "fianc%E9e" (although it has a configuration option to send URLs as UTF-8). The response header will need to indicate a non-ASCII filename. The actual mechanism to do this is not clear. For ASCII filenames, the response header would look like:: Content-Disposition: attachment; filename="english.txt" If Tahoe were to enforce the UTF-8 convention, it would need to decode the URL argument into a Unicode string, and then encode it back into a sequence of bytes when creating the response header. One possibility would be to use unencoded UTF-8. Developers suggest that IE7 might accept this:: #1: Content-Disposition: attachment; filename="fianc\xC3\xA9e" (note, the last four bytes of that line, not including the newline, are 0xC3 0xA9 0x65 0x22) `RFC2231#4 `_ (dated 1997): suggests that the following might work, and `some developers have reported `_ that it is supported by Firefox (but not IE7):: #2: Content-Disposition: attachment; filename*=utf-8''fianc%C3%A9e My reading of `RFC2616#19.5.1 `_ (which defines Content-Disposition) says that the filename= parameter is defined to be wrapped in quotes (presumably to allow spaces without breaking the parsing of subsequent parameters), which would give us:: #3: Content-Disposition: attachment; filename*=utf-8''"fianc%C3%A9e" However this is contrary to the examples in the email thread listed above. Developers report that IE7 (when it is configured for UTF-8 URL encoding, which is not the default in Asian countries), will accept:: #4: Content-Disposition: attachment; filename=fianc%C3%A9e However, for maximum compatibility, Tahoe simply copies bytes from the URL into the response header, rather than enforcing the UTF-8 convention. This means it does not try to decode the filename from the URL argument, nor does it encode the filename into the response header. tahoe-lafs-1.10.0/docs/garbage-collection.rst000066400000000000000000000337141221140116300210160ustar00rootroot00000000000000=========================== Garbage Collection in Tahoe =========================== 1. `Overview`_ 2. `Client-side Renewal`_ 3. `Server Side Expiration`_ 4. `Expiration Progress`_ 5. `Future Directions`_ Overview ======== When a file or directory in the virtual filesystem is no longer referenced, the space that its shares occupied on each storage server can be freed, making room for other shares. Tahoe currently uses a garbage collection ("GC") mechanism to implement this space-reclamation process. Each share has one or more "leases", which are managed by clients who want the file/directory to be retained. The storage server accepts each share for a pre-defined period of time, and is allowed to delete the share if all of the leases expire. Garbage collection is not enabled by default: storage servers will not delete shares without being explicitly configured to do so. When GC is enabled, clients are responsible for renewing their leases on a periodic basis at least frequently enough to prevent any of the leases from expiring before the next renewal pass. There are several tradeoffs to be considered when choosing the renewal timer and the lease duration, and there is no single optimal pair of values. See the ``_ diagram to get an idea for the tradeoffs involved. If lease renewal occurs quickly and with 100% reliability, than any renewal time that is shorter than the lease duration will suffice, but a larger ratio of duration-over-renewal-time will be more robust in the face of occasional delays or failures. The current recommended values for a small Tahoe grid are to renew the leases once a week, and give each lease a duration of 31 days. In the current release, there is not yet a way to create a lease with a different duration, but the server can use the ``expire.override_lease_duration`` configuration setting to increase or decrease the effective duration (when the lease is processed) to something other than 31 days. Renewing leases can be expected to take about one second per file/directory, depending upon the number of servers and the network speeds involved. Client-side Renewal =================== If all of the files and directories which you care about are reachable from a single starting point (usually referred to as a "rootcap"), and you store that rootcap as an alias (via "``tahoe create-alias``" for example), then the simplest way to renew these leases is with the following CLI command:: tahoe deep-check --add-lease ALIAS: This will recursively walk every directory under the given alias and renew the leases on all files and directories. (You may want to add a ``--repair`` flag to perform repair at the same time.) Simply run this command once a week (or whatever other renewal period your grid recommends) and make sure it completes successfully. As a side effect, a manifest of all unique files and directories will be emitted to stdout, as well as a summary of file sizes and counts. It may be useful to track these statistics over time. Note that newly uploaded files (and newly created directories) get an initial lease too: the ``--add-lease`` process is only needed to ensure that all older objects have up-to-date leases on them. A separate "rebalancing manager/service" is also planned -- see ticket `#543 `_. The exact details of what this service will do are not settled, but it is likely to work by acquiring manifests from rootcaps on a periodic basis, keeping track of checker results, managing lease-addition, and prioritizing repair and rebalancing of shares. Eventually it may use multiple worker nodes to perform these jobs in parallel. Server Side Expiration ====================== Expiration must be explicitly enabled on each storage server, since the default behavior is to never expire shares. Expiration is enabled by adding config keys to the ``[storage]`` section of the ``tahoe.cfg`` file (as described below) and restarting the server node. Each lease has two parameters: a create/renew timestamp and a duration. The timestamp is updated when the share is first uploaded (i.e. the file or directory is created), and updated again each time the lease is renewed (i.e. "``tahoe check --add-lease``" is performed). The duration is currently fixed at 31 days, and the "nominal lease expiration time" is simply $duration seconds after the $create_renew timestamp. (In a future release of Tahoe, the client will get to request a specific duration, and the server will accept or reject the request depending upon its local configuration, so that servers can achieve better control over their storage obligations.) The lease-expiration code has two modes of operation. The first is age-based: leases are expired when their age is greater than their duration. This is the preferred mode: as long as clients consistently update their leases on a periodic basis, and that period is shorter than the lease duration, then all active files and directories will be preserved, and the garbage will collected in a timely fashion. Since there is not yet a way for clients to request a lease duration of other than 31 days, there is a ``tahoe.cfg`` setting to override the duration of all leases. If, for example, this alternative duration is set to 60 days, then clients could safely renew their leases with an add-lease operation perhaps once every 50 days: even though nominally their leases would expire 31 days after the renewal, the server would not actually expire the leases until 60 days after renewal. The other mode is an absolute-date-cutoff: it compares the create/renew timestamp against some absolute date, and expires any lease which was not created or renewed since the cutoff date. If all clients have performed an add-lease some time after March 20th, you could tell the storage server to expire all leases that were created or last renewed on March 19th or earlier. This is most useful if you have a manual (non-periodic) add-lease process. Note that there is not much point to running a storage server in this mode for a long period of time: once the lease-checker has examined all shares and expired whatever it is going to expire, the second and subsequent passes are not going to find any new leases to remove. The ``tahoe.cfg`` file uses the following keys to control lease expiration: ``[storage]`` ``expire.enabled = (boolean, optional)`` If this is ``True``, the storage server will delete shares on which all leases have expired. Other controls dictate when leases are considered to have expired. The default is ``False``. ``expire.mode = (string, "age" or "cutoff-date", required if expiration enabled)`` If this string is "age", the age-based expiration scheme is used, and the ``expire.override_lease_duration`` setting can be provided to influence the lease ages. If it is "cutoff-date", the absolute-date-cutoff mode is used, and the ``expire.cutoff_date`` setting must be provided to specify the cutoff date. The mode setting currently has no default: you must provide a value. In a future release, this setting is likely to default to "age", but in this release it was deemed safer to require an explicit mode specification. ``expire.override_lease_duration = (duration string, optional)`` When age-based expiration is in use, a lease will be expired if its ``lease.create_renew`` timestamp plus its ``lease.duration`` time is earlier/older than the current time. This key, if present, overrides the duration value for all leases, changing the algorithm from:: if (lease.create_renew_timestamp + lease.duration) < now: expire_lease() to:: if (lease.create_renew_timestamp + override_lease_duration) < now: expire_lease() The value of this setting is a "duration string", which is a number of days, months, or years, followed by a units suffix, and optionally separated by a space, such as one of the following:: 7days 31day 60 days 2mo 3 month 12 months 2years This key is meant to compensate for the fact that clients do not yet have the ability to ask for leases that last longer than 31 days. A grid which wants to use faster or slower GC than a 31-day lease timer permits can use this parameter to implement it. This key is only valid when age-based expiration is in use (i.e. when ``expire.mode = age`` is used). It will be rejected if cutoff-date expiration is in use. ``expire.cutoff_date = (date string, required if mode=cutoff-date)`` When cutoff-date expiration is in use, a lease will be expired if its create/renew timestamp is older than the cutoff date. This string will be a date in the following format:: 2009-01-16 (January 16th, 2009) 2008-02-02 2007-12-25 The actual cutoff time shall be midnight UTC at the beginning of the given day. Lease timers should naturally be generous enough to not depend upon differences in timezone: there should be at least a few days between the last renewal time and the cutoff date. This key is only valid when cutoff-based expiration is in use (i.e. when "expire.mode = cutoff-date"). It will be rejected if age-based expiration is in use. expire.immutable = (boolean, optional) If this is False, then immutable shares will never be deleted, even if their leases have expired. This can be used in special situations to perform GC on mutable files but not immutable ones. The default is True. expire.mutable = (boolean, optional) If this is False, then mutable shares will never be deleted, even if their leases have expired. This can be used in special situations to perform GC on immutable files but not mutable ones. The default is True. Expiration Progress =================== In the current release, leases are stored as metadata in each share file, and no separate database is maintained. As a result, checking and expiring leases on a large server may require multiple reads from each of several million share files. This process can take a long time and be very disk-intensive, so a "share crawler" is used. The crawler limits the amount of time looking at shares to a reasonable percentage of the storage server's overall usage: by default it uses no more than 10% CPU, and yields to other code after 100ms. A typical server with 1.1M shares was observed to take 3.5 days to perform this rate-limited crawl through the whole set of shares, with expiration disabled. It is expected to take perhaps 4 or 5 days to do the crawl with expiration turned on. The crawler's status is displayed on the "Storage Server Status Page", a web page dedicated to the storage server. This page resides at $NODEURL/storage, and there is a link to it from the front "welcome" page. The "Lease Expiration crawler" section of the status page shows the progress of the current crawler cycle, expected completion time, amount of space recovered, and details of how many shares have been examined. The crawler's state is persistent: restarting the node will not cause it to lose significant progress. The state file is located in two files ($BASEDIR/storage/lease_checker.state and lease_checker.history), and the crawler can be forcibly reset by stopping the node, deleting these two files, then restarting the node. Future Directions ================= Tahoe's GC mechanism is undergoing significant changes. The global mark-and-sweep garbage-collection scheme can require considerable network traffic for large grids, interfering with the bandwidth available for regular uploads and downloads (and for non-Tahoe users of the network). A preferable method might be to have a timer-per-client instead of a timer-per-lease: the leases would not be expired until/unless the client had not checked in with the server for a pre-determined duration. This would reduce the network traffic considerably (one message per week instead of thousands), but retain the same general failure characteristics. In addition, using timers is not fail-safe (from the client's point of view), in that a client which leaves the network for an extended period of time may return to discover that all of their files have been garbage-collected. (It *is* fail-safe from the server's point of view, in that a server is not obligated to provide disk space in perpetuity to an unresponsive client). It may be useful to create a "renewal agent" to which a client can pass a list of renewal-caps: the agent then takes the responsibility for keeping these leases renewed, so the client can go offline safely. Of course, this requires a certain amount of coordination: the renewal agent should not be keeping files alive that the client has actually deleted. The client can send the renewal-agent a manifest of renewal caps, and each new manifest should replace the previous set. The GC mechanism is also not immediate: a client which deletes a file will nevertheless be consuming extra disk space (and might be charged or otherwise held accountable for it) until the ex-file's leases finally expire on their own. In the current release, these leases are each associated with a single "node secret" (stored in $BASEDIR/private/secret), which is used to generate renewal-secrets for each lease. Two nodes with different secrets will produce separate leases, and will not be able to renew each others' leases. Once the Accounting project is in place, leases will be scoped by a sub-delegatable "account id" instead of a node secret, so clients will be able to manage multiple leases per file. In addition, servers will be able to identify which shares are leased by which clients, so that clients can safely reconcile their idea of which files/directories are active against the server's list, and explicitly cancel leases on objects that aren't on the active list. By reducing the size of the "lease scope", the coordination problem is made easier. In general, mark-and-sweep is easier to implement (it requires mere vigilance, rather than coordination), so unless the space used by deleted files is not expiring fast enough, the renew/expire timed lease approach is recommended. tahoe-lafs-1.10.0/docs/helper.rst000066400000000000000000000205071221140116300165500ustar00rootroot00000000000000======================= The Tahoe Upload Helper ======================= 1. `Overview`_ 2. `Setting Up A Helper`_ 3. `Using a Helper`_ 4. `Other Helper Modes`_ Overview ======== As described in the "Swarming Download, Trickling Upload" section of `architecture.rst`_, Tahoe uploads require more bandwidth than downloads: you must push the redundant shares during upload, but you do not need to retrieve them during download. With the default 3-of-10 encoding parameters, this means that an upload will require about 3.3x the traffic as a download of the same file. .. _architecture.rst: file:architecture.rst Unfortunately, this "expansion penalty" occurs in the same upstream direction that most consumer DSL lines are slow anyways. Typical ADSL lines get 8 times as much download capacity as upload capacity. When the ADSL upstream penalty is combined with the expansion penalty, the result is uploads that can take up to 32 times longer than downloads. The "Helper" is a service that can mitigate the expansion penalty by arranging for the client node to send data to a central Helper node instead of sending it directly to the storage servers. It sends ciphertext to the Helper, so the security properties remain the same as with non-Helper uploads. The Helper is responsible for applying the erasure encoding algorithm and placing the resulting shares on the storage servers. Of course, the helper cannot mitigate the ADSL upstream penalty. The second benefit of using an upload helper is that clients who lose their network connections while uploading a file (because of a network flap, or because they shut down their laptop while an upload was in progress) can resume their upload rather than needing to start again from scratch. The helper holds the partially-uploaded ciphertext on disk, and when the client tries to upload the same file a second time, it discovers that the partial ciphertext is already present. The client then only needs to upload the remaining ciphertext. This reduces the "interrupted upload penalty" to a minimum. This also serves to reduce the number of active connections between the client and the outside world: most of their traffic flows over a single TCP connection to the helper. This can improve TCP fairness, and should allow other applications that are sharing the same uplink to compete more evenly for the limited bandwidth. Setting Up A Helper =================== Who should consider running a helper? * Benevolent entities which wish to provide better upload speed for clients that have slow uplinks * Folks which have machines with upload bandwidth to spare. * Server grid operators who want clients to connect to a small number of helpers rather than a large number of storage servers (a "multi-tier" architecture) What sorts of machines are good candidates for running a helper? * The Helper needs to have good bandwidth to the storage servers. In particular, it needs to have at least 3.3x better upload bandwidth than the client does, or the client might as well upload directly to the storage servers. In a commercial grid, the helper should be in the same colo (and preferably in the same rack) as the storage servers. * The Helper will take on most of the CPU load involved in uploading a file. So having a dedicated machine will give better results. * The Helper buffers ciphertext on disk, so the host will need at least as much free disk space as there will be simultaneous uploads. When an upload is interrupted, that space will be used for a longer period of time. To turn a Tahoe-LAFS node into a helper (i.e. to run a helper service in addition to whatever else that node is doing), edit the tahoe.cfg file in your node's base directory and set "enabled = true" in the section named "[helper]". Then restart the node. This will signal the node to create a Helper service and listen for incoming requests. Once the node has started, there will be a file named private/helper.furl which contains the contact information for the helper: you will need to give this FURL to any clients that wish to use your helper. :: cat $BASEDIR/private/helper.furl | mail -s "helper furl" friend@example.com You can tell if your node is running a helper by looking at its web status page. Assuming that you've set up the 'webport' to use port 3456, point your browser at ``http://localhost:3456/`` . The welcome page will say "Helper: 0 active uploads" or "Not running helper" as appropriate. The http://localhost:3456/helper_status page will also provide details on what the helper is currently doing. The helper will store the ciphertext that is is fetching from clients in $BASEDIR/helper/CHK_incoming/ . Once all the ciphertext has been fetched, it will be moved to $BASEDIR/helper/CHK_encoding/ and erasure-coding will commence. Once the file is fully encoded and the shares are pushed to the storage servers, the ciphertext file will be deleted. If a client disconnects while the ciphertext is being fetched, the partial ciphertext will remain in CHK_incoming/ until they reconnect and finish sending it. If a client disconnects while the ciphertext is being encoded, the data will remain in CHK_encoding/ until they reconnect and encoding is finished. For long-running and busy helpers, it may be a good idea to delete files in these directories that have not been modified for a week or two. Future versions of tahoe will try to self-manage these files a bit better. Using a Helper ============== Who should consider using a Helper? * clients with limited upstream bandwidth, such as a consumer ADSL line * clients who believe that the helper will give them faster uploads than they could achieve with a direct upload * clients who experience problems with TCP connection fairness: if other programs or machines in the same home are getting less than their fair share of upload bandwidth. If the connection is being shared fairly, then a Tahoe upload that is happening at the same time as a single FTP upload should get half the bandwidth. * clients who have been given the helper.furl by someone who is running a Helper and is willing to let them use it To take advantage of somebody else's Helper, take the helper furl that they give you, and edit your tahoe.cfg file. Enter the helper's furl into the value of the key "helper.furl" in the "[client]" section of tahoe.cfg, as described in the "Client Configuration" section of configuration.rst_. .. _configuration.rst: file:configuration.rst Then restart the node. This will signal the client to try and connect to the helper. Subsequent uploads will use the helper rather than using direct connections to the storage server. If the node has been configured to use a helper, that node's HTTP welcome page (``http://localhost:3456/``) will say "Helper: $HELPERFURL" instead of "Helper: None". If the helper is actually running and reachable, the bullet to the left of "Helper" will be green. The helper is optional. If a helper is connected when an upload begins, the upload will use the helper. If there is no helper connection present when an upload begins, that upload will connect directly to the storage servers. The client will automatically attempt to reconnect to the helper if the connection is lost, using the same exponential-backoff algorithm as all other tahoe/foolscap connections. The upload/download status page (``http://localhost:3456/status``) will announce the using-helper-or-not state of each upload, in the "Helper?" column. Other Helper Modes ================== The Tahoe Helper only currently helps with one kind of operation: uploading immutable files. There are three other things it might be able to help with in the future: * downloading immutable files * uploading mutable files (such as directories) * downloading mutable files (like directories) Since mutable files are currently limited in size, the ADSL upstream penalty is not so severe for them. There is no ADSL penalty to downloads, but there may still be benefit to extending the helper interface to assist with them: fewer connections to the storage servers, and better TCP fairness. A future version of the Tahoe helper might provide assistance with these other modes. If it were to help with all four modes, then the clients would not need direct connections to the storage servers at all: clients would connect to helpers, and helpers would connect to servers. For a large grid with tens of thousands of clients, this might make the grid more scalable. tahoe-lafs-1.10.0/docs/historical/000077500000000000000000000000001221140116300166745ustar00rootroot00000000000000tahoe-lafs-1.10.0/docs/historical/configuration.rst000066400000000000000000000057371221140116300223110ustar00rootroot00000000000000======================= Old Configuration Files ======================= Tahoe-LAFS releases before v1.3.0 had no ``tahoe.cfg`` file, and used distinct files for each item listed below. If Tahoe-LAFS v1.9.0 or above detects the old configuration files at start up it emits a warning and aborts the start up. (This was issue ticket #1385.) =============================== =================================== ================= Config setting File Comment =============================== =================================== ================= ``[node]nickname`` ``BASEDIR/nickname`` ``[node]web.port`` ``BASEDIR/webport`` ``[node]tub.port`` ``BASEDIR/client.port`` (for Clients, not Introducers) ``[node]tub.port`` ``BASEDIR/introducer.port`` (for Introducers, not Clients) (note that, unlike other keys, ``tahoe.cfg`` overrode this file from Tahoe-LAFS v1.3.0 up to and including Tahoe-LAFS v1.8.2) ``[node]tub.location`` ``BASEDIR/advertised_ip_addresses`` ``[node]log_gatherer.furl`` ``BASEDIR/log_gatherer.furl`` (one per line) ``[node]timeout.keepalive`` ``BASEDIR/keepalive_timeout`` ``[node]timeout.disconnect`` ``BASEDIR/disconnect_timeout`` ``[client]introducer.furl`` ``BASEDIR/introducer.furl`` ``[client]helper.furl`` ``BASEDIR/helper.furl`` ``[client]key_generator.furl`` ``BASEDIR/key_generator.furl`` ``[client]stats_gatherer.furl`` ``BASEDIR/stats_gatherer.furl`` ``[storage]enabled`` ``BASEDIR/no_storage`` (``False`` if ``no_storage`` exists) ``[storage]readonly`` ``BASEDIR/readonly_storage`` (``True`` if ``readonly_storage`` exists) ``[storage]sizelimit`` ``BASEDIR/sizelimit`` ``[storage]debug_discard`` ``BASEDIR/debug_discard_storage`` ``[helper]enabled`` ``BASEDIR/run_helper`` (``True`` if ``run_helper`` exists) =============================== =================================== ================= Note: the functionality of ``[node]ssh.port`` and ``[node]ssh.authorized_keys_file`` were previously (before Tahoe-LAFS v1.3.0) combined, controlled by the presence of a ``BASEDIR/authorized_keys.SSHPORT`` file, in which the suffix of the filename indicated which port the ssh server should listen on, and the contents of the file provided the ssh public keys to accept. Support for these files has been removed completely. To ``ssh`` into your Tahoe-LAFS node, add ``[node]ssh.port`` and ``[node].ssh_authorized_keys_file`` statements to your ``tahoe.cfg``. Likewise, the functionality of ``[node]tub.location`` is a variant of the now (since Tahoe-LAFS v1.3.0) unsupported ``BASEDIR/advertised_ip_addresses`` . The old file was additive (the addresses specified in ``advertised_ip_addresses`` were used in addition to any that were automatically discovered), whereas the new ``tahoe.cfg`` directive is not (``tub.location`` is used verbatim). tahoe-lafs-1.10.0/docs/historical/historical_known_issues.txt000066400000000000000000000323261221140116300244130ustar00rootroot00000000000000= Known Issues = Below is a list of known issues in older releases of Tahoe-LAFS, and how to manage them. The current version of this file can be found at https://tahoe-lafs.org/source/tahoe/trunk/docs/historical/historical_known_issues.txt Issues in newer releases of Tahoe-LAFS can be found at: https://tahoe-lafs.org/source/tahoe/trunk/docs/known_issues.rst == issues in Tahoe v1.8.2, released 30-Jan-2011 == Unauthorized deletion of an immutable file by its storage index --------------------------------------------------------------- Due to a flaw in the Tahoe-LAFS storage server software in v1.3.0 through v1.8.2, a person who knows the "storage index" that identifies an immutable file can cause the server to delete its shares of that file. If an attacker can cause enough shares to be deleted from enough storage servers, this deletes the file. This vulnerability does not enable anyone to read file contents without authorization (confidentiality), nor to change the contents of a file (integrity). A person could learn the storage index of a file in several ways: 1. By being granted the authority to read the immutable file—i.e. by being granted a read capability to the file. They can determine the file's storage index from its read capability. 2. By being granted a verify capability to the file. They can determine the file's storage index from its verify capability. This case probably doesn't happen often because users typically don't share verify caps. 3. By operating a storage server, and receiving a request from a client that has a read cap or a verify cap. If the client attempts to upload, download, or verify the file with their storage server, even if it doesn't actually have the file, then they can learn the storage index of the file. 4. By gaining read access to an existing storage server's local filesystem, and inspecting the directory structure that it stores its shares in. They can thus learn the storage indexes of all files that the server is holding at least one share of. Normally only the operator of an existing storage server would be able to inspect its local filesystem, so this requires either being such an operator of an existing storage server, or somehow gaining the ability to inspect the local filesystem of an existing storage server. *how to manage it* Tahoe-LAFS version v1.8.3 or newer (except v1.9a1) no longer has this flaw; if you upgrade a storage server to a fixed release then that server is no longer vulnerable to this problem. Note that the issue is local to each storage server independently of other storage servers—when you upgrade a storage server then that particular storage server can no longer be tricked into deleting its shares of the target file. If you can't immediately upgrade your storage server to a version of Tahoe-LAFS that eliminates this vulnerability, then you could temporarily shut down your storage server. This would of course negatively impact availability—clients would not be able to upload or download shares to that particular storage server while it was shut down—but it would protect the shares already stored on that server from being deleted as long as the server is shut down. If the servers that store shares of your file are running a version of Tahoe-LAFS with this vulnerability, then you should think about whether someone can learn the storage indexes of your files by one of the methods described above. A person can not exploit this vulnerability unless they have received a read cap or verify cap, or they control a storage server that has been queried about this file by a client that has a read cap or a verify cap. Tahoe-LAFS does not currently have a mechanism to limit which storage servers can connect to your grid, but it does have a way to see which storage servers have been connected to the grid. The Introducer's front page in the Web User Interface has a list of all storage servers that the Introducer has ever seen and the first time and the most recent time that it saw them. Each Tahoe-LAFS gateway maintains a similar list on its front page in its Web User Interface, showing all of the storage servers that it learned about from the Introducer, when it first connected to that storage server, and when it most recently connected to that storage server. These lists are stored in memory and are reset to empty when the process is restarted. See ticket `#1528`_ for technical details. .. _#1528: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1528 == issues in Tahoe v1.1.0, released 2008-06-11 == (Tahoe v1.1.0 was superceded by v1.2.0 which was released 2008-07-21.) === more than one file can match an immutable file cap === In Tahoe v1.0 and v1.1, a flaw in the cryptographic integrity check makes it possible for the original uploader of an immutable file to produce more than one immutable file matching the same capability, so that different downloads using the same capability could result in different files. This flaw can be exploited only by the original uploader of an immutable file, which means that it is not a severe vulnerability: you can still rely on the integrity check to make sure that the file you download with a given capability is a file that the original uploader intended. The only issue is that you can't assume that every time you use the same capability to download a file you'll get the same file. ==== how to manage it ==== This was fixed in Tahoe v1.2.0, released 2008-07-21, under ticket #491. Upgrade to that release of Tahoe and then you can rely on the property that there is only one file that you can download using a given capability. If you are still using Tahoe v1.0 or v1.1, then remember that the original uploader could produce multiple files that match the same capability, so for example if someone gives you a capability, and you use it to download a file, and you give that capability to your friend, and he uses it to download a file, you and your friend could get different files. === server out of space when writing mutable file === If a v1.0 or v1.1 storage server runs out of disk space or is otherwise unable to write to its local filesystem, then problems can ensue. For immutable files, this will not lead to any problem (the attempt to upload that share to that server will fail, the partially uploaded share will be deleted from the storage server's "incoming shares" directory, and the client will move on to using another storage server instead). If the write was an attempt to modify an existing mutable file, however, a problem will result: when the attempt to write the new share fails (e.g. due to insufficient disk space), then it will be aborted and the old share will be left in place. If enough such old shares are left, then a subsequent read may get those old shares and see the file in its earlier state, which is a "rollback" failure. With the default parameters (3-of-10), six old shares will be enough to potentially lead to a rollback failure. ==== how to manage it ==== Make sure your Tahoe storage servers don't run out of disk space. This means refusing storage requests before the disk fills up. There are a couple of ways to do that with v1.1. First, there is a configuration option named "sizelimit" which will cause the storage server to do a "du" style recursive examination of its directories at startup, and then if the sum of the size of files found therein is greater than the "sizelimit" number, it will reject requests by clients to write new immutable shares. However, that can take a long time (something on the order of a minute of examination of the filesystem for each 10 GB of data stored in the Tahoe server), and the Tahoe server will be unavailable to clients during that time. Another option is to set the "readonly_storage" configuration option on the storage server before startup. This will cause the storage server to reject all requests to upload new immutable shares. Note that neither of these configurations affect mutable shares: even if sizelimit is configured and the storage server currently has greater space used than allowed, or even if readonly_storage is configured, servers will continue to accept new mutable shares and will continue to accept requests to overwrite existing mutable shares. Mutable files are typically used only for directories, and are usually much smaller than immutable files, so if you use one of these configurations to stop the influx of immutable files while there is still sufficient disk space to receive an influx of (much smaller) mutable files, you may be able to avoid the potential for "rollback" failure. A future version of Tahoe will include a fix for this issue. Here is [https://tahoe-lafs.org/pipermail/tahoe-dev/2008-May/000630.html the mailing list discussion] about how that future version will work. === pyOpenSSL/Twisted defect causes false alarms in tests === The combination of Twisted v8.0 or Twisted v8.1 with pyOpenSSL v0.7 causes the Tahoe v1.1 unit tests to fail, even though the behavior of Tahoe itself which is being tested is correct. ==== how to manage it ==== If you are using Twisted v8.0 or Twisted v8.1 and pyOpenSSL v0.7, then please ignore ERROR "Reactor was unclean" in test_system and test_introducer. Upgrading to a newer version of Twisted or pyOpenSSL will cause those false alarms to stop happening (as will downgrading to an older version of either of those packages). == issues in Tahoe v1.0.0, released 2008-03-25 == (Tahoe v1.0 was superceded by v1.1 which was released 2008-06-11.) === server out of space when writing mutable file === In addition to the problems caused by insufficient disk space described above, v1.0 clients which are writing mutable files when the servers fail to write to their filesystem are likely to think the write succeeded, when it in fact failed. This can cause data loss. ==== how to manage it ==== Upgrade client to v1.1, or make sure that servers are always able to write to their local filesystem (including that there is space available) as described in "server out of space when writing mutable file" above. === server out of space when writing immutable file === Tahoe v1.0 clients are using v1.0 servers which are unable to write to their filesystem during an immutable upload will correctly detect the first failure, but if they retry the upload without restarting the client, or if another client attempts to upload the same file, the second upload may appear to succeed when it hasn't, which can lead to data loss. ==== how to manage it ==== Upgrading either or both of the client and the server to v1.1 will fix this issue. Also it can be avoided by ensuring that the servers are always able to write to their local filesystem (including that there is space available) as described in "server out of space when writing mutable file" above. === large directories or mutable files of certain sizes === If a client attempts to upload a large mutable file with a size greater than about 3,139,000 and less than or equal to 3,500,000 bytes then it will fail but appear to succeed, which can lead to data loss. (Mutable files larger than 3,500,000 are refused outright). The symptom of the failure is very high memory usage (3 GB of memory) and 100% CPU for about 5 minutes, before it appears to succeed, although it hasn't. Directories are stored in mutable files, and a directory of approximately 9000 entries may fall into this range of mutable file sizes (depending on the size of the filenames or other metadata associated with the entries). ==== how to manage it ==== This was fixed in v1.1, under ticket #379. If the client is upgraded to v1.1, then it will fail cleanly instead of falsely appearing to succeed when it tries to write a file whose size is in this range. If the server is also upgraded to v1.1, then writes of mutable files whose size is in this range will succeed. (If the server is upgraded to v1.1 but the client is still v1.0 then the client will still suffer this failure.) === uploading files greater than 12 GiB === If a Tahoe v1.0 client uploads a file greater than 12 GiB in size, the file will be silently corrupted so that it is not retrievable, but the client will think that it succeeded. This is a "data loss" failure. ==== how to manage it ==== Don't upload files larger than 12 GiB. If you have previously uploaded files of that size, assume that they have been corrupted and are not retrievable from the Tahoe storage grid. Tahoe v1.1 clients will refuse to upload files larger than 12 GiB with a clean failure. A future release of Tahoe will remove this limitation so that larger files can be uploaded. === pycryptopp defect resulting in data corruption === Versions of pycryptopp earlier than pycryptopp-0.5.0 had a defect which, when compiled with some compilers, would cause AES-256 encryption and decryption to be computed incorrectly. This could cause data corruption. Tahoe v1.0 required, and came with a bundled copy of, pycryptopp v0.3. ==== how to manage it ==== You can detect whether pycryptopp-0.3 has this failure when it is compiled by your compiler. Run the unit tests that come with pycryptopp-0.3: unpack the "pycryptopp-0.3.tar" file that comes in the Tahoe v1.0 {{{misc/dependencies}}} directory, cd into the resulting {{{pycryptopp-0.3.0}}} directory, and execute {{{python ./setup.py test}}}. If the tests pass, then your compiler does not trigger this failure. tahoe-lafs-1.10.0/docs/historical/peer-selection-tahoe2.txt000066400000000000000000000065701221140116300235430ustar00rootroot00000000000000= THIS PAGE DESCRIBES HISTORICAL DESIGN CHOICES. SEE docs/architecture.rst FOR CURRENT DOCUMENTATION = When a file is uploaded, the encoded shares are sent to other peers. But to which ones? The PeerSelection algorithm is used to make this choice. Early in 2007, we were planning to use the following "Tahoe Two" algorithm. By the time we released 0.2.0, we switched to "tahoe3", but when we released v0.6, we switched back (ticket #132). As in Tahoe Three, the verifierid is used to consistently-permute the set of all peers (by sorting the peers by HASH(verifierid+peerid)). Each file gets a different permutation, which (on average) will evenly distribute shares among the grid and avoid hotspots. With our basket of (usually 10) shares to distribute in hand, we start at the beginning of the list and ask each peer in turn if they are willing to hold on to one of our shares (the "lease request"). If they say yes, we remove that share from the basket and remember who agreed to host it. Then we go to the next peer in the list and ask them the same question about another share. If a peer says no, we remove them from the list. If a peer says that they already have one or more shares for this file, we remove those shares from the basket. If we reach the end of the list, we start again at the beginning. If we run out of peers before we run out of shares, we fail unless we've managed to place at least some number of the shares: the likely threshold is to attempt to place 10 shares (out of which we'll need 3 to recover the file), and be content if we can find homes for at least 7 of them. In small networks, this approach will loop around several times and place several shares with each node (e.g. in a 5-host network with plenty of space, each node will get 2 shares). In large networks with plenty of space, the shares will be placed with the first 10 peers in the permuted list. In large networks that are somewhat full, we'll need to traverse more of the list before we find homes for the shares. The average number of peers that we'll need to talk to is vaguely equal to 10 / (1-utilization), with a bunch of other terms that relate to the distribution of free space on the peers and the size of the shares being offered. Small files with small shares will fit anywhere, large files with large shares will only fit on certain peers, so the mesh may have free space but no holes large enough for a very large file, which might indicate that we should try again with a larger number of (smaller) shares. When it comes time to download, we compute a similar list of permuted peerids, and start asking for shares beginning with the start of the list. Each peer gives us a list of the shareids that they are holding. Eventually (depending upon how much churn the peerlist has experienced), we'll find holders for at least 3 shares, or we'll run out of peers. If the mesh is very large and we want to fail faster, we can establish an upper bound on how many peers we should talk to (perhaps by recording the permuted peerid of the last node to which we sent a share, or a count of the total number of peers we talked to during upload). I suspect that this approach handles churn more efficiently than tahoe3, but I haven't gotten my head around the math that could be used to show it. On the other hand, it takes a lot more round trips to find homes in small meshes (one per share, whereas tahoe three can do just one per node). tahoe-lafs-1.10.0/docs/historical/peer-selection-tahoe3.txt000066400000000000000000000101151221140116300235320ustar00rootroot00000000000000= THIS PAGE DESCRIBES HISTORICAL ARCHITECTURE CHOICES: THE CURRENT CODE DOES NOT WORK AS DESCRIBED HERE. = When a file is uploaded, the encoded shares are sent to other peers. But to which ones? The PeerSelection algorithm is used to make this choice. In the old (May 2007) version, the verifierid is used to consistently-permute the set of all peers (by sorting the peers by HASH(verifierid+peerid)). Each file gets a different permutation, which (on average) will evenly distribute shares among the grid and avoid hotspots. This permutation places the peers around a 2^256^-sized ring, like the rim of a big clock. The 100-or-so shares are then placed around the same ring (at 0, 1/100*2^256^, 2/100*2^256^, ... 99/100*2^256^). Imagine that we start at 0 with an empty basket in hand and proceed clockwise. When we come to a share, we pick it up and put it in the basket. When we come to a peer, we ask that peer if they will give us a lease for every share in our basket. The peer will grant us leases for some of those shares and reject others (if they are full or almost full). If they reject all our requests, we remove them from the ring, because they are full and thus unhelpful. Each share they accept is removed from the basket. The remainder stay in the basket as we continue walking clockwise. We keep walking, accumulating shares and distributing them to peers, until either we find a home for all shares, or there are no peers left in the ring (because they are all full). If we run out of peers before we run out of shares, the upload may be considered a failure, depending upon how many shares we were able to place. The current parameters try to place 100 shares, of which 25 must be retrievable to recover the file, and the peer selection algorithm is happy if it was able to place at least 75 shares. These numbers are adjustable: 25-out-of-100 means an expansion factor of 4x (every file in the grid consumes four times as much space when totalled across all StorageServers), but is highly reliable (the actual reliability is a binomial distribution function of the expected availability of the individual peers, but in general it goes up very quickly with the expansion factor). If the file has been uploaded before (or if two uploads are happening at the same time), a peer might already have shares for the same file we are proposing to send to them. In this case, those shares are removed from the list and assumed to be available (or will be soon). This reduces the number of uploads that must be performed. When downloading a file, the current release just asks all known peers for any shares they might have, chooses the minimal necessary subset, then starts downloading and processing those shares. A later release will use the full algorithm to reduce the number of queries that must be sent out. This algorithm uses the same consistent-hashing permutation as on upload, but instead of one walker with one basket, we have 100 walkers (one per share). They each proceed clockwise in parallel until they find a peer, and put that one on the "A" list: out of all peers, this one is the most likely to be the same one to which the share was originally uploaded. The next peer that each walker encounters is put on the "B" list, etc. All the "A" list peers are asked for any shares they might have. If enough of them can provide a share, the download phase begins and those shares are retrieved and decoded. If not, the "B" list peers are contacted, etc. This routine will eventually find all the peers that have shares, and will find them quickly if there is significant overlap between the set of peers that were present when the file was uploaded and the set of peers that are present as it is downloaded (i.e. if the "peerlist stability" is high). Some limits may be imposed in large grids to avoid querying a million peers; this provides a tradeoff between the work spent to discover that a file is unrecoverable and the probability that a retrieval will fail when it could have succeeded if we had just tried a little bit harder. The appropriate value of this tradeoff will depend upon the size of the grid, and will change over time. tahoe-lafs-1.10.0/docs/historical/peer-selection.txt000066400000000000000000000030641221140116300223560ustar00rootroot00000000000000When a file is uploaded, the encoded shares are sent to other peers. But to which ones? Likewise, when we want to download a file, which peers should we ask for shares? The "peer selection" algorithm is used to make these choices. During the first tahoe meeting, (actualy on the drive back home), we designed the now-abandoned "tahoe1" algorithm, which involved a "cabal" for each file, where the peers involved would check up on each other to make sure the data was still available. The big limitation was the expense of tracking which nodes were parts of which cabals. When we release 0.2.0, we used the "tahoe3" algorithm (see peer-selection-tahoe3.txt), but in v0.6 (ticket #132) we switched back to "tahoe2" (see peer-selection-tahoe2.txt, and the PEER SELECTION section of docs/architecture.rst), which uses a permuted peerid list and packs the shares into the first 10 or so members of this list. (It is named "tahoe2" because it was designed before "tahoe3" was.) In the future, we might move to an algorithm known as "denver airport", which uses Chord-like routing to minimize the number of active connections. Different peer selection algorithms result in different properties: * how well do we handle nodes leaving or joining the mesh (differences in the peer list)? * how many connections do we need to keep open? * how many nodes must we speak to when uploading a file? * if a file is unrecoverable, how long will it take for us to discover this fact? * how expensive is a file-checking operation? * how well can we accomodate changes to encoding parameters? tahoe-lafs-1.10.0/docs/how_to_make_a_tahoe-lafs_release.org000066400000000000000000000056541221140116300236550ustar00rootroot00000000000000* Tahoe Release Checklist [0/19] - [ ] update doc files - relnotes.txt - CREDITS - docs/known_issues.rst - NEWS.rst: Add release name and date to top-most item in NEWS. - [ ] change docs/quickstart.rst to point to just the current allmydata-tahoe-X.Y.Z.zip source code file, or else to point to a directory which contains only allmydata-tahoe-X.Y.Z.* source code files - [ ] git pull - [ ] git tag allmydata-tahoe-X.Y.Z - [ ] build locally to make sure the release is reporting itself as the intended version - [ ] make sure buildbot is green - [ ] make sure other people aren't committing at that moment - [ ] push tag along with some other documentation-only patch (typically to relnotes.txt) to trigger buildslaves - git push --tags official; git push official - that will build tarballs - [ ] make sure buildbot is green - [ ] make sure a sumo sdist tarball got built and uploaded properly - [ ] download tarballs, sign with "gpg -ba -u 68666a7a TAR", upload *.asc - [ ] symlink the release tarball on tahoe-lafs.org: /var/www/source/tahoe-lafs/releases/ - [ ] update Wiki: front page news, news, old news, parade of release notes - [ ] send out relnotes.txt to: - tahoe-announce@tahoe-lafs.org - tahoe-dev@tahoe-lafs.org - [ ] update https://tahoe-lafs.org/hacktahoelafs/ - [ ] make an "announcement of new release" on freshmeat - [ ] upload to pypi with "python ./setup.py sdist upload register" - currently broken (with git). And we want signed tarballs, and we want the tarballs to match the ones on tahoe-lafs.org. So instead do this: - [ ] login to pypi - [ ] from Edit, add new release - [ ] upload .tar.gz, .asc - [ ] make an "announcement of new release" on launchpad - [ ] close the Milestone on the trac Roadmap - [ ] send out relnotes.txt to: - p2p-hackers@lists.zooko.com - lwn@lwn.net - a Google+ page - cap-talk@mail.eros-os.org - cryptography@metzdown.com - cryptography@randombit.net - twisted-python@twistedmatrix.com - owncloud@kde.org - liberationtech@lists.stanford.edu - the "decentralization" group on groups.yahoo.com - pycrypto mailing list - fuse-devel@lists.sourceforge.net - fuse-sshfs@lists.sourceforge.net - duplicity-talk@nongnu.org - news@phoronix.com - python-list@python.org - cygwin@cygwin.com - The Boulder Linux Users' Group - The Boulder Hackerspace mailing list - cryptopp-users@googlegroups.com - tiddlywiki - hdfs-dev@hadoop.apache.org - bzr - mercurial - http://listcultures.org/pipermail/p2presearch_listcultures.org/ - deltacloud - libcloud - swift@lists.launchpad.net - stephen@fosketts.net - Chris Mellor of The Register - nosql@mypopescu.com - The H Open - fans/customers of cleversafe - fans/customers of bitcasa - fans/customers of wuala - fans/customers of spideroak tahoe-lafs-1.10.0/docs/known_issues.rst000066400000000000000000000447611221140116300200300ustar00rootroot00000000000000 See also cautions.rst_. .. _cautions.rst: file:cautions.rst ============ Known Issues ============ Below is a list of known issues in recent releases of Tahoe-LAFS, and how to manage them. The current version of this file can be found at https://tahoe-lafs.org/source/tahoe-lafs/trunk/docs/known_issues.rst . If you've been using Tahoe-LAFS since v1.1 (released 2008-06-11) or if you're just curious about what sort of mistakes we've made in the past, then you might want to read `the "historical known issues" document`_. .. _the "historical known issues" document: historical/historical_known_issues.txt Known Issues in Tahoe-LAFS v1.10, released 01-May-2013 ====================================================== * `Unauthorized access by JavaScript in unrelated files`_ * `Disclosure of file through embedded hyperlinks or JavaScript in that file`_ * `Command-line arguments are leaked to other local users`_ * `Capabilities may be leaked to web browser phishing filter / "safe browsing" servers`_ * `Known issues in the FTP and SFTP frontends`_ * `Traffic analysis based on sizes of files/directories, storage indices, and timing`_ * `Privacy leak via Google Chart API link in map-update timing web page`_ ---- Unauthorized access by JavaScript in unrelated files ---------------------------------------------------- If you view a file stored in Tahoe-LAFS through a web user interface, JavaScript embedded in that file can, in some circumstances, access other files or directories stored in Tahoe-LAFS that you view through the same web user interface. Such a script would be able to send the contents of those other files or directories to the author of the script, and if you have the ability to modify the contents of those files or directories, then that script could modify or delete those files or directories. This attack is known to be possible when an attacking tab or window could reach a tab or window containing a Tahoe URI by navigating back or forward in the history, either from itself or from any frame with a known name (as specified by the "target" attribute of an HTML link). It might be possible in other cases depending on the browser. *how to manage it* For future versions of Tahoe-LAFS, we are considering ways to close off this leakage of authority while preserving ease of use -- the discussion of this issue is ticket `#615`_. For the present, either do not view files stored in Tahoe-LAFS through a web user interface, or turn off JavaScript in your web browser before doing so, or limit your viewing to files which you know don't contain malicious JavaScript. .. _#615: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/615 ---- Disclosure of file through embedded hyperlinks or JavaScript in that file ------------------------------------------------------------------------- If there is a file stored on a Tahoe-LAFS storage grid, and that file gets downloaded and displayed in a web browser, then JavaScript or hyperlinks within that file can leak the capability to that file to a third party, which means that third party gets access to the file. If there is JavaScript in the file, then it could deliberately leak the capability to the file out to some remote listener. If there are hyperlinks in the file, and they get followed, then whichever server they point to receives the capability to the file. Note that IMG tags are typically followed automatically by web browsers, so being careful which hyperlinks you click on is not sufficient to prevent this from happening. *how to manage it* For future versions of Tahoe-LAFS, we are considering ways to close off this leakage of authority while preserving ease of use -- the discussion of this issue is ticket `#127`_. For the present, a good work-around is that if you want to store and view a file on Tahoe-LAFS and you want that file to remain private, then remove from that file any hyperlinks pointing to other people's servers and remove any JavaScript unless you are sure that the JavaScript is not written to maliciously leak access. .. _#127: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/127 ---- Command-line arguments are leaked to other local users ------------------------------------------------------ Remember that command-line arguments are visible to other users (through the 'ps' command, or the windows Process Explorer tool), so if you are using a Tahoe-LAFS node on a shared host, other users on that host will be able to see (and copy) any caps that you pass as command-line arguments. This includes directory caps that you set up with the "tahoe add-alias" command. *how to manage it* As of Tahoe-LAFS v1.3.0 there is a "tahoe create-alias" command that does the following technique for you. Bypass add-alias and edit the NODEDIR/private/aliases file directly, by adding a line like this: fun: URI:DIR2:ovjy4yhylqlfoqg2vcze36dhde:4d4f47qko2xm5g7osgo2yyidi5m4muyo2vjjy53q4vjju2u55mfa By entering the dircap through the editor, the command-line arguments are bypassed, and other users will not be able to see them. Once you've added the alias, if you use that alias instead of a cap itself on the command-line, then no secrets are passed through the command line. Then other processes on the system can still see your filenames and other arguments you type there, but not the caps that Tahoe-LAFS uses to permit access to your files and directories. ---- Capabilities may be leaked to web browser phishing filter / "safe browsing" servers ----------------------------------------------------------------------------------- Firefox, Internet Explorer, and Chrome include a "phishing filter" or "safe browing" component, which is turned on by default, and which sends any URLs that it deems suspicious to a central server. Microsoft gives `a brief description of their filter's operation`_. Firefox and Chrome both use Google's `"safe browsing API"`_ (`specification`_). This of course has implications for the privacy of general web browsing (especially in the cases of Firefox and Chrome, which send your main personally identifying Google cookie along with these requests without your explicit consent, as described in `Firefox bugzilla ticket #368255`_. The reason for documenting this issue here, though, is that when using the Tahoe-LAFS web user interface, it could also affect confidentiality and integrity by leaking capabilities to the filter server. Since IE's filter sends URLs by SSL/TLS, the exposure of caps is limited to the filter server operators (or anyone able to hack the filter server) rather than to network eavesdroppers. The "safe browsing API" protocol used by Firefox and Chrome, on the other hand, is *not* encrypted, although the URL components are normally hashed. Opera also has a similar facility that is disabled by default. A previous version of this file stated that Firefox had abandoned their phishing filter; this was incorrect. .. _a brief description of their filter's operation: https://blogs.msdn.com/ie/archive/2005/09/09/463204.aspx .. _"safe browsing API": https://code.google.com/apis/safebrowsing/ .. _specification: https://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec .. _Firefox bugzilla ticket #368255: https://bugzilla.mozilla.org/show_bug.cgi?id=368255 *how to manage it* If you use any phishing filter or "safe browsing" feature, consider either disabling it, or not using the WUI via that browser. Phishing filters have `very limited effectiveness`_ , and phishing or malware attackers have learnt how to bypass them. .. _very limited effectiveness: http://lorrie.cranor.org/pubs/ndss-phish-tools-final.pdf To disable the filter in IE7 or IE8: ++++++++++++++++++++++++++++++++++++ - Click Internet Options from the Tools menu. - Click the Advanced tab. - If an "Enable SmartScreen Filter" option is present, uncheck it. If a "Use Phishing Filter" or "Phishing Filter" option is present, set it to Disable. - Confirm (click OK or Yes) out of all dialogs. If you have a version of IE that splits the settings between security zones, do this for all zones. To disable the filter in Firefox: +++++++++++++++++++++++++++++++++ - Click Options from the Tools menu. - Click the Security tab. - Uncheck both the "Block reported attack sites" and "Block reported web forgeries" options. - Click OK. To disable the filter in Chrome: ++++++++++++++++++++++++++++++++ - Click Options from the Tools menu. - Click the "Under the Hood" tab and find the "Privacy" section. - Uncheck the "Enable phishing and malware protection" option. - Click Close. ---- Known issues in the FTP and SFTP frontends ------------------------------------------ These are documented in `docs/frontends/FTP-and-SFTP.rst`_ and on `the SftpFrontend page`_ on the wiki. .. _docs/frontends/FTP-and-SFTP.rst: frontends/FTP-and-SFTP.rst .. _the SftpFrontend page: https://tahoe-lafs.org/trac/tahoe-lafs/wiki/SftpFrontend ---- Traffic analysis based on sizes of files/directories, storage indices, and timing --------------------------------------------------------------------------------- Files and directories stored by Tahoe-LAFS are encrypted, but the ciphertext reveals the exact size of the original file or directory representation. This information is available to passive eavesdroppers and to server operators. For example, a large data set with known file sizes could probably be identified with a high degree of confidence. Uploads and downloads of the same file or directory can be linked by server operators, even without making assumptions based on file size. Anyone who knows the introducer furl for a grid may be able to act as a server operator. This implies that if such an attacker knows which file/directory is being accessed in a particular request (by some other form of surveillance, say), then they can identify later or earlier accesses of the same file/directory. Observing requests during a directory traversal (such as a deep-check operation) could reveal information about the directory structure, i.e. which files and subdirectories are linked from a given directory. Attackers can combine the above information with inferences based on timing correlations. For instance, two files that are accessed close together in time are likely to be related even if they are not linked in the directory structure. Also, users that access the same files may be related to each other. ---- Privacy leak via Google Chart API link in map-update timing web page -------------------------------------------------------------------- The Tahoe web-based user interface includes a diagnostic page known as the "map-update timing page". It is reached through the "Recent and Active Operations" link on the front welcome page, then through the "Status" column for "map-update" operations (which occur when mutable files, including directories, are read or written). This page contains per-server response times, as lines of text, and includes an image which displays the response times in graphical form. The image is generated by constructing a URL for the `Google Chart API `_, which is then served by the `chart.apis.google.com` internet server. When you view this page, several parties may learn information about your Tahoe activities. The request will typically include a "Referer" header, revealing the URL of the mapupdate status page (which is typically something like "http://127.0.0.1:3456/status/mapupdate-123") to network observers and the Google API server. The image returned by this server is typically a PNG file, but either the server or a MitM attacker could replace it with something malicious that attempts to exploit a browser rendering bug or buffer overflow. (Note that browsers do not execute scripts inside IMG tags, even for SVG images). In addition, if your Tahoe node connects to its grid over Tor or i2p, but the web browser you use to access your node does not, then this image link may reveal your use of Tahoe (and that grid) to the outside world. It is not recommended to use a browser in this way, because other links in Tahoe-stored content would reveal even more information (e.g. an attacker could store an HTML file with unique CSS references into a shared Tahoe grid, then send your pseudonym a message with its URI, then observe your browser loading that CSS file, and thus link the source IP address of your web client to that pseudonym). A future version of Tahoe will probably replace the Google Chart API link (which was deprecated by Google in April 2012) with client-side javascript using d3.js, removing the information leak but requiring JS to see the chart. See ticket `#1942`_ for details. .. _#1942: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1942 ---- Known Issues in Tahoe-LAFS v1.9.0, released 31-Oct-2011 ======================================================= Integrity Failure during Mutable Downloads ------------------------------------------ Under certain circumstances, the integrity-verification code of the mutable downloader could be bypassed. Clients who receive carefully crafted shares (from attackers) will emit incorrect file contents, and the usual share-corruption errors would not be raised. This only affects mutable files (not immutable), and only affects downloads that use doctored shares. It is not persistent: the threat is resolved once you upgrade your client to a version without the bug. However, read-modify-write operations (such as directory manipulations) performed by vulnerable clients could cause the attacker's modifications to be written back out to the mutable file, making the corruption permanent. The attacker's ability to manipulate the file contents is limited. They can modify FEC-encoded ciphertext in all but one share. This gives them the ability to blindly flip bits in roughly 2/3rds of the file (for the default k=3 encoding parameter). Confidentiality remains intact, unless the attacker can deduce the file's contents by observing your reactions to corrupted downloads. This bug was introduced in 1.9.0, as part of the MDMF-capable downloader, and affects both SDMF and MDMF files. It was not present in 1.8.3. *how to manage it* There are three options: * Upgrade to 1.9.1, which fixes the bug * Downgrade to 1.8.3, which does not contain the bug * If using 1.9.0, do not trust the contents of mutable files (whether SDMF or MDMF) that the 1.9.0 client emits, and do not modify directories (which could write the corrupted data back into place, making the damage persistent) .. _#1654: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1654 ---- Known Issues in Tahoe-LAFS v1.8.2, released 30-Jan-2011 ======================================================= Unauthorized deletion of an immutable file by its storage index --------------------------------------------------------------- Due to a flaw in the Tahoe-LAFS storage server software in v1.3.0 through v1.8.2, a person who knows the "storage index" that identifies an immutable file can cause the server to delete its shares of that file. If an attacker can cause enough shares to be deleted from enough storage servers, this deletes the file. This vulnerability does not enable anyone to read file contents without authorization (confidentiality), nor to change the contents of a file (integrity). A person could learn the storage index of a file in several ways: 1. By being granted the authority to read the immutable file: i.e. by being granted a read capability to the file. They can determine the file's storage index from its read capability. 2. By being granted a verify capability to the file. They can determine the file's storage index from its verify capability. This case probably doesn't happen often because users typically don't share verify caps. 3. By operating a storage server, and receiving a request from a client that has a read cap or a verify cap. If the client attempts to upload, download, or verify the file with their storage server, even if it doesn't actually have the file, then they can learn the storage index of the file. 4. By gaining read access to an existing storage server's local filesystem, and inspecting the directory structure that it stores its shares in. They can thus learn the storage indexes of all files that the server is holding at least one share of. Normally only the operator of an existing storage server would be able to inspect its local filesystem, so this requires either being such an operator of an existing storage server, or somehow gaining the ability to inspect the local filesystem of an existing storage server. *how to manage it* Tahoe-LAFS version v1.8.3 or newer (except v1.9a1) no longer has this flaw; if you upgrade a storage server to a fixed release then that server is no longer vulnerable to this problem. Note that the issue is local to each storage server independently of other storage servers: when you upgrade a storage server then that particular storage server can no longer be tricked into deleting its shares of the target file. If you can't immediately upgrade your storage server to a version of Tahoe-LAFS that eliminates this vulnerability, then you could temporarily shut down your storage server. This would of course negatively impact availability -- clients would not be able to upload or download shares to that particular storage server while it was shut down -- but it would protect the shares already stored on that server from being deleted as long as the server is shut down. If the servers that store shares of your file are running a version of Tahoe-LAFS with this vulnerability, then you should think about whether someone can learn the storage indexes of your files by one of the methods described above. A person can not exploit this vulnerability unless they have received a read cap or verify cap, or they control a storage server that has been queried about this file by a client that has a read cap or a verify cap. Tahoe-LAFS does not currently have a mechanism to limit which storage servers can connect to your grid, but it does have a way to see which storage servers have been connected to the grid. The Introducer's front page in the Web User Interface has a list of all storage servers that the Introducer has ever seen and the first time and the most recent time that it saw them. Each Tahoe-LAFS gateway maintains a similar list on its front page in its Web User Interface, showing all of the storage servers that it learned about from the Introducer, when it first connected to that storage server, and when it most recently connected to that storage server. These lists are stored in memory and are reset to empty when the process is restarted. See ticket `#1528`_ for technical details. .. _#1528: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1528 tahoe-lafs-1.10.0/docs/lease-tradeoffs.svg000066400000000000000000000445301221140116300203260ustar00rootroot00000000000000 image/svg+xml leaserenewaltime leaseexpirationtime daily day week month year weekly monthly yearly less traffic more traffic lessgarbage moregarbage less safe more safe tahoe-lafs-1.10.0/docs/logging.rst000066400000000000000000000341231221140116300167160ustar00rootroot00000000000000============= Tahoe Logging ============= 1. `Overview`_ 2. `Realtime Logging`_ 3. `Incidents`_ 4. `Working with flogfiles`_ 5. `Gatherers`_ 1. `Incident Gatherer`_ 2. `Log Gatherer`_ 6. `Local twistd.log files`_ 7. `Adding log messages`_ 8. `Log Messages During Unit Tests`_ Overview ======== Tahoe uses the Foolscap logging mechanism (known as the "flog" subsystem) to record information about what is happening inside the Tahoe node. This is primarily for use by programmers and grid operators who want to find out what went wrong. The Foolscap logging system is documented at ``__. The Foolscap distribution includes a utility named "``flogtool``" that is used to get access to many Foolscap logging features. This command only works when foolscap and its dependencies are installed correctly. Tahoe-LAFS v1.10.0 and later include a ``tahoe debug flogtool`` command that can be used even when foolscap is not installed; to use this, prefix all of the example commands below with ``tahoe debug``. For earlier versions since Tahoe-LAFS v1.8.2, installing Foolscap v0.6.1 or later and then running ``bin/tahoe @flogtool`` from the root of a Tahoe-LAFS source distribution may work (but only on Unix, not Windows). Realtime Logging ================ When you are working on Tahoe code, and want to see what the node is doing, the easiest tool to use is "``flogtool tail``". This connects to the Tahoe node and subscribes to hear about all log events. These events are then displayed to stdout, and optionally saved to a file. "``flogtool tail``" connects to the "logport", for which the FURL is stored in ``BASEDIR/private/logport.furl`` . The following command will connect to this port and start emitting log information:: flogtool tail BASEDIR/private/logport.furl The ``--save-to FILENAME`` option will save all received events to a file, where then can be examined later with "``flogtool dump``" or "``flogtool web-viewer``". The ``--catch-up`` option will ask the node to dump all stored events before subscribing to new ones (without ``--catch-up``, you will only hear about events that occur after the tool has connected and subscribed). Incidents ========= Foolscap keeps a short list of recent events in memory. When something goes wrong, it writes all the history it has (and everything that gets logged in the next few seconds) into a file called an "incident". These files go into ``BASEDIR/logs/incidents/`` , in a file named "``incident-TIMESTAMP-UNIQUE.flog.bz2``". The default definition of "something goes wrong" is the generation of a log event at the ``log.WEIRD`` level or higher, but other criteria could be implemented. The typical "incident report" we've seen in a large Tahoe grid is about 40kB compressed, representing about 1800 recent events. These "flogfiles" have a similar format to the files saved by "``flogtool tail --save-to``". They are simply lists of log events, with a small header to indicate which event triggered the incident. The "``flogtool dump FLOGFILE``" command will take one of these ``.flog.bz2`` files and print their contents to stdout, one line per event. The raw event dictionaries can be dumped by using "``flogtool dump --verbose FLOGFILE``". The "``flogtool web-viewer``" command can be used to examine the flogfile in a web browser. It runs a small HTTP server and emits the URL on stdout. This view provides more structure than the output of "``flogtool dump``": the parent/child relationships of log events is displayed in a nested format. "``flogtool web-viewer``" is still fairly immature. Working with flogfiles ====================== The "``flogtool filter``" command can be used to take a large flogfile (perhaps one created by the log-gatherer, see below) and copy a subset of events into a second file. This smaller flogfile may be easier to work with than the original. The arguments to "``flogtool filter``" specify filtering criteria: a predicate that each event must match to be copied into the target file. ``--before`` and ``--after`` are used to exclude events outside a given window of time. ``--above`` will retain events above a certain severity level. ``--from`` retains events send by a specific tubid. ``--strip-facility`` removes events that were emitted with a given facility (like ``foolscap.negotiation`` or ``tahoe.upload``). Gatherers ========= In a deployed Tahoe grid, it is useful to get log information automatically transferred to a central log-gatherer host. This offloads the (admittedly modest) storage requirements to a different host and provides access to logfiles from multiple nodes (web-API, storage, or helper) in a single place. There are two kinds of gatherers: "log gatherer" and "stats gatherer". Each produces a FURL which needs to be placed in the ``NODEDIR/tahoe.cfg`` file of each node that is to publish to the gatherer, under the keys "log_gatherer.furl" and "stats_gatherer.furl" respectively. When the Tahoe node starts, it will connect to the configured gatherers and offer its logport: the gatherer will then use the logport to subscribe to hear about events. The gatherer will write to files in its working directory, which can then be examined with tools like "``flogtool dump``" as described above. Incident Gatherer ----------------- The "incident gatherer" only collects Incidents: records of the log events that occurred just before and slightly after some high-level "trigger event" was recorded. Each incident is classified into a "category": a short string that summarizes what sort of problem took place. These classification functions are written after examining a new/unknown incident. The idea is to recognize when the same problem is happening multiple times. A collection of classification functions that are useful for Tahoe nodes are provided in ``misc/incident-gatherer/support_classifiers.py`` . There is roughly one category for each ``log.WEIRD``-or-higher level event in the Tahoe source code. The incident gatherer is created with the "``flogtool create-incident-gatherer WORKDIR``" command, and started with "``tahoe start``". The generated "``gatherer.tac``" file should be modified to add classifier functions. The incident gatherer writes incident names (which are simply the relative pathname of the ``incident-\*.flog.bz2`` file) into ``classified/CATEGORY``. For example, the ``classified/mutable-retrieve-uncoordinated-write-error`` file contains a list of all incidents which were triggered by an uncoordinated write that was detected during mutable file retrieval (caused when somebody changed the contents of the mutable file in between the node's mapupdate step and the retrieve step). The ``classified/unknown`` file contains a list of all incidents that did not match any of the classification functions. At startup, the incident gatherer will automatically reclassify any incident report which is not mentioned in any of the ``classified/\*`` files. So the usual workflow is to examine the incidents in ``classified/unknown``, add a new classification function, delete ``classified/unknown``, then bound the gatherer with "``tahoe restart WORKDIR``". The incidents which can be classified with the new functions will be added to their own ``classified/FOO`` lists, and the remaining ones will be put in ``classified/unknown``, where the process can be repeated until all events are classifiable. The incident gatherer is still fairly immature: future versions will have a web interface and an RSS feed, so operations personnel can track problems in the storage grid. In our experience, each incident takes about two seconds to transfer from the node that generated it to the gatherer. The gatherer will automatically catch up to any incidents which occurred while it is offline. Log Gatherer ------------ The "Log Gatherer" subscribes to hear about every single event published by the connected nodes, regardless of severity. This server writes these log events into a large flogfile that is rotated (closed, compressed, and replaced with a new one) on a periodic basis. Each flogfile is named according to the range of time it represents, with names like "``from-2008-08-26-132256--to-2008-08-26-162256.flog.bz2``". The flogfiles contain events from many different sources, making it easier to correlate things that happened on multiple machines (such as comparing a client node making a request with the storage servers that respond to that request). Create the Log Gatherer with the "``flogtool create-gatherer WORKDIR``" command, and start it with "``tahoe start``". Then copy the contents of the ``log_gatherer.furl`` file it creates into the ``BASEDIR/tahoe.cfg`` file (under the key ``log_gatherer.furl`` of the section ``[node]``) of all nodes that should be sending it log events. (See ``__.) The "``flogtool filter``" command, described above, is useful to cut down the potentially large flogfiles into a more focussed form. Busy nodes, particularly web-API nodes which are performing recursive deep-size/deep-stats/deep-check operations, can produce a lot of log events. To avoid overwhelming the node (and using an unbounded amount of memory for the outbound TCP queue), publishing nodes will start dropping log events when the outbound queue grows too large. When this occurs, there will be gaps (non-sequential event numbers) in the log-gatherer's flogfiles. Local twistd.log files ====================== [TODO: not yet true, requires foolscap-0.3.1 and a change to ``allmydata.node``] In addition to the foolscap-based event logs, certain high-level events will be recorded directly in human-readable text form, in the ``BASEDIR/logs/twistd.log`` file (and its rotated old versions: ``twistd.log.1``, ``twistd.log.2``, etc). This form does not contain as much information as the flogfiles available through the means described previously, but they are immediately available to the curious developer, and are retained until the twistd.log.NN files are explicitly deleted. Only events at the ``log.OPERATIONAL`` level or higher are bridged to ``twistd.log`` (i.e. not the ``log.NOISY`` debugging events). In addition, foolscap internal events (like connection negotiation messages) are not bridged to ``twistd.log``. Adding log messages =================== When adding new code, the Tahoe developer should add a reasonable number of new log events. For details, please see the Foolscap logging documentation, but a few notes are worth stating here: * use a facility prefix of "``tahoe.``", like "``tahoe.mutable.publish``" * assign each severe (``log.WEIRD`` or higher) event a unique message identifier, as the ``umid=`` argument to the ``log.msg()`` call. The ``misc/coding_tools/make_umid`` script may be useful for this purpose. This will make it easier to write a classification function for these messages. * use the ``parent=`` argument whenever the event is causally/temporally clustered with its parent. For example, a download process that involves three sequential hash fetches could announce the send and receipt of those hash-fetch messages with a ``parent=`` argument that ties them to the overall download process. However, each new web-API download request should be unparented. * use the ``format=`` argument in preference to the ``message=`` argument. E.g. use ``log.msg(format="got %(n)d shares, need %(k)d", n=n, k=k)`` instead of ``log.msg("got %d shares, need %d" % (n,k))``. This will allow later tools to analyze the event without needing to scrape/reconstruct the structured data out of the formatted string. * Pass extra information as extra keyword arguments, even if they aren't included in the ``format=`` string. This information will be displayed in the "``flogtool dump --verbose``" output, as well as being available to other tools. The ``umid=`` argument should be passed this way. * use ``log.err`` for the catch-all ``addErrback`` that gets attached to the end of any given Deferred chain. When used in conjunction with ``LOGTOTWISTED=1``, ``log.err()`` will tell Twisted about the error-nature of the log message, causing Trial to flunk the test (with an "ERROR" indication that prints a copy of the Failure, including a traceback). Don't use ``log.err`` for events that are ``BAD`` but handled (like hash failures: since these are often deliberately provoked by test code, they should not cause test failures): use ``log.msg(level=BAD)`` for those instead. Log Messages During Unit Tests ============================== If a test is failing and you aren't sure why, start by enabling ``FLOGTOTWISTED=1`` like this:: make test FLOGTOTWISTED=1 With ``FLOGTOTWISTED=1``, sufficiently-important log events will be written into ``_trial_temp/test.log``, which may give you more ideas about why the test is failing. By default, ``_trial_temp/test.log`` will not receive messages below the ``level=OPERATIONAL`` threshold. You can change the threshold via the ``FLOGLEVEL`` variable, e.g.:: make test FLOGLEVEL=10 FLOGTOTWISTED=1 (The level numbers are listed in src/allmydata/util/log.py.) To look at the detailed foolscap logging messages, run the tests like this:: make test FLOGFILE=flog.out.bz2 FLOGLEVEL=1 FLOGTOTWISTED=1 The first environment variable will cause foolscap log events to be written to ``./flog.out.bz2`` (instead of merely being recorded in the circular buffers for the use of remote subscribers or incident reports). The second will cause all log events to be written out, not just the higher-severity ones. The third will cause twisted log events (like the markers that indicate when each unit test is starting and stopping) to be copied into the flogfile, making it easier to correlate log events with unit tests. Enabling this form of logging appears to roughly double the runtime of the unit tests. The ``flog.out.bz2`` file is approximately 2MB. You can then use "``flogtool dump``" or "``flogtool web-viewer``" on the resulting ``flog.out`` file. ("``flogtool tail``" and the log-gatherer are not useful during unit tests, since there is no single Tub to which all the log messages are published). It is possible for setting these environment variables to cause spurious test failures in tests with race condition bugs. All known instances of this have been fixed as of Tahoe-LAFS v1.7.1. tahoe-lafs-1.10.0/docs/man/000077500000000000000000000000001221140116300153065ustar00rootroot00000000000000tahoe-lafs-1.10.0/docs/man/tahoe.1000066400000000000000000000125611221140116300164750ustar00rootroot00000000000000.TH TAHOE 1 "July 2011" "Tahoe-LAFS \[em] tahoe command" "User Commands" .SH NAME .PP tahoe - Secure distributed filesystem. .SH SYNOPSIS .PP tahoe \f[I]COMMAND\f[] [\f[I]OPTION\f[]]... [\f[I]PARAMETER\f[]]... .SH GENERAL OPTIONS .TP .B \f[B]-q,\ --quiet\f[] Operate silently. .RS .RE .TP .B \f[B]-V,\ --version\f[] Display version numbers. .RS .RE .TP .B \f[B]--version-and-path\f[] Display version numbers and paths to their locations. .RS .RE .TP .B \f[B]-d,\ --node-directory\f[] Specify which Tahoe node directory should be used. (default for most commands: `$HOME/.tahoe') .RS .RE .SH COMMANDS .PP The \f[B]tahoe\f[] runner can be used for various tasks depending on the command used. .SS ADMINISTRATION .PP tahoe \f[I]COMMAND\f[] [\f[I]OPTION\f[]]... [\f[I]NODEDIR\f[]] .RS .SS COMMANDS .TP .B \f[B]create-node\f[] Create a node that acts as a client, server or both. .TP .B \f[B]create-client\f[] Create a client node (with storage initially disabled). .TP .B \f[B]create-introducer\f[] Create an introducer node. .TP .B \f[B]create-key-generator\f[] Create a key generator service. .TP .B \f[B]create-stats-gatherer\f[] Create a stats-gatherer service. .SS OPTIONS .TP .B \f[B]-C,\ --basedir=\f[] Same as \[em]node-directory. .TP .B \f[B]-d,\ --node-directory=\f[] Specify which Tahoe node directory should be used. (default for \f[B]create-node\f[] and \f[B]create-client\f[]: `$HOME/.tahoe/'). .TP .B \f[B]-n,\ --nickname=\f[] Specify the nickname for this node (\f[B]create-node\f[] and \f[B]create-client\f[] only). .TP .B \f[B]-i,\ --introducer=\f[] Specify the introducer FURL to use (\f[B]create-node\f[] and \f[B]create-client\f[] only). .TP .B \f[B]-p,\ --webport=\f[] Specify which TCP port to run the HTTP interface on. Use `none' to disable. Default: `tcp:3456:interface=127.0.0.1' (\f[B]create-node\f[] and \f[B]create-client\f[] only). .TP .B \f[B]--no-storage\f[] Do not offer storage service to other nodes (\f[B]create-node\f[] only). .RE .SS CONTROLLING NODES .PP tahoe \f[I]COMMAND\f[] [\f[I]OPTION\f[]]... [\f[I]NODEDIR\f[]] .SS COMMANDS .TP .B \f[B]start\f[] Start a node (of any type). .RS .RE .TP .B \f[B]stop\f[] Stop a node. .RS .RE .TP .B \f[B]restart\f[] Restart a node. .RS .RE .TP .B \f[B]run\f[] Run a node synchronously. .RS .RE .SS OPTIONS .TP .B \f[B]-p,\ --profile\f[] Run under the Python profiler, putting results in `profiling_results.prof' (use with \f[B]start\f[] or \f[B]restart\f[] only). .RS .RE .TP .B \f[B]--syslog\f[] Tell the node to log to syslog, not a file (use with \f[B]start\f[] or \f[B]restart\f[] only). .RS .RE .TP .B \f[B]-C,\ --basedir=\f[] Same as \[em]node-directory. .RS .RE .TP .B \f[B]-d,\ --node-directory=\f[] Specify which Tahoe node directory should be used (default for commands other than \f[B]run\f[]: `$HOME/.tahoe/'). .RS .RE .TP .B \f[B]--help\f[] Display help and exit .RS .RE .SS USING THE FILESYSTEM .TP .B \f[B]mkdir\f[] Create a new directory. .RS .RE .TP .B \f[B]add-alias\f[] Add a new alias cap. .RS .RE .TP .B \f[B]create-alias\f[] Create a new alias cap. .RS .RE .TP .B \f[B]list-aliases\f[] List all alias caps. .RS .RE .TP .B \f[B]ls\f[] List a directory. .RS .RE .TP .B \f[B]get\f[] Retrieve a file from the grid. .RS .RE .TP .B \f[B]put\f[] Upload a file into the grid. .RS .RE .TP .B \f[B]cp\f[] Copy one or more files or directories. .RS .RE .TP .B \f[B]unlink\f[] Unlink a file or directory on the grid. .RS .RE .TP .B \f[B]rm\f[] Unlink a file or directory on the grid (same as \f[B]unlink\f[]). .RS .RE .TP .B \f[B]mv\f[] Move a file within the grid. .RS .RE .TP .B \f[B]ln\f[] Make an additional link to an existing file or directory. .RS .RE .TP .B \f[B]backup\f[] Make target dir look like local dir. .RS .RE .TP .B \f[B]webopen\f[] Open a web browser to a grid file or directory. .RS .RE .TP .B \f[B]manifest\f[] List all files/directories in a subtree. .RS .RE .TP .B \f[B]stats\f[] Print statistics about all files/directories in a subtree. .RS .RE .TP .B \f[B]check\f[] Check a single file or directory. .RS .RE .TP .B \f[B]deep-check\f[] Check all files/directories reachable from a starting point .RS .RE .SS OPTIONS .PP Please run `tahoe \f[I]COMMAND\f[] --help' for more details on each command. .SS DEBUGGING .PP tahoe debug \f[I]SUBCOMMAND\f[] [\f[I]OPTION\f[]]... [\f[I]PARAMETER\f[]]... .SS SUBCOMMANDS .TP .B \f[B]dump-share\f[] Unpack and display the contents of a share. .RS .RE .TP .B \f[B]dump-cap\f[] Unpack a read-cap or write-cap. .RS .RE .TP .B \f[B]find-shares\f[] Locate sharefiles in node directories. .RS .RE .TP .B \f[B]catalog-shares\f[] Describe all shares in node dirs. .RS .RE .TP .B \f[B]corrupt-share\f[] Corrupt a share by flipping a bit. .RS .RE .TP .B \f[B]repl\f[] Open a Python interpreter. .RS .RE .TP .B \f[B]trial\f[] Run tests using Twisted Trial with the right imports. .RS .RE .PP Please run e.g.\ `tahoe debug dump-share --help' for more details on each subcommand. .SH AUTHORS .PP Tahoe-LAFS has been written by Brian Warner, Zooko Wilcox-O'Hearn and dozens of others. This manpage was originally written by bertagaz. .SH REPORTING BUGS .PP Please see . .PP For known security issues see . .PP Tahoe-LAFS home page: .PP tahoe-dev mailing list: .SH COPYRIGHT .PP Copyright \@ 2006\[en]2013 The Tahoe-LAFS Software Foundation tahoe-lafs-1.10.0/docs/network-and-reliance-topology.svg000066400000000000000000005106151221140116300231470ustar00rootroot00000000000000 image/svg+xml Tahoe-LAFS network topology Tahoe-LAFS storage servers Tahoe-LAFS client Tahoe-LAFS gateway over TCP/SSL FTP HTTP(S)server Tahoe-LAFSstorageclient Red means that whoever controls that link or that machine cansee your files and change their contents. In other words, yourely on that component for confidentiality and integrity. Black means that whoever controls that link or that machine cannotsee your files or change their contents. In other words, youdo not rely on that component for confidentiality and integrity. • Web browser• Command-line tool• tahoe backup tool• JavaScript frontends• duplicity• GridBackup (incomplete)• FTP and SFTP clients• FUSE via sshfs Tahoe-LAFS storage protocol Tahoe-LAFS web-API FTPserver SFTPserver SFTP Cloudstorage Cloudstorage • Disk backend• Cloud backend under development (S3, OpenStack, Google, Azure) tahoe-lafs-1.10.0/docs/nodekeys.rst000066400000000000000000000147751221140116300171240ustar00rootroot00000000000000======================= Node Keys in Tahoe-LAFS ======================= "Node Keys" are cryptographic signing/verifying keypairs used to identify Tahoe-LAFS nodes (client-only and client+server). The private signing key is stored in NODEDIR/private/node.privkey , and is used to sign the announcements that are distributed to all nodes by the Introducer. The public verifying key is used to identify the sending node from those other systems: it is displayed as a "Node ID" that looks like "v0-abc234xyz567..", which ends with a long base32-encoded string. These node keys were introduced in the 1.10 release (April 2013), as part of ticket #466. In previous releases, announcements were unsigned, and nodes were identified by their Foolscap "Tub ID" (a somewhat shorter base32 string, with no "v0-" prefix). Why Announcements Are Signed ---------------------------- All nodes (both client-only and client+server) publish announcements to the Introducer, which then relays them to all other nodes. These announcements contain information about the publishing node's nickname, how to reach the node, what services it offers, and what version of code it is running. The new private node key is used to sign these announcements, preventing the Introducer from modifying their contents en-route. This will enable future versions of Tahoe-LAFS to use other forms of introduction (gossip, multiple introducers) without weakening the security model. The Node ID is useful as a handle with which to talk about a node. For example, when clients eventually gain the ability to control which storage servers they are willing to use (#467), the configuration file might simply include a list of Node IDs for the approved servers. TubIDs are currently also suitable for this job, but they depend upon having a Foolscap connection to the server. Since our goal is to move away from Foolscap towards a simpler (faster and more portable) protocol, we want to reduce our dependence upon TubIDs. Node IDs and Ed25519 signatures can be used for non-Foolscap non-SSL based protocols. How The Node ID Is Computed --------------------------- The long-form Node ID is the Ed25519 public verifying key, 256 bits (32 bytes) long, base32-encoded, with a "v0-" prefix appended, and the trailing "=" padding removed, like so: v0-rlj3jnxqv4ee5rtpyngvzbhmhuikjfenjve7j5mzmfcxytwmyf6q The Node ID is displayed in this long form on the node's front Welcome page, and on the Introducer's status page. In most other places (share-placement lists, file health displays), the "short form" is used instead. This is simply the first 8 characters of the base32 portion, frequently enclosed in square brackets, like this: [rlj3jnxq] In contrast, old-style TubIDs are usually displayed with just 6 base32 characters. Version Compatibility, Fallbacks For Old Versions ------------------------------------------------- Since Tahoe-LAFS 1.9 does not know about signed announcements, 1.10 includes backwards-compatibility code to allow old and new versions to interoperate. There are three relevant participants: the node publishing an announcement, the Introducer which relays them, and the node receiving the (possibly signed) announcement. When a 1.10 node connects to an old Introducer (version 1.9 or earlier), it sends downgraded non-signed announcements. It likewise accepts non-signed announcements from the Introducer. The non-signed announcements use TubIDs to identify the sending node. The new 1.10 Introducer, when it connects to an old node, downgrades any signed announcements to non-signed ones before delivery. As a result, the only way to receive signed announcements is for all three systems to be running the new 1.10 code. In a grid with a mixture of old and new nodes, if the Introducer is old, then all nodes will see unsigned TubIDs. If the Introducer is new, then nodes will see signed Node IDs whenever possible. Share Placement --------------- Tahoe-LAFS uses a "permuted ring" algorithm to decide where to place shares for any given file. For each potential server, it uses that server's "permutation seed" to compute a pseudo-random but deterministic location on a ring, then walks the ring in clockwise order, asking each server in turn to hold a share until all are placed. When downloading a file, the servers are accessed in the same order. This minimizes the number of queries that must be done to download a file, and tolerates "churn" (nodes being added and removed from the grid) fairly well. This property depends upon server nodes having a stable permutation seed. If a server's permutation seed were to change, it would effectively wind up at a randomly selected place on the permuted ring. Downloads would still complete, but clients would spend more time asking other servers before querying the correct one. In the old 1.9 code, the permutation-seed was always equal to the TubID. In 1.10, servers include their permutation-seed as part of their announcement. To improve stability for existing grids, if an old server (one with existing shares) is upgraded to run the 1.10 codebase, it will use its old TubID as its permutation-seed. When a new empty server runs the 1.10 code, it will use its Node ID instead. In both cases, once the node has picked a permutation-seed, it will continue using that value forever. To be specific, when a node wakes up running the 1.10 code, it will look for a recorded NODEDIR/permutation-seed file, and use its contents if present. If that file does not exist, it creates it (with the TubID if it has any shares, otherwise with the Node ID), and uses the contents as the permutation-seed. There is one unfortunate consequence of this pattern. If new 1.10 server is created in a grid that has an old client, or has a new client but an old Introducer, then that client will see downgraded non-signed announcements, and thus will first upload shares with the TubID-based permutation-seed. Later, when the client and/or Introducer is upgraded, the client will start seeing signed announcements with the NodeID-based permutation-seed, and will then look for shares in the wrong place. This will hurt performance in a large grid, but should not affect reliability. This effect shouldn't even be noticeable in grids for which the number of servers is close to the "N" shares.total number (e.g. where num-servers < 3*N). And the as-yet-unimplemented "share rebalancing" feature should repair the misplacement. If you wish to avoid this effect, try to upgrade both Introducers and clients at about the same time. (Upgrading servers does not matter: they will continue to use the old permutation-seed). tahoe-lafs-1.10.0/docs/performance.rst000066400000000000000000000172231221140116300175730ustar00rootroot00000000000000============================================ Performance costs for some common operations ============================================ 1. `Publishing an A-byte immutable file`_ 2. `Publishing an A-byte mutable file`_ 3. `Downloading B bytes of an A-byte immutable file`_ 4. `Downloading B bytes of an A-byte mutable file`_ 5. `Modifying B bytes of an A-byte mutable file`_ 6. `Inserting/Removing B bytes in an A-byte mutable file`_ 7. `Adding an entry to an A-entry directory`_ 8. `Listing an A entry directory`_ 9. `Checking an A-byte file`_ 10. `Verifying an A-byte file (immutable)`_ 11. `Repairing an A-byte file (mutable or immutable)`_ ``K`` indicates the number of shares required to reconstruct the file (default: 3) ``N`` indicates the total number of shares produced (default: 10) ``S`` indicates the segment size (default: 128 KiB) ``A`` indicates the number of bytes in a file ``B`` indicates the number of bytes of a file that are being read or written ``G`` indicates the number of storage servers on your grid Most of these cost estimates may have a further constant multiplier: when a formula says ``N/K*S``, the cost may actually be ``2*N/K*S`` or ``3*N/K*S``. Also note that all references to mutable files are for SDMF-formatted files; this document has not yet been updated to describe the MDMF format. Publishing an ``A``-byte immutable file ======================================= when the file is already uploaded --------------------------------- If the file is already uploaded with the exact same contents, same erasure coding parameters (K, N), and same added convergence secret, then it reads the whole file from disk one time while hashing it to compute the storage index, then contacts about N servers to ask each one to store a share. All of the servers reply that they already have a copy of that share, and the upload is done. disk: A cpu: ~A network: ~N memory footprint: S when the file is not already uploaded ------------------------------------- If the file is not already uploaded with the exact same contents, same erasure coding parameters (K, N), and same added convergence secret, then it reads the whole file from disk one time while hashing it to compute the storage index, then contacts about N servers to ask each one to store a share. Then it uploads each share to a storage server. disk: 2*A cpu: 2*~A network: N/K*A memory footprint: N/K*S Publishing an ``A``-byte mutable file ===================================== cpu: ~A + a large constant for RSA keypair generation network: A memory footprint: N/K*A notes: Tahoe-LAFS generates a new RSA keypair for each mutable file that it publishes to a grid. This takes up to 1 or 2 seconds on a typical desktop PC. Part of the process of encrypting, encoding, and uploading a mutable file to a Tahoe-LAFS grid requires that the entire file be in memory at once. For larger files, this may cause Tahoe-LAFS to have an unacceptably large memory footprint (at least when uploading a mutable file). Downloading ``B`` bytes of an ``A``-byte immutable file ======================================================= cpu: ~B network: B notes: When Tahoe-LAFS 1.8.0 or later is asked to read an arbitrary range of an immutable file, only the S-byte segments that overlap the requested range will be downloaded. (Earlier versions would download from the beginning of the file up until the end of the requested range, and then continue to download the rest of the file even after the request was satisfied.) Downloading ``B`` bytes of an ``A``-byte mutable file ===================================================== cpu: ~A network: A memory footprint: A notes: As currently implemented, mutable files must be downloaded in their entirety before any part of them can be read. We are exploring fixes for this; see ticket #393 for more information. Modifying ``B`` bytes of an ``A``-byte mutable file =================================================== cpu: ~A network: A memory footprint: N/K*A notes: If you upload a changed version of a mutable file that you earlier put onto your grid with, say, 'tahoe put --mutable', Tahoe-LAFS will replace the old file with the new file on the grid, rather than attempting to modify only those portions of the file that have changed. Modifying a file in this manner is essentially uploading the file over again, except that it re-uses the existing RSA keypair instead of generating a new one. Inserting/Removing ``B`` bytes in an ``A``-byte mutable file ============================================================ cpu: ~A network: A memory footprint: N/K*A notes: Modifying any part of a mutable file in Tahoe-LAFS requires that the entire file be downloaded, modified, held in memory while it is encrypted and encoded, and then re-uploaded. A future version of the mutable file layout ("LDMF") may provide efficient inserts and deletes. Note that this sort of modification is mostly used internally for directories, and isn't something that the WUI, CLI, or other interfaces will do -- instead, they will simply overwrite the file to be modified, as described in "Modifying B bytes of an A-byte mutable file". Adding an entry to an ``A``-entry directory =========================================== cpu: ~A network: ~A memory footprint: N/K*~A notes: In Tahoe-LAFS, directories are implemented as specialized mutable files. So adding an entry to a directory is essentially adding B (actually, 300-330) bytes somewhere in an existing mutable file. Listing an ``A`` entry directory ================================ cpu: ~A network: ~A memory footprint: N/K*~A notes: Listing a directory requires that the mutable file storing the directory be downloaded from the grid. So listing an A entry directory requires downloading a (roughly) 330 * A byte mutable file, since each directory entry is about 300-330 bytes in size. Checking an ``A``-byte file =========================== cpu: ~G network: ~G memory footprint: negligible notes: To check a file, Tahoe-LAFS queries all the servers that it knows about. Note that neither of these values directly depend on the size of the file. This is relatively inexpensive, compared to the verify and repair operations. Verifying an A-byte file (immutable) ==================================== cpu: ~N/K*A network: N/K*A memory footprint: N/K*S notes: To verify a file, Tahoe-LAFS downloads all of the ciphertext shares that were originally uploaded to the grid and integrity checks them. This is (for grids with good redundancy) more expensive than downloading an A-byte file, since only a fraction of these shares would be necessary to recover the file. Verifying an A-byte file (mutable) ================================== cpu: ~N/K*A network: N/K*A memory footprint: N/K*A notes: To verify a file, Tahoe-LAFS downloads all of the ciphertext shares that were originally uploaded to the grid and integrity checks them. This is (for grids with good redundancy) more expensive than downloading an A-byte file, since only a fraction of these shares would be necessary to recover the file. Repairing an ``A``-byte file (mutable or immutable) =================================================== cpu: variable, between ~A and ~N/K*A network: variable; between A and N/K*A memory footprint (immutable): (1+N/K)*S (SDMF mutable): (1+N/K)*A notes: To repair a file, Tahoe-LAFS downloads the file, and generates/uploads missing shares in the same way as when it initially uploads the file. So, depending on how many shares are missing, this can cost as little as a download or as much as a download followed by a full upload. Since SDMF files have only one segment, which must be processed in its entirety, repair requires a full-file download followed by a full-file upload. tahoe-lafs-1.10.0/docs/proposed/000077500000000000000000000000001221140116300163665ustar00rootroot00000000000000tahoe-lafs-1.10.0/docs/proposed/GridID.txt000066400000000000000000000262151221140116300202370ustar00rootroot00000000000000= Grid Identifiers = What makes up a Tahoe "grid"? The rough answer is a fairly-stable set of Storage Servers. The read- and write- caps that point to files and directories are scoped to a particular set of servers. The Tahoe peer-selection and erasure-coding algorithms provide high availability as long as there is significant overlap between the servers that were used for upload and the servers that are available for subsequent download. When new peers are added, the shares will get spread out in the search space, so clients must work harder to download their files. When peers are removed, shares are lost, and file health is threatened. Repair bandwidth must be used to generate new shares, so cost increases with the rate of server departure. If servers leave the grid too quickly, repair may not be able to keep up, and files will be lost. So to get long-term stability, we need that peer set to remain fairly stable. A peer which joins the grid needs to stick around for a while. == Multiple Grids == The current Tahoe read-cap format doesn't admit the existence of multiple grids. In fact, the "URI:" prefix implies that these cap strings are universal: it suggests that this string (plus some protocol definition) is completely sufficient to recover the file. However, there are a variety of reasons why we may want to have more than one Tahoe grid in the world: * scaling: there are a variety of problems that are likely to be encountered as we attempt to grow a Tahoe grid from a few dozen servers to a few thousand, some of which are easier to deal with than others. Maintaining connections to servers and keeping up-to-date on the locations of servers is one issue. There are design improvements that can work around these, but they will take time, and we may not want to wait for that work to be done. Begin able to deploy multiple grids may be the best way to get a large number of clients using tahoe at once. * managing quality of storage, storage allocation: the members of a friendnet may want to restrict access to storage space to just each other, and may want to run their grid without involving any external coordination * commercial goals: a company using Tahoe may want to restrict access to storage space to just their customers * protocol upgrades, development: new and experimental versions of the tahoe software may need to be deployed and analyzed in isolation from the grid that clients are using for active storage So if we define a grid to be a set of storage servers, then two distinct grids will have two distinct sets of storage servers. Clients are free to use whichever grid they like (and have permission to use), however each time they upload a file, they must choose a specific grid to put it in. Clients can upload the same file to multiple grids in two separate upload operations. == Grid IDs in URIs == Each URI needs to be scoped to a specific grid, to avoid confusion ("I looked for URI123 and it said File Not Found.. oh, which grid did you upload that into?"). To accomplish this, the URI will contain a "grid identifier" that references a specific Tahoe grid. The grid ID is shorthand for a relatively stable set of storage servers. To make the URIs actually Universal, there must be a way to get from the grid ID to the actual grid. This document defines a protocol by which a client that wants to download a file from a previously-unknown grid will be able to locate and connect to that grid. == Grid ID specification == The Grid ID is a string, using a fairly limited character set, alphanumerics plus possibly a few others. It can be very short: a gridid of just "0" can be used. The gridID will be copied into the cap string for every file that is uploaded to that grid, so there is pressure to keep them short. The cap format needs to be able to distinguish the gridID from the rest of the cap. This could be expressed in DNS-style dot notation, for example the directory write-cap with a write-key of "0ZrD.." that lives on gridID "foo" could be expressed as "D0ZrDNAHuxs0XhYJNmkdicBUFxsgiHzMdm.foo" . * design goals: non-word-breaking, double-click-pasteable, maybe human-readable (do humans need to know which grid is being used? probably not). * does not need to be Secure (i.e. long and unguessable), but we must analyze the sorts of DoS attack that can result if it is not (and even if it is) * does not need to be human-memorable, although that may assist debugging and discussion ("my file is on grid 4, where is yours?) * *does* need to be unique, but the total number of grids is fairly small (counted in the hundreds or thousands rather than millions or billions) and we can afford to coordinate the use of short names. Folks who don't like coordination can pick a largeish random string. Each announcement that a Storage Server publishes (to introducers) will include its grid id. If a server participates in multiple grids, it will make multiple announcements, each with a single grid id. Clients will be able to ask an introducer for information about all storage servers that participate in a specific grid. Clients are likely to have a default grid id, to which they upload files. If a client is adding a file to a directory that lives in a different grid, they may upload the file to that other grid instead of their default. == Getting from a Grid ID to a grid == When a client decides to download a file, it starts by unpacking the cap and extracting the grid ID. Then it attempts to connect to at least one introducer for that grid, by leveraging DNS: hash $GRIDID id (with some tag) to get a long base32-encoded string: $HASH GET http://tahoe-$HASH.com/introducer/gridid/$GRIDID the results should be a JSON-encoded list of introducer FURLs for extra redundancy, if that query fails, perform the following additional queries: GET http://tahoe-$HASH.net/introducer/gridid/$GRIDID GET http://tahoe-$HASH.org/introducer/gridid/$GRIDID GET http://tahoe-$HASH.tv/introducer/gridid/$GRIDID GET http://tahoe-$HASH.info/introducer/gridid/$GRIDID etc. GET http://grids.tahoe-lafs.org/introducer/gridid/$GRIDID The first few introducers should be able to announce other introducers, via the distributed gossip-based introduction scheme of #68. Properties: * claiming a grid ID is cheap: a single domain name registration (in an uncontested namespace), and a simple web server. allmydata.com can publish introducer FURLs for grids that don't want to register their own domain. * lookup is at least as robust as DNS. By using benevolent public services like tahoe-grids.allmydata.com, reliability can be increased further. The HTTP fetch can return a list of every known server node, all of which can act as introducers. * not secure: anyone who can interfere with DNS lookups (or claims tahoe-$HASH.com before you do) can cause clients to connect to their servers instead of yours. This admits a moderate DoS attack against download availability. Performing multiple queries (to .net, .org, etc) and merging the results may mitigate this (you'll get their servers *and* your servers; the download search will be slower but is still likely to succeed). It may admit an upload DoS attack as well, or an upload file-reliability attack (trick you into uploading to unreliable servers) depending upon how the "server selection policy" (see below) is implemented. Once the client is connected to an introducer, it will see if there is a Helper who is willing to assist with the upload or download. (For download, this might reduce the number of connections that the grid's storage servers must deal with). If not, ask the introducers for storage servers, and connect to them directly. == Controlling Access == The introducers are not used to enforce access control. Instead, a system of public keys are used. There are a few kinds of access control that we might want to implement: * protect storage space: only let authorized clients upload/consume storage * protect download bandwidth: only give shares to authorized clients * protect share reliability: only upload shares to "good" servers The first two are implemented by the server, to protect their resources. The last is implemented by the client, to avoid uploading shares to unreliable servers (specifically, to maximize the utility of the client's limited upload bandwidth: there's no problem with putting shares on unreliable peers per se, but it is a problem if doing so means the client won't put a share on a more reliable peer). The first limitation (protect storage space) will be implemented by public keys and signed "storage authority" certificates. The client will present some credentials to the storage server to convince it that the client deserves the space. When storage servers are in this mode, they will have a certificate that names a public key, and any credentials that can demonstrate a path from that key will be accepted. This scheme is described in docs/proposed/old-accounts-pubkey.txt . The second limitation is unexplored. The read-cap does not currently contain any notion of who must pay for the bandwidth incurred. The third limitation (only upload to "good" servers), when enabled, is implemented by a "server selection policy" on the client side, which defines which server credentials will be accepted. This is just like the first limitation in reverse. Before clients consider including a server in their peer selection algorithm, they check the credentials, and ignore any that do not meet them. This means that a client may not wish to upload anything to "foreign grids", because they have no promise of reliability. The reasons that a client might want to upload to a foreign grid need to be examined: reliability may not be important, or it might be good enough to upload the file to the client's "home grid" instead. The server selection policy is intended to be fairly open-ended: we can imagine a policy that says "upload to any server that has a good reputation among group X", or more complicated schemes that require less and less centralized management. One important and simple scheme is to simply have a list of acceptable keys: a friendnet with 5 members would include 5 such keys in each policy, enabling every member to use the services of the others, without having a single central manager with unilateral control over the definition of the group. == Closed Grids == To implement these access controls, each client needs to be configured with three things: * home grid ID (used to find introducers, helpers, storage servers) * storage authority (certificate to enable uploads) * server selection policy (identify good/reliable servers) If the server selection policy indicates centralized control (i.e. there is some single key X which is used to sign the credentials for all "good" servers), then this could be built in to the grid ID. By using the base32 hash of the pubkey as the grid ID, clients would only need to be configured with two things: the grid ID, and their storage authority. In this case, the introducer would provide the pubkey, and the client would compare the hashes to make sure they match. This is analogous to how a TubID is used in a FURL. Such grids would have significantly larger grid IDs, 24 characters or more. tahoe-lafs-1.10.0/docs/proposed/README.lossmodel000066400000000000000000000022341221140116300212470ustar00rootroot00000000000000The lossmodel.lyx file is the source document for an in-progress paper that analyzes the probability of losing files stored in a Tahoe Least-acces File System under various scenarios. It describes: 1. How to estimate peer reliabilities, based on peer MTBF failure data. 2. How to compute file loss probabilities, based on a given set of shares stored on peers with estimated reliabilities. The peer reliabilities do not have to be uniform, and the model takes into account the file repair process. 3. How to estimate Tahoe parameters for k (shares needed), n (shares distributed) and A (repair interval) to achieve a file reliability target. 4. How to compute the estimated repair cost over time, discounted at a fixed rate, of maintaining a file for a time period T. Future work will also address the latter three issues in the context of "non-aggressive" repair, where repair will only be performed if too many shares are lost, and it will also extend the repair cost estimation model to suggest cost functions appropriate for common network architectures. A PDF of the current version of the file may be downloaded from: http://willden.org/~shawn/lossmodel.pdftahoe-lafs-1.10.0/docs/proposed/accounting-overview.txt000066400000000000000000001076041221140116300231350ustar00rootroot00000000000000 = Accounting = "Accounting" is the arena of the Tahoe system that concerns measuring, controlling, and enabling the ability to upload and download files, and to create new directories. In contrast with the capability-based access control model, which dictates how specific files and directories may or may not be manipulated, Accounting is concerned with resource consumption: how much disk space a given person/account/entity can use. Tahoe releases up to and including 1.4.1 have a nearly-unbounded resource usage model. Anybody who can talk to the Introducer gets to talk to all the Storage Servers, and anyone who can talk to a Storage Server gets to use as much disk space as they want (up to the reserved_space= limit imposed by the server, which affects all users equally). Not only is the per-user space usage unlimited, it is also unmeasured: the owner of the Storage Server has no way to find out how much space Alice or Bob is using. The goals of the Accounting system are thus: * allow the owner of a storage server to control who gets to use disk space, with separate limits per user * allow both the server owner and the user to measure how much space the user is consuming, in an efficient manner * provide grid-wide aggregation tools, so a set of cooperating server operators can easily measure how much a given user is consuming across all servers. This information should also be available to the user in question. For the purposes of this document, the terms "Account" and "User" are mostly interchangeable. The fundamental unit of Accounting is the "Account", in that usage and quota enforcement is performed separately for each account. These accounts might correspond to individual human users, or they might be shared among a group, or a user might have an arbitrary number of accounts. Accounting interacts with Garbage Collection. To protect their shares from GC, clients maintain limited-duration leases on those shares: when the last lease expires, the share is deleted. Each lease has a "label", which indicates the account or user which wants to keep the share alive. A given account's "usage" (their per-server aggregate usage) is simply the sum of the sizes of all shares on which they hold a lease. The storage server may limit the user to a fixed "quota" (an upper bound on their usage). To keep a file alive, the user must be willing to use up some of their quota. Note that a popular file might have leases from multiple users, in which case one user might take a chance and decline to add their own lease, saving some of their quota and hoping that the other leases continue to keep the file alive despite their personal unwillingness to contribute to the effort. One could imagine a "pro-rated quotas" scheme, in which a 10MB file with 5 leaseholders would deduct 2MB from each leaseholder's quota. We have decided to not implement pro-rated quotas, because such a scheme would make usage values hard to predict: a given account might suddenly go over quota solely because of a third party's actions. == Authority Flow == The authority to consume space on the storage server originates, of course, with the storage server operator. These operators start with complete control over their space, and delegate portions of it to others: either directly to clients who want to upload files, or to intermediaries who can then delegate attenuated authority onwards. The operators have various reasons for wanting to share their space: monetary consideration, expectations of in-kind exchange, or simple generosity. But the final authority always rests with the operator. The server operator grants limited authority over their space by configuring their server to accept requests that demonstrate knowledge of certain secrets. They then share those secrets with the client who intends to use this space, or with an intermediary who will generate still more secrets and share those with the client. Eventually, an upload or create-directory operation will be performed that needs this authority. Part of the operation will involve proving knowledge of the secret to the storage server, and the server will require this proof before accepting the uploaded share or adding a new lease. The authority is expressed as a string, containing cryptographically-signed messages and keys. The string also contains "restrictions", which are annotations that explain the limits imposed upon this authority, either by the original grantor (the storage server operator) or by one of the intermediaries. Authority can be reduced but not increased. Any holder of a given authority can delegate some or all of it to another party. The authority string may be short enough to include as an argument to a CLI command (--with-authority ABCDE), or it may be long enough that it must be stashed in a file and referenced in some other fashion (--with-authority-file ~/.my_authority). There are CLI tools to create brand new authority strings, to derive attenuated authorities from an existing one, and to explain the contents of an authority string. These authority strings can be shared with others just like filecaps and dircaps: knowledge of the authority string is both necessary and complete to wield the authority it represents. Web-API requests will include the authority necessary to complete the operation. When used by a CLI tool, the authority is likely to come from ~/.tahoe/private/authority (i.e. it is ambient to the user who has access to that node, just like aliases provide similar access to a specific "root directory"). When used by the browser-oriented WUI, the authority will [TODO] somehow be retained on each page in a way that minimizes the risk of CSRF attacks and allows safe sharing (cut-and-paste of a URL without sharing the storage authority too). The client node receiving the web-API request will extract the authority string from the request and use it to build the storage server messages that it sends to fulfill that request. == Definition Of Authority == The term "authority" is used here in the object-capability sense: it refers to the ability of some principal to cause some action to occur, whether because they can do it themselves, or because they can convince some other principal to do it for them. In Tahoe terms, "storage authority" is the ability to do one of the following actions: * upload a new share, thus consuming storage space * adding a new lease to a share, thus preventing space from being reclaimed * modify an existing mutable share, potentially increasing the space consumed The Accounting effort may involve other kinds of authority that get limited in a similar manner as storage authority, like the ability to download a share or query whether a given share is present: anything that may consume CPU time, disk bandwidth, or other limited resources. The authority to renew or cancel a lease may be controlled in a similar fashion. Storage authority, as granted from a server operator to a client, is not simply a binary "use space or not" grant. Instead, it is parameterized by a number of "restrictions". The most important of these restrictions (with respect to the goals of Accounting) is the "Account Label". === Account Labels === A Tahoe "Account" is defined by a variable-length sequence of small integers. (they are not required to be small, the actual limit is 2**64, but neither are they required to be unguessable). For the purposes of discussion, these lists will be expressed as period-joined strings: the two-element list (1,4) will be displayed here as "1.4". These accounts are arranged in a hierarchy: the account identifier 1.4 is considered to be a "parent" of 1.4.2 . There is no relationship between the values used by unrelated accounts: 1.4 is unrelated to 2.4, despite both coincidentally using a "4" in the second element. Each lease has a label, which contains the Account identifier. The storage server maintains an aggregate size count for each label prefix: when asked about account 1.4, it will report the amount of space used by shares labeled 1.4, 1.4.2, 1.4.7, 1.4.7.8, etc (but *not* 1 or 1.5). The "Account Label" restriction allows a client to apply any label it wants, as long as that label begins with a specific prefix. If account 1 is associated with Alice, then Alice will receive a storage authority string that contains a "must start with 1" restriction, enabling her to to use storage space but obligating her to lease her shares with a label that can be traced back to her. She can delegate part of her authority to others (perhaps with other non-label restrictions, such as a space restriction or time limit) with or without an additional label restriction. For example, she might delegate some of her authority to her friend Amy, with a 1.4 label restriction. Amy could then create labels with 1.4 or 1.4.7, but she could not create labels with the same 1 identifier that Alice can do, nor could she create labels with 1.5 (which Alice might have given to her other friend Annette). The storage server operator can ask about the usage of 1 to find out how much Alice is responsible for (which includes the space that she has delegated to Amy and Annette), and none of the A-users can avoid being counted in this total. But Alice can ask the storage server about the usage of 1.4 to find out how much Amy has taken advantage of her gift. Likewise, Alice has control over any lease with a label that begins with 1, so she can cancel Amy's leases and free the space they were consuming. If this seems surprising, consider that the storage server operator considered Alice to be responsible for that space anyways: with great responsibility (for space consumed) comes great power (to stop consuming that space). === Server Space Restriction === The storage server's basic control over how space usage (apart from the binary use-it-or-not authority granted by handing out an authority string at all) is implemented by keeping track of the space used by any given account identifier. If account 1.4 sends a request to allocate a 1MB share, but that 1MB would bring the 1.4 usage over its quota, the request will be denied. For this to be useful, the storage server must give each usage-limited principal a separate account, and it needs to configure a size limit at the same time as the authority string is minted. For a friendnet, the CLI "add account" tool can do both at once: tahoe server add-account --quota 5GB Alice --> Please give the following authority string to "Alice", who should provide it to the "tahoe add-authority" command (authority string..) This command will allocate an account identifier, add Alice to the "pet name table" to associate it with the new account, and establish the 5GB sizelimit. Both the sizelimit and the petname can be changed later. Note that this restriction is independent for each server: some additional mechanism must be used to provide a grid-wide restriction. Also note that this restriction is not expressed in the authority string. It is purely local to the storage server. === Attenuated Server Space Restriction === TODO (or not) The server-side space restriction described above can only be applied by the storage server, and cannot be attenuated by other delegates. Alice might be allowed to use 5GB on this server, but she cannot use that restriction to delegate, say, just 1GB to Amy. Instead, Alice's sub-delegation should include a "server_size" restriction key, which contains a size limit. The storage server will only honor a request that uses this authority string if it does not cause the aggregate usage of this authority string's account prefix to rise above the given size limit. Note that this will not enforce the desired restriction if the size limits are not consistent across multiple delegated authorities for the same label. For example, if Amy ends up with two delagations, A1 (which gives her a size limit of 1GB) and A2 (which gives her 5GB), then she can consume 5GB despite the limit in A1. === Other Restrictions === Many storage authority restrictions are meant for internal use by tahoe tools as they delegate short-lived subauthorities to each other, and are not likely to be set by end users. * "SI": a storage index string. The authority can only be used to upload shares of a single file. * "serverid": a server identifier. The authority can only be used when talking to a specific server * "UEB_hash": a binary hash. The authority can only be used to upload shares of a single file, identified by its share's contents. (note: this restricton would require the server to parse the share and validate the hash) * "before": a timestamp. The authority is only valid until a specific time. Requires synchronized clocks or a better definition of "timestamp". * "delegate_to_furl": a string, used to acquire a FURL for an object that contains the attenuated authority. When it comes time to actually use the authority string to do something, this is the first step. * "delegate_to_key": an ECDSA pubkey, used to grant attenuated authority to a separate private key. == User Experience == The process starts with Bob the storage server operator, who has just created a new Storage Server: tahoe create-node --> creates ~/.tahoe # edit ~/.tahoe/tahoe.cfg, add introducer.furl, configure storage, etc Now Bob decides that he wants to let his friend Alice use 5GB of space on his new server. tahoe server add-account --quota=5GB Alice --> Please give the following authority string to "Alice", who should provide it to the "tahoe add-authority" command (authority string XYZ..) Bob copies the new authority string into an email message and sends it to Alice. Meanwhile, Alice has created her own client, and attached it to the same Introducer as Bob. When she gets the email, she pastes the authority string into her local client: tahoe client add-authority (authority string XYZ..) --> new authority added: account (1) Now all CLI commands that Alice runs with her node will take advantage of Bob's space grant. Once Alice's node connects to Bob's, any upload which needs to send a share to Bob's server will search her list of authorities to find one that allows her to use Bob's server. When Alice uses her WUI, upload will be disabled until and unless she pastes one or more authority strings into a special "storage authority" box. TODO: Once pasted, we'll use some trick to keep the authority around in a convenient-yet-safe fashion. When Alice uses her javascript-based web drive, the javascript program will be launched with some trick to hand it the storage authorities, perhaps via a fragment identifier (http://server/path#fragment). If Alice decides that she wants Amy to have some space, she takes the authority string that Bob gave her and uses it to create one for Amy: tahoe authority dump (authority string XYZ..) --> explanation of what is in XYZ tahoe authority delegate --account 4,1 --space 2GB (authority string XYZ..) --> (new authority string ABC..) Alice sends the ABC string to Amy, who uses "tahoe client add-authority" to start using it. Later, Bob would like to find out how much space Alice is using. He brings up his node's Storage Server Web Status page. In addition to the overall usage numbers, the page will have a collapsible-treeview table with lines like: AccountID Usage TotalUsage Petname (1) 1.5GB 2.5GB Alice +(1,4) 1.0GB 1.0GB ? This indicates that Alice, as a whole, is using 2.5GB. It also indicates that Alice has delegated some space to a (1,4) account, and that delegation has used 1.0GB. Alice has used 1.5GB on her own, but is responsible for the full 2.5GB. If Alice tells Bob that the subaccount is for Amy, then Bob can assign a pet name for (1,4) with "tahoe server add-pet-name 1,4 Amy". Note that Bob is not aware of the 2GB limit that Alice has imposed upon Amy: the size restriction may have appeared on all the requests that have showed up thus far, but Bob has no way of being sure that a less-restrictive delgation hasn't been created, so his UI does not attempt to remember or present the restrictions it has seen before. === Friendnet === A "friendnet" is a set of nodes, each of which is both a storage server and a client, each operated by a separate person, all of which have granted storage rights to the others. The simplest way to get a friendnet started is to simply grant storage authority to everybody. "tahoe server enable-ambient-storage-authority" will configure the storage server to give space to anyone who asks. This behaves just like a 1.3.0 server, without accounting of any sort. The next step is to restrict server use to just the participants. "tahoe server disable-ambient-storage-authority" will undo the previous step, then there are two basic approaches: * "full mesh": each node grants authority directory to all the others. First, agree upon a userid number for each participant (the value doesn't matter, as long as it is unique). Each user should then use "tahoe server add-account" for all the accounts (including themselves, if they want some of their shares to land on their own machine), including a quota if they wish to restrict individuals: tahoe server add-account --account 1 --quota 5GB Alice --> authority string for Alice tahoe server add-account --account 2 --quota 5GB Bob --> authority string for Bob tahoe server add-account --account 3 --quota 5GB Carol --> authority string for Carol Then email Alice's string to Alice, Bob's string to Bob, etc. Once all users have used "tahoe client add-authority" on everything, each server will accept N distinct authorities, and each client will hold N distinct authorities. * "account manager": the group designates somebody to be the "AM", or "account manager". The AM generates a keypair and publishes the public key to all the participants, who create a local authority which delgates full storage rights to the corresponding private key. The AM then delegates account-restricted authority to each user, sending them their personal authority string: AM: tahoe authority create-authority --write-private-to=private.txt --> public.txt # email public.txt to all members AM: tahoe authority delegate --from-file=private.txt --account 1 --quota 5GB --> alice_authority.txt # email this to Alice tahoe authority delegate --from-file=private.txt --account 2 --quota 5GB --> bob_authority.txt # email this to Bob tahoe authority delegate --from-file=private.txt --account 3 --quota 5GB --> carol_authority.txt # email this to Carol ... Alice: # receives alice_authority.txt tahoe client add-authority --from-file=alice_authority.txt # receives public.txt tahoe server add-authorization --from-file=public.txt Bob: # receives bob_authority.txt tahoe client add-authority --from-file=bob_authority.txt # receives public.txt tahoe server add-authorization --from-file=public.txt Carol: # receives carol_authority.txt tahoe client add-authority --from-file=carol_authority.txt # receives public.txt tahoe server add-authorization --from-file=public.txt If the members want to see names next to their local usage totals, they can set local petnames for the accounts: tahoe server set-petname 1 Alice tahoe server set-petname 2 Bob tahoe server set-petname 3 Carol Alternatively, the AM could provide a usage aggregator, which will collect usage values from all the storage servers and show the totals in a single place, and add the petnames to that display instead. The AM gets more authority than anyone else (they can spoof everybody), but each server has just a single authorization instead of N, and each client has a single authority instead of N. When a new member joins the group, the amount of work that must be done is significantly less, and only two parties are involved instead of all N: AM: tahoe authority delegate --from-file=private.txt --account 4 --quota 5GB --> dave_authority.txt # email this to Dave Dave: # receives dave_authority.txt tahoe client add-authority --from-file=dave_authority.txt # receives public.txt tahoe server add-authorization --from-file=public.txt Another approach is to let everybody be the AM: instead of keeping the private.txt file secret, give it to all members of the group (but not to outsiders). This lets current members bring new members into the group without depending upon anybody else doing work. It also renders any notion of enforced quotas meaningless, so it is only appropriate for actual friends who are voluntarily refraining from spoofing each other. === Commercial Grid === A "commercial grid", like the one that allmydata.com manages as a for-profit service, is characterized by a large number of independent clients (who do not know each other), and by all of the storage servers being managed by a single entity. In this case, we use an Account Manager like above, to collapse the potential N*M explosion of authorities into something smaller. We also create a dummy "parent" account, and give all the real clients subaccounts under it, to give the operations personnel a convenient "total space used" number. Each time a new customer joins, the AM is directed to create a new authority for them, and the resulting string is provided to the customer's client node. AM: tahoe authority create-authority --account 1 \ --write-private-to=AM-private.txt --write-public-to=AM-public.txt Each time a new storage server is brought up: SERVER: tahoe server add-authorization --from-file=AM-public.txt Each time a new client joins: AM: N = next_account++ tahoe authority delegate --from-file=AM-private.txt --account 1,N --> new_client_authority.txt # give this to new client == Programmatic Interfaces == The storage authority can be passed as a string in a single serialized form, which is cut-and-pasteable and printable. It uses minimal punctuation, to make it possible to include it as a URL query argument or HTTP header field without requiring character-escaping. Before passing it over HTTP, however, note that revealing the authority string to someone is equivalent to irrevocably delegating all that authority to them. While this is appropriate when transferring authority from, say, a receptive storage server to your local agent, it is not appropriate when using a foreign tahoe node, or when asking a Helper to upload a specific file. Attenuations (see below) should be used to limit the delegated authority in these cases. In the programmatic web-API, any operation that consumes storage will accept a storage-authority= query argument, the value of which will be the printable form of an authority string. This includes all PUT operations, POST t=upload and t=mkdir, and anything which creates a new file, creates a directory (perhaps an intermediate one), or modifies a mutable file. Alternatively, the authority string can also be passed through an HTTP header. A single "X-Tahoe-Storage-Authority:" header can be used with the printable authority string. If the string is too large to fit in a single header, the application can provide a series of numbered "X-Tahoe-Storage-Authority-1:", "X-Tahoe-Storage-Authority-2:", etc, headers, and these will be sorted in alphabetical order (please use 08/09/10/11 rather than 8/9/10/11), stripped of leading and trailing whitespace, and concatenated. The HTTP header form can accomodate larger authority strings, since these strings can grow too large to pass as a query argument (especially when several delegations or attenuations are involved). However, depending upon the HTTP client library being used, passing extra HTTP headers may be more complicated than simply modifying the URL, and may be impossible in some cases (such as javascript running in a web browser). TODO: we may add a stored-token form of authority-passing to handle environments in which query-args won't work and headers are not available. This approach would use a special PUT which takes the authority string as the HTTP body, and remembers it on the server side in associated with a brief-but-unguessable token. Later operations would then use the authority by passing a --storage-authority-token=XYZ query argument. These authorities would expire after some period. == Quota Management, Aggregation, Reporting == The storage server will maintain enough information to efficiently compute usage totals for each account referenced in all of their leases, as well as all their parent accounts. This information is used for several purposes: * enforce server-space restrictions, by selectively rejecting storage requests which would cause the account-usage-total to rise above the limit specified in the enabling authorization string * report individual account usage to the account-holder (if a client can consume space under account A, they are also allowed to query usage for account A or a subaccount). * report individual account usage to the storage-server operator, possibly associated with a pet name * report usage for all accounts to the storage-server operator, possibly associated with a pet name, in the form of a large table * report usage for all accounts to an external aggregator The external aggregator would take usage information from all the storage servers in a single grid and sum them together, providing a grid-wide usage number for each account. This could be used by e.g. clients in a commercial grid to report overall-space-used to the end user. There will be web-API URLs available for all of these reports. TODO: storage servers might also have a mechanism to apply space-usage limits to specific account ids directly, rather than requiring that these be expressed only through authority-string limitation fields. This would let a storage server operator revoke their space-allocation after delivering the authority string. == Low-Level Formats == This section describes the low-level formats used by the Accounting process, beginning with the storage-authority data structure and working upwards. This section is organized to follow the storage authority, starting from the point of grant. The discussion will thus begin at the storage server (where the authority is first created), work back to the client (which receives the authority as a web-API argument), then follow the authority back to the servers as it is used to enable specific storage operations. It will then detail the accounting tables that the storage server is obligated to maintain, and describe the interfaces through which these tables are accessed by other parties. === Storage Authority === ==== Terminology ==== Storage Authority is represented as a chain of certificates and a private key. Each certificate authorizes and restricts a specific private key. The initial certificate in the chain derives its authority by being placed in the storage server's tahoe.cfg file (i.e. by being authorized by the storage server operator). All subsequent certificates are signed by the authorized private key that was identified in the previous certificate: they derive their authority by delegation. Each certificate has restrictions which limit the authority being delegated. authority: ([cert[0], cert[1], cert[2] ...], privatekey) The "restrictions dictionary" is a table which establishes an upper bound on how this authority (or any attenuations thereof) may be used. It is effectively a set of key-value pairs. A "signing key" is an EC-DSA192 private key string, as supplied to the pycryptopp SigningKey() constructor, and is 12 bytes long. A "verifying key" is an EC-DSA192 public key string, as produced by pycryptopp, and is 24 bytes long. A "key identifier" is a string which securely identifies a specific signing/verifying keypair: for long RSA keys it would be a secure hash of the public key, but since ECDSA192 keys are so short, we simply use the full verifying key verbatim. A "key hint" is a variable-length prefix of the key identifier, perhaps zero bytes long, used to help a recipient reduce the number of verifying keys that it must search to find one that matches a signed message. ==== Authority Chains ==== The authority chain consists of a list of certificates, each of which has a serialized restrictions dictionary. Each dictionary will have a "delegate-to-key" field, which delegates authority to a private key, referenced with a key identifier. In addition, the non-initial certs are signed, so they each contain a signature and a key hint: cert[0]: serialized(restrictions_dictionary) cert[1]: serialized(restrictions_dictionary), signature, keyhint cert[2]: serialized(restrictions_dictionary), signature, keyhint In this example, suppose cert[0] contains a delegate-to-key field that identifies a keypair sign_A/verify_A. In this case, cert[1] will have a signature that was made with sign_A, and the keyhint in cert[1] will reference verify_A. cert[0].restrictions[delegate-to-key] = A_keyid cert[1].signature = SIGN(sign_A, serialized(cert[0].restrictions)) cert[1].keyhint = verify_A cert[1].restrictions[delegate-to-key] = B_keyid cert[2].signature = SIGN(sign_B, serialized(cert[1].restrictions)) cert[2].keyhint = verify_B cert[2].restrictions[delete-to-key] = C_keyid In this example, the full storage authority consists of the cert[0,1,2] chain and the sign_C private key: anyone who is in possession of both will be able to exert this authority. To wield the authority, a client will present the cert[0,1,2] chain and an action message signed by sign_C; the server will validate the chain and the signature before performing the requested action. The only circumstances that might prompt the client to share the sign_C private key with another party (including the server) would be if it wanted to irrevocably share its full authority with that party. ==== Restriction Dictionaries ==== Within a restriction dictionary, the following keys are defined. Their full meanings are defined later. 'accountid': an arbitrary-length sequence of integers >=0, restricting the accounts which can be manipulated or used in leases 'SI': a storage index (binary string), controlling which file may be manipulated 'serverid': binary string, limiting which server will accept requests 'UEB-hash': binary string, limiting the content of the file being manipulated 'before': timestamp (seconds since epoch), limits the lifetime of this authority 'server-size': integer >0, maximum aggregate storage (in bytes) per account 'delegate-to-key': binary string (DSA pubkey identifier) 'furl-to': printable FURL string ==== Authority Serialization ==== There is only one form of serialization: a somewhat-compact URL-safe cut-and-pasteable printable form. We are interested in minimizing the size of the resulting authority, so rather than using a general-purpose (perhaps JSON-based) serialization scheme, we use one that is specialized for this task. This URL-safe form will use minimal punctuation to avoid quoting issues when used in a URL query argument. It would be nice to avoid word-breaking characters that make cut-and-paste troublesome, however this is more difficult because most non-alphanumeric characters are word-breaking in at least one application. The serialized storage authority as a whole contains a single version identifier and magic number at the beginning. None of the internal components contain redundant version numbers: they are implied by the container. If components are serialized independently for other reasons, they may contain version identifers in that form. Signing keys (i.e. private keys) are URL-safe-serialized using Zooko's base62 alphabet, which offers almost the same density as standard base64 but without any non-URL-safe or word-breaking characters. Since we used fixed-format keys (EC-DSA, 192bit, with SHA256), the private keys are fixed-length (96 bits or 12 bytes), so there is no length indicator: all URL-safe-serialized signing keys are 17 base62 characters long. The 192-bit verifying keys (i.e. public keys) use the same approach: the URL-safe form is 33 characters long. An account-id sequence (a variable-length sequence of non-negative numbers) is serialized by representing each number in decimal ASCII, then joining the pieces with commas. The string is terminated by the first non-[0-9,] character encountered, which will either be the key-identifier letter of the next field, or the dictionary-terminating character at the end. Any single integral decimal number (such as the "before" timestamp field, or the "server-size" field) is serialized as a variable-length sequence of ASCII decimal digits, terminated by any non-digit. The restrictions dictionary is serialized as a concatenated series of key-identifier-letter / value string pairs, ending with the marker "E.". The URL-safe form uses a single printable letter to indicate the which key is being serialized. Each type of value string is serialized differently: "A": accountid: variable-length sequence of comma-joned numbers "I": storage index: fixed-length 26-character *base32*-encoded storage index "P": server id (peer id): fixed-length 32-character *base32* encoded serverid (matching the printable Tub.tubID string that Foolscap provides) "U": UEB hash: fixed-length 43-character base62 encoded UEB hash "B": before: variable-length sequence of decimal digits, seconds-since-epoch. "S": server-size: variable-length sequence of decimal digits, max size in bytes "D": delegate-to-key: ECDSA public key, 33 base62 characters. "F": furl-to: variable-length FURL string, wrapped in a netstring: "%d:%s," % (len(FURL), FURL). Note that this is rarely pasted. "E.": end-of-dictionary marker The ECDSA signature is serialized as a variable number of base62 characters, terminated by a period. We expect the signature to be about 384 bits (48 bytes) long, or 65 base62 characters. A missing signature (such as for the initial cert) is represented as a single period. The key hint is serialized with a base62-encoded serialized hint string (a byte-quantized prefix of the serialized public key), terminated by a period. An empty hint would thus be serialized as a single period. For the current design, we expect the key hint to be empty. The full storage authority string consists of a certificate chain and a delegate private key. Given the single-certificate serialization scheme described above, the full authority is serialized as follows: * version prefix: depends upon the application, but for storage-authority chains this will be "sa0-", for Storage-Authority Version 0. * serialized certificates, concatenated together * serialized private key (to which the last certificate delegates authority) Note that this serialization form does not have an explicit terminator, so the environment must provide a length indicator or some other way to identify the end of the authority string. The benefit of this approach is that the full string will begin and end with alphanumeric characters, making cut-and-paste easier (increasing the size of the mouse target: anywhere within the final component will work). Also note that the period is a reserved delimiter: it cannot appear in the serialized restrictions dictionary. The parser can remove the version prefix, split the rest on periods, and expect to see 3*k+1 fields, consisting of k (restriction-dictionary,signature,keyhint) 3-tuples and a single private key at the end. Some examples: (example A) cert[0] delegates account 1,4 to (pubkey ZlFA / privkey 1f2S): sa0-A1,4D2lFA6LboL2xx0ldQH2K1TdSrwuqMMiME3E...1f2SI9UJPXvb7vdJ1 (example B) cert[0] delegates account 1,4 to ZlFA/1f2S cert[1] subdelegates 5GB and subaccount 1,4,7 to pubkey 0BPo/06rt: sa0-A1,4D2lFA6LboL2xx0ldQH2K1TdSrwuqMMiME3E...A1,4,7S5000000000D0BPoGxJ3M4KWrmdpLnknhJABrWip5e9kPE,7cyhQvv5axdeihmOzIHjs85TcUIYiWHdsxNz50GTerEOR5ucj2TITPXxyaCUli1oF...06rtcPQotR3q4f2cT == Problems == Problems which have thus far been identified with this approach: * allowing arbitrary subaccount generation will permit a DoS attack, in which an authorized uploader consumes lots of DB space by creating an unbounded number of randomly-generated subaccount identifiers. OTOH, they can already attach an unbounded number of leases to any file they like, consuming a lot of space. tahoe-lafs-1.10.0/docs/proposed/denver.txt000066400000000000000000000243571221140116300204250ustar00rootroot00000000000000The "Denver Airport" Protocol (discussed whilst returning robk to DEN, 12/1/06) This is a scaling improvement on the "Select Peers" phase of Tahoe2. The problem it tries to address is the storage and maintenance of the 1M-long peer list, and the relative difficulty of gathering long-term reliability information on a useful numbers of those peers. In DEN, each node maintains a Chord-style set of connections to other nodes: log2(N) "finger" connections to distant peers (the first of which is halfway across the ring, the second is 1/4 across, then 1/8th, etc). These connections need to be kept alive with relatively short timeouts (5s?), so any breaks can be rejoined quickly. In addition to the finger connections, each node must also remain aware of K "successor" nodes (those which are immediately clockwise of the starting point). The node is not required to maintain connections to these, but it should remain informed about their contact information, so that it can create connections when necessary. We probably need a connection open to the immediate successor at all times. Since inbound connections exist too, each node has something like 2*log2(N) plus up to 2*K connections. Each node keeps history of uptime/availability of the nodes that it remains connected to. Each message that is sent to these peers includes an estimate of that peer's availability from the point of view of the outside world. The receiving node will average these reports together to determine what kind of reliability they should announce to anyone they accept leases for. This reliability is expressed as a percentage uptime: P=1.0 means the peer is available 24/7, P=0.0 means it is almost never reachable. When a node wishes to publish a file, it creates a list of (verifierid, sharenum) tuples, and computes a hash of each tuple. These hashes then represent starting points for the landlord search: starting_points = [(sharenum,sha(verifierid + str(sharenum))) for sharenum in range(256)] The node then constructs a reservation message that contains enough information for the potential landlord to evaluate the lease, *and* to make a connection back to the starting node: message = [verifierid, sharesize, requestor_furl, starting_points] The node looks through its list of finger connections and splits this message into up to log2(N) smaller messages, each of which contains only the starting points that should be sent to that finger connection. Specifically we sent a starting_point to a finger A if the nodeid of that finger is <= the starting_point and if the next finger B is > starting_point. Each message sent out can contain multiple starting_points, each for a different share. When a finger node receives this message, it performs the same splitting algorithm, sending each starting_point to other fingers. Eventually a starting_point is received by a node that knows that the starting_point lies between itself and its immediate successor. At this point the message switches from the "hop" mode (following fingers) to the "search" mode (following successors). While in "search" mode, each node interprets the message as a lease request. It checks its storage pool to see if it can accomodate the reservation. If so, it uses requestor_furl to contact the originator and announces its willingness to host the given sharenum. This message will include the reliability measurement derived from the host's counterclockwise neighbors. If the recipient cannot host the share, it forwards the request on to the next successor, which repeats the cycle. Each message has a maximum hop count which limits the number of peers which may be searched before giving up. If a node sees itself to be the last such hop, it must establish a connection to the originator and let them know that this sharenum could not be hosted. The originator sends out something like 100 or 200 starting points, and expects to get back responses (positive or negative) in a reasonable amount of time. (perhaps if we receive half of the responses in time T, wait for a total of 2T for the remaining ones). If no response is received with the timeout, either re-send the requests for those shares (to different fingers) or send requests for completely different shares. Each share represents some fraction of a point "S", such that the points for enough shares to reconstruct the whole file total to 1.0 points. I.e., if we construct 100 shares such that we need 25 of them to reconstruct the file, then each share represents .04 points. As the positive responses come in, we accumulate two counters: the capacity counter (which gets a full S points for each positive response), and the reliability counter (which gets S*(reliability-of-host) points). The capacity counter is not allowed to go above some limit (like 4x), as determined by provisioning. The node keeps adding leases until the reliability counter has gone above some other threshold (larger but close to 1.0). [ at download time, each host will be able to provide the share back with probability P times an exponential decay factor related to peer death. Sum these probabilities to get the average number of shares that will be available. The interesting thing is actually the distribution of these probabilities, and what threshold you have to pick to get a sufficiently high chance of recovering the file. If there are N identical peers with probability P, the number of recovered shares will have a gaussian distribution with an average of N*P and a stddev of (??). The PMF of this function is an S-curve, with a sharper slope when N is large. The probability of recovering the file is the value of this S curve at the threshold value (the number of necessary shares). P is not actually constant across all peers, rather we assume that it has its own distribution: maybe gaussian, more likely exponential (power law). This changes the shape of the S-curve. Assuming that we can characterize the distribution of P with perhaps two parameters (say meanP and stddevP), the S-curve is a function of meanP, stddevP, N, and threshold... To get 99.99% or 99.999% recoverability, we must choose a threshold value high enough to accomodate the random variations and uncertainty about the real values of P for each of the hosts we've selected. By counting reliability points, we are trying to estimate meanP/stddevP, so we know which S-curve to look at. The threshold is fixed at 1.0, since that's what erasure coding tells us we need to recover the file. The job is then to add hosts (increasing N and possibly changing meanP/stddevP) until our recoverability probability is as high as we want. ] The originator takes all acceptance messages and adds them in order to the list of landlords that will be used to host the file. It stops when it gets enough reliability points. Note that it does *not* discriminate against unreliable hosts: they are less likely to have been found in the first place, so we don't need to discriminate against them a second time. We do, however, use the reliability points to acknowledge that sending data to an unreliable peer is not as useful as sending it to a reliable one (there is still value in doing so, though). The remaining reservation-acceptance messages are cancelled and then put aside: if we need to make a second pass, we ask those peers first. Shares are then created and published as in Tahoe2. If we lose a connection during the encoding, that share is lost. If we lose enough shares, we might want to generate more to make up for them: this is done by using the leftover acceptance messages first, then triggering a new Chord search for the as-yet-unaccepted sharenums. These new peers will get shares from all segments that have not yet been finished, then a second pass will be made to catch them up on the earlier segments. Properties of this approach: the total number of peers that each node must know anything about is bounded to something like 2*log2(N) + K, probably on the order of 50 to 100 total. This is the biggest advantage, since in tahoe2 each node must know at least the nodeid of all 1M peers. The maintenance traffic should be much less as a result. each node must maintain open (keep-alived) connections to something like 2*log2(N) peers. In tahoe2, this number is 0 (well, probably 1 for the introducer). during upload, each node must actively use 100 connections to a random set of peers to push data (just like tahoe2). The probability that any given share-request gets a response is equal to the number of hops it travels through times the chance that a peer dies while holding on to the message. This should be pretty small, as the message should only be held by a peer for a few seconds (more if their network is busy). In tahoe2, each share-request always gets a response, since they are made directly to the target. I visualize the peer-lookup process as the originator creating a message-in-a-bottle for each share. Each message says "Dear Sir/Madam, I would like to store X bytes of data for file Y (share #Z) on a system close to (but not below) nodeid STARTING_POINT. If you find this amenable, please contact me at FURL so we can make arrangements.". These messages are then bundled together according to their rough destination (STARTING_POINT) and sent somewhere in the right direction. Download happens the same way: lookup messages are disseminated towards the STARTING_POINT and then search one successor at a time from there. There are two ways that the share might go missing: if the node is now offline (or has for some reason lost its shares), or if new nodes have joined since the original upload and the search depth (maximum hop count) is too small to accomodate the churn. Both result in the same amount of localized traffic. In the latter case, a storage node might want to migrate the share closer to the starting point, or perhaps just send them a note to remember a pointer for the share. Checking: anyone who wishes to do a filecheck needs to send out a lookup message for every potential share. These lookup messages could have a higher search depth than usual. It would be useful to know how many peers each message went through before being returned: this might be useful to perform repair by instructing the old host (which is further from the starting point than you'd like) to push their share closer towards the starting point. tahoe-lafs-1.10.0/docs/proposed/lossmodel.lyx000066400000000000000000001575041221140116300211410ustar00rootroot00000000000000#LyX 1.6.2 created this file. For more info see http://www.lyx.org/ \lyxformat 345 \begin_document \begin_header \textclass amsart \use_default_options true \begin_modules theorems-ams theorems-ams-extended \end_modules \language english \inputencoding auto \font_roman default \font_sans default \font_typewriter default \font_default_family default \font_sc false \font_osf false \font_sf_scale 100 \font_tt_scale 100 \graphics default \float_placement h \paperfontsize default \spacing single \use_hyperref false \papersize default \use_geometry false \use_amsmath 1 \use_esint 1 \cite_engine basic \use_bibtopic false \paperorientation portrait \secnumdepth 3 \tocdepth 3 \paragraph_separation indent \defskip medskip \quotes_language english \papercolumns 1 \papersides 1 \paperpagestyle default \tracking_changes false \output_changes false \author "" \author "" \end_header \begin_body \begin_layout Title Tahoe Distributed Filesharing System Loss Model \end_layout \begin_layout Author Shawn Willden \end_layout \begin_layout Date 07/22/2009 \end_layout \begin_layout Address South Weber, Utah \end_layout \begin_layout Email shawn@willden.org \end_layout \begin_layout Abstract The abstract goes here \end_layout \begin_layout Section Problem Statement \end_layout \begin_layout Standard The allmydata Tahoe distributed file system uses Reed-Solomon erasure coding to split files into \begin_inset Formula $N$ \end_inset shares which are delivered to randomly-selected peers in a distributed network. The file can later be reassembled from any \begin_inset Formula $k\leq N$ \end_inset of the shares, if they are available. \end_layout \begin_layout Standard Over time shares are lost for a variety of reasons. Storage servers may crash, be destroyed or simply be removed from the network. To mitigate such losses, Tahoe network clients employ a repair agent which scans the peers once per time period \begin_inset Formula $A$ \end_inset and determines how many of the shares remain. If less than \begin_inset Formula $L$ \end_inset ( \begin_inset Formula $k\leq L\leq N$ \end_inset ) shares remain, then the repairer reconstructs the file shares and redistribute s the missing ones, bringing the availability back up to full. \end_layout \begin_layout Standard The question we're trying to answer is "What is the probability that we'll be able to reassemble the file at some later time \begin_inset Formula $T$ \end_inset ?". We'd also like to be able to determine what values we should choose for \begin_inset Formula $k$ \end_inset , \begin_inset Formula $N$ \end_inset , \begin_inset Formula $A$ \end_inset , and \begin_inset Formula $L$ \end_inset in order to ensure \begin_inset Formula $Pr[loss]\leq r$ \end_inset for some threshold probability \begin_inset Formula $r$ \end_inset . This is an optimization problem because although we could obtain very low \begin_inset Formula $Pr[loss]$ \end_inset by selecting conservative parameters, these choices have costs. The peer storage and bandwidth consumed by the share distribution process are approximately \begin_inset Formula $\nicefrac{N}{k}$ \end_inset times the size of the original file, so we would like to minimize \begin_inset Formula $\nicefrac{N}{k}$ \end_inset , consistent with \begin_inset Formula $Pr[loss]\leq r$ \end_inset . Likewise, a frequent and aggressive repair process keeps the number of shares available close to \begin_inset Formula $N,$ \end_inset but at a cost in bandwidth and processing time as the repair agent downloads \begin_inset Formula $k$ \end_inset shares, reconstructs the file and uploads new shares to replace those that are lost. \end_layout \begin_layout Section Reliability \end_layout \begin_layout Standard The probability that the file becomes unrecoverable is dependent upon the probability that the peers to whom we send shares are able to return those copies on demand. Shares that are corrupted are detected and discarded, so there is no need to distinguish between corruption and loss. \end_layout \begin_layout Standard Many factors affect share availability. Availability can be temporarily interrupted by peer unavailability due to network outages, power failures or administrative shutdown, among other reasons. Availability can be permanently lost due to failure or corruption of storage media, catastrophic damage to the peer system, administrative error, withdrawal from the network, malicious corruption, etc. \end_layout \begin_layout Standard The existence of intermittent failure modes motivates the introduction of a distinction between \noun on availability \noun default and \noun on reliability \noun default . Reliability is the probability that a share is retrievable assuming intermitten t failures can be waited out, so reliability considers only permanent failures. Availability considers all failures, and is focused on the probability of retrieval within some defined time frame. \end_layout \begin_layout Standard Another consideration is that some failures affect multiple shares. If multiple shares of a file are stored on a single hard drive, for example, failure of that drive may lose them all. Catastrophic damage to a data center may destroy all shares on all peers in that data center. \end_layout \begin_layout Standard While the types of failures that may occur are quite consistent across peers, their probabilities differ dramatically. A professionally-administered server with redundant storage, power and Internet located in a carefully-monitored data center with automatic fire suppression systems is much less likely to become either temporarily or permanently unavailable than the typical virus and malware-ridden home computer on a single cable modem connection. A variety of situations in between exist as well, such as the case of the author's home file server, which is administered by an IT professional and uses RAID level 6 redundant storage, but runs on old, cobbled-together equipment, and has a consumer-grade Internet connection. \end_layout \begin_layout Standard To begin with, let's use a simple definition of reliability: \end_layout \begin_layout Definition \noun on Reliability \noun default is the probability \begin_inset Formula $p_{i}$ \end_inset that a share \begin_inset Formula $s_{i}$ \end_inset will survive to (be retrievable at) time \begin_inset Formula $T=A$ \end_inset , ignoring intermittent failures. That is, the probability that the share will be retrievable at the end of the current repair cycle, and therefore usable by the repairer to regenerate any lost shares. \end_layout \begin_layout Standard Reliability \begin_inset Formula $p_{i}$ \end_inset is clearly dependent on \begin_inset Formula $A$ \end_inset . Short repair cycles offer less time for shares to \begin_inset Quotes eld \end_inset decay \begin_inset Quotes erd \end_inset into unavailability. \end_layout \begin_layout Subsection Peer Reliability \end_layout \begin_layout Standard Since peer reliability is the basis for any computations we may do on share and file reliability, we must have a way to estimate it. Reliability modeling of hardware, software and human performance are each complex topics, the subject of much ongoing research. In particular, the reliability of one of the key components of any peer from our perspective -- the hard drive where file shares are stored -- is the subject of much current debate. \end_layout \begin_layout Standard A common assumption about hardware failure is that it follows the \begin_inset Quotes eld \end_inset bathtub curve \begin_inset Quotes erd \end_inset , with frequent failures during the first few months, a constant failure rate for a few years and then a rising failure rate as the hardware wears out. This curve is often flattened by burn-in stress testing, and by periodic replacement that assures that in-service components never reach \begin_inset Quotes eld \end_inset old age \begin_inset Quotes erd \end_inset . \end_layout \begin_layout Standard In any case, we're generally going to ignore all of that complexity and focus on the bottom of the bathtub, assuming constant failure rates. This is a particularly reasonable assumption as long as we're focused on failures during a particular, relatively short interval \begin_inset Formula $A$ \end_inset . Towards the end of this paper, as we examine failures over many repair intervals, the assumption becomes more tenuous, and we note some of the issues. \end_layout \begin_layout Subsubsection Estimate Adaptation \end_layout \begin_layout Standard Even assuming constant failure rates, however, it will be rare that the duration of \begin_inset Formula $A$ \end_inset coincides with the available failure rate data, particularly since we want to view \begin_inset Formula $A$ \end_inset as a tunable parameter. It's necessary to be able adapt failure rates baselined against any given duration to the selected value of \begin_inset Formula $A$ \end_inset . \end_layout \begin_layout Standard Another issue is that failure rates of hardware, etc., are necessarily continuous in nature, while the per-interval failure/survival rates that are of interest for file reliability calculations are discrete -- a peer either survives or fails during the interval. The continuous nature of failure rates means that the common and obvious methods for estimating failure rates result in values that follow continuous, not discrete distributions. The difference is minor for small failure probabilities, and converges to zero as the number of intervals goes to infinity, but is important enough in some cases to be worth correcting for. \end_layout \begin_layout Standard Continuous failure rates are described in terms of mean time to failure, and under the assumption that failure rates are constant, are exponentially distributed. Under these assumptions, the probability that a machine fails at time \begin_inset Formula $t$ \end_inset , is \begin_inset Formula \[ f\left(t\right)=\lambda e^{-\lambda t}\] \end_inset where \begin_inset Formula $\lambda$ \end_inset represents the per unit-time failure rate. The probability that a machine fails at or before time \begin_inset Formula $A$ \end_inset is therefore \begin_inset Formula \begin{align} F\left(t\right) & =\int_{0}^{A}f\left(x\right)dx\nonumber \\ & =\int_{0}^{A}\lambda e^{-\lambda x}dx\nonumber \\ & =1-e^{-\lambda A}\label{eq:failure-time}\end{align} \end_inset \end_layout \begin_layout Standard Note that \begin_inset Formula $A$ \end_inset and \begin_inset Formula $\lambda$ \end_inset in \begin_inset CommandInset ref LatexCommand ref reference "eq:failure-time" \end_inset must be expressed in consistent time units. If they're different, unit conversions should be applied in the normal way. For example, if the estimate for \begin_inset Formula $\lambda$ \end_inset is 750 failures per million hours, and \begin_inset Formula $A$ \end_inset is one month, then either \begin_inset Formula $A$ \end_inset should be represented as \begin_inset Formula $30\cdot24/1000000=.00072$ \end_inset , or \begin_inset Formula $\lambda$ \end_inset should be converted to failures per month. Or both may be converted to hours. \end_layout \begin_layout Subsubsection Acquiring Peer Reliability Estimates \end_layout \begin_layout Standard Need to write this. \end_layout \begin_layout Subsection Uniform Reliability \begin_inset CommandInset label LatexCommand label name "sub:Fixed-Reliability" \end_inset \end_layout \begin_layout Standard In the simplest case, the peers holding the file shares all have the same reliability \begin_inset Formula $p$ \end_inset , and are all independent from one another. Let \begin_inset Formula $K$ \end_inset be a random variable that represents the number of shares that survive \begin_inset Formula $A$ \end_inset . Each share's survival can be viewed as an independent Bernoulli trial with a success probability of \begin_inset Formula $p$ \end_inset , which means that \begin_inset Formula $K$ \end_inset follows the binomial distribution with parameters \begin_inset Formula $N$ \end_inset and \begin_inset Formula $p$ \end_inset . That is, \begin_inset Formula $K\sim B(N,p)$ \end_inset . \end_layout \begin_layout Theorem Binomial Distribution Theorem \end_layout \begin_layout Theorem Consider \begin_inset Formula $n$ \end_inset independent Bernoulli trials \begin_inset Foot status collapsed \begin_layout Plain Layout A Bernoulli trial is simply a test of some sort that results in one of two outcomes, one of which is designated success and the other failure. The classic example of a Bernoulli trial is a coin toss. \end_layout \end_inset that succeed with probability \begin_inset Formula $p$ \end_inset , and let \begin_inset Formula $K$ \end_inset be a random variable that represents the number, \begin_inset Formula $m$ \end_inset , of successes, \begin_inset Formula $0\le m\le n$ \end_inset . We say that \begin_inset Formula $K$ \end_inset follows the Binomial Distribution with parameters n and p, denoted \begin_inset Formula $K\sim B(n,p)$ \end_inset . The probability mass function (PMF) of K is a function that gives the probabili ty that \begin_inset Formula $K$ \end_inset takes a particular value \begin_inset Formula $m$ \end_inset (the probability that there are exactly \begin_inset Formula $m$ \end_inset successful trials, and therefore \begin_inset Formula $n-m$ \end_inset failures). The PMF of K is \begin_inset Formula \begin{equation} Pr[K=m]=f(m;n,p)=\binom{n}{m}p^{m}(1-p)^{n-m}\label{eq:binomial-pmf}\end{equation} \end_inset \end_layout \begin_layout Proof Consider the specific case of exactly \begin_inset Formula $m$ \end_inset successes followed by \begin_inset Formula $n-m$ \end_inset failures, because each success has probability \begin_inset Formula $p$ \end_inset , each failure has probability \begin_inset Formula $1-p$ \end_inset , and the trials are independent, the probability of this exact case occurring is \begin_inset Formula $p^{m}\left(1-p\right)^{\left(n-m\right)}$ \end_inset , the product of the probabilities of the outcome of each trial. \end_layout \begin_layout Proof Now consider any reordering of these \begin_inset Formula $m$ \end_inset successes and \begin_inset Formula $n$ \end_inset failures. Any such reordering occurs with the same probability \begin_inset Formula $p^{m}\left(1-p\right)^{\left(n-m\right)}$ \end_inset , but with the terms of the product reordered. Since multiplication is commutative, each such reordering has the same probability. There are n-choose-m such orderings, and each ordering is an independent event, meaning we can sum the probabilities of the individual orderings, so the probability that any ordering of \begin_inset Formula $m$ \end_inset successes and \begin_inset Formula $n-m$ \end_inset failures occurs is given by \begin_inset Formula \[ \binom{n}{m}p^{m}\left(1-p\right)^{\left(n-m\right)}\] \end_inset which is the right-hand-side of equation \begin_inset CommandInset ref LatexCommand ref reference "eq:binomial-pmf" \end_inset . \end_layout \begin_layout Standard A file survives if at least \begin_inset Formula $k$ \end_inset of the \begin_inset Formula $N$ \end_inset shares survive. Equation \begin_inset CommandInset ref LatexCommand ref reference "eq:binomial-pmf" \end_inset gives the probability that exactly \begin_inset Formula $i$ \end_inset shares survive, for any \begin_inset Formula $1\leq i\leq n$ \end_inset , so the probability that fewer than \begin_inset Formula $k$ \end_inset survive is the sum of the probabilities that \begin_inset Formula $0,1,2,\ldots,k-1$ \end_inset shares survive. That is: \end_layout \begin_layout Standard \begin_inset Formula \begin{equation} Pr[file\, lost]=\sum_{i=0}^{k-1}\binom{n}{i}p^{i}(1-p)^{n-i}\label{eq:simple-failure}\end{equation} \end_inset \end_layout \begin_layout Subsection Independent Reliability \begin_inset CommandInset label LatexCommand label name "sub:Independent-Reliability" \end_inset \end_layout \begin_layout Standard Equation \begin_inset CommandInset ref LatexCommand ref reference "eq:simple-failure" \end_inset assumes that all shares have the same probability of survival, but as explained above, this is not necessarily true. A more accurate model allows each share \begin_inset Formula $s_{i}$ \end_inset an independent probability of survival \begin_inset Formula $p_{i}$ \end_inset . Each share's survival can still be treated as an independent Bernoulli trial, but with success probability \begin_inset Formula $p_{i}$ \end_inset . Under this assumption, \begin_inset Formula $K$ \end_inset follows a generalized binomial distribution with parameters \begin_inset Formula $N$ \end_inset and \begin_inset Formula $p_{1},p_{2},\dots,p_{N}$ \end_inset . \end_layout \begin_layout Standard The PMF for this generalized \begin_inset Formula $K$ \end_inset does not have a simple closed-form representation. However, the PMFs for random variables representing individual share survival do. Let \begin_inset Formula $K_{i}$ \end_inset be a random variable such that: \end_layout \begin_layout Standard \begin_inset Formula \[ K_{i}=\begin{cases} 1 & \textnormal{if }s_{i}\textnormal{ survives}\\ 0 & \textnormal{if }s_{i}\textnormal{ fails}\end{cases}\] \end_inset \end_layout \begin_layout Standard The PMF for \begin_inset Formula $K_{i}$ \end_inset is very simple: \begin_inset Formula \[ Pr[K_{i}=j]=\begin{cases} p_{i} & j=1\\ 1-p_{i} & j=0\end{cases}\] \end_inset which can also be expressed as \begin_inset Formula \[ Pr[K_{i}=j]=f\left(j\right)=\left(1-p_{i}\right)\left(1-j\right)+p_{i}\left(j\right)\] \end_inset \end_layout \begin_layout Standard Note that since each \begin_inset Formula $K_{i}$ \end_inset represents the count of shares \begin_inset Formula $s_{i}$ \end_inset that survives (either 0 or 1), if we add up all of the individual survivor counts, we get the group survivor count. That is: \begin_inset Formula \[ \sum_{i=1}^{N}K_{i}=K\] \end_inset Effectively, we have separated \begin_inset Formula $K$ \end_inset into the series of Bernoulli trials that make it up. \end_layout \begin_layout Theorem Discrete Convolution Theorem \end_layout \begin_layout Theorem Let \begin_inset Formula $X$ \end_inset and \begin_inset Formula $Y$ \end_inset be discrete random variables with probability mass functions given by \begin_inset Formula $Pr\left[X=x\right]=f(x)$ \end_inset and \begin_inset Formula $Pr\left[Y=y\right]=g(y).$ \end_inset Let \begin_inset Formula $Z$ \end_inset be the discrete random random variable obtained by summing \begin_inset Formula $X$ \end_inset and \begin_inset Formula $Y$ \end_inset . \end_layout \begin_layout Theorem The probability mass function of \begin_inset Formula $Z$ \end_inset is given by \begin_inset Formula \[ Pr[Z=z]=h(z)=\left(f\star g\right)(z)\] \end_inset where \begin_inset Formula $\star$ \end_inset denotes the discrete convolution operation: \begin_inset Formula \[ \left(f\star g\right)\left(n\right)=\sum_{m=-\infty}^{\infty}f\left(m\right)g\left(m-n\right)\] \end_inset \end_layout \begin_layout Proof The proof is beyond the scope of this paper. \end_layout \begin_layout Standard If we denote the PMF of \begin_inset Formula $K$ \end_inset with \begin_inset Formula $f$ \end_inset and the PMF of \begin_inset Formula $K_{i}$ \end_inset with \begin_inset Formula $g_{i}$ \end_inset (more formally, \begin_inset Formula $Pr[K=x]=f(x)$ \end_inset and \begin_inset Formula $Pr[K_{i}=x]=g_{i}(x)$ \end_inset ) then since \begin_inset Formula $K=\sum_{i=1}^{N}K_{i}$ \end_inset , according to the discrete convolution theorem \begin_inset Formula $f=g_{1}\star g_{2}\star g_{3}\star\ldots\star g_{N}$ \end_inset . Since convolution is associative, this can also be written as \begin_inset Formula $ $ \end_inset \begin_inset Formula \begin{equation} f=(\ldots((g_{1}\star g_{2})\star g_{3})\star\ldots)\star g_{N})\label{eq:convolution}\end{equation} \end_inset Therefore, \begin_inset Formula $f$ \end_inset can be computed as a sequence of convolution operations on the simple PMFs of the random variables \begin_inset Formula $K_{i}$ \end_inset . In fact, for large \begin_inset Formula $N$ \end_inset , equation \begin_inset CommandInset ref LatexCommand ref reference "eq:convolution" \end_inset turns out to be a more effective means of computing the PMF of \begin_inset Formula $K$ \end_inset than the binomial theorem. even in the case of shares with identical survival probability. The reason it's better is because the calculation of \begin_inset Formula $\binom{n}{m}$ \end_inset in equation \begin_inset CommandInset ref LatexCommand ref reference "eq:binomial-pmf" \end_inset produces very large values that overflow unless arbitrary precision numeric representations are used. \end_layout \begin_layout Standard Note also that it is not necessary to have very simple PMFs like those of the \begin_inset Formula $K_{i}$ \end_inset . Any share or set of shares that has a known PMF can be combined with any other set with a known PMF by convolution, as long as the two share sets are independent. The reverse holds as well; given a group with an empirically-derived PMF, in it's theoretically possible to solve for an individual PMF, and thereby determine \begin_inset Formula $p_{i}$ \end_inset even when per-share data is unavailable. \end_layout \begin_layout Subsection Multiple Failure Modes \begin_inset CommandInset label LatexCommand label name "sub:Multiple-Failure-Modes" \end_inset \end_layout \begin_layout Standard In modeling share survival probabilities, it's useful to be able to analyze separately each of the various failure modes. For example, if reliable statistics for disk failure can be obtained, then a probability mass function for that form of failure can be generated. Similarly, statistics on other hardware failures, administrative errors, network losses, etc., can all be estimated independently. If those estimates can then be combined into a single PMF for a share, then we can use it to predict failures for that share. \end_layout \begin_layout Standard Combining independent failure modes for a single share is straightforward. If \begin_inset Formula $p_{i,j}$ \end_inset is the probability of survival of the \begin_inset Formula $j$ \end_inset th failure mode of share \begin_inset Formula $i$ \end_inset , \begin_inset Formula $1\leq j\leq m$ \end_inset , then \begin_inset Formula \[ Pr[K_{i}=k]=f_{i}(k)=\begin{cases} \prod_{j=1}^{m}p_{i,j} & k=1\\ 1-\prod_{j=1}^{m}p_{i,j} & k=0\end{cases}\] \end_inset is the survival PMF. \end_layout \begin_layout Subsection Multi-share failures \begin_inset CommandInset label LatexCommand label name "sub:Multi-share-failures" \end_inset \end_layout \begin_layout Standard If there are failure modes that affect multiple computers, we can also construct the PMF that predicts their survival. The key observation is that the PMF has non-zero probabilities only for \begin_inset Formula $0$ \end_inset survivors and \begin_inset Formula $n$ \end_inset survivors, where \begin_inset Formula $n$ \end_inset is the number of shares in the set. If \begin_inset Formula $p$ \end_inset is the probability of survival, the PMF of \begin_inset Formula $K$ \end_inset , a random variable representing the number of survivors is \begin_inset Formula \[ Pr[K=k]=f(k)=\begin{cases} p & k=n\\ 0 & 0 \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $k$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $Pr[K=k]$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $Pr[file\, loss]=Pr[K \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $N/k$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $1.60\times10^{-9}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $2.53\times10^{-11}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 12 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 2 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $3.80\times10^{-8}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $1.63\times10^{-9}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 6 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 3 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $4.04\times10^{-7}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $3.70\times10^{-8}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 4 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 4 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $2.06\times10^{-6}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $4.44\times10^{-7}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 3 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 5 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $2.10\times10^{-5}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $2.50\times10^{-6}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 2.4 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 6 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.000428$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $2.35\times10^{-5}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 2 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 7 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.00417$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.000452$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1.7 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 8 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.0157$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.00462$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1.5 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 9 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.00127$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.0203$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1.3 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 10 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.0230$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.0216$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1.2 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 11 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.208$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.0446$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1.1 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 12 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.747$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $0.253$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1 \end_layout \end_inset \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption \begin_layout Plain Layout \align left \begin_inset CommandInset label LatexCommand label name "tab:Example-PMF" \end_inset Example PMF \end_layout \end_inset \end_layout \begin_layout Plain Layout \end_layout \end_inset \end_layout \begin_layout Standard The table demonstrates the importance of the selection of \begin_inset Formula $k$ \end_inset , and the tradeoff against file size expansion. Note that the survival of exactly 9 servers is significantly less likely than the survival of 8 or 10 servers. This is, again, an artifact of the group failure modes. Because of this, there is no reason to choose \begin_inset Formula $k=9$ \end_inset over \begin_inset Formula $k=10$ \end_inset . Normally, reducing the number of shares needed for reassembly improve the file's chances of survival, but in this case it provides a minuscule gain in reliability at the cost of a 10% increase in bandwidth and storage consumed. \end_layout \begin_layout Subsection Share Duplication \end_layout \begin_layout Standard Before moving on to consider issues other than single-interval file loss, let's analyze one more possibility, that of \begin_inset Quotes eld \end_inset cheap \begin_inset Quotes erd \end_inset file repair via share duplication. \end_layout \begin_layout Standard Initially, files are split using erasure coding, which creates \begin_inset Formula $N$ \end_inset unique shares, any \begin_inset Formula $k$ \end_inset of which can be used to to reconstruct the file. When shares are lost, proper repair downloads some \begin_inset Formula $k$ \end_inset shares, reconstructs the original file and then uses the erasure coding algorithm to reconstruct the lost shares, then redeploys them to peers in the network. This is a somewhat expensive process. \end_layout \begin_layout Standard A cheaper repair option is simply to direct some peer that has share \begin_inset Formula $s_{i}$ \end_inset to send a copy to another peer, thus increasing by one the number of shares in the network. This is not as good as actually replacing the lost share, though. Suppose that more shares were lost, leaving only \begin_inset Formula $k$ \end_inset shares remaining. If two of those shares are identical, because one was duplicated in this fashion, then only \begin_inset Formula $k-1$ \end_inset shares truly remain, and the file can no longer be reconstructed. \end_layout \begin_layout Standard However, such cheap repair is not completely pointless; it does increase file survivability. But by how much? \end_layout \begin_layout Standard Effectively, share duplication simply increases the probability that \begin_inset Formula $s_{i}$ \end_inset will survive, by providing two locations from which to retrieve it. We can view the two copies of the single share as one, but with a higher probability of survival than would be provided by either of the two peers. In particular, if \begin_inset Formula $p_{1}$ \end_inset and \begin_inset Formula $p_{2}$ \end_inset are the probabilities that the two peers will survive, respectively, then \begin_inset Formula \[ Pr[s_{i}\, survives]=p_{1}+p_{2}-p_{1}p_{2}\] \end_inset \end_layout \begin_layout Standard More generally, if a single share is deployed on \begin_inset Formula $n$ \end_inset peers, each with a PMF \begin_inset Formula $f_{i}(j),0\leq j\leq1,1\leq i\leq n$ \end_inset , the share survival count is a random variable \begin_inset Formula $K$ \end_inset and the probability of share loss is \begin_inset Formula \[ Pr[K=0]=(f_{1}\star f_{2}\star\ldots\star f_{n})(0)\] \end_inset \end_layout \begin_layout Standard From that, we can construct a share PMF in the obvious way, which can then be convolved with the other share PMFs to produce the share set PMF. \end_layout \begin_layout Example Suppose a file has \begin_inset Formula $N=10,k=3$ \end_inset and that all servers have survival probability \begin_inset Formula $p=.9$ \end_inset . Given a full complement of shares, \begin_inset Formula $Pr[\textrm{file\, loss}]=3.74\times10^{-7}$ \end_inset . Suppose that four shares are lost, which increases \begin_inset Formula $Pr[\textrm{file\, loss}]$ \end_inset to \begin_inset Formula $.00127$ \end_inset , a value \begin_inset Formula $3400$ \end_inset times greater. Rather than doing a proper reconstruction, we could direct four peers still holding shares to send a copy of their share to new peer, which changes the composition of the shares from one of six, unique \begin_inset Quotes eld \end_inset standard \begin_inset Quotes erd \end_inset shares, to one of two standard shares, each with survival probability \begin_inset Formula $.9$ \end_inset and four \begin_inset Quotes eld \end_inset doubled \begin_inset Quotes erd \end_inset shares, each with survival probability \begin_inset Formula $2p-p^{2}\approxeq.99$ \end_inset . \end_layout \begin_layout Example Combining the two single-peer share PMFs with the four double-share PMFs gives a new file survival probability of \begin_inset Formula $6.64\times10^{-6}$ \end_inset . Not as good as a full repair, but still quite respectable. Also, if storage were not a concern, all six shares could be duplicated, for a \begin_inset Formula $Pr[file\, loss]=1.48\times10^{-7}$ \end_inset , which is actually three time better than the nominal case. \end_layout \begin_layout Example The reason such cheap repairs may be attractive in many cases is that distribute d bandwidth is cheaper than bandwidth through a single peer. This is particularly true if that single peer has a very slow connection, which is common for home computers -- especially in the outbound direction. \end_layout \begin_layout Section Long-Term Reliability \end_layout \begin_layout Standard Thus far, we've focused entirely on the probability that a file survives the interval \begin_inset Formula $A$ \end_inset between repair times. The probability that a file survives long-term, though, is also important. As long as the probability of failure during a repair period is non-zero, a given file will eventually be lost. We want to know the probability of surviving for time \begin_inset Formula $T$ \end_inset , and how the parameters \begin_inset Formula $A$ \end_inset (time between repairs) and \begin_inset Formula $L$ \end_inset (allowed share low watermark) affect survival time. \end_layout \begin_layout Standard To model file survival time, let \begin_inset Formula $T$ \end_inset be a random variable denoting the time at which a given file becomes unrecovera ble, and \begin_inset Formula $R(t)=Pr[T>t]$ \end_inset be a function that gives the probability that the file survives to time \begin_inset Formula $t$ \end_inset . \begin_inset Formula $R(t)$ \end_inset is the cumulative distribution function of \begin_inset Formula $T$ \end_inset . \end_layout \begin_layout Standard Most survival functions are continuous, but \begin_inset Formula $R(t)$ \end_inset is inherently discrete and stochastic. The time steps are the repair intervals, each of length \begin_inset Formula $A$ \end_inset , so \begin_inset Formula $T$ \end_inset -values are multiples of \begin_inset Formula $A$ \end_inset . During each interval, the file's shares degrade according to the probability mass function of \begin_inset Formula $K$ \end_inset . \end_layout \begin_layout Subsection Aggressive Repair \end_layout \begin_layout Standard Let's first consider the case of an aggressive repairer. Every interval, this repairer checks the file for share losses and restores them. Thus, at the beginning of each interval, the file always has \begin_inset Formula $N$ \end_inset shares, distributed on servers with various individual and group failure probabilities, which will survive or fail per the output of random variable \begin_inset Formula $K$ \end_inset . \end_layout \begin_layout Standard For any interval, then, the probability that the file will survive is \begin_inset Formula $f\left(k\right)=Pr[K\geq k]$ \end_inset . Since each interval success or failure is independent, and assuming the share reliabilities remain constant over time, \begin_inset Formula \begin{equation} R\left(t\right)=f(k)^{t}\end{equation} \end_inset \end_layout \begin_layout Standard This simple survival function makes it simple to select parameters \begin_inset Formula $N$ \end_inset and \begin_inset Formula $K$ \end_inset such that \begin_inset Formula $R(t)\geq r$ \end_inset , where \begin_inset Formula $r$ \end_inset is a user-specified parameter indicating the desired probability of survival to time \begin_inset Formula $t$ \end_inset . Specifically, we can solve for \begin_inset Formula $f\left(k\right)$ \end_inset in \begin_inset Formula $r\leq f\left(k\right)^{t}$ \end_inset , giving: \begin_inset Formula \begin{equation} f\left(k\right)\geq\sqrt[t]{r}\end{equation} \end_inset \end_layout \begin_layout Standard So, given a PMF \begin_inset Formula $f\left(k\right)$ \end_inset , to assure the survival of a file to time \begin_inset Formula $t$ \end_inset with probability at least \begin_inset Formula $r$ \end_inset , choose \begin_inset Formula $k$ \end_inset such that \begin_inset Formula $f\left(k\right)\geq\sqrt[t]{r}$ \end_inset . For example, if \begin_inset Formula $A$ \end_inset is one month, and \begin_inset Formula $r=1-\nicefrac{1}{10^{6}}$ \end_inset and \begin_inset Formula $t=120$ \end_inset , or 10 years, we calculate \begin_inset Formula $f\left(k\right)\geq\sqrt[120]{.999999}\approx0.999999992$ \end_inset . Per the PMF of table \begin_inset CommandInset ref LatexCommand ref reference "tab:Example-PMF" \end_inset , this means \begin_inset Formula $k=2$ \end_inset , achieves the goal, at the cost of a six-fold expansion in stored file size. If the lesser goal of no more than \begin_inset Formula $\nicefrac{1}{1000}$ \end_inset probability of loss is taken, then since \begin_inset Formula $\sqrt[120]{.9999}=.999992$ \end_inset , \begin_inset Formula $k=5$ \end_inset achieves the goal with an expansion factor of \begin_inset Formula $2.4$ \end_inset . \end_layout \begin_layout Subsection Repair Cost \end_layout \begin_layout Standard The simplicity and predictability of aggressive repair is attractive, but there is a downside: Repairs cost processing power and bandwidth. The processing power is proportional to the size of the file, since the whole file must be reconstructed and then re-processed using the Reed-Solomon algorithm, while the bandwidth cost is proportional to the number of missing shares that must be replaced, \begin_inset Formula $N-K$ \end_inset . \end_layout \begin_layout Standard Let \begin_inset Formula $c\left(s,d,k\right)$ \end_inset be a cost function that combines the processing cost of regenerating a file of size \begin_inset Formula $s$ \end_inset and the bandwidth cost of downloading a file of size \begin_inset Formula $s$ \end_inset and uploading \begin_inset Formula $d$ \end_inset shares each of size \begin_inset Formula $\nicefrac{s}{k}$ \end_inset . Also, let \begin_inset Formula $D$ \end_inset denote the random variable \begin_inset Formula $N-K$ \end_inset , which is the number of shares that must be redistributed to bring the file share set back up to \begin_inset Formula $N$ \end_inset after degrading during an interval. The probability mass function of \begin_inset Formula $D$ \end_inset is \begin_inset Formula \[ Pr[D=d]=f(d)=\begin{cases} Pr\left[K=N\right]+Pr[K image/svg+xml DSA private key (256 bit string) DSA public key (2048+ bit string) salt read-cap + 192 64 (math) AES H H encryptedsalt data crypttext AES readkey shares otherstuff DSA signature private key 256 pubkey hash 256 FEC Hmerkletrees H write-cap storage index 64 SI:A 64 SI:B verify cap H H H H H H pubkey hash 256 64 SI:A : stored in share H deep-verify cap 192 64 H H AES writekey AES deepverifykey H tahoe-lafs-1.10.0/docs/proposed/mutable-DSA.txt000066400000000000000000000503121221140116300211660ustar00rootroot00000000000000 (protocol proposal, work-in-progress, not authoritative) (this document describes DSA-based mutable files, as opposed to the RSA-based mutable files that were introduced in tahoe-0.7.0 . This proposal has not yet been implemented. Please see mutable-DSA.svg for a quick picture of the crypto scheme described herein) This file shows only the differences from RSA-based mutable files to (EC)DSA-based mutable files. You have to read and understand docs/specifications/mutable.rst before reading this file (mutable-DSA.txt). == new design criteria == * provide for variable number of semiprivate sections? * put e.g. filenames in one section, readcaps in another, writecaps in a third (ideally, to modify a filename you'd only have to modify one section, and we'd make encrypting/hashing more efficient by doing it on larger blocks of data, preferably one segment at a time instead of one writecap at a time) * cleanly distinguish between "container" (leases, write-enabler) and "slot contents" (everything that comes from share encoding) * sign all slot bits (to allow server-side verification) * someone reading the whole file should be able to read the share in a single linear pass with just a single seek to zero * writing the file should occur in two passes (3 seeks) in mostly linear order 1: write version/pubkey/topbits/salt 2: write zeros / seek+prefill where the hashchain/tree goes 3: write blocks 4: seek back 5: write hashchain/tree * storage format: consider putting container bits in a separate file - $SI.index (contains list of shnums, leases, other-cabal-members, WE, etc) - $SI-$shnum.share (actual share data) * possible layout: - version - pubkey - topbits (k, N, size, segsize, etc) - salt? (salt tree root?) - share hash root - share hash chain - block hash tree - (salts?) (salt tree?) - blocks - signature (of [version .. share hash root]) === SDMF slots overview === Each SDMF slot is created with a DSA public/private key pair, using a system-wide common modulus and generator, in which the private key is a random 256 bit number, and the public key is a larger value (about 2048 bits) that can be derived with a bit of math from the private key. The public key is known as the "verification key", while the private key is called the "signature key". The 256-bit signature key is used verbatim as the "write capability". This can be converted into the 2048ish-bit verification key through a fairly cheap set of modular exponentiation operations; this is done any time the holder of the write-cap wants to read the data. (Note that the signature key can either be a newly-generated random value, or the hash of something else, if we found a need for a capability that's stronger than the write-cap). This results in a write-cap which is 256 bits long and can thus be expressed in an ASCII/transport-safe encoded form (base62 encoding, fits in 72 characters, including a local-node http: convenience prefix). The private key is hashed to form a 256-bit "salt". The public key is also hashed to form a 256-bit "pubkey hash". These two values are concatenated, hashed, and truncated to 192 bits to form the first 192 bits of the read-cap. The pubkey hash is hashed by itself and truncated to 64 bits to form the last 64 bits of the read-cap. The full read-cap is 256 bits long, just like the write-cap. The first 192 bits of the read-cap are hashed and truncated to form the first 192 bits of the "traversal cap". The last 64 bits of the read-cap are hashed to form the last 64 bits of the traversal cap. This gives us a 256-bit traversal cap. The first 192 bits of the traversal-cap are hashed and truncated to form the first 64 bits of the storage index. The last 64 bits of the traversal-cap are hashed to form the last 64 bits of the storage index. This gives us a 128-bit storage index. The verification-cap is the first 64 bits of the storage index plus the pubkey hash, 320 bits total. The verification-cap doesn't need to be expressed in a printable transport-safe form, so it's ok that it's longer. The read-cap is hashed one way to form an AES encryption key that is used to encrypt the salt; this key is called the "salt key". The encrypted salt is stored in the share. The private key never changes, therefore the salt never changes, and the salt key is only used for a single purpose, so there is no need for an IV. The read-cap is hashed a different way to form the master data encryption key. A random "data salt" is generated each time the share's contents are replaced, and the master data encryption key is concatenated with the data salt, then hashed, to form the AES CTR-mode "read key" that will be used to encrypt the actual file data. This is to avoid key-reuse. An outstanding issue is how to avoid key reuse when files are modified in place instead of being replaced completely; this is not done in SDMF but might occur in MDMF. The master data encryption key is used to encrypt data that should be visible to holders of a write-cap or a read-cap, but not to holders of a traversal-cap. The private key is hashed one way to form the salt, and a different way to form the "write enabler master". For each storage server on which a share is kept, the write enabler master is concatenated with the server's nodeid and hashed, and the result is called the "write enabler" for that particular server. Note that multiple shares of the same slot stored on the same server will all get the same write enabler, i.e. the write enabler is associated with the "bucket", rather than the individual shares. The private key is hashed a third way to form the "data write key", which can be used by applications which wish to store some data in a form that is only available to those with a write-cap, and not to those with merely a read-cap. This is used to implement transitive read-onlyness of dirnodes. The traversal cap is hashed to work the "traversal key", which can be used by applications that wish to store data in a form that is available to holders of a write-cap, read-cap, or traversal-cap. The idea is that dirnodes will store child write-caps under the writekey, child names and read-caps under the read-key, and verify-caps (for files) or deep-verify-caps (for directories) under the traversal key. This would give the holder of a root deep-verify-cap the ability to create a verify manifest for everything reachable from the root, but not the ability to see any plaintext or filenames. This would make it easier to delegate filechecking and repair to a not-fully-trusted agent. The public key is stored on the servers, as is the encrypted salt, the (non-encrypted) data salt, the encrypted data, and a signature. The container records the write-enabler, but of course this is not visible to readers. To make sure that every byte of the share can be verified by a holder of the verify-cap (and also by the storage server itself), the signature covers the version number, the sequence number, the root hash "R" of the share merkle tree, the encoding parameters, and the encrypted salt. "R" itself covers the hash trees and the share data. The read-write URI is just the private key. The read-only URI is the read-cap key. The deep-verify URI is the traversal-cap. The verify-only URI contains the the pubkey hash and the first 64 bits of the storage index. FMW:b2a(privatekey) FMR:b2a(readcap) FMT:b2a(traversalcap) FMV:b2a(storageindex[:64])b2a(pubkey-hash) Note that this allows the read-only, deep-verify, and verify-only URIs to be derived from the read-write URI without actually retrieving any data from the share, but instead by regenerating the public key from the private one. Users of the read-only, deep-verify, or verify-only caps must validate the public key against their pubkey hash (or its derivative) the first time they retrieve the pubkey, before trusting any signatures they see. The SDMF slot is allocated by sending a request to the storage server with a desired size, the storage index, and the write enabler for that server's nodeid. If granted, the write enabler is stashed inside the slot's backing store file. All further write requests must be accompanied by the write enabler or they will not be honored. The storage server does not share the write enabler with anyone else. The SDMF slot structure will be described in more detail below. The important pieces are: * a sequence number * a root hash "R" * the data salt * the encoding parameters (including k, N, file size, segment size) * a signed copy of [seqnum,R,data_salt,encoding_params] (using signature key) * the verification key (not encrypted) * the share hash chain (part of a Merkle tree over the share hashes) * the block hash tree (Merkle tree over blocks of share data) * the share data itself (erasure-coding of read-key-encrypted file data) * the salt, encrypted with the salt key The access pattern for read (assuming we hold the write-cap) is: * generate public key from the private one * hash private key to get the salt, hash public key, form read-cap * form storage-index * use storage-index to locate 'k' shares with identical 'R' values * either get one share, read 'k' from it, then read k-1 shares * or read, say, 5 shares, discover k, either get more or be finished * or copy k into the URIs * .. jump to "COMMON READ", below To read (assuming we only hold the read-cap), do: * hash read-cap pieces to generate storage index and salt key * use storage-index to locate 'k' shares with identical 'R' values * retrieve verification key and encrypted salt * decrypt salt * hash decrypted salt and pubkey to generate another copy of the read-cap, make sure they match (this validates the pubkey) * .. jump to "COMMON READ" * COMMON READ: * read seqnum, R, data salt, encoding parameters, signature * verify signature against verification key * hash data salt and read-cap to generate read-key * read share data, compute block-hash Merkle tree and root "r" * read share hash chain (leading from "r" to "R") * validate share hash chain up to the root "R" * submit share data to erasure decoding * decrypt decoded data with read-key * submit plaintext to application The access pattern for write is: * generate pubkey, salt, read-cap, storage-index as in read case * generate data salt for this update, generate read-key * encrypt plaintext from application with read-key * application can encrypt some data with the data-write-key to make it only available to writers (used for transitively-readonly dirnodes) * erasure-code crypttext to form shares * split shares into blocks * compute Merkle tree of blocks, giving root "r" for each share * compute Merkle tree of shares, find root "R" for the file as a whole * create share data structures, one per server: * use seqnum which is one higher than the old version * share hash chain has log(N) hashes, different for each server * signed data is the same for each server * include pubkey, encrypted salt, data salt * now we have N shares and need homes for them * walk through peers * if share is not already present, allocate-and-set * otherwise, try to modify existing share: * send testv_and_writev operation to each one * testv says to accept share if their(seqnum+R) <= our(seqnum+R) * count how many servers wind up with which versions (histogram over R) * keep going until N servers have the same version, or we run out of servers * if any servers wound up with a different version, report error to application * if we ran out of servers, initiate recovery process (described below) ==== Cryptographic Properties ==== This scheme protects the data's confidentiality with 192 bits of key material, since the read-cap contains 192 secret bits (derived from an encrypted salt, which is encrypted using those same 192 bits plus some additional public material). The integrity of the data (assuming that the signature is valid) is protected by the 256-bit hash which gets included in the signature. The privilege of modifying the data (equivalent to the ability to form a valid signature) is protected by a 256 bit random DSA private key, and the difficulty of computing a discrete logarithm in a 2048-bit field. There are a few weaker denial-of-service attacks possible. If N-k+1 of the shares are damaged or unavailable, the client will be unable to recover the file. Any coalition of more than N-k shareholders will be able to effect this attack by merely refusing to provide the desired share. The "write enabler" shared secret protects existing shares from being displaced by new ones, except by the holder of the write-cap. One server cannot affect the other shares of the same file, once those other shares are in place. The worst DoS attack is the "roadblock attack", which must be made before those shares get placed. Storage indexes are effectively random (being derived from the hash of a random value), so they are not guessable before the writer begins their upload, but there is a window of vulnerability during the beginning of the upload, when some servers have heard about the storage index but not all of them. The roadblock attack we want to prevent is when the first server that the uploader contacts quickly runs to all the other selected servers and places a bogus share under the same storage index, before the uploader can contact them. These shares will normally be accepted, since storage servers create new shares on demand. The bogus shares would have randomly-generated write-enablers, which will of course be different than the real uploader's write-enabler, since the malicious server does not know the write-cap. If this attack were successful, the uploader would be unable to place any of their shares, because the slots have already been filled by the bogus shares. The uploader would probably try for peers further and further away from the desired location, but eventually they will hit a preconfigured distance limit and give up. In addition, the further the writer searches, the less likely it is that a reader will search as far. So a successful attack will either cause the file to be uploaded but not be reachable, or it will cause the upload to fail. If the uploader tries again (creating a new privkey), they may get lucky and the malicious servers will appear later in the query list, giving sufficient honest servers a chance to see their share before the malicious one manages to place bogus ones. The first line of defense against this attack is the timing challenges: the attacking server must be ready to act the moment a storage request arrives (which will only occur for a certain percentage of all new-file uploads), and only has a few seconds to act before the other servers will have allocated the shares (and recorded the write-enabler, terminating the window of vulnerability). The second line of defense is post-verification, and is possible because the storage index is partially derived from the public key hash. A storage server can, at any time, verify every public bit of the container as being signed by the verification key (this operation is recommended as a continual background process, when disk usage is minimal, to detect disk errors). The server can also hash the verification key to derive 64 bits of the storage index. If it detects that these 64 bits do not match (but the rest of the share validates correctly), then the implication is that this share was stored to the wrong storage index, either due to a bug or a roadblock attack. If an uploader finds that they are unable to place their shares because of "bad write enabler errors" (as reported by the prospective storage servers), it can "cry foul", and ask the storage server to perform this verification on the share in question. If the pubkey and storage index do not match, the storage server can delete the bogus share, thus allowing the real uploader to place their share. Of course the origin of the offending bogus share should be logged and reported to a central authority, so corrective measures can be taken. It may be necessary to have this "cry foul" protocol include the new write-enabler, to close the window during which the malicious server can re-submit the bogus share during the adjudication process. If the problem persists, the servers can be placed into pre-verification mode, in which this verification is performed on all potential shares before being committed to disk. This mode is more CPU-intensive (since normally the storage server ignores the contents of the container altogether), but would solve the problem completely. The mere existence of these potential defenses should be sufficient to deter any actual attacks. Note that the storage index only has 64 bits of pubkey-derived data in it, which is below the usual crypto guidelines for security factors. In this case it's a pre-image attack which would be needed, rather than a collision, and the actual attack would be to find a keypair for which the public key can be hashed three times to produce the desired portion of the storage index. We believe that 64 bits of material is sufficiently resistant to this form of pre-image attack to serve as a suitable deterrent. === SMDF Slot Format === This SMDF data lives inside a server-side MutableSlot container. The server is generally oblivious to this format, but it may look inside the container when verification is desired. This data is tightly packed. There are no gaps left between the different fields, and the offset table is mainly present to allow future flexibility of key sizes. # offset size name 1 0 1 version byte, \x01 for this format 2 1 8 sequence number. 2^64-1 must be handled specially, TBD 3 9 32 "R" (root of share hash Merkle tree) 4 41 32 data salt (readkey is H(readcap+data_salt)) 5 73 32 encrypted salt (AESenc(key=H(readcap), salt) 6 105 18 encoding parameters: 105 1 k 106 1 N 107 8 segment size 115 8 data length (of original plaintext) 7 123 36 offset table: 127 4 (9) signature 131 4 (10) share hash chain 135 4 (11) block hash tree 139 4 (12) share data 143 8 (13) EOF 8 151 256 verification key (2048bit DSA key) 9 407 40 signature=DSAsig(H([1,2,3,4,5,6])) 10 447 (a) share hash chain, encoded as: "".join([pack(">H32s", shnum, hash) for (shnum,hash) in needed_hashes]) 11 ?? (b) block hash tree, encoded as: "".join([pack(">32s",hash) for hash in block_hash_tree]) 12 ?? LEN share data 13 ?? -- EOF (a) The share hash chain contains ceil(log(N)) hashes, each 32 bytes long. This is the set of hashes necessary to validate this share's leaf in the share Merkle tree. For N=10, this is 4 hashes, i.e. 128 bytes. (b) The block hash tree contains ceil(length/segsize) hashes, each 32 bytes long. This is the set of hashes necessary to validate any given block of share data up to the per-share root "r". Each "r" is a leaf of the share has tree (with root "R"), from which a minimal subset of hashes is put in the share hash chain in (8). == TODO == Every node in a given tahoe grid must have the same common DSA moduli and exponent, but different grids could use different parameters. We haven't figured out how to define a "grid id" yet, but I think the DSA parameters should be part of that identifier. In practical terms, this might mean that the Introducer tells each node what parameters to use, or perhaps the node could have a config file which specifies them instead. The shares MUST have a ciphertext hash of some sort (probably a merkle tree over the blocks, and/or a flat hash of the ciphertext), just like immutable files do. Without this, a malicious publisher could produce some shares that result in file A, and other shares that result in file B, and upload both of them (incorporating both into the share hash tree). The result would be a read-cap that would sometimes resolve to file A, and sometimes to file B, depending upon which servers were used for the download. By including a ciphertext hash in the SDMF data structure, the publisher must commit to just a single ciphertext, closing this hole. See ticket #492 for more details. tahoe-lafs-1.10.0/docs/proposed/mutsemi.svg000066400000000000000000002226401221140116300206000ustar00rootroot00000000000000 image/svg+xml signing (private) key semi-private key verifying (public) key read-write cap read-only cap verify cap shares Merkle Tree SHA256d SHA256d SHA256d FEC salt encryption key plaintext ciphertext SHA256dtruncated AES-CTR share 1 share 2 share 3 share 4 tahoe-lafs-1.10.0/docs/proposed/old-accounts-introducer.txt000066400000000000000000000157651221140116300237140ustar00rootroot00000000000000This is a proposal for handing accounts and quotas in Tahoe. Nothing is final yet.. we are still evaluating the options. = Account Management: Introducer-based = A Tahoe grid can be configured in several different modes. The simplest mode (which is also the default) is completely permissive: all storage servers will accept shares from all clients, and no attempt is made to keep track of who is storing what. Access to the grid is mostly equivalent to having access to the Introducer (or convincing one of the existing members to give you a list of all their storage server FURLs). This mode, while a good starting point, does not accomodate any sort of auditing or quota management. Even in a small friendnet, operators might like to know how much of their storage space is being consumed by Alice, so they might be able to ask her to cut back when overall disk usage is getting to high. In a larger commercial deployment, a service provider needs to be able to get accurate usage numbers so they can bill the user appropriately. In addition, the operator may want the ability to delete all of Bob's shares (i.e. cancel any outstanding leases) when he terminates his account. There are several lease-management/garbage-collection/deletion strategies possible for a Tahoe grid, but the most efficient ones require knowledge of lease ownership, so that renewals and expiration can take place on a per-account basis rather than a (more numerous) per-share basis. == Accounts == To accomplish this, "Accounts" can be established in a Tahoe grid. There is nominally one account per human user of the grid, but of course a user might use multiple accounts, or an account might be shared between multiple users. The Account is the smallest unit of quota and lease management. Accounts are created by an "Account Manager". In a commercial network there will be just one (centralized) account manager, and all storage nodes will be configured to require a valid account before providing storage services. In a friendnet, each peer can run their own account manager, and servers will accept accounts from any of the managers (this mode is permissive but allows quota-tracking of non-malicious users). The account manager is free to manage the accounts as it pleases. Large systems will probably use a database to correlate things like username, storage consumed, billing status, etc. == Overview == The Account Manager ("AM") replaces the normal Introducer node: grids which use an Account Manager will not run an Introducer, and the participating nodes will not be configured with an "introducer.furl". Instead, each client will be configured with a different "account.furl", which gives that client access to a specific account. These account FURLs point to an object inside the Account Manager which exists solely for the benefit of that one account. When the client needs access to storage servers, it will use this account object to acquire personalized introductions to a per-account "Personal Storage Server" facet, one per storage server node. For example, Alice would wind up with PSS[1A] on server 1, and PSS[2A] on server 2. Bob would get PSS[1B] and PSS[2B]. These PSS facets provide the same remote methods as the old generic SS facet, except that every time they create a lease object, the account information of the holder is recorded in that lease. The client stores a list of these PSS facet FURLs in persistent storage, and uses them in the "get_permuted_peers" function that all uploads and downloads use to figure out who to talk to when looking for shares or shareholders. Each Storage Server has a private facet that it gives to the Account Manager. This facet allows the AM to create PSS facets for a specific account. In particular, the AM tells the SS "please create account number 42, and tell me the PSS FURL that I should give to the client". The SS creates an object which remembers the account number, creates a FURL for it, and returns the FURL. If there is a single central account manager, then account numbers can be small integers. (if there are multiple ones, they need to be large random strings to ensure uniqueness). To avoid requiring large (accounts*servers) lookup tables, a given account should use the same identifer for all the servers it talks to. When this can be done, the PSS and Account FURLs are generated as MAC'ed copies of the account number. More specifically, the PSS FURL is a MAC'ed copy of the account number: each SS has a private secret "S", and it creates a string "%d-%s" % (accountnum, b2a(hash(S+accountnum))) to use as the swissnum part of the FURL. The SS uses tub.registerNameLookupHandler to add a function that tries to validate inbound FURLs against this scheme: if successful, it creates a new PSS object with the account number stashed inside. This allows the server to minimize their per-user storage requirements but still insure that PSS FURLs are unguessable. Account FURLs are created by the Account Manager in a similar fashion, using a MAC of the account number. The Account Manager can use the same account number to index other information in a database, like account status, billing status, etc. The mechanism by which Account FURLs are minted is left up to the account manager, but the simple AM that the 'tahoe create-account-manager' command makes has a "new-account" FURL which accepts a username and creates an account for them. The 'tahoe create-account' command is a CLI frontend to this facility. In a friendnet, you could publish this FURL to your friends, allowing everyone to make their own account. In a commercial grid, this facility would be reserved use by the same code which handles billing. == Creating the Account Manager == The 'tahoe create-account-manager' command is used to create a simple account manager node. When started, this node will write several FURLs to its private/ directory, some of which should be provided to other services. * new-account.furl : this FURL allows the holder to create new accounts * manage-accounts.furl : this FURL allows the holder to list and modify all existing accounts * serverdesk.furl : this FURL is used by storage servers to make themselves available to all account holders == Configuring the Storage Servers == To use an account manager, each storage server node should be given access to the AM's serverdesk (by simply copying "serverdesk.furl" into the storage server's base directory). In addition, it should *not* be given an introducer.furl . The serverdesk FURL tells the SS that it should allow the AM to create PSS facets for each account, and the lack of an introducer FURL tells the SS to not make its generic SS facet available to anyone. The combination means that clients must acquire PSS facets instead of using the generic one. == Configuring Clients == Each client should be configured to use a specific account by copying their account FURL into their basedir, in a file named "account.furl". In addition, these client nodes should *not* have an "introducer.furl". This combination tells the client to ask the AM for ... tahoe-lafs-1.10.0/docs/proposed/old-accounts-pubkey.txt000066400000000000000000001036151221140116300230250ustar00rootroot00000000000000This is a proposal for handing accounts and quotas in Tahoe. Nothing is final yet.. we are still evaluating the options. = Accounts = The basic Tahoe account is defined by a DSA key pair. The holder of the private key has the ability to consume storage in conjunction with a specific account number. The Account Server has a long-term keypair. Valid accounts are marked as such by the Account Server's signature on a "membership card", which binds a specific pubkey to an account number and declares that this pair is a valid account. Each Storage Server which participates in the AS's domain will have the AS's pubkey in its list of valid AS keys, and will thus accept membership cards that were signed by that AS. If the SS accepts multiple ASs, then it will give each a distinct number, and leases will be labled with an (AS#,Account#) pair. If there is only one AS, then leases will be labeled with just the Account#. Each client node is given the FURL of their personal Account object. The Account will accept a DSA public key and return a signed membership card that authorizes the corresponding private key to consume storage on behalf of the account. The client will create its own DSA keypair the first time it connects to the Account, and will then use the resulting membership card for all subsequent storage operations. == Storage Server Goals == The Storage Server cares about two things: 1: maintaining an accurate refcount on each bucket, so it can delete the bucket when the refcount goes to zero 2: being able to answer questions about aggregate usage per account The SS conceptually maintains a big matrix of lease information: one column per account, one row per storage index. The cells contain a boolean (has-lease or no-lease). If the grid uses per-lease timers, then each has-lease cell also contains a lease timer. This matrix may be stored in a variety of ways: entries in each share file, or items in a SQL database, according to the desired tradeoff between complexity, robustness, read speed, and write speed. Each client (by virtue of their knowledge of an authorized private key) gets to manipulate their column of this matrix in any way they like: add lease, renew lease, delete lease. (TODO: for reconcilliation purposes, the should also be able to enumerate leases). == Storage Operations == Side-effect-causing storage operations come in three forms: 1: allocate bucket / add lease to existing bucket arguments: storage_index=, storage_server=, ueb_hash=, account= 2: renew lease arguments: storage_index=, storage_server=, account= 3: cancel lease arguments: storage_index=, storage_server=, account= (where lease renewal is only relevant for grids which use per-lease timers). Clients do add-lease when they upload a file, and cancel-lease when they remove their last reference to it. Storage Servers publish a "public storage port" through the introducer, which does not actually enable storage operations, but is instead used in a rights-amplification pattern to grant authorized parties access to a "personal storage server facet". This personal facet is the one that implements allocate_bucket. All clients get access to the same public storage port, which means that we can improve the introduction mechanism later (to use a gossip-based protocol) without affecting the authority-granting protocols. The public storage port accepts signed messages asking for storage authority. It responds by creating a personal facet and making it available to the requester. The account number is curried into the facet, so that all lease-creating operations will record this account number into the lease. By restricting the nature of the personal facets that a client can access, we restrict them to using their designated account number. ======================================== There are two kinds of signed messages: use (other names: connection, FURLification, activation, reification, grounding, specific-making, ?), and delegation. The FURLification message results in a FURL that points to an object which can actually accept RIStorageServer methods. The delegation message results in a new signed message. The furlification message looks like: (pubkey, signed(serialized({limitations}, beneficiary_furl))) The delegation message looks like: (pubkey, signed(serialized({limitations}, delegate_pubkey))) The limitations dict indicates what the resulting connection or delegation can be used for. All limitations for the cert chain are applied, and the result must be restricted to their overall minimum. The following limitation keys are defined: 'account': a number. All resulting leases must be tagged with this account number. A chain with multiple distinct 'account' limitations is an error (the result will not permit leases) 'SI': a storage index (binary string). Leases may only be created for this specific storage index, no other. 'serverid': a peerid (binary string). Leases may only be created on the storage server identified by this serverid. 'UEB_hash': (binary string): Leases may only be created for shares which contain a matching UEB_hash. Note: this limitation is a nuisance to implement correctly: it requires that the storage server parse the share and verify all hashes. 'before': a timestamp (seconds since epoch). All leases must be made before this time. In addition, all liverefs and FURLs must expire and cease working at this time. 'server_size': a number, measuring share size (in bytes). A storage server which sees this message should keep track of how much storage space has been consumed using this liveref/FURL, and throw an exception when receiving a lease request that would bring this total above 'server_size'. Note: this limitation is a nuisance to implement (it works best if 'before' is used and provides a short lifetime). Actually, let's merge the two, and put the type in the limitations dict. 'furl_to' and 'delegate_key' are mutually exclusive. 'furl_to': (string): Used only on furlification messages. This requests the recipient to create an object which implements the given access, then send a FURL which references this object to an RIFURLReceiver.furl() call at the given 'furl_to' FURL. To reduce the number of extra roundtrips, both foolscap calls include an extra (ignored) argument that will carry the object being referenced by the FURL, used to pre-load the recipient's foolscap table. In addition, the response message will contain a nonce, to allow the same beneficiary to be used for multiple messages: def process(limitations, nonce, ignored): facet = create_storage_facet(limitations) facet_furl = tub.registerReference(facet) d = tub.getReference(limitations['furl_to']) d.addCallback(lambda rref: rref.furl(facet_furl, nonce, facet)) The server must always send the facet/facet_furl to the furl_to beneficiary, and never to the 'ignored' argument (even though for well-behaved clients these will both refer to the same target). This is to prevent a rogue server from echoing a client's signed message to some other server, to try to steal the client's authority. The facet_furl should be persistent, so to reduce storage space, facet_furl should contain an HMAC'ed list of all limitations, and create_storage_facet() should be deferred until the client actually tries to use the furl. This leads to 150-200 byte base32 swissnums. 'delegate_key': (binary string, a DSA pubkey). Used only on delegation messages. This requests all observers to accept messages signed by the given public key and to apply the associated limitations. I also want to keep the message size small, so I'm going to define a custom netstring-based encoding format for it (JSON expands binary data by about 3.5x). Each dict entry will be encoded as netstring(key)+netstring(value). The container is responsible for providing the size of this serialized structure. The actual message will then look like: def make_message(privkey, limitations): message_to_sign = "".join([ netstring(k) + netstring(v) for k,v in limitations ]) signature = privkey.sign(message_to_sign) pubkey = privkey.get_public_key() msg = netstring(message_to_sign) + netstring(signature) + netstring(pubkey) return msg The deserialization code MUST throw an exception if the same limitations key appears twice, to ensure that everybody interprets the dict the same way. These messages are passed over foolscap connections as a single string. They are also saved to disk in this format. Code should only store them in a deserialized form if the signature has been verified, the cert chain verified, and the limitations accumulated. The membership card is just the following: membership_card = make_message(account_server_privkey, {'account': account_number, 'before': time.time() + 1*MONTH, 'delegate_key': client_pubkey}) This card is provided on demand by the given user's Account facet, for whatever pubkey they submit. When a client learns about a new storage server, they create a new receiver object (and stash the peerid in it), and submit the following message to the RIStorageServerWelcome.get_personal_facet() method: class Receiver(foolscap.Referenceable): def remote_furl(self, facet_furl, nonce, ignored_facet): self.stash = facet_furl receiver = Receiver() nonce = make_nonce() mymsg = make_message(client_privkey, {'furl_to': receiver_furl}) send([membership_card, mymsg], nonce, receiver) Note that the receiver_furl will probably not have a routeable address, but this won't matter because the client is already attached, so foolscap can use the existing connection. The receiver should use facet_furl in preference to ignored_facet for consistency, but (unlike the server's use of receiver_furl) there is no security risk in using ignored_facet (since both are coming from the same source). The server will validate the cert chain (see below) and wind up with a complete list of limitations that are to be applied to the facet it will provide to the caller. This list must combine limitations from the entire chain: in particular it must enforce the account= limitation from the membership card. The server will then serialize this limitation dict into a string, compute a fixed-size HMAC code using a server-private secret, then base32 encode the (hmac+limitstring) value (and prepend a "0-" version indicator). The resulting string is used as the swissnum portion of the FURL that is sent to the furl_to target. Later, when the client tries to dereference this FURL, a Tub.registerNameLookupHandler hook will notice the attempt, claim the "0-" namespace, base32decode the string, check the HMAC, decode the limitation dict, then create and return an RIStorageServer facet with these limitations. The client should cache the (peerid, FURL) mapping in persistent storage. Later, when it learns about this storage server again, it will use the cached FURL instead of signing another message. If the getReference or the storage operation fails with StorageAuthorityExpiredError, the cache entry should be removed and the client should sign a new message to obtain a new one. (security note: an evil storage server can take 'mymsg' and present it to someone else, but other servers will only send the resulting authority to the client's receiver_furl, so the evil server cannot benefit from this. The receiver object has the serverid curried into it, so the evil server can only affect the client's mapping for this one serverid, not anything else, so the server cannot hurt the client in any way other than denying service to itself. It might be a good idea to include serverid= in the message, but it isn't clear that it really helps anything). When the client wants to use a Helper, it needs to delegate some amount of storage authority to the helper. The first phase has the client send the storage index to the helper, so it can query servers and decide whether the file needs to be uploaded or not. If it decides yes, the Helper creates a new Uploader object and a receiver object, and sends the Uploader liveref and the receiver FURL to the client. The client then creates a message for the helper to use: helper_msg = make_message(client_privkey, {'furl_to': helper_rx_furl, 'SI': storage_index, 'before': time.time() + 1*DAY, #? 'server_size': filesize/k+overhead, }) The client then sends (membership_card, helper_msg) to the helper. The Helper sends (membership_card, helper_msg) to each storage server that it needs to use for the upload. This gives the Helper access to a limited facet on each storage server. This facet gives the helper the authority to upload data for a specific storage index, for a limited time, using leases that are tagged by the user's account number. The helper cannot use the client's storage authority for any other file. The size limit prevents the helper from storing some other (larger) file of its own using this authority. The time restriction allows the storage servers to expire their 'server_size' table entry quickly, and prevents the helper from hanging on to the storage authority indefinitely. The Helper only gets one furl_to target, which must be used for multiple SS peerids. The helper's receiver must parse the FURL that gets returned to determine which server is which. [problems: an evil server could deliver a bogus FURL which points to a different server. The Helper might reject the real server's good FURL as a duplicate. This allows an evil server to block access to a good server. Queries could be sent sequentially, which would partially mitigate this problem (an evil server could send multiple requests). Better: if the cert-chain send message could include a nonce, which is supposed to be returned with the FURL, then the helper could use this to correlate sends and receives.] === repair caps === There are three basic approaches to provide a Repairer with the storage authority that it needs. The first is to give the Repairer complete authority: allow it to place leases for whatever account number it wishes. This is simple and requires the least overhead, but of course it give the Repairer the ability to abuse everyone's quota. The second is to give the Repairer no user authority: instead, give the repairer its own account, and build it to keep track of which leases it is holding on behalf of one of its customers. This repairer will slowly accumulate quota space over time, as it creates new shares to replace ones that have decayed. Eventually, when the client comes back online, the client should establish its own leases on these new shares and allow the repairer to cancel its temporary ones. The third approach is in between the other two: give the repairer some limited authority over the customer's account, but not enough to let it consume the user's whole quota. To create the storage-authority portion of a (one-month) repair-cap, the client creates a new DSA keypair (repair_privkey, repair_pubkey), and then creates a signed message and bundles it into the repaircap: repair_msg = make_message(client_privkey, {'delegate_key': repair_pubkey, 'SI': storage_index, 'UEB_hash': file_ueb_hash}) repair_cap = (verify_cap, repair_privkey, (membership_card, repair_msg)) This gives the holder of the repair cap a time-limited authority to upload shares for the given storage index which contain the given data. This prohibits the repair-cap from being used to upload or repair any other file. When the repairer needs to upload a new share, it will use the delegated key to create its own signed message: upload_msg = make_message(repair_privkey, {'furl_to': repairer_rx_furl}) send(membership_card, repair_msg, upload_msg) The biggest problem with the low-authority approaches is the expiration time of the membership card, which limits the duration for which the repair-cap authority is valid. It would be nice if repair-caps could last a long time, years perhaps, so that clients can be offline for a similar period of time. However to retain a reasonable revocation interval for users, the membership card's before= timeout needs to be closer to a month. [it might be reasonable to use some sort of rights-amplification: the repairer has a special cert which allows it to remove the before= value from a chain]. === chain verification === The server will create a chain that starts with the AS's certificate: an unsigned message which derives its authority from being manually placed in the SS's configdir. The only limitation in the AS certificate will be on some kind of meta-account, in case we want to use multiple account servers and allow their account numbers to live in distinct number spaces (think sub-accounts or business partners to buy storage in bulk and resell it to users). The rest of the chain comes directly from what the client sent. The server walks the chain, keeping an accumulated limitations dictionary along the way. At each step it knows the pubkey that was delegated by the previous step. == client config == Clients are configured with an Account FURL that points to a private facet on the Account Server. The client generates a private key at startup. It sends the pubkey to the AS facet, which will return a signed delegate_key message (the "membership card") that grants the client's privkey any storage authority it wishes (as long as the account number is set to a specific value). The client stores this membership card in private/membership.cert . RIStorageServer messages will accept an optional account= argument. If left unspecified, the value is taken from the limitations that were curried into the SS facet. In all cases, the value used must meet those limitations. The value must not be None: Helpers/Repairers or other super-powered storage clients are obligated to specify an account number. == server config == Storage servers are configured with an unsigned root authority message. This is like the output of make_message(account_server_privkey, {}) but has empty 'signature' and 'pubkey' strings. This root goes into NODEDIR/storage_authority_root.cert . It is prepended to all chains that arrive. [if/when we accept multiple authorities, storage_authority_root.cert will turn into a storage_authority_root/ directory with *.cert files, and each arriving chain will cause a search through these root certs for a matching pubkey. The empty limitations will be replaced by {domain=X}, which is used as a sort of meta-account.. the details depend upon whether we express account numbers as an int (with various ranges) or as a tuple] The root authority message is published by the Account Server through its web interface, and also into a local file: NODEDIR/storage_authority_root.cert . The admin of the storage server is responsible for copying this file into place, thus enabling clients to use storage services. ---------------------------------------- -- Text beyond this point is out-of-date, and exists purely for background -- Each storage server offers a "public storage port", which only accepts signed messages. The Introducer mechanism exists to give clients a reference to a set of these public storage ports. All clients get access to the same ports. If clients did all their work themselves, these public storage ports would be enough, and no further code would be necessary (all storage requests would we signed the same way). Fundamentally, each storage request must be signed by the account's private key, giving the SS an authenticated Account Number to go with the request. This is used to index the correct cell in the lease matrix. The holder of the account privkey is allowed to manipulate their column of the matrix in any way they like: add leases, renew leases, delete leases. (TODO: for reconcilliation purposes, they should also be able to enumerate leases). The storage request is sent in the form of a signed request message, accompanied by the membership card. For example: req = SIGN("allocate SI=123 SSID=abc", accountprivkey) , membership_card -> RemoteBucketWriter reference Upon receipt of this request, the storage server will return a reference to a RemoteBucketWriter object, which the client can use to fill and close the bucket. The SS must perform two DSA signature verifications before accepting this request. The first is to validate the membership card: the Account Server's pubkey is used to verify the membership card's signature, from which an account pubkey and account# is extracted. The second is to validate the request: the account pubkey is used to verify the request signature. If both are valid, the full request (with account# and storage index) is delivered to the internal StorageServer object. Note that the signed request message includes the Storage Server's node ID, to prevent this storage server from taking the signed message and echoing to other storage servers. Each SS will ignore any request that is not addressed to the right SSID. Also note that the SI= and SSID= fields may contain wildcards, if the signing client so chooses. == Caching Signature Verification == We add some complexity to this simple model to achieve two goals: to enable fine-grained delegation of storage capabilities (specifically for renewers and repairers), and to reduce the number of public-key crypto operations that must be performed. The first enhancement is to allow the SS to cache the results of the verification step. To do this, the client creates a signed message which asks the SS to return a FURL of an object which can be used to execute further operations *without* a DSA signature. The FURL is expected to contain a MAC'ed string that contains the account# and the argument restrictions, effectively currying a subset of arguments into the RemoteReference. Clients which do all their operations themselves would use this to obtain a private storage port for each public storage port, stashing the FURLs in a local table, and then later storage operations would be done to those FURLs instead of creating signed requests. For example: req = SIGN("FURL(allocate SI=* SSID=abc)", accountprivkey), membership_card -> FURL Tub.getReference(FURL).allocate(SI=123) -> RemoteBucketWriter reference == Renewers and Repairers A brief digression is in order, to motivate the other enhancement. The "manifest" is a list of caps, one for each node that is reachable from the user's root directory/directories. The client is expected to generate the manifest on a periodic basis (perhaps once a day), and to keep track of which files/dirnodes have been added and removed. Items which have been removed must be explicitly dereferenced to reclaim their storage space. For grids which use per-file lease timers, the manifest is used to drive the Renewer: a process which renews the lease timers on a periodic basis (perhaps once a week). The manifest can also be used to drive a Checker, which in turn feeds work into the Repairer. The manifest should contain the minimum necessary authority to do its job, which generally means it contains the "verify cap" for each node. For immutable files, the verify cap contains the storage index and the UEB hash: enough information to retrieve and validate the ciphertext but not enough to decrypt it. For mutable files, the verify cap contains the storage index and the pubkey hash, which also serves to retrieve and validate ciphertext but not decrypt it. If the client does its own Renewing and Repairing, then a verifycap-based manifest is sufficient. However, if the user wants to be able to turn their computer off for a few months and still keep their files around, they need to delegate this job off to some other willing node. In a commercial network, there will be centralized (and perhaps trusted) Renewer/Repairer nodes, but in a friendnet these may not be available, and the user will depend upon one of their friends being willing to run this service for them while they are away. In either of these cases, the verifycaps are not enough: the Renewer will need additional authority to renew the client's leases, and the Repairer will need the authority to create new shares (in the client's name) when necessary. A trusted central service could be given all-account superpowers, allowing it to exercise storage authority on behalf of all users as it pleases. If this is the case, the verifycaps are sufficient. But if we desire to grant less authority to the Renewer/Repairer, then we need a mechanism to attenuate this authority. The usual objcap approach is to create a proxy: an intermediate object which itself is given full authority, but which is unwilling to exercise more than a portion of that authority in response to incoming requests. The not-fully-trusted service is then only given access to the proxy, not the final authority. For example: class Proxy(RemoteReference): def __init__(self, original, storage_index): self.original = original self.storage_index = storage_index def remote_renew_leases(self): return self.original.renew_leases(self.storage_index) renewer.grant(Proxy(target, "abcd")) But this approach interposes the proxy in the calling chain, requiring the machine which hosts the proxy to be available and on-line at all times, which runs opposite to our use case (turning the client off for a month). == Creating Attenuated Authorities == The other enhancement is to use more public-key operations to allow the delegation of reduced authority to external helper services. Specifically, we want to give then Renewer the ability to renew leases for a specific file, rather than giving it lease-renewal power for all files. Likewise, the Repairer should have the ability to create new shares, but only for the file that is being repaired, not for unrelated files. If we do not mind giving the storage servers the ability to replay their inbound message to other storage servers, then the client can simply generate a signed message with a wildcard SSID= argument and leave it in the care of the Renewer or Repairer. For example, the Renewer would get: SIGN("renew-lease SI=123 SSID=*", accountprivkey), membership_card Then, when the Renewer needed to renew a lease, it would deliver this signed request message to the storage server. The SS would verify the signatures just as if the message came from the original client, find them good, and perform the desired operation. With this approach, the manifest that is delivered to the remote Renewer process needs to include a signed lease-renewal request for each file: we use the term "renew-cap" for this combined (verifycap + signed lease-renewal request) message. Likewise the "repair-cap" would be the verifycap plus a signed allocate-bucket message. A renew-cap manifest would be enough for a remote Renewer to do its job, a repair-cap manifest would provide a remote Repairer with enough authority, and a cancel-cap manifest would be used for a remote Canceller (used, e.g., to make sure that file has been dereferenced even if the client does not stick around long enough to track down and inform all of the storage servers involved). The only concern is that the SS could also take this exact same renew-lease message and deliver it to other storage servers. This wouldn't cause a concern for mere lease renewal, but the allocate-share message might be a bit less comfortable (you might not want to grant the first storage server the ability to claim space in your name on all other storage servers). Ideally we'd like to send a different message to each storage server, each narrowed in scope to a single SSID, since then none of these messages would be useful on any other SS. If the client knew the identities of all the storage servers in the system ahead of time, it might create a whole slew of signed messages, but a) this is a lot of signatures, only a fraction of which will ever actually be used, and b) new servers might be introduced after the manifest is created, particularly if we're talking about repair-caps instead of renewal-caps. The Renewer can't generate these one-per-SSID messages from the SSID=* message, because it doesn't have a privkey to make the correct signatures. So without some other mechanism, we're stuck with these relatively coarse authorities. If we want to limit this sort of authority, then we need to introduce a new method. The client begins by generating a new DSA keypair. Then it signs a message that declares the new pubkey to be valid for a specific subset of storage operations (such as "renew-lease SI=123 SSID=*"). Then it delivers the new privkey, the declaration message, and the membership card to the Renewer. The renewer uses the new privkey to sign its own one-per-SSID request message for each server, then sends the (signed request, declaration, membership card) triple to the server. The server needs to perform three verification checks per message: first the membership card, then the declaration message, then the actual request message. == Other Enhancements == If a given authority is likely to be used multiple times, the same give-me-a-FURL trick can be used to cut down on the number of public key operations that must be performed. This is trickier with the per-SI messages. When storing the manifest, things like the membership card should be amortized across a set of common entries. An isolated renew-cap needs to contain the verifycap, the signed renewal request, and the membership card. But a manifest with a thousand entries should only include one copy of the membership card. It might be sensible to define a signed renewal request that grants authority for a set of storage indicies, so that the signature can be shared among several entries (to save space and perhaps processing time). The request could include a Bloom filter of authorized SI values: when the request is actually sent to the server, the renewer would add a list of actual SI values to renew, and the server would accept all that are contained in the filter. == Revocation == The lifetime of the storage authority included in the manifest's renew-caps or repair-caps will determine the lifetime of those caps. In particular, if we implement account revocation by using time-limited membership cards (requiring the client to get a new card once a month), then the repair-caps won't work for more than a month, which kind of defeats the purpose. A related issue is the FURL-shortcut: the MAC'ed message needs to include a validity period of some sort, and if the client tries to use a old FURL they should get an error message that will prompt them to try and acquire a newer one. ------------------------------ The client can produce a repair-cap manifest for a specific Repairer's pubkey, so it can produce a signed message that includes the pubkey (instead of needing to generate a new privkey just for this purpose). The result is not a capability, since it can only be used by the holder of the corresponding privkey. So the generic form of the storage operation message is the request (which has all the argument values filled in), followed by a chain of authorizations. The first authorization must be signed by the Account Server's key. Each authorization must be signed by the key mentioned in the previous one. Each one adds a new limitation on the power of the following ones. The actual request is bounded by all the limitations of the chain. The membership card is an authorization that simply limits the account number that can be used: "op=* SI=* SSID=* account=4 signed-by=CLIENT-PUBKEY". So a repair manifest created for a Repairer with pubkey ABCD could consist of a list of verifycaps plus a single authorization (using a Bloom filter to identify the SIs that were allowed): SIGN("allocate SI=[bloom] SSID=* signed-by=ABCD") If/when the Repairer needed to allocate a share, it would use its own privkey to sign an additional message and send the whole list to the SS: request=allocate SI=1234 SSID=EEFS account=4 shnum=2 SIGN("allocate SI=1234 SSID=EEFS", ABCD) SIGN("allocate SI=[bloom] SSID=* signed-by=ABCD", clientkey) membership: SIGN("op=* SI=* SSID=* account=4 signed-by=clientkey", ASkey) [implicit]: ASkey ---------------------------------------- Things would be a lot simpler if the Repairer (actually the Re-Leaser) had everybody's account authority. One simplifying approach: the Repairer/Re-Leaser has its own account, and the shares it creates are leased under that account number. The R/R keeps track of which leases it has created for whom. When the client eventually comes back online, it is told to perform a re-leasing run, and after that occurs the R/R can cancel its own temporary leases. This would effectively transfer storage quota from the original client to the R/R over time (as shares are regenerated by the R/R while the client remains offline). If the R/R is centrally managed, the quota mechanism can sum the R/R's numbers with the SS's numbers when determining how much storage is consumed by any given account. Not quite as clean as storing the exact information in the SS's lease tables directly, but: * the R/R no longer needs any special account authority (it merely needs an accurate account number, which can be supplied by giving the client a specific facet that is bound to that account number) * the verify-cap manifest is sufficient to perform repair * no extra DSA keys are necessary * account authority could be implemented with either DSA keys or personal SS facets: i.e. we don't need the delegability aspects of DSA keys for use by the repair mechanism (we might still want them to simplify introduction). I *think* this would eliminate all that complexity of chained authorization messages. tahoe-lafs-1.10.0/docs/quickstart.rst000066400000000000000000000052021221140116300174560ustar00rootroot00000000000000 ================== Getting Tahoe-LAFS ================== Welcome to `the Tahoe-LAFS project`_, a secure, decentralized, fault-tolerant storage system. `about Tahoe-LAFS `__ .. _the Tahoe-LAFS project: https://tahoe-lafs.org How To Get Tahoe-LAFS ===================== This procedure has been verified to work on Windows, Mac, OpenSolaris, and too many flavors of Linux and of BSD to list. It's likely to work on other platforms. In Case Of Trouble ------------------ There are a few 3rd party libraries that Tahoe-LAFS depends on that might not be easy to set up on your platform. If the following instructions don't Just Work without any further effort on your part, then please write to `the tahoe-dev mailing list`_ where friendly hackers will help you out. .. _the tahoe-dev mailing list: https://tahoe-lafs.org/cgi-bin/mailman/listinfo/tahoe-dev Install Python -------------- Check if you already have an adequate version of Python installed by running ``python -V``. Python v2.6 (v2.6.6 or greater recommended) or Python v2.7 will work. Python v3 does not work. On Windows, we recommend the use of native Python v2.7, not Cygwin Python. If you don't have one of these versions of Python installed, download and install `Python v2.7`_. Make sure that the path to the installation directory has no spaces in it (e.g. on Windows, do not install Python in the "Program Files" directory). .. _Python v2.7: http://www.python.org/download/releases/2.7.4/ Get Tahoe-LAFS -------------- Download the latest stable release, `Tahoe-LAFS v1.10.0`_. .. _Tahoe-LAFS v1.10.0: https://tahoe-lafs.org/source/tahoe-lafs/releases/allmydata-tahoe-1.10.0.zip Set Up Tahoe-LAFS ----------------- Unpack the zip file and cd into the top-level directory. Run ``python setup.py build`` to generate the ``tahoe`` executable in a subdirectory of the current directory named ``bin``. This will download and build anything you need from various websites. On Windows, the ``build`` step might tell you to open a new Command Prompt (or, on XP and earlier, to log out and back in again). This is needed the first time you set up Tahoe-LAFS on a particular installation of Windows. Run ``bin/tahoe --version`` (on Windows, ``bin\tahoe --version``) to verify that the executable tool prints out the right version number after "``allmydata-tahoe:``". Optionally run ``python setup.py trial`` to verify that it passes all of its self-tests. Run Tahoe-LAFS -------------- Now you are ready to deploy a decentralized filesystem. The ``tahoe`` executable in the ``bin`` directory can configure and launch your Tahoe-LAFS nodes. See ``__ for instructions on how to do that. tahoe-lafs-1.10.0/docs/running.rst000066400000000000000000000134411221140116300167500ustar00rootroot00000000000000===================== How To Run Tahoe-LAFS ===================== Intro ===== This is how to run a Tahoe-LAFS client or a complete Tahoe-LAFS grid. First you have to install the Tahoe-LAFS software, as documented in `quickstart.rst `_. The ``tahoe`` program in the ``bin`` directory is used to create, start, and stop nodes. Each node lives in a separate base directory, in which there is a configuration file named ``tahoe.cfg``. Nodes read and write files within this base directory. A grid consists of a set of *storage nodes* and *client nodes* running the Tahoe-LAFS code. There is also an *introducer node* that is responsible for getting the other nodes talking to each other. If you're getting started we recommend you try connecting to the `public test grid `_ as you only need to create a client node. When you want to create your own grid you'll need to create the introducer and several initial storage nodes (see the note about small grids below). If the Tahoe-LAFS ``bin`` directory is not on your PATH, then in all the command lines below, specify the full path to ``bin/tahoe``. To construct a client node, run "``tahoe create-client``", which will create ``~/.tahoe`` to be the node's base directory. Acquire the ``introducer.furl`` (see below if you are running your own introducer, or use the one from the `TestGrid page `_), and paste it after ``introducer.furl =`` in the ``[client]`` section of ``~/.tahoe/tahoe.cfg``. Then use "``tahoe run ~/.tahoe``". After that, the node should be off and running. The first thing it will do is connect to the introducer and get itself connected to all other nodes on the grid. By default, "``tahoe create-client``" creates a client-only node, that does not offer its disk space to other nodes. To configure other behavior, use "``tahoe create-node``" or see `configuration.rst `_. To construct an introducer, create a new base directory for it (the name of the directory is up to you), ``cd`` into it, and run "``tahoe create-introducer .``". Now run the introducer using "``tahoe start .``". After it starts, it will write a file named ``introducer.furl`` into the ``private/`` subdirectory of that base directory. This file contains the URL the other nodes must use in order to connect to this introducer. (Note that "``tahoe run .``" doesn't work for introducers, this is a known issue: `#937 `_.) The "``tahoe run``" command above will run the node in the foreground. On Unix, you can run it in the background instead by using the "``tahoe start``" command. To stop a node started in this way, use "``tahoe stop``". ``tahoe --help`` gives a summary of all commands. See `configuration.rst `_ for more details about how to configure Tahoe-LAFS, including how to get other clients to connect to your node if it is behind a firewall or NAT device. A note about small grids ------------------------ By default, Tahoe-LAFS ships with the configuration parameter ``shares.happy`` set to 7. If you are using Tahoe-LAFS on a grid with fewer than 7 storage nodes, this won't work well for you -- none of your uploads will succeed. To fix this, see `configuration.rst `_ to learn how to set ``shares.happy`` to a more suitable value for your grid. Do Stuff With It ================ This is how to use your Tahoe-LAFS node. The WUI ------- Point your web browser to `http://127.0.0.1:3456 `_ -- which is the URL of the gateway running on your own local computer -- to use your newly created node. Create a new directory (with the button labelled "create a directory"). Your web browser will load the new directory. Now if you want to be able to come back to this directory later, you have to bookmark it, or otherwise save a copy of the URL. If you lose the URL to this directory, then you can never again come back to this directory. You can do more or less everything you want to do with a decentralized filesystem through the WUI. The CLI ------- Prefer the command-line? Run "``tahoe --help``" (the same command-line tool that is used to start and stop nodes serves to navigate and use the decentralized filesystem). To get started, create a new directory and mark it as the 'tahoe:' alias by running "``tahoe create-alias tahoe``". Once you've done that, you can do "``tahoe ls tahoe:``" and "``tahoe cp LOCALFILE tahoe:foo.txt``" to work with your filesystem. The Tahoe-LAFS CLI uses similar syntax to the well-known scp and rsync tools. See `CLI.rst `_ for more details. As with the WUI (and with all current interfaces to Tahoe-LAFS), you are responsible for remembering directory capabilities yourself. If you create a new directory and lose the capability to it, then you cannot access that directory ever again. The SFTP and FTP frontends -------------------------- You can access your Tahoe-LAFS grid via any `SFTP `_ or `FTP `_ client. See `FTP-and-SFTP.rst `_ for how to set this up. On most Unix platforms, you can also use SFTP to plug Tahoe-LAFS into your computer's local filesystem via ``sshfs``. The `SftpFrontend `_ page on the wiki has more information about using SFTP with Tahoe-LAFS. The WAPI -------- Want to program your Tahoe-LAFS node to do your bidding? Easy! See `webapi.rst `_. Socialize ========= You can chat with other users of and hackers of this software on the #tahoe-lafs IRC channel at ``irc.freenode.net``, or on the `tahoe-dev mailing list `_. tahoe-lafs-1.10.0/docs/specifications/000077500000000000000000000000001221140116300175365ustar00rootroot00000000000000tahoe-lafs-1.10.0/docs/specifications/CHK-hashes.svg000066400000000000000000001162641221140116300221470ustar00rootroot00000000000000 image/svg+xml data(plaintext) data(crypttext) shares plaintexthash tree crypttexthash tree sharehash tree URI Extension Block plaintext root plaintext (flat) hash crypttext root crypttext (flat) hash share root URI encryptionkey storageindex UEBhash AES FEC A B : B is derived from A by hashing, therefore B validates A A B : B is derived from A by encryption or erasure coding A B : A is used as an index to retrieve data B SHARE CHK File Hashes plaintext hashes removedsee #453 tahoe-lafs-1.10.0/docs/specifications/Makefile000066400000000000000000000007041221140116300211770ustar00rootroot00000000000000SOURCES = CHK-hashes.svg file-encoding1.svg file-encoding2.svg \ file-encoding3.svg file-encoding4.svg file-encoding5.svg \ file-encoding6.svg PNGS = $(patsubst %.svg,%.png,$(SOURCES)) EPSS = $(patsubst %.svg,%.eps,$(SOURCES)) .PHONY: images-png images-eps all: $(PNGS) $(EPSS) images-png: $(PNGS) images-eps: $(EPSS) %.png: %.svg inkscape -b white -d 90 -D --export-png $@ $< %.eps: %.svg inkscape --export-eps $@ $< clean: rm -f *.png *.eps tahoe-lafs-1.10.0/docs/specifications/URI-extension.rst000066400000000000000000000042011221140116300227360ustar00rootroot00000000000000=================== URI Extension Block =================== This block is a serialized dictionary with string keys and string values (some of which represent numbers, some of which are SHA-256 hashes). All buckets hold an identical copy. The hash of the serialized data is kept in the URI. The download process must obtain a valid copy of this data before any decoding can take place. The download process must also obtain other data before incremental validation can be performed. Full-file validation (for clients who do not wish to do incremental validation) can be performed solely with the data from this block. At the moment, this data block contains the following keys (and an estimate on their sizes):: size 5 segment_size 7 num_segments 2 needed_shares 2 total_shares 3 codec_name 3 codec_params 5+1+2+1+3=12 tail_codec_params 12 share_root_hash 32 (binary) or 52 (base32-encoded) each plaintext_hash plaintext_root_hash crypttext_hash crypttext_root_hash Some pieces are needed elsewhere (size should be visible without pulling the block, the Tahoe3 algorithm needs total_shares to find the right peers, all peer selection algorithms need needed_shares to ask a minimal set of peers). Some pieces are arguably redundant but are convenient to have present (test_encode.py makes use of num_segments). The rule for this data block is that it should be a constant size for all files, regardless of file size. Therefore hash trees (which have a size that depends linearly upon the number of segments) are stored elsewhere in the bucket, with only the hash tree root stored in this data block. This block will be serialized as follows:: assert that all keys match ^[a-zA-z_\-]+$ sort all the keys lexicographically for k in keys: write("%s:" % k) write(netstring(data[k])) Serialized size:: dense binary (but decimal) packing: 160+46=206 including 'key:' (185) and netstring (6*3+7*4=46) on values: 231 including 'key:%d\n' (185+13=198) and printable values (46+5*52=306)=504 We'll go with the 231-sized block, and provide a tool to dump it as text if we really want one. tahoe-lafs-1.10.0/docs/specifications/backends/000077500000000000000000000000001221140116300213105ustar00rootroot00000000000000tahoe-lafs-1.10.0/docs/specifications/backends/raic.rst000066400000000000000000000411701221140116300227630ustar00rootroot00000000000000 ============================================================= Redundant Array of Independent Clouds: Share To Cloud Mapping ============================================================= Introduction ============ This document describes a proposed design for the mapping of LAFS shares to objects in a cloud storage service. It also analyzes the costs for each of the functional requirements, including network, disk, storage and API usage costs. Terminology =========== *LAFS share* A Tahoe-LAFS share representing part of a file after encryption and erasure encoding. *LAFS shareset* The set of shares stored by a LAFS storage server for a given storage index. The shares within a shareset are numbered by a small integer. *Cloud storage service* A service such as Amazon S3 `²`_, Rackspace Cloud Files `³`_, Google Cloud Storage `⁴`_, or Windows Azure `⁵`_, that provides cloud storage. *Cloud storage interface* A protocol interface supported by a cloud storage service, such as the S3 interface `⁶`_, the OpenStack Object Storage interface `⁷`_, the Google Cloud Storage interface `⁸`_, or the Azure interface `⁹`_. There may be multiple services implementing a given cloud storage interface. In this design, only REST-based APIs `¹⁰`_ over HTTP will be used as interfaces. *Cloud object* A file-like abstraction provided by a cloud storage service, storing a sequence of bytes. Cloud objects are mutable in the sense that the contents and metadata of the cloud object with a given name in a given cloud container can be replaced. Cloud objects are called “blobs” in the Azure interface, and “objects” in the other interfaces. *Cloud container* A container for cloud objects provided by a cloud service. Cloud containers are called “buckets” in the S3 and Google Cloud Storage interfaces, and “containers” in the Azure and OpenStack Storage interfaces. Functional Requirements ======================= * *Upload*: a LAFS share can be uploaded to an appropriately configured Tahoe-LAFS storage server and the data is stored to the cloud storage service. * *Scalable shares*: there is no hard limit on the size of LAFS share that can be uploaded. If the cloud storage interface offers scalable files, then this could be implemented by using that feature of the specific cloud storage interface. Alternately, it could be implemented by mapping from the LAFS abstraction of an unlimited-size immutable share to a set of size-limited cloud objects. * *Streaming upload*: the size of the LAFS share that is uploaded can exceed the amount of RAM and even the amount of direct attached storage on the storage server. I.e., the storage server is required to stream the data directly to the ultimate cloud storage service while processing it, instead of to buffer the data until the client is finished uploading and then transfer the data to the cloud storage service. * *Download*: a LAFS share can be downloaded from an appropriately configured Tahoe-LAFS storage server, and the data is loaded from the cloud storage service. * *Streaming download*: the size of the LAFS share that is downloaded can exceed the amount of RAM and even the amount of direct attached storage on the storage server. I.e. the storage server is required to stream the data directly to the client while processing it, instead of to buffer the data until the cloud storage service is finished serving and then transfer the data to the client. * *Modify*: a LAFS share can have part of its contents modified. If the cloud storage interface offers scalable mutable files, then this could be implemented by using that feature of the specific cloud storage interface. Alternately, it could be implemented by mapping from the LAFS abstraction of an unlimited-size mutable share to a set of size-limited cloud objects. * *Efficient modify*: the size of the LAFS share being modified can exceed the amount of RAM and even the amount of direct attached storage on the storage server. I.e. the storage server is required to download, patch, and upload only the segment(s) of the share that are being modified, instead of to download, patch, and upload the entire share. * *Tracking leases*: The Tahoe-LAFS storage server is required to track when each share has its lease renewed so that unused shares (shares whose lease has not been renewed within a time limit, e.g. 30 days) can be garbage collected. This does not necessarily require code specific to each cloud storage interface, because the lease tracking can be performed in the storage server's generic component rather than in the component supporting each interface. Mapping ======= This section describes the mapping between LAFS shares and cloud objects. A LAFS share will be split into one or more “chunks” that are each stored in a cloud object. A LAFS share of size `C` bytes will be stored as `ceiling(C / chunksize)` chunks. The last chunk has a size between 1 and `chunksize` bytes inclusive. (It is not possible for `C` to be zero, because valid shares always have a header, so, there is at least one chunk for each share.) For an existing share, the chunk size is determined by the size of the first chunk. For a new share, it is a parameter that may depend on the storage interface. It is an error for any chunk to be larger than the first chunk, or for any chunk other than the last to be smaller than the first chunk. If a mutable share with total size less than the default chunk size for the storage interface is being modified, the new contents are split using the default chunk size. *Rationale*: this design allows the `chunksize` parameter to be changed for new shares written via a particular storage interface, without breaking compatibility with existing stored shares. All cloud storage interfaces return the sizes of cloud objects with requests to list objects, and so the size of the first chunk can be determined without an additional request. The name of the cloud object for chunk `i` > 0 of a LAFS share with storage index `STORAGEINDEX` and share number `SHNUM`, will be shares/`ST`/`STORAGEINDEX`/`SHNUM.i` where `ST` is the first two characters of `STORAGEINDEX`. When `i` is 0, the `.0` is omitted. *Rationale*: this layout maintains compatibility with data stored by the prototype S3 backend, for which Least Authority Enterprises has existing customers. This prototype always used a single cloud object to store each share, with name shares/`ST`/`STORAGEINDEX`/`SHNUM` By using the same prefix “shares/`ST`/`STORAGEINDEX`/” for old and new layouts, the storage server can obtain a list of cloud objects associated with a given shareset without having to know the layout in advance, and without having to make multiple API requests. This also simplifies sharing of test code between the disk and cloud backends. Mutable and immutable shares will be “chunked” in the same way. Rationale for Chunking ---------------------- Limiting the amount of data received or sent in a single request has the following advantages: * It is unnecessary to write separate code to take advantage of the “large object” features of each cloud storage interface, which differ significantly in their design. * Data needed for each PUT request can be discarded after it completes. If a PUT request fails, it can be retried while only holding the data for that request in memory. Costs ===== In this section we analyze the costs of the proposed design in terms of network, disk, memory, cloud storage, and API usage. Network usage: bandwidth and number-of-round-trips -------------------------------------------------- When a Tahoe-LAFS storage client allocates a new share on a storage server, the backend will request a list of the existing cloud objects with the appropriate prefix. This takes one HTTP request in the common case, but may take more for the S3 interface, which has a limit of 1000 objects returned in a single “GET Bucket” request. If the share is to be read, the client will make a number of calls each specifying the offset and length of the required span of bytes. On the first request that overlaps a given chunk of the share, the server will make an HTTP GET request for that cloud object. The server may also speculatively make GET requests for cloud objects that are likely to be needed soon (which can be predicted since reads are normally sequential), in order to reduce latency. Each read will be satisfied as soon as the corresponding data is available, without waiting for the rest of the chunk, in order to minimize read latency. All four cloud storage interfaces support GET requests using the Range HTTP header. This could be used to optimize reads where the Tahoe-LAFS storage client requires only part of a share. If the share is to be written, the server will make an HTTP PUT request for each chunk that has been completed. Tahoe-LAFS clients only write immutable shares sequentially, and so we can rely on that property to simplify the implementation. When modifying shares of an existing mutable file, the storage server will be able to make PUT requests only for chunks that have changed. (Current Tahoe-LAFS v1.9 clients will not take advantage of this ability, but future versions will probably do so for MDMF files.) In some cases, it may be necessary to retry a request (see the `Structure of Implementation`_ section below). In the case of a PUT request, at the point at which a retry is needed, the new chunk contents to be stored will still be in memory and so this is not problematic. In the absence of retries, the maximum number of GET requests that will be made when downloading a file, or the maximum number of PUT requests when uploading or modifying a file, will be equal to the number of chunks in the file. If the new mutable share content has fewer chunks than the old content, then the remaining cloud objects for old chunks must be deleted (using one HTTP request each). When reading a share, the backend must tolerate the case where these cloud objects have not been deleted successfully. The last write to a share will be reported as successful only when all corresponding HTTP PUTs and DELETEs have completed successfully. Disk usage (local to the storage server) ---------------------------------------- It is never necessary for the storage server to write the content of share chunks to local disk, either when they are read or when they are written. Each chunk is held only in memory. A proposed change to the Tahoe-LAFS storage server implementation uses a sqlite database to store metadata about shares. In that case the same database would be used for the cloud backend. This would enable lease tracking to be implemented in the same way for disk and cloud backends. Memory usage ------------ The use of chunking simplifies bounding the memory usage of the storage server when handling files that may be larger than memory. However, this depends on limiting the number of chunks that are simultaneously held in memory. Multiple chunks can be held in memory either because of pipelining of requests for a single share, or because multiple shares are being read or written (possibly by multiple clients). For immutable shares, the Tahoe-LAFS storage protocol requires the client to specify in advance the maximum amount of data it will write. Also, a cooperative client (including all existing released versions of the Tahoe-LAFS code) will limit the amount of data that is pipelined, currently to 50 KiB. Since the chunk size will be greater than that, it is possible to ensure that for each allocation, the maximum chunk data memory usage is the lesser of two chunks, and the allocation size. (There is some additional overhead but it is small compared to the chunk data.) If the maximum memory usage of a new allocation would exceed the memory available, the allocation can be delayed or possibly denied, so that the total memory usage is bounded. It is not clear that the existing protocol allows allocations for mutable shares to be bounded in general; this may be addressed in a future protocol change. The above discussion assumes that clients do not maliciously send large messages as a denial-of-service attack. Foolscap (the protocol layer underlying the Tahoe-LAFS storage protocol) does not attempt to resist denial of service. Storage ------- The storage requirements, including not-yet-collected garbage shares, are the same as for the Tahoe-LAFS disk backend. That is, the total size of cloud objects stored is equal to the total size of shares that the disk backend would store. Erasure coding causes the size of shares for each file to be a factor `shares.total` / `shares.needed` times the file size, plus overhead that is logarithmic in the file size `¹¹`_. API usage --------- Cloud storage backends typically charge a small fee per API request. The number of requests to the cloud storage service for various operations is discussed under “network usage” above. Structure of Implementation =========================== A generic “cloud backend”, based on the prototype S3 backend but with support for chunking as described above, will be written. An instance of the cloud backend can be attached to one of several “cloud interface adapters”, one for each cloud storage interface. These adapters will operate only on chunks, and need not distinguish between mutable and immutable shares. They will be a relatively “thin” abstraction layer over the HTTP APIs of each cloud storage interface, similar to the S3Bucket abstraction in the prototype. For some cloud storage services it may be necessary to transparently retry requests in order to recover from transient failures. (Although the erasure coding may enable a file to be retrieved even when shares are not stored by or not readable from all cloud storage services used in a Tahoe-LAFS grid, it may be desirable to retry cloud storage service requests in order to improve overall reliability.) Support for this will be implemented in the generic cloud backend, and used whenever a cloud storage adaptor reports a transient failure. Our experience with the prototype suggests that it is necessary to retry on transient failures for Amazon's S3 service. There will also be a “mock” cloud interface adaptor, based on the prototype's MockS3Bucket. This allows tests of the generic cloud backend to be run without a connection to a real cloud service. The mock adaptor will be able to simulate transient and non-transient failures. Known Issues ============ This design worsens a known “write hole” issue in Tahoe-LAFS when updating the contents of mutable files. An update to a mutable file can require changing the contents of multiple chunks, and if the client fails or is disconnected during the operation the resulting state of the stored cloud objects may be inconsistent: no longer containing all of the old version, but not yet containing all of the new version. A mutable share can be left in an inconsistent state even by the existing Tahoe-LAFS disk backend if it fails during a write, but that has a smaller chance of occurrence because the current client behavior leads to mutable shares being written to disk in a single system call. The best fix for this issue probably requires changing the Tahoe-LAFS storage protocol, perhaps by extending it to use a two-phase or three-phase commit (ticket #1755). References =========== ¹ omitted .. _²: ² “Amazon S3” Amazon (2012) https://aws.amazon.com/s3/ .. _³: ³ “Rackspace Cloud Files” Rackspace (2012) https://www.rackspace.com/cloud/cloud_hosting_products/files/ .. _⁴: ⁴ “Google Cloud Storage” Google (2012) https://developers.google.com/storage/ .. _⁵: ⁵ “Windows Azure Storage” Microsoft (2012) https://www.windowsazure.com/en-us/develop/net/fundamentals/cloud-storage/ .. _⁶: ⁶ “Amazon Simple Storage Service (Amazon S3) API Reference: REST API” Amazon (2012) http://docs.amazonwebservices.com/AmazonS3/latest/API/APIRest.html .. _⁷: ⁷ “OpenStack Object Storage” openstack.org (2012) http://openstack.org/projects/storage/ .. _⁸: ⁸ “Google Cloud Storage Reference Guide” Google (2012) https://developers.google.com/storage/docs/reference-guide .. _⁹: ⁹ “Windows Azure Storage Services REST API Reference” Microsoft (2012) http://msdn.microsoft.com/en-us/library/windowsazure/dd179355.aspx .. _¹⁰: ¹⁰ “Representational state transfer” English Wikipedia (2012) https://en.wikipedia.org/wiki/Representational_state_transfer .. _¹¹: ¹¹ “Performance costs for some common operations” tahoe-lafs.org (2012) https://tahoe-lafs.org/trac/tahoe-lafs/browser/trunk/docs/performance.rst tahoe-lafs-1.10.0/docs/specifications/dirnodes.rst000066400000000000000000000560561221140116300221130ustar00rootroot00000000000000========================== Tahoe-LAFS Directory Nodes ========================== As explained in the architecture docs, Tahoe-LAFS can be roughly viewed as a collection of three layers. The lowest layer is the key-value store: it provides operations that accept files and upload them to the grid, creating a URI in the process which securely references the file's contents. The middle layer is the filesystem, creating a structure of directories and filenames resembling the traditional unix/windows filesystems. The top layer is the application layer, which uses the lower layers to provide useful services to users, like a backup application, or a way to share files with friends. This document examines the middle layer, the "filesystem". 1. `Key-value Store Primitives`_ 2. `Filesystem goals`_ 3. `Dirnode goals`_ 4. `Dirnode secret values`_ 5. `Dirnode storage format`_ 6. `Dirnode sizes, mutable-file initial read sizes`_ 7. `Design Goals, redux`_ 1. `Confidentiality leaks in the storage servers`_ 2. `Integrity failures in the storage servers`_ 3. `Improving the efficiency of dirnodes`_ 4. `Dirnode expiration and leases`_ 8. `Starting Points: root dirnodes`_ 9. `Mounting and Sharing Directories`_ 10. `Revocation`_ Key-value Store Primitives ========================== In the lowest layer (key-value store), there are two operations that reference immutable data (which we refer to as "CHK URIs" or "CHK read-capabilities" or "CHK read-caps"). One puts data into the grid (but only if it doesn't exist already), the other retrieves it:: chk_uri = put(data) data = get(chk_uri) We also have three operations which reference mutable data (which we refer to as "mutable slots", or "mutable write-caps and read-caps", or sometimes "SSK slots"). One creates a slot with some initial contents, a second replaces the contents of a pre-existing slot, and the third retrieves the contents:: mutable_uri = create(initial_data) replace(mutable_uri, new_data) data = get(mutable_uri) Filesystem Goals ================ The main goal for the middle (filesystem) layer is to give users a way to organize the data that they have uploaded into the grid. The traditional way to do this in computer filesystems is to put this data into files, give those files names, and collect these names into directories. Each directory is a set of name-entry pairs, each of which maps a "child name" to a directory entry pointing to an object of some kind. Those child objects might be files, or they might be other directories. Each directory entry also contains metadata. The directory structure is therefore a directed graph of nodes, in which each node might be a directory node or a file node. All file nodes are terminal nodes. Dirnode Goals ============= What properties might be desirable for these directory nodes? In no particular order: 1. functional. Code which does not work doesn't count. 2. easy to document, explain, and understand 3. confidential: it should not be possible for others to see the contents of a directory 4. integrity: it should not be possible for others to modify the contents of a directory 5. available: directories should survive host failure, just like files do 6. efficient: in storage, communication bandwidth, number of round-trips 7. easy to delegate individual directories in a flexible way 8. updateness: everybody looking at a directory should see the same contents 9. monotonicity: everybody looking at a directory should see the same sequence of updates Some of these goals are mutually exclusive. For example, availability and consistency are opposing, so it is not possible to achieve #5 and #8 at the same time. Moreover, it takes a more complex architecture to get close to the available-and-consistent ideal, so #2/#6 is in opposition to #5/#8. Tahoe-LAFS v0.7.0 introduced distributed mutable files, which use public-key cryptography for integrity, and erasure coding for availability. These achieve roughly the same properties as immutable CHK files, but their contents can be replaced without changing their identity. Dirnodes are then just a special way of interpreting the contents of a specific mutable file. Earlier releases used a "vdrive server": this server was abolished in the v0.7.0 release. For details of how mutable files work, please see ``_ in this directory. For releases since v0.7.0, we achieve most of our desired properties. The integrity and availability of dirnodes is equivalent to that of regular (immutable) files, with the exception that there are more simultaneous-update failure modes for mutable slots. Delegation is quite strong: you can give read-write or read-only access to any subtree, and the data format used for dirnodes is such that read-only access is transitive: i.e. if you grant Bob read-only access to a parent directory, then Bob will get read-only access (and *not* read-write access) to its children. Relative to the previous "vdrive-server" based scheme, the current distributed dirnode approach gives better availability, but cannot guarantee updateness quite as well, and requires far more network traffic for each retrieval and update. Mutable files are somewhat less available than immutable files, simply because of the increased number of combinations (shares of an immutable file are either present or not, whereas there are multiple versions of each mutable file, and you might have some shares of version 1 and other shares of version 2). In extreme cases of simultaneous update, mutable files might suffer from non-monotonicity. Dirnode secret values ===================== As mentioned before, dirnodes are simply a special way to interpret the contents of a mutable file, so the secret keys and capability strings described in ``_ are all the same. Each dirnode contains an RSA public/private keypair, and the holder of the "write capability" will be able to retrieve the private key (as well as the AES encryption key used for the data itself). The holder of the "read capability" will be able to obtain the public key and the AES data key, but not the RSA private key needed to modify the data. The "write capability" for a dirnode grants read-write access to its contents. This is expressed on concrete form as the "dirnode write cap": a printable string which contains the necessary secrets to grant this access. Likewise, the "read capability" grants read-only access to a dirnode, and can be represented by a "dirnode read cap" string. For example, URI:DIR2:swdi8ge1s7qko45d3ckkyw1aac%3Aar8r5j99a4mezdojejmsfp4fj1zeky9gjigyrid4urxdimego68o is a write-capability URI, while URI:DIR2-RO:buxjqykt637u61nnmjg7s8zkny:ar8r5j99a4mezdojejmsfp4fj1zeky9gjigyrid4urxdimego68o is a read-capability URI, both for the same dirnode. Dirnode storage format ====================== Each dirnode is stored in a single mutable file, distributed in the Tahoe-LAFS grid. The contents of this file are a serialized list of netstrings, one per child. Each child is a list of four netstrings: (name, rocap, rwcap, metadata). (Remember that the contents of the mutable file are encrypted by the read-cap, so this section describes the plaintext contents of the mutable file, *after* it has been decrypted by the read-cap.) The name is simple a UTF-8 -encoded child name. The 'rocap' is a read-only capability URI to that child, either an immutable (CHK) file, a mutable file, or a directory. It is also possible to store 'unknown' URIs that are not recognized by the current version of Tahoe-LAFS. The 'rwcap' is a read-write capability URI for that child, encrypted with the dirnode's write-cap: this enables the "transitive readonlyness" property, described further below. The 'metadata' is a JSON-encoded dictionary of type,value metadata pairs. Some metadata keys are pre-defined, the rest are left up to the application. Each rwcap is stored as IV + ciphertext + MAC. The IV is a 16-byte random value. The ciphertext is obtained by using AES in CTR mode on the rwcap URI string, using a key that is formed from a tagged hash of the IV and the dirnode's writekey. The MAC is written only for compatibility with older Tahoe-LAFS versions and is no longer verified. If Bob has read-only access to the 'bar' directory, and he adds it as a child to the 'foo' directory, then he will put the read-only cap for 'bar' in both the rwcap and rocap slots (encrypting the rwcap contents as described above). If he has full read-write access to 'bar', then he will put the read-write cap in the 'rwcap' slot, and the read-only cap in the 'rocap' slot. Since other users who have read-only access to 'foo' will be unable to decrypt its rwcap slot, this limits those users to read-only access to 'bar' as well, thus providing the transitive readonlyness that we desire. Dirnode sizes, mutable-file initial read sizes ============================================== How big are dirnodes? When reading dirnode data out of mutable files, how large should our initial read be? If we guess exactly, we can read a dirnode in a single round-trip, and update one in two RTT. If we guess too high, we'll waste some amount of bandwidth. If we guess low, we need to make a second pass to get the data (or the encrypted privkey, for writes), which will cost us at least another RTT. Assuming child names are between 10 and 99 characters long, how long are the various pieces of a dirnode? :: netstring(name) ~= 4+len(name) chk-cap = 97 (for 4-char filesizes) dir-rw-cap = 88 dir-ro-cap = 91 netstring(cap) = 4+len(cap) encrypted(cap) = 16+cap+32 JSON({}) = 2 JSON({ctime=float,mtime=float,'tahoe':{linkcrtime=float,linkmotime=float}}): 137 netstring(metadata) = 4+137 = 141 so a CHK entry is:: 5+ 4+len(name) + 4+97 + 5+16+97+32 + 4+137 And a 15-byte filename gives a 416-byte entry. When the entry points at a subdirectory instead of a file, the entry is a little bit smaller. So an empty directory uses 0 bytes, a directory with one child uses about 416 bytes, a directory with two children uses about 832, etc. When the dirnode data is encoding using our default 3-of-10, that means we get 139ish bytes of data in each share per child. The pubkey, signature, and hashes form the first 935ish bytes of the container, then comes our data, then about 1216 bytes of encprivkey. So if we read the first:: 1kB: we get 65bytes of dirnode data : only empty directories 2kB: 1065bytes: about 8 3kB: 2065bytes: about 15 entries, or 6 entries plus the encprivkey 4kB: 3065bytes: about 22 entries, or about 13 plus the encprivkey So we've written the code to do an initial read of 4kB from each share when we read the mutable file, which should give good performance (one RTT) for small directories. Design Goals, redux =================== How well does this design meet the goals? 1. functional: YES: the code works and has extensive unit tests 2. documentable: YES: this document is the existence proof 3. confidential: YES: see below 4. integrity: MOSTLY: a coalition of storage servers can rollback individual mutable files, but not a single one. No server can substitute fake data as genuine. 5. availability: YES: as long as 'k' storage servers are present and have the same version of the mutable file, the dirnode will be available. 6. efficient: MOSTLY: network: single dirnode lookup is very efficient, since clients can fetch specific keys rather than being required to get or set the entire dirnode each time. Traversing many directories takes a lot of roundtrips, and these can't be collapsed with promise-pipelining because the intermediate values must only be visible to the client. Modifying many dirnodes at once (e.g. importing a large pre-existing directory tree) is pretty slow, since each graph edge must be created independently. storage: each child has a separate IV, which makes them larger than if all children were aggregated into a single encrypted string 7. delegation: VERY: each dirnode is a completely independent object, to which clients can be granted separate read-write or read-only access 8. updateness: VERY: with only a single point of access, and no caching, each client operation starts by fetching the current value, so there are no opportunities for staleness 9. monotonicity: VERY: the single point of access also protects against retrograde motion Confidentiality leaks in the storage servers -------------------------------------------- Dirnode (and the mutable files upon which they are based) are very private against other clients: traffic between the client and the storage servers is protected by the Foolscap SSL connection, so they can observe very little. Storage index values are hashes of secrets and thus unguessable, and they are not made public, so other clients cannot snoop through encrypted dirnodes that they have not been told about. Storage servers can observe access patterns and see ciphertext, but they cannot see the plaintext (of child names, metadata, or URIs). If an attacker operates a significant number of storage servers, they can infer the shape of the directory structure by assuming that directories are usually accessed from root to leaf in rapid succession. Since filenames are usually much shorter than read-caps and write-caps, the attacker can use the length of the ciphertext to guess the number of children of each node, and might be able to guess the length of the child names (or at least their sum). From this, the attacker may be able to build up a graph with the same shape as the plaintext filesystem, but with unlabeled edges and unknown file contents. Integrity failures in the storage servers ----------------------------------------- The mutable file's integrity mechanism (RSA signature on the hash of the file contents) prevents the storage server from modifying the dirnode's contents without detection. Therefore the storage servers can make the dirnode unavailable, but not corrupt it. A sufficient number of colluding storage servers can perform a rollback attack: replace all shares of the whole mutable file with an earlier version. To prevent this, when retrieving the contents of a mutable file, the client queries more servers than necessary and uses the highest available version number. This insures that one or two misbehaving storage servers cannot cause this rollback on their own. Improving the efficiency of dirnodes ------------------------------------ The current mutable-file -based dirnode scheme suffers from certain inefficiencies. A very large directory (with thousands or millions of children) will take a significant time to extract any single entry, because the whole file must be downloaded first, then parsed and searched to find the desired child entry. Likewise, modifying a single child will require the whole file to be re-uploaded. The current design assumes (and in some cases, requires) that dirnodes remain small. The mutable files on which dirnodes are based are currently using "SDMF" ("Small Distributed Mutable File") design rules, which state that the size of the data shall remain below one megabyte. More advanced forms of mutable files (MDMF and LDMF) are in the design phase to allow efficient manipulation of larger mutable files. This would reduce the work needed to modify a single entry in a large directory. Judicious caching may help improve the reading-large-directory case. Some form of mutable index at the beginning of the dirnode might help as well. The MDMF design rules allow for efficient random-access reads from the middle of the file, which would give the index something useful to point at. The current SDMF design generates a new RSA public/private keypair for each directory. This takes considerable time and CPU effort, generally one or two seconds per directory. We have designed (but not yet built) a DSA-based mutable file scheme which will use shared parameters to reduce the directory-creation effort to a bare minimum (picking a random number instead of generating two random primes). When a backup program is run for the first time, it needs to copy a large amount of data from a pre-existing filesystem into reliable storage. This means that a large and complex directory structure needs to be duplicated in the dirnode layer. With the one-object-per-dirnode approach described here, this requires as many operations as there are edges in the imported filesystem graph. Another approach would be to aggregate multiple directories into a single storage object. This object would contain a serialized graph rather than a single name-to-child dictionary. Most directory operations would fetch the whole block of data (and presumeably cache it for a while to avoid lots of re-fetches), and modification operations would need to replace the whole thing at once. This "realm" approach would have the added benefit of combining more data into a single encrypted bundle (perhaps hiding the shape of the graph from a determined attacker), and would reduce round-trips when performing deep directory traversals (assuming the realm was already cached). It would also prevent fine-grained rollback attacks from working: a coalition of storage servers could change the entire realm to look like an earlier state, but it could not independently roll back individual directories. The drawbacks of this aggregation would be that small accesses (adding a single child, looking up a single child) would require pulling or pushing a lot of unrelated data, increasing network overhead (and necessitating test-and-set semantics for the modification side, which increases the chances that a user operation will fail, making it more challenging to provide promises of atomicity to the user). It would also make it much more difficult to enable the delegation ("sharing") of specific directories. Since each aggregate "realm" provides all-or-nothing access control, the act of delegating any directory from the middle of the realm would require the realm first be split into the upper piece that isn't being shared and the lower piece that is. This splitting would have to be done in response to what is essentially a read operation, which is not traditionally supposed to be a high-effort action. On the other hand, it may be possible to aggregate the ciphertext, but use distinct encryption keys for each component directory, to get the benefits of both schemes at once. Dirnode expiration and leases ----------------------------- Dirnodes are created any time a client wishes to add a new directory. How long do they live? What's to keep them from sticking around forever, taking up space that nobody can reach any longer? Mutable files are created with limited-time "leases", which keep the shares alive until the last lease has expired or been cancelled. Clients which know and care about specific dirnodes can ask to keep them alive for a while, by renewing a lease on them (with a typical period of one month). Clients are expected to assist in the deletion of dirnodes by canceling their leases as soon as they are done with them. This means that when a client unlinks a directory, it should also cancel its lease on that directory. When the lease count on a given share goes to zero, the storage server can delete the related storage. Multiple clients may all have leases on the same dirnode: the server may delete the shares only after all of the leases have gone away. We expect that clients will periodically create a "manifest": a list of so-called "refresh capabilities" for all of the dirnodes and files that they can reach. They will give this manifest to the "repairer", which is a service that keeps files (and dirnodes) alive on behalf of clients who cannot take on this responsibility for themselves. These refresh capabilities include the storage index, but do *not* include the readkeys or writekeys, so the repairer does not get to read the files or directories that it is helping to keep alive. After each change to the user's vdrive, the client creates a manifest and looks for differences from their previous version. Anything which was removed prompts the client to send out lease-cancellation messages, allowing the data to be deleted. Starting Points: root dirnodes ============================== Any client can record the URI of a directory node in some external form (say, in a local file) and use it as the starting point of later traversal. Each Tahoe-LAFS user is expected to create a new (unattached) dirnode when they first start using the grid, and record its URI for later use. Mounting and Sharing Directories ================================ The biggest benefit of this dirnode approach is that sharing individual directories is almost trivial. Alice creates a subdirectory that she wants to use to share files with Bob. This subdirectory is attached to Alice's filesystem at "~alice/share-with-bob". She asks her filesystem for the read-write directory URI for that new directory, and emails it to Bob. When Bob receives the URI, he asks his own local vdrive to attach the given URI, perhaps at a place named "~bob/shared-with-alice". Every time either party writes a file into this directory, the other will be able to read it. If Alice prefers, she can give a read-only URI to Bob instead, and then Bob will be able to read files but not change the contents of the directory. Neither Alice nor Bob will get access to any files above the mounted directory: there are no 'parent directory' pointers. If Alice creates a nested set of directories, "~alice/share-with-bob/subdir2", and gives a read-only URI to share-with-bob to Bob, then Bob will be unable to write to either share-with-bob/ or subdir2/. A suitable UI needs to be created to allow users to easily perform this sharing action: dragging a folder their vdrive to an IM or email user icon, for example. The UI will need to give the sending user an opportunity to indicate whether they want to grant read-write or read-only access to the recipient. The recipient then needs an interface to drag the new folder into their vdrive and give it a home. Revocation ========== When Alice decides that she no longer wants Bob to be able to access the shared directory, what should she do? Suppose she's shared this folder with both Bob and Carol, and now she wants Carol to retain access to it but Bob to be shut out. Ideally Carol should not have to do anything: her access should continue unabated. The current plan is to have her client create a deep copy of the folder in question, delegate access to the new folder to the remaining members of the group (Carol), asking the lucky survivors to replace their old reference with the new one. Bob may still have access to the old folder, but he is now the only one who cares: everyone else has moved on, and he will no longer be able to see their new changes. In a strict sense, this is the strongest form of revocation that can be accomplished: there is no point trying to force Bob to forget about the files that he read a moment before being kicked out. In addition it must be noted that anyone who can access the directory can proxy for Bob, reading files to him and accepting changes whenever he wants. Preventing delegation between communication parties is just as pointless as asking Bob to forget previously accessed files. However, there may be value to configuring the UI to ask Carol to not share files with Bob, or to removing all files from Bob's view at the same time his access is revoked. tahoe-lafs-1.10.0/docs/specifications/file-encoding.rst000066400000000000000000000173321221140116300230010ustar00rootroot00000000000000============= File Encoding ============= When the client wishes to upload an immutable file, the first step is to decide upon an encryption key. There are two methods: convergent or random. The goal of the convergent-key method is to make sure that multiple uploads of the same file will result in only one copy on the grid, whereas the random-key method does not provide this "convergence" feature. The convergent-key method computes the SHA-256d hash of a single-purpose tag, the encoding parameters, a "convergence secret", and the contents of the file. It uses a portion of the resulting hash as the AES encryption key. There are security concerns with using convergence this approach (the "partial-information guessing attack", please see ticket #365 for some references), so Tahoe uses a separate (randomly-generated) "convergence secret" for each node, stored in NODEDIR/private/convergence . The encoding parameters (k, N, and the segment size) are included in the hash to make sure that two different encodings of the same file will get different keys. This method requires an extra IO pass over the file, to compute this key, and encryption cannot be started until the pass is complete. This means that the convergent-key method will require at least two total passes over the file. The random-key method simply chooses a random encryption key. Convergence is disabled, however this method does not require a separate IO pass, so upload can be done with a single pass. This mode makes it easier to perform streaming upload. Regardless of which method is used to generate the key, the plaintext file is encrypted (using AES in CTR mode) to produce a ciphertext. This ciphertext is then erasure-coded and uploaded to the servers. Two hashes of the ciphertext are generated as the encryption proceeds: a flat hash of the whole ciphertext, and a Merkle tree. These are used to verify the correctness of the erasure decoding step, and can be used by a "verifier" process to make sure the file is intact without requiring the decryption key. The encryption key is hashed (with SHA-256d and a single-purpose tag) to produce the "Storage Index". This Storage Index (or SI) is used to identify the shares produced by the method described below. The grid can be thought of as a large table that maps Storage Index to a ciphertext. Since the ciphertext is stored as erasure-coded shares, it can also be thought of as a table that maps SI to shares. Anybody who knows a Storage Index can retrieve the associated ciphertext: ciphertexts are not secret. .. image:: file-encoding1.svg The ciphertext file is then broken up into segments. The last segment is likely to be shorter than the rest. Each segment is erasure-coded into a number of "blocks". This takes place one segment at a time. (In fact, encryption and erasure-coding take place at the same time, once per plaintext segment). Larger segment sizes result in less overhead overall, but increase both the memory footprint and the "alacrity" (the number of bytes we have to receive before we can deliver validated plaintext to the user). The current default segment size is 128KiB. One block from each segment is sent to each shareholder (aka leaseholder, aka landlord, aka storage node, aka peer). The "share" held by each remote shareholder is nominally just a collection of these blocks. The file will be recoverable when a certain number of shares have been retrieved. .. image:: file-encoding2.svg The blocks are hashed as they are generated and transmitted. These block hashes are put into a Merkle hash tree. When the last share has been created, the merkle tree is completed and delivered to the peer. Later, when we retrieve these blocks, the peer will send many of the merkle hash tree nodes ahead of time, so we can validate each block independently. The root of this block hash tree is called the "block root hash" and used in the next step. .. image:: file-encoding3.svg There is a higher-level Merkle tree called the "share hash tree". Its leaves are the block root hashes from each share. The root of this tree is called the "share root hash" and is included in the "URI Extension Block", aka UEB. The ciphertext hash and Merkle tree are also put here, along with the original file size, and the encoding parameters. The UEB contains all the non-secret values that could be put in the URI, but would have made the URI too big. So instead, the UEB is stored with the share, and the hash of the UEB is put in the URI. The URI then contains the secret encryption key and the UEB hash. It also contains the basic encoding parameters (k and N) and the file size, to make download more efficient (by knowing the number of required shares ahead of time, sufficient download queries can be generated in parallel). The URI (also known as the immutable-file read-cap, since possessing it grants the holder the capability to read the file's plaintext) is then represented as a (relatively) short printable string like so:: URI:CHK:auxet66ynq55naiy2ay7cgrshm:6rudoctmbxsmbg7gwtjlimd6umtwrrsxkjzthuldsmo4nnfoc6fa:3:10:1000000 .. image:: file-encoding4.svg During download, when a peer begins to transmit a share, it first transmits all of the parts of the share hash tree that are necessary to validate its block root hash. Then it transmits the portions of the block hash tree that are necessary to validate the first block. Then it transmits the first block. It then continues this loop: transmitting any portions of the block hash tree to validate block#N, then sending block#N. .. image:: file-encoding5.svg So the "share" that is sent to the remote peer actually consists of three pieces, sent in a specific order as they become available, and retrieved during download in a different order according to when they are needed. The first piece is the blocks themselves, one per segment. The last block will likely be shorter than the rest, because the last segment is probably shorter than the rest. The second piece is the block hash tree, consisting of a total of two SHA-1 hashes per block. The third piece is a hash chain from the share hash tree, consisting of log2(numshares) hashes. During upload, all blocks are sent first, followed by the block hash tree, followed by the share hash chain. During download, the share hash chain is delivered first, followed by the block root hash. The client then uses the hash chain to validate the block root hash. Then the peer delivers enough of the block hash tree to validate the first block, followed by the first block itself. The block hash chain is used to validate the block, then it is passed (along with the first block from several other peers) into decoding, to produce the first segment of crypttext, which is then decrypted to produce the first segment of plaintext, which is finally delivered to the user. .. image:: file-encoding6.svg Hashes ====== All hashes use SHA-256d, as defined in Practical Cryptography (by Ferguson and Schneier). All hashes use a single-purpose tag, e.g. the hash that converts an encryption key into a storage index is defined as follows:: SI = SHA256d(netstring("allmydata_immutable_key_to_storage_index_v1") + key) When two separate values need to be combined together in a hash, we wrap each in a netstring. Using SHA-256d (instead of plain SHA-256) guards against length-extension attacks. Using the tag protects our Merkle trees against attacks in which the hash of a leaf is confused with a hash of two children (allowing an attacker to generate corrupted data that nevertheless appears to be valid), and is simply good "cryptograhic hygiene". The `"Chosen Protocol Attack" by Kelsey, Schneier, and Wagner `_ is relevant. Putting the tag in a netstring guards against attacks that seek to confuse the end of the tag with the beginning of the subsequent value. tahoe-lafs-1.10.0/docs/specifications/file-encoding1.svg000066400000000000000000000513301221140116300230450ustar00rootroot00000000000000 image/svg+xml FILE (plaintext) convergentencryptionkey AES-CTR FILE (crypttext) tag storageindex SHA-256 SHA-256 tag encoding parameters randomencryptionkey or tahoe-lafs-1.10.0/docs/specifications/file-encoding2.svg000066400000000000000000001233631221140116300230540ustar00rootroot00000000000000 image/svg+xml FILE (crypttext) segA segB segC segD FEC block A1 block A2 block A3 block A4 FEC block B1 block B2 block B3 block B4 FEC block C1 block C2 block C3 block C4 FEC block D1 block D2 block D3 block D4 share4 peer 4 tahoe-lafs-1.10.0/docs/specifications/file-encoding3.svg000066400000000000000000000531161221140116300230530ustar00rootroot00000000000000 image/svg+xml SHA SHA SHA SHA SHA SHA SHA share A4 share B4 share C4 share D4 share4 peer 4 Merkle Tree block hash tree "block root hash" tahoe-lafs-1.10.0/docs/specifications/file-encoding4.svg000066400000000000000000000745341221140116300230630ustar00rootroot00000000000000 image/svg+xml blockroot hashes SHA s1 s2 s3 s4 SHA SHA shares share1 share2 share3 share4 Merkle Tree share hash tree "share root hash" URI Extension Block file size encoding parameters share root hash URI / "file read-cap" UEB hash encryption key SHA other hashes tahoe-lafs-1.10.0/docs/specifications/file-encoding5.svg000066400000000000000000000654661221140116300230700ustar00rootroot00000000000000 image/svg+xml blockroot hashes SHA s1 s2 s3 s4 SHA SHA share hash tree SHA s5 s6 s7 s8 SHA SHA Merkle Tree "share root hash" SHA merkle hash chainto validate s1 tahoe-lafs-1.10.0/docs/specifications/file-encoding6.svg000066400000000000000000001066431221140116300230620ustar00rootroot00000000000000 image/svg+xml SHA SHA SHA SHA SHA SHA share A4 share B4 share C4 share D4 share4 peer 4 Merkle Tree block hash tree "block root hash" blockroot hashes SHA s1 s2 s3 s4 SHA SHA Merkle Tree share hash tree "share root hash" merkle hash chainto validate s4 s4 tahoe-lafs-1.10.0/docs/specifications/mut.svg000066400000000000000000003320741221140116300210750ustar00rootroot00000000000000 image/svg+xml shares Merkle Tree AES-CTR SHA256d SHA256d SHA256d FEC salt encryption key write key read key verifying (public) key signing (private) key encrypted signing key verify cap read-write cap verify cap write key read-only cap verify cap read key plaintext ciphertext SHA256dtruncated SHA256dtruncated SHA256dtruncated SHA256dtruncated AES-CTR share 1 share 2 share 3 share 4 tahoe-lafs-1.10.0/docs/specifications/mutable.rst000066400000000000000000001055121221140116300217250ustar00rootroot00000000000000============= Mutable Files ============= 1. `Mutable Formats`_ 2. `Consistency vs. Availability`_ 3. `The Prime Coordination Directive: "Don't Do That"`_ 4. `Small Distributed Mutable Files`_ 1. `SDMF slots overview`_ 2. `Server Storage Protocol`_ 3. `Code Details`_ 4. `SMDF Slot Format`_ 5. `Recovery`_ 5. `Medium Distributed Mutable Files`_ 6. `Large Distributed Mutable Files`_ 7. `TODO`_ Mutable files are places with a stable identifier that can hold data that changes over time. In contrast to immutable slots, for which the identifier/capability is derived from the contents themselves, the mutable file identifier remains fixed for the life of the slot, regardless of what data is placed inside it. Each mutable file is referenced by two different caps. The "read-write" cap grants read-write access to its holder, allowing them to put whatever contents they like into the slot. The "read-only" cap is less powerful, only granting read access, and not enabling modification of the data. The read-write cap can be turned into the read-only cap, but not the other way around. The data in these files is distributed over a number of servers, using the same erasure coding that immutable files use, with 3-of-10 being a typical choice of encoding parameters. The data is encrypted and signed in such a way that only the holders of the read-write cap will be able to set the contents of the slot, and only the holders of the read-only cap will be able to read those contents. Holders of either cap will be able to validate the contents as being written by someone with the read-write cap. The servers who hold the shares are not automatically given the ability read or modify them: the worst they can do is deny service (by deleting or corrupting the shares), or attempt a rollback attack (which can only succeed with the cooperation of at least k servers). Mutable Formats =============== History ------- When mutable files first shipped in Tahoe-0.8.0 (15-Feb-2008), the only version available was "SDMF", described below. This was a limited-functionality placeholder, intended to be replaced with improved-efficiency "MDMF" files shortly afterwards. The development process took longer than expected, and MDMF didn't ship until Tahoe-1.9.0 (31-Oct-2011), and even then it was opt-in (not used by default). SDMF was intended for relatively small mutable files, up to a few megabytes. It uses only one segment, so alacrity (the measure of how quickly the first byte of plaintext is returned to the client) suffers, as the whole file must be downloaded even if you only want to get a single byte. The memory used by both clients and servers also scales with the size of the file, instead of being limited to the half-a-MB-or-so that immutable file operations use, so large files cause significant memory usage. To discourage the use of SDMF outside it's design parameters, the early versions of Tahoe enforced a maximum size on mutable files (maybe 10MB). Since most directories are built out of mutable files, this imposed a limit of about 30k entries per directory. In subsequent releases, this limit was removed, but the performance problems inherent in the SDMF implementation remained. In the summer of 2010, Google-Summer-of-Code student Kevan Carstensen took on the project of finally implementing MDMF. Because of my (Brian) design mistake in SDMF (not including a separate encryption seed in each segment), the share format for SDMF could not be used for MDMF, resulting in a larger gap between the two implementations (my original intention had been to make SDMF a clean subset of MDMF, where any single-segment MDMF file could be handled by the old SDMF code). In the fall of 2011, Kevan's code was finally integrated, and first made available in the Tahoe-1.9.0 release. SDMF vs. MDMF ------------- The improvement of MDMF is the use of multiple segments: individual 128-KiB sections of the file can be retrieved or modified independently. The improvement can be seen when fetching just a portion of the file (using a Range: header on the webapi), or when modifying a portion (again with a Range: header). It can also be seen indirectly when fetching the whole file: the first segment of data should be delivered faster from a large MDMF file than from an SDMF file, although the overall download will then proceed at the same rate. We've decided to make it opt-in for now: mutable files default to SDMF format unless explicitly configured to use MDMF, either in ``tahoe.cfg`` (see ``__) or in the WUI or CLI command that created a new mutable file. The code can read and modify existing files of either format without user intervention. We expect to make MDMF the default in a subsequent release, perhaps 2.0. Which format should you use? SDMF works well for files up to a few MB, and can be handled by older versions (Tahoe-1.8.3 and earlier). If you do not need to support older clients, want to efficiently work with mutable files, and have code which will use Range: headers that make partial reads and writes, then MDMF is for you. Consistency vs. Availability ============================ There is an age-old battle between consistency and availability. Epic papers have been written, elaborate proofs have been established, and generations of theorists have learned that you cannot simultaneously achieve guaranteed consistency with guaranteed reliability. In addition, the closer to 0 you get on either axis, the cost and complexity of the design goes up. Tahoe's design goals are to largely favor design simplicity, then slightly favor read availability, over the other criteria. As we develop more sophisticated mutable slots, the API may expose multiple read versions to the application layer. The tahoe philosophy is to defer most consistency recovery logic to the higher layers. Some applications have effective ways to merge multiple versions, so inconsistency is not necessarily a problem (i.e. directory nodes can usually merge multiple "add child" operations). The Prime Coordination Directive: "Don't Do That" ================================================= The current rule for applications which run on top of Tahoe is "do not perform simultaneous uncoordinated writes". That means you need non-tahoe means to make sure that two parties are not trying to modify the same mutable slot at the same time. For example: * don't give the read-write URI to anyone else. Dirnodes in a private directory generally satisfy this case, as long as you don't use two clients on the same account at the same time * if you give a read-write URI to someone else, stop using it yourself. An inbox would be a good example of this. * if you give a read-write URI to someone else, call them on the phone before you write into it * build an automated mechanism to have your agents coordinate writes. For example, we expect a future release to include a FURL for a "coordination server" in the dirnodes. The rule can be that you must contact the coordination server and obtain a lock/lease on the file before you're allowed to modify it. If you do not follow this rule, Bad Things will happen. The worst-case Bad Thing is that the entire file will be lost. A less-bad Bad Thing is that one or more of the simultaneous writers will lose their changes. An observer of the file may not see monotonically-increasing changes to the file, i.e. they may see version 1, then version 2, then 3, then 2 again. Tahoe takes some amount of care to reduce the badness of these Bad Things. One way you can help nudge it from the "lose your file" case into the "lose some changes" case is to reduce the number of competing versions: multiple versions of the file that different parties are trying to establish as the one true current contents. Each simultaneous writer counts as a "competing version", as does the previous version of the file. If the count "S" of these competing versions is larger than N/k, then the file runs the risk of being lost completely. [TODO] If at least one of the writers remains running after the collision is detected, it will attempt to recover, but if S>(N/k) and all writers crash after writing a few shares, the file will be lost. Note that Tahoe uses serialization internally to make sure that a single Tahoe node will not perform simultaneous modifications to a mutable file. It accomplishes this by using a weakref cache of the MutableFileNode (so that there will never be two distinct MutableFileNodes for the same file), and by forcing all mutable file operations to obtain a per-node lock before they run. The Prime Coordination Directive therefore applies to inter-node conflicts, not intra-node ones. Small Distributed Mutable Files =============================== SDMF slots are suitable for small (<1MB) files that are editing by rewriting the entire file. The three operations are: * allocate (with initial contents) * set (with new contents) * get (old contents) The first use of SDMF slots will be to hold directories (dirnodes), which map encrypted child names to rw-URI/ro-URI pairs. SDMF slots overview ------------------- Each SDMF slot is created with a public/private key pair. The public key is known as the "verification key", while the private key is called the "signature key". The private key is hashed and truncated to 16 bytes to form the "write key" (an AES symmetric key). The write key is then hashed and truncated to form the "read key". The read key is hashed and truncated to form the 16-byte "storage index" (a unique string used as an index to locate stored data). The public key is hashed by itself to form the "verification key hash". The write key is hashed a different way to form the "write enabler master". For each storage server on which a share is kept, the write enabler master is concatenated with the server's nodeid and hashed, and the result is called the "write enabler" for that particular server. Note that multiple shares of the same slot stored on the same server will all get the same write enabler, i.e. the write enabler is associated with the "bucket", rather than the individual shares. The private key is encrypted (using AES in counter mode) by the write key, and the resulting crypttext is stored on the servers. so it will be retrievable by anyone who knows the write key. The write key is not used to encrypt anything else, and the private key never changes, so we do not need an IV for this purpose. The actual data is encrypted (using AES in counter mode) with a key derived by concatenating the readkey with the IV, the hashing the results and truncating to 16 bytes. The IV is randomly generated each time the slot is updated, and stored next to the encrypted data. The read-write URI consists of the write key and the verification key hash. The read-only URI contains the read key and the verification key hash. The verify-only URI contains the storage index and the verification key hash. :: URI:SSK-RW:b2a(writekey):b2a(verification_key_hash) URI:SSK-RO:b2a(readkey):b2a(verification_key_hash) URI:SSK-Verify:b2a(storage_index):b2a(verification_key_hash) Note that this allows the read-only and verify-only URIs to be derived from the read-write URI without actually retrieving the public keys. Also note that it means the read-write agent must validate both the private key and the public key when they are first fetched. All users validate the public key in exactly the same way. The SDMF slot is allocated by sending a request to the storage server with a desired size, the storage index, and the write enabler for that server's nodeid. If granted, the write enabler is stashed inside the slot's backing store file. All further write requests must be accompanied by the write enabler or they will not be honored. The storage server does not share the write enabler with anyone else. The SDMF slot structure will be described in more detail below. The important pieces are: * a sequence number * a root hash "R" * the encoding parameters (including k, N, file size, segment size) * a signed copy of [seqnum,R,encoding_params], using the signature key * the verification key (not encrypted) * the share hash chain (part of a Merkle tree over the share hashes) * the block hash tree (Merkle tree over blocks of share data) * the share data itself (erasure-coding of read-key-encrypted file data) * the signature key, encrypted with the write key The access pattern for read is: * hash read-key to get storage index * use storage index to locate 'k' shares with identical 'R' values * either get one share, read 'k' from it, then read k-1 shares * or read, say, 5 shares, discover k, either get more or be finished * or copy k into the URIs * read verification key * hash verification key, compare against verification key hash * read seqnum, R, encoding parameters, signature * verify signature against verification key * read share data, compute block-hash Merkle tree and root "r" * read share hash chain (leading from "r" to "R") * validate share hash chain up to the root "R" * submit share data to erasure decoding * decrypt decoded data with read-key * submit plaintext to application The access pattern for write is: * hash write-key to get read-key, hash read-key to get storage index * use the storage index to locate at least one share * read verification key and encrypted signature key * decrypt signature key using write-key * hash signature key, compare against write-key * hash verification key, compare against verification key hash * encrypt plaintext from application with read-key * application can encrypt some data with the write-key to make it only available to writers (use this for transitive read-onlyness of dirnodes) * erasure-code crypttext to form shares * split shares into blocks * compute Merkle tree of blocks, giving root "r" for each share * compute Merkle tree of shares, find root "R" for the file as a whole * create share data structures, one per server: * use seqnum which is one higher than the old version * share hash chain has log(N) hashes, different for each server * signed data is the same for each server * now we have N shares and need homes for them * walk through peers * if share is not already present, allocate-and-set * otherwise, try to modify existing share: * send testv_and_writev operation to each one * testv says to accept share if their(seqnum+R) <= our(seqnum+R) * count how many servers wind up with which versions (histogram over R) * keep going until N servers have the same version, or we run out of servers * if any servers wound up with a different version, report error to application * if we ran out of servers, initiate recovery process (described below) Server Storage Protocol ----------------------- The storage servers will provide a mutable slot container which is oblivious to the details of the data being contained inside it. Each storage index refers to a "bucket", and each bucket has one or more shares inside it. (In a well-provisioned network, each bucket will have only one share). The bucket is stored as a directory, using the base32-encoded storage index as the directory name. Each share is stored in a single file, using the share number as the filename. The container holds space for a container magic number (for versioning), the write enabler, the nodeid which accepted the write enabler (used for share migration, described below), a small number of lease structures, the embedded data itself, and expansion space for additional lease structures:: # offset size name 1 0 32 magic verstr "Tahoe mutable container v1\n\x75\x09\x44\x03\x8e" 2 32 20 write enabler's nodeid 3 52 32 write enabler 4 84 8 data size (actual share data present) (a) 5 92 8 offset of (8) count of extra leases (after data) 6 100 368 four leases, 92 bytes each 0 4 ownerid (0 means "no lease here") 4 4 expiration timestamp 8 32 renewal token 40 32 cancel token 72 20 nodeid which accepted the tokens 7 468 (a) data 8 ?? 4 count of extra leases 9 ?? n*92 extra leases The "extra leases" field must be copied and rewritten each time the size of the enclosed data changes. The hope is that most buckets will have four or fewer leases and this extra copying will not usually be necessary. The (4) "data size" field contains the actual number of bytes of data present in field (7), such that a client request to read beyond 504+(a) will result in an error. This allows the client to (one day) read relative to the end of the file. The container size (that is, (8)-(7)) might be larger, especially if extra size was pre-allocated in anticipation of filling the container with a lot of data. The offset in (5) points at the *count* of extra leases, at (8). The actual leases (at (9)) begin 4 bytes later. If the container size changes, both (8) and (9) must be relocated by copying. The server will honor any write commands that provide the write token and do not exceed the server-wide storage size limitations. Read and write commands MUST be restricted to the 'data' portion of the container: the implementation of those commands MUST perform correct bounds-checking to make sure other portions of the container are inaccessible to the clients. The two methods provided by the storage server on these "MutableSlot" share objects are: * readv(ListOf(offset=int, length=int)) * returns a list of bytestrings, of the various requested lengths * offset < 0 is interpreted relative to the end of the data * spans which hit the end of the data will return truncated data * testv_and_writev(write_enabler, test_vector, write_vector) * this is a test-and-set operation which performs the given tests and only applies the desired writes if all tests succeed. This is used to detect simultaneous writers, and to reduce the chance that an update will lose data recently written by some other party (written after the last time this slot was read). * test_vector=ListOf(TupleOf(offset, length, opcode, specimen)) * the opcode is a string, from the set [gt, ge, eq, le, lt, ne] * each element of the test vector is read from the slot's data and compared against the specimen using the desired (in)equality. If all tests evaluate True, the write is performed * write_vector=ListOf(TupleOf(offset, newdata)) * offset < 0 is not yet defined, it probably means relative to the end of the data, which probably means append, but we haven't nailed it down quite yet * write vectors are executed in order, which specifies the results of overlapping writes * return value: * error: OutOfSpace * error: something else (io error, out of memory, whatever) * (True, old_test_data): the write was accepted (test_vector passed) * (False, old_test_data): the write was rejected (test_vector failed) * both 'accepted' and 'rejected' return the old data that was used for the test_vector comparison. This can be used by the client to detect write collisions, including collisions for which the desired behavior was to overwrite the old version. In addition, the storage server provides several methods to access these share objects: * allocate_mutable_slot(storage_index, sharenums=SetOf(int)) * returns DictOf(int, MutableSlot) * get_mutable_slot(storage_index) * returns DictOf(int, MutableSlot) * or raises KeyError We intend to add an interface which allows small slots to allocate-and-write in a single call, as well as do update or read in a single call. The goal is to allow a reasonably-sized dirnode to be created (or updated, or read) in just one round trip (to all N shareholders in parallel). migrating shares ```````````````` If a share must be migrated from one server to another, two values become invalid: the write enabler (since it was computed for the old server), and the lease renew/cancel tokens. Suppose that a slot was first created on nodeA, and was thus initialized with WE(nodeA) (= H(WEM+nodeA)). Later, for provisioning reasons, the share is moved from nodeA to nodeB. Readers may still be able to find the share in its new home, depending upon how many servers are present in the grid, where the new nodeid lands in the permuted index for this particular storage index, and how many servers the reading client is willing to contact. When a client attempts to write to this migrated share, it will get a "bad write enabler" error, since the WE it computes for nodeB will not match the WE(nodeA) that was embedded in the share. When this occurs, the "bad write enabler" message must include the old nodeid (e.g. nodeA) that was in the share. The client then computes H(nodeB+H(WEM+nodeA)), which is the same as H(nodeB+WE(nodeA)). The client sends this along with the new WE(nodeB), which is H(WEM+nodeB). Note that the client only sends WE(nodeB) to nodeB, never to anyone else. Also note that the client does not send a value to nodeB that would allow the node to impersonate the client to a third node: everything sent to nodeB will include something specific to nodeB in it. The server locally computes H(nodeB+WE(nodeA)), using its own node id and the old write enabler from the share. It compares this against the value supplied by the client. If they match, this serves as proof that the client was able to compute the old write enabler. The server then accepts the client's new WE(nodeB) and writes it into the container. This WE-fixup process requires an extra round trip, and requires the error message to include the old nodeid, but does not require any public key operations on either client or server. Migrating the leases will require a similar protocol. This protocol will be defined concretely at a later date. Code Details ------------ The MutableFileNode class is used to manipulate mutable files (as opposed to ImmutableFileNodes). These are initially generated with client.create_mutable_file(), and later recreated from URIs with client.create_node_from_uri(). Instances of this class will contain a URI and a reference to the client (for peer selection and connection). NOTE: this section is out of date. Please see src/allmydata/interfaces.py (the section on IMutableFilesystemNode) for more accurate information. The methods of MutableFileNode are: * download_to_data() -> [deferred] newdata, NotEnoughSharesError * if there are multiple retrieveable versions in the grid, get() returns the first version it can reconstruct, and silently ignores the others. In the future, a more advanced API will signal and provide access to the multiple heads. * update(newdata) -> OK, UncoordinatedWriteError, NotEnoughSharesError * overwrite(newdata) -> OK, UncoordinatedWriteError, NotEnoughSharesError download_to_data() causes a new retrieval to occur, pulling the current contents from the grid and returning them to the caller. At the same time, this call caches information about the current version of the file. This information will be used in a subsequent call to update(), and if another change has occured between the two, this information will be out of date, triggering the UncoordinatedWriteError. update() is therefore intended to be used just after a download_to_data(), in the following pattern:: d = mfn.download_to_data() d.addCallback(apply_delta) d.addCallback(mfn.update) If the update() call raises UCW, then the application can simply return an error to the user ("you violated the Prime Coordination Directive"), and they can try again later. Alternatively, the application can attempt to retry on its own. To accomplish this, the app needs to pause, download the new (post-collision and post-recovery) form of the file, reapply their delta, then submit the update request again. A randomized pause is necessary to reduce the chances of colliding a second time with another client that is doing exactly the same thing:: d = mfn.download_to_data() d.addCallback(apply_delta) d.addCallback(mfn.update) def _retry(f): f.trap(UncoordinatedWriteError) d1 = pause(random.uniform(5, 20)) d1.addCallback(lambda res: mfn.download_to_data()) d1.addCallback(apply_delta) d1.addCallback(mfn.update) return d1 d.addErrback(_retry) Enthusiastic applications can retry multiple times, using a randomized exponential backoff between each. A particularly enthusiastic application can retry forever, but such apps are encouraged to provide a means to the user of giving up after a while. UCW does not mean that the update was not applied, so it is also a good idea to skip the retry-update step if the delta was already applied:: d = mfn.download_to_data() d.addCallback(apply_delta) d.addCallback(mfn.update) def _retry(f): f.trap(UncoordinatedWriteError) d1 = pause(random.uniform(5, 20)) d1.addCallback(lambda res: mfn.download_to_data()) def _maybe_apply_delta(contents): new_contents = apply_delta(contents) if new_contents != contents: return mfn.update(new_contents) d1.addCallback(_maybe_apply_delta) return d1 d.addErrback(_retry) update() is the right interface to use for delta-application situations, like directory nodes (in which apply_delta might be adding or removing child entries from a serialized table). Note that any uncoordinated write has the potential to lose data. We must do more analysis to be sure, but it appears that two clients who write to the same mutable file at the same time (even if both eventually retry) will, with high probability, result in one client observing UCW and the other silently losing their changes. It is also possible for both clients to observe UCW. The moral of the story is that the Prime Coordination Directive is there for a reason, and that recovery/UCW/retry is not a subsitute for write coordination. overwrite() tells the client to ignore this cached version information, and to unconditionally replace the mutable file's contents with the new data. This should not be used in delta application, but rather in situations where you want to replace the file's contents with completely unrelated ones. When raw files are uploaded into a mutable slot through the Tahoe-LAFS web-API (using POST and the ?mutable=true argument), they are put in place with overwrite(). The peer-selection and data-structure manipulation (and signing/verification) steps will be implemented in a separate class in allmydata/mutable.py . SMDF Slot Format ---------------- This SMDF data lives inside a server-side MutableSlot container. The server is oblivious to this format. This data is tightly packed. In particular, the share data is defined to run all the way to the beginning of the encrypted private key (the encprivkey offset is used both to terminate the share data and to begin the encprivkey). :: # offset size name 1 0 1 version byte, \x00 for this format 2 1 8 sequence number. 2^64-1 must be handled specially, TBD 3 9 32 "R" (root of share hash Merkle tree) 4 41 16 IV (share data is AES(H(readkey+IV)) ) 5 57 18 encoding parameters: 57 1 k 58 1 N 59 8 segment size 67 8 data length (of original plaintext) 6 75 32 offset table: 75 4 (8) signature 79 4 (9) share hash chain 83 4 (10) block hash tree 87 4 (11) share data 91 8 (12) encrypted private key 99 8 (13) EOF 7 107 436ish verification key (2048 RSA key) 8 543ish 256ish signature=RSAsign(sigkey, H(version+seqnum+r+IV+encparm)) 9 799ish (a) share hash chain, encoded as: "".join([pack(">H32s", shnum, hash) for (shnum,hash) in needed_hashes]) 10 (927ish) (b) block hash tree, encoded as: "".join([pack(">32s",hash) for hash in block_hash_tree]) 11 (935ish) LEN share data (no gap between this and encprivkey) 12 ?? 1216ish encrypted private key= AESenc(write-key, RSA-key) 13 ?? -- EOF (a) The share hash chain contains ceil(log(N)) hashes, each 32 bytes long. This is the set of hashes necessary to validate this share's leaf in the share Merkle tree. For N=10, this is 4 hashes, i.e. 128 bytes. (b) The block hash tree contains ceil(length/segsize) hashes, each 32 bytes long. This is the set of hashes necessary to validate any given block of share data up to the per-share root "r". Each "r" is a leaf of the share has tree (with root "R"), from which a minimal subset of hashes is put in the share hash chain in (8). Recovery -------- The first line of defense against damage caused by colliding writes is the Prime Coordination Directive: "Don't Do That". The second line of defense is to keep "S" (the number of competing versions) lower than N/k. If this holds true, at least one competing version will have k shares and thus be recoverable. Note that server unavailability counts against us here: the old version stored on the unavailable server must be included in the value of S. The third line of defense is our use of testv_and_writev() (described below), which increases the convergence of simultaneous writes: one of the writers will be favored (the one with the highest "R"), and that version is more likely to be accepted than the others. This defense is least effective in the pathological situation where S simultaneous writers are active, the one with the lowest "R" writes to N-k+1 of the shares and then dies, then the one with the next-lowest "R" writes to N-2k+1 of the shares and dies, etc, until the one with the highest "R" writes to k-1 shares and dies. Any other sequencing will allow the highest "R" to write to at least k shares and establish a new revision. The fourth line of defense is the fact that each client keeps writing until at least one version has N shares. This uses additional servers, if necessary, to make sure that either the client's version or some newer/overriding version is highly available. The fifth line of defense is the recovery algorithm, which seeks to make sure that at least *one* version is highly available, even if that version is somebody else's. The write-shares-to-peers algorithm is as follows: * permute peers according to storage index * walk through peers, trying to assign one share per peer * for each peer: * send testv_and_writev, using "old(seqnum+R) <= our(seqnum+R)" as the test * this means that we will overwrite any old versions, and we will overwrite simultaenous writers of the same version if our R is higher. We will not overwrite writers using a higher seqnum. * record the version that each share winds up with. If the write was accepted, this is our own version. If it was rejected, read the old_test_data to find out what version was retained. * if old_test_data indicates the seqnum was equal or greater than our own, mark the "Simultanous Writes Detected" flag, which will eventually result in an error being reported to the writer (in their close() call). * build a histogram of "R" values * repeat until the histogram indicate that some version (possibly ours) has N shares. Use new servers if necessary. * If we run out of servers: * if there are at least shares-of-happiness of any one version, we're happy, so return. (the close() might still get an error) * not happy, need to reinforce something, goto RECOVERY Recovery: * read all shares, count the versions, identify the recoverable ones, discard the unrecoverable ones. * sort versions: locate max(seqnums), put all versions with that seqnum in the list, sort by number of outstanding shares. Then put our own version. (TODO: put versions with seqnum us ahead of us?). * for each version: * attempt to recover that version * if not possible, remove it from the list, go to next one * if recovered, start at beginning of peer list, push that version, continue until N shares are placed * if pushing our own version, bump up the seqnum to one higher than the max seqnum we saw * if we run out of servers: * schedule retry and exponential backoff to repeat RECOVERY * admit defeat after some period? presumeably the client will be shut down eventually, maybe keep trying (once per hour?) until then. Medium Distributed Mutable Files ================================ These are just like the SDMF case, but: * We actually take advantage of the Merkle hash tree over the blocks, by reading a single segment of data at a time (and its necessary hashes), to reduce the read-time alacrity. * We allow arbitrary writes to any range of the file. * We add more code to first read each segment that a write must modify. This looks exactly like the way a normal filesystem uses a block device, or how a CPU must perform a cache-line fill before modifying a single word. * We might implement some sort of copy-based atomic update server call, to allow multiple writev() calls to appear atomic to any readers. MDMF slots provide fairly efficient in-place edits of very large files (a few GB). Appending data is also fairly efficient. Large Distributed Mutable Files =============================== LDMF slots (not implemented) would use a fundamentally different way to store the file, inspired by Mercurial's "revlog" format. This would enable very efficient insert/remove/replace editing of arbitrary spans. Multiple versions of the file can be retained, in a revision graph that can have multiple heads. Each revision can be referenced by a cryptographic identifier. There are two forms of the URI, one that means "most recent version", and a longer one that points to a specific revision. Metadata can be attached to the revisions, like timestamps, to enable rolling back an entire tree to a specific point in history. LDMF1 provides deltas but tries to avoid dealing with multiple heads. LDMF2 provides explicit support for revision identifiers and branching. TODO ==== improve allocate-and-write or get-writer-buckets API to allow one-call (or maybe two-call) updates. The challenge is in figuring out which shares are on which machines. First cut will have lots of round trips. (eventually) define behavior when seqnum wraps. At the very least make sure it can't cause a security problem. "the slot is worn out" is acceptable. (eventually) define share-migration lease update protocol. Including the nodeid who accepted the lease is useful, we can use the same protocol as we do for updating the write enabler. However we need to know which lease to update.. maybe send back a list of all old nodeids that we find, then try all of them when we accept the update? We now do this in a specially-formatted IndexError exception: "UNABLE to renew non-existent lease. I have leases accepted by " + "nodeids: '12345','abcde','44221' ." confirm that a repairer can regenerate shares without the private key. Hmm, without the write-enabler they won't be able to write those shares to the servers.. although they could add immutable new shares to new servers. tahoe-lafs-1.10.0/docs/specifications/outline.rst000066400000000000000000000264671221140116300217660ustar00rootroot00000000000000============================== Specification Document Outline ============================== While we do not yet have a clear set of specification documents for Tahoe (explaining the file formats, so that others can write interoperable implementations), this document is intended to lay out an outline for what these specs ought to contain. Think of this as the ISO 7-Layer Model for Tahoe. We currently imagine 4 documents. 1. `#1: Share Format, Encoding Algorithm`_ 2. `#2: Share Exchange Protocol`_ 3. `#3: Server Selection Algorithm, filecap format`_ 4. `#4: Directory Format`_ #1: Share Format, Encoding Algorithm ==================================== This document will describe the way that files are encrypted and encoded into shares. It will include a specification of the share format, and explain both the encoding and decoding algorithms. It will cover both mutable and immutable files. The immutable encoding algorithm, as described by this document, will start with a plaintext series of bytes, encoding parameters "k" and "N", and either an encryption key or a mechanism for deterministically deriving the key from the plaintext (the CHK specification). The algorithm will end with a set of N shares, and a set of values that must be included in the filecap to provide confidentiality (the encryption key) and integrity (the UEB hash). The immutable decoding algorithm will start with the filecap values (key and UEB hash) and "k" shares. It will explain how to validate the shares against the integrity information, how to reverse the erasure-coding, and how to decrypt the resulting ciphertext. It will result in the original plaintext bytes (or some subrange thereof). The sections on mutable files will contain similar information. This document is *not* responsible for explaining the filecap format, since full filecaps may need to contain additional information as described in document #3. Likewise it it not responsible for explaining where to put the generated shares or where to find them again later. It is also not responsible for explaining the access control mechanisms surrounding share upload, download, or modification ("Accounting" is the business of controlling share upload to conserve space, and mutable file shares require some sort of access control to prevent non-writecap holders from destroying shares). We don't yet have a document dedicated to explaining these, but let's call it "Access Control" for now. #2: Share Exchange Protocol =========================== This document explains the wire-protocol used to upload, download, and modify shares on the various storage servers. Given the N shares created by the algorithm described in document #1, and a set of servers who are willing to accept those shares, the protocols in this document will be sufficient to get the shares onto the servers. Likewise, given a set of servers who hold at least k shares, these protocols will be enough to retrieve the shares necessary to begin the decoding process described in document #1. The notion of a "storage index" is used to reference a particular share: the storage index is generated by the encoding process described in document #1. This document does *not* describe how to identify or choose those servers, rather it explains what to do once they have been selected (by the mechanisms in document #3). This document also explains the protocols that a client uses to ask a server whether or not it is willing to accept an uploaded share, and whether it has a share available for download. These protocols will be used by the mechanisms in document #3 to help decide where the shares should be placed. Where cryptographic mechanisms are necessary to implement access-control policy, this document will explain those mechanisms. In the future, Tahoe will be able to use multiple protocols to speak to storage servers. There will be alternative forms of this document, one for each protocol. The first one to be written will describe the Foolscap-based protocol that tahoe currently uses, but we anticipate a subsequent one to describe a more HTTP-based protocol. #3: Server Selection Algorithm, filecap format ============================================== This document has two interrelated purposes. With a deeper understanding of the issues, we may be able to separate these more cleanly in the future. The first purpose is to explain the server selection algorithm. Given a set of N shares, where should those shares be uploaded? Given some information stored about a previously-uploaded file, how should a downloader locate and recover at least k shares? Given a previously-uploaded mutable file, how should a modifier locate all (or most of) the shares with a reasonable amount of work? This question implies many things, all of which should be explained in this document: * the notion of a "grid", nominally a set of servers who could potentially hold shares, which might change over time * a way to configure which grid should be used * a way to discover which servers are a part of that grid * a way to decide which servers are reliable enough to be worth sending shares * an algorithm to handle servers which refuse shares * a way for a downloader to locate which servers have shares * a way to choose which shares should be used for download The server-selection algorithm has several obviously competing goals: * minimize the amount of work that must be done during upload * minimize the total storage resources used * avoid "hot spots", balance load among multiple servers * maximize the chance that enough shares will be downloadable later, by uploading lots of shares, and by placing them on reliable servers * minimize the work that the future downloader must do * tolerate temporary server failures, permanent server departure, and new server insertions * minimize the amount of information that must be added to the filecap The server-selection algorithm is defined in some context: some set of expectations about the servers or grid with which it is expected to operate. Different algorithms are appropriate for different situtations, so there will be multiple alternatives of this document. The first version of this document will describe the algorithm that the current (1.3.0) release uses, which is heavily weighted towards the two main use case scenarios for which Tahoe has been designed: the small, stable friendnet, and the allmydata.com managed grid. In both cases, we assume that the storage servers are online most of the time, they are uniformly highly reliable, and that the set of servers does not change very rapidly. The server-selection algorithm for this environment uses a permuted server list to achieve load-balancing, uses all servers identically, and derives the permutation key from the storage index to avoid adding a new field to the filecap. An alternative algorithm could give clients more precise control over share placement, for example by a user who wished to make sure that k+1 shares are located in each datacenter (to allow downloads to take place using only local bandwidth). This algorithm could skip the permuted list and use other mechanisms to accomplish load-balancing (or ignore the issue altogether). It could add additional information to the filecap (like a list of which servers received the shares) in lieu of performing a search at download time, perhaps at the expense of allowing a repairer to move shares to a new server after the initial upload. It might make up for this by storing "location hints" next to each share, to indicate where other shares are likely to be found, and obligating the repairer to update these hints. The second purpose of this document is to explain the format of the file capability string (or "filecap" for short). There are multiple kinds of capabilties (read-write, read-only, verify-only, repaircap, lease-renewal cap, traverse-only, etc). There are multiple ways to represent the filecap (compressed binary, human-readable, clickable-HTTP-URL, "tahoe:" URL, etc), but they must all contain enough information to reliably retrieve a file (given some context, of course). It must at least contain the confidentiality and integrity information from document #1 (i.e. the encryption key and the UEB hash). It must also contain whatever additional information the upload-time server-selection algorithm generated that will be required by the downloader. For some server-selection algorithms, the additional information will be minimal. For example, the 1.3.0 release uses the hash of the encryption key as a storage index, and uses the storage index to permute the server list, and uses an Introducer to learn the current list of servers. This allows a "close-enough" list of servers to be compressed into a filecap field that is already required anyways (the encryption key). It also adds k and N to the filecap, to speed up the downloader's search (the downloader knows how many shares it needs, so it can send out multiple queries in parallel). But other server-selection algorithms might require more information. Each variant of this document will explain how to encode that additional information into the filecap, and how to extract and use that information at download time. These two purposes are interrelated. A filecap that is interpreted in the context of the allmydata.com commercial grid, which uses tahoe-1.3.0, implies a specific peer-selection algorithm, a specific Introducer, and therefore a fairly-specific set of servers to query for shares. A filecap which is meant to be interpreted on a different sort of grid would need different information. Some filecap formats can be designed to contain more information (and depend less upon context), such as the way an HTTP URL implies the existence of a single global DNS system. Ideally a tahoe filecap should be able to specify which "grid" it lives in, with enough information to allow a compatible implementation of Tahoe to locate that grid and retrieve the file (regardless of which server-selection algorithm was used for upload). This more-universal format might come at the expense of reliability, however. Tahoe-1.3.0 filecaps do not contain hostnames, because the failure of DNS or an individual host might then impact file availability (however the Introducer contains DNS names or IP addresses). #4: Directory Format ==================== Tahoe directories are a special way of interpreting and managing the contents of a file (either mutable or immutable). These "dirnode" files are basically serialized tables that map child name to filecap/dircap. This document describes the format of these files. Tahoe-1.3.0 directories are "transitively readonly", which is accomplished by applying an additional layer of encryption to the list of child writecaps. The key for this encryption is derived from the containing file's writecap. This document must explain how to derive this key and apply it to the appropriate portion of the table. Future versions of the directory format are expected to contain "deep-traversal caps", which allow verification/repair of files without exposing their plaintext to the repair agent. This document wil be responsible for explaining traversal caps too. Future versions of the directory format will probably contain an index and more advanced data structures (for efficiency and fast lookups), instead of a simple flat list of (childname, childcap). This document will also need to describe metadata formats, including what access-control policies are defined for the metadata. tahoe-lafs-1.10.0/docs/specifications/servers-of-happiness.rst000066400000000000000000000115771221140116300243660ustar00rootroot00000000000000==================== Servers of Happiness ==================== When you upload a file to a Tahoe-LAFS grid, you expect that it will stay there for a while, and that it will do so even if a few of the peers on the grid stop working, or if something else goes wrong. An upload health metric helps to make sure that this actually happens. An upload health metric is a test that looks at a file on a Tahoe-LAFS grid and says whether or not that file is healthy; that is, whether it is distributed on the grid in such a way as to ensure that it will probably survive in good enough shape to be recoverable, even if a few things go wrong between the time of the test and the time that it is recovered. Our current upload health metric for immutable files is called 'servers-of-happiness'; its predecessor was called 'shares-of-happiness'. shares-of-happiness used the number of encoded shares generated by a file upload to say whether or not it was healthy. If there were more shares than a user-configurable threshold, the file was reported to be healthy; otherwise, it was reported to be unhealthy. In normal situations, the upload process would distribute shares fairly evenly over the peers in the grid, and in that case shares-of-happiness worked fine. However, because it only considered the number of shares, and not where they were on the grid, it could not detect situations where a file was unhealthy because most or all of the shares generated from the file were stored on one or two peers. servers-of-happiness addresses this by extending the share-focused upload health metric to also consider the location of the shares on grid. servers-of-happiness looks at the mapping of peers to the shares that they hold, and compares the cardinality of the largest happy subset of those to a user-configurable threshold. A happy subset of peers has the property that any k (where k is as in k-of-n encoding) peers within the subset can reconstruct the source file. This definition of file health provides a stronger assurance of file availability over time; with 3-of-10 encoding, and happy=7, a healthy file is still guaranteed to be available even if 4 peers fail. Measuring Servers of Happiness ============================== We calculate servers-of-happiness by computing a matching on a bipartite graph that is related to the layout of shares on the grid. One set of vertices is the peers on the grid, and one set of vertices is the shares. An edge connects a peer and a share if the peer will (or does, for existing shares) hold the share. The size of the maximum matching on this graph is the size of the largest happy peer set that exists for the upload. First, note that a bipartite matching of size n corresponds to a happy subset of size n. This is because a bipartite matching of size n implies that there are n peers such that each peer holds a share that no other peer holds. Then any k of those peers collectively hold k distinct shares, and can restore the file. A bipartite matching of size n is not necessary for a happy subset of size n, however (so it is not correct to say that the size of the maximum matching on this graph is the size of the largest happy subset of peers that exists for the upload). For example, consider a file with k = 3, and suppose that each peer has all three of those pieces. Then, since any peer from the original upload can restore the file, if there are 10 peers holding shares, and the happiness threshold is 7, the upload should be declared happy, because there is a happy subset of size 10, and 10 > 7. However, since a maximum matching on the bipartite graph related to this layout has only 3 edges, Tahoe-LAFS declares the upload unhealthy. Though it is not unhealthy, a share layout like this example is inefficient; for k = 3, and if there are n peers, it corresponds to an expansion factor of 10x. Layouts that are declared healthy by the bipartite graph matching approach have the property that they correspond to uploads that are either already relatively efficient in their utilization of space, or can be made to be so by deleting shares; and that place all of the shares that they generate, enabling redistribution of shares later without having to re-encode the file. Also, it is computationally reasonable to compute a maximum matching in a bipartite graph, and there are well-studied algorithms to do that. Issues ====== The uploader is good at detecting unhealthy upload layouts, but it doesn't always know how to make an unhealthy upload into a healthy upload if it is possible to do so; it attempts to redistribute shares to achieve happiness, but only in certain circumstances. The redistribution algorithm isn't optimal, either, so even in these cases it will not always find a happy layout if one can be arrived at through redistribution. We are investigating improvements to address these issues. We don't use servers-of-happiness for mutable files yet; this fix will likely come in Tahoe-LAFS version 1.8. tahoe-lafs-1.10.0/docs/specifications/uri.rst000066400000000000000000000214261221140116300210740ustar00rootroot00000000000000========== Tahoe URIs ========== 1. `File URIs`_ 1. `CHK URIs`_ 2. `LIT URIs`_ 3. `Mutable File URIs`_ 2. `Directory URIs`_ 3. `Internal Usage of URIs`_ Each file and directory in a Tahoe filesystem is described by a "URI". There are different kinds of URIs for different kinds of objects, and there are different kinds of URIs to provide different kinds of access to those objects. Each URI is a string representation of a "capability" or "cap", and there are read-caps, write-caps, verify-caps, and others. Each URI provides both ``location`` and ``identification`` properties. ``location`` means that holding the URI is sufficient to locate the data it represents (this means it contains a storage index or a lookup key, whatever is necessary to find the place or places where the data is being kept). ``identification`` means that the URI also serves to validate the data: an attacker who wants to trick you into into using the wrong data will be limited in their abilities by the identification properties of the URI. Some URIs are subsets of others. In particular, if you know a URI which allows you to modify some object, you can produce a weaker read-only URI and give it to someone else, and they will be able to read that object but not modify it. Directories, for example, have a read-cap which is derived from the write-cap: anyone with read/write access to the directory can produce a limited URI that grants read-only access, but not the other way around. src/allmydata/uri.py is the main place where URIs are processed. It is the authoritative definition point for all the the URI types described herein. File URIs ========= The lowest layer of the Tahoe architecture (the "grid") is reponsible for mapping URIs to data. This is basically a distributed hash table, in which the URI is the key, and some sequence of bytes is the value. There are two kinds of entries in this table: immutable and mutable. For immutable entries, the URI represents a fixed chunk of data. The URI itself is derived from the data when it is uploaded into the grid, and can be used to locate and download that data from the grid at some time in the future. For mutable entries, the URI identifies a "slot" or "container", which can be filled with different pieces of data at different times. It is important to note that the "files" described by these URIs are just a bunch of bytes, and that **no** filenames or other metadata is retained at this layer. The vdrive layer (which sits above the grid layer) is entirely responsible for directories and filenames and the like. CHK URIs -------- CHK (Content Hash Keyed) files are immutable sequences of bytes. They are uploaded in a distributed fashion using a "storage index" (for the "location" property), and encrypted using a "read key". A secure hash of the data is computed to help validate the data afterwards (providing the "identification" property). All of these pieces, plus information about the file's size and the number of shares into which it has been distributed, are put into the "CHK" uri. The storage index is derived by hashing the read key (using a tagged SHA-256d hash, then truncated to 128 bits), so it does not need to be physically present in the URI. The current format for CHK URIs is the concatenation of the following strings:: URI:CHK:(key):(hash):(needed-shares):(total-shares):(size) Where (key) is the base32 encoding of the 16-byte AES read key, (hash) is the base32 encoding of the SHA-256 hash of the URI Extension Block, (needed-shares) is an ascii decimal representation of the number of shares required to reconstruct this file, (total-shares) is the same representation of the total number of shares created, and (size) is an ascii decimal representation of the size of the data represented by this URI. All base32 encodings are expressed in lower-case, with the trailing '=' signs removed. For example, the following is a CHK URI, generated from a previous version of the contents of `<../architecture.rst>`_:: URI:CHK:ihrbeov7lbvoduupd4qblysj7a:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq:3:10:28733 Historical note: The name "CHK" is somewhat inaccurate and continues to be used for historical reasons. "Content Hash Key" means that the encryption key is derived by hashing the contents, which gives the useful property that encoding the same file twice will result in the same URI. However, this is an optional step: by passing a different flag to the appropriate API call, Tahoe will generate a random encryption key instead of hashing the file: this gives the useful property that the URI or storage index does not reveal anything about the file's contents (except filesize), which improves privacy. The URI:CHK: prefix really indicates that an immutable file is in use, without saying anything about how the key was derived. LIT URIs -------- LITeral files are also an immutable sequence of bytes, but they are so short that the data is stored inside the URI itself. These are used for files of 55 bytes or shorter, which is the point at which the LIT URI is the same length as a CHK URI would be. LIT URIs do not require an upload or download phase, as their data is stored directly in the URI. The format of a LIT URI is simply a fixed prefix concatenated with the base32 encoding of the file's data:: URI:LIT:bjuw4y3movsgkidbnrwg26lemf2gcl3xmvrc6kropbuhi3lmbi The LIT URI for an empty file is "URI:LIT:", and the LIT URI for a 5-byte file that contains the string "hello" is "URI:LIT:nbswy3dp". Mutable File URIs ----------------- The other kind of DHT entry is the "mutable slot", in which the URI names a container to which data can be placed and retrieved without changing the identity of the container. These slots have write-caps (which allow read/write access), read-caps (which only allow read-access), and verify-caps (which allow a file checker/repairer to confirm that the contents exist, but does not let it decrypt the contents). Mutable slots use public key technology to provide data integrity, and put a hash of the public key in the URI. As a result, the data validation is limited to confirming that the data retrieved matches *some* data that was uploaded in the past, but not _which_ version of that data. The format of the write-cap for mutable files is:: URI:SSK:(writekey):(fingerprint) Where (writekey) is the base32 encoding of the 16-byte AES encryption key that is used to encrypt the RSA private key, and (fingerprint) is the base32 encoded 32-byte SHA-256 hash of the RSA public key. For more details about the way these keys are used, please see ``_. The format for mutable read-caps is:: URI:SSK-RO:(readkey):(fingerprint) The read-cap is just like the write-cap except it contains the other AES encryption key: the one used for encrypting the mutable file's contents. This second key is derived by hashing the writekey, which allows the holder of a write-cap to produce a read-cap, but not the other way around. The fingerprint is the same in both caps. Historical note: the "SSK" prefix is a perhaps-inaccurate reference to "Sub-Space Keys" from the Freenet project, which uses a vaguely similar structure to provide mutable file access. Directory URIs ============== The grid layer provides a mapping from URI to data. To turn this into a graph of directories and files, the "vdrive" layer (which sits on top of the grid layer) needs to keep track of "directory nodes", or "dirnodes" for short. ``_ describes how these work. Dirnodes are contained inside mutable files, and are thus simply a particular way to interpret the contents of these files. As a result, a directory write-cap looks a lot like a mutable-file write-cap:: URI:DIR2:(writekey):(fingerprint) Likewise directory read-caps (which provide read-only access to the directory) look much like mutable-file read-caps:: URI:DIR2-RO:(readkey):(fingerprint) Historical note: the "DIR2" prefix is used because the non-distributed dirnodes in earlier Tahoe releases had already claimed the "DIR" prefix. Internal Usage of URIs ====================== The classes in source:src/allmydata/uri.py are used to pack and unpack these various kinds of URIs. Three Interfaces are defined (IURI, IFileURI, and IDirnodeURI) which are implemented by these classes, and string-to-URI-class conversion routines have been registered as adapters, so that code which wants to extract e.g. the size of a CHK or LIT uri can do:: print IFileURI(uri).get_size() If the URI does not represent a CHK or LIT uri (for example, if it was for a directory instead), the adaptation will fail, raising a TypeError inside the IFileURI() call. Several utility methods are provided on these objects. The most important is ``to_string()``, which returns the string form of the URI. Therefore ``IURI(uri).to_string == uri`` is true for any valid URI. See the IURI class in source:src/allmydata/interfaces.py for more details. tahoe-lafs-1.10.0/docs/stats.rst000066400000000000000000000352361221140116300164340ustar00rootroot00000000000000================ Tahoe Statistics ================ 1. `Overview`_ 2. `Statistics Categories`_ 3. `Running a Tahoe Stats-Gatherer Service`_ 4. `Using Munin To Graph Stats Values`_ Overview ======== Each Tahoe node collects and publishes statistics about its operations as it runs. These include counters of how many files have been uploaded and downloaded, CPU usage information, performance numbers like latency of storage server operations, and available disk space. The easiest way to see the stats for any given node is use the web interface. From the main "Welcome Page", follow the "Operational Statistics" link inside the small "This Client" box. If the welcome page lives at http://localhost:3456/, then the statistics page will live at http://localhost:3456/statistics . This presents a summary of the stats block, along with a copy of the raw counters. To obtain just the raw counters (in JSON format), use /statistics?t=json instead. Statistics Categories ===================== The stats dictionary contains two keys: 'counters' and 'stats'. 'counters' are strictly counters: they are reset to zero when the node is started, and grow upwards. 'stats' are non-incrementing values, used to measure the current state of various systems. Some stats are actually booleans, expressed as '1' for true and '0' for false (internal restrictions require all stats values to be numbers). Under both the 'counters' and 'stats' dictionaries, each individual stat has a key with a dot-separated name, breaking them up into groups like 'cpu_monitor' and 'storage_server'. The currently available stats (as of release 1.6.0 or so) are described here: **counters.storage_server.\*** this group counts inbound storage-server operations. They are not provided by client-only nodes which have been configured to not run a storage server (with [storage]enabled=false in tahoe.cfg) allocate, write, close, abort these are for immutable file uploads. 'allocate' is incremented when a client asks if it can upload a share to the server. 'write' is incremented for each chunk of data written. 'close' is incremented when the share is finished. 'abort' is incremented if the client abandons the upload. get, read these are for immutable file downloads. 'get' is incremented when a client asks if the server has a specific share. 'read' is incremented for each chunk of data read. readv, writev these are for immutable file creation, publish, and retrieve. 'readv' is incremented each time a client reads part of a mutable share. 'writev' is incremented each time a client sends a modification request. add-lease, renew, cancel these are for share lease modifications. 'add-lease' is incremented when an 'add-lease' operation is performed (which either adds a new lease or renews an existing lease). 'renew' is for the 'renew-lease' operation (which can only be used to renew an existing one). 'cancel' is used for the 'cancel-lease' operation. bytes_freed this counts how many bytes were freed when a 'cancel-lease' operation removed the last lease from a share and the share was thus deleted. bytes_added this counts how many bytes were consumed by immutable share uploads. It is incremented at the same time as the 'close' counter. **stats.storage_server.\*** allocated this counts how many bytes are currently 'allocated', which tracks the space that will eventually be consumed by immutable share upload operations. The stat is increased as soon as the upload begins (at the same time the 'allocated' counter is incremented), and goes back to zero when the 'close' or 'abort' message is received (at which point the 'disk_used' stat should incremented by the same amount). disk_total, disk_used, disk_free_for_root, disk_free_for_nonroot, disk_avail, reserved_space these all reflect disk-space usage policies and status. 'disk_total' is the total size of disk where the storage server's BASEDIR/storage/shares directory lives, as reported by /bin/df or equivalent. 'disk_used', 'disk_free_for_root', and 'disk_free_for_nonroot' show related information. 'reserved_space' reports the reservation configured by the tahoe.cfg [storage]reserved_space value. 'disk_avail' reports the remaining disk space available for the Tahoe server after subtracting reserved_space from disk_avail. All values are in bytes. accepting_immutable_shares this is '1' if the storage server is currently accepting uploads of immutable shares. It may be '0' if a server is disabled by configuration, or if the disk is full (i.e. disk_avail is less than reserved_space). total_bucket_count this counts the number of 'buckets' (i.e. unique storage-index values) currently managed by the storage server. It indicates roughly how many files are managed by the server. latencies.*.* these stats keep track of local disk latencies for storage-server operations. A number of percentile values are tracked for many operations. For example, 'storage_server.latencies.readv.50_0_percentile' records the median response time for a 'readv' request. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds. The operations tracked are the same as the counters.storage_server.* counter values (allocate, write, close, get, read, add-lease, renew, cancel, readv, writev). The percentile values tracked are: mean, 01_0_percentile, 10_0_percentile, 50_0_percentile, 90_0_percentile, 95_0_percentile, 99_0_percentile, 99_9_percentile. (the last value, 99.9 percentile, means that 999 out of the last 1000 operations were faster than the given number, and is the same threshold used by Amazon's internal SLA, according to the Dynamo paper). Percentiles are only reported in the case of a sufficient number of observations for unambiguous interpretation. For example, the 99.9th percentile is (at the level of thousandths precision) 9 thousandths greater than the 99th percentile for sample sizes greater than or equal to 1000, thus the 99.9th percentile is only reported for samples of 1000 or more observations. **counters.uploader.files_uploaded** **counters.uploader.bytes_uploaded** **counters.downloader.files_downloaded** **counters.downloader.bytes_downloaded** These count client activity: a Tahoe client will increment these when it uploads or downloads an immutable file. 'files_uploaded' is incremented by one for each operation, while 'bytes_uploaded' is incremented by the size of the file. **counters.mutable.files_published** **counters.mutable.bytes_published** **counters.mutable.files_retrieved** **counters.mutable.bytes_retrieved** These count client activity for mutable files. 'published' is the act of changing an existing mutable file (or creating a brand-new mutable file). 'retrieved' is the act of reading its current contents. **counters.chk_upload_helper.\*** These count activity of the "Helper", which receives ciphertext from clients and performs erasure-coding and share upload for files that are not already in the grid. The code which implements these counters is in src/allmydata/immutable/offloaded.py . upload_requests incremented each time a client asks to upload a file upload_already_present: incremented when the file is already in the grid upload_need_upload incremented when the file is not already in the grid resumes incremented when the helper already has partial ciphertext for the requested upload, indicating that the client is resuming an earlier upload fetched_bytes this counts how many bytes of ciphertext have been fetched from uploading clients encoded_bytes this counts how many bytes of ciphertext have been encoded and turned into successfully-uploaded shares. If no uploads have failed or been abandoned, encoded_bytes should eventually equal fetched_bytes. **stats.chk_upload_helper.\*** These also track Helper activity: active_uploads how many files are currently being uploaded. 0 when idle. incoming_count how many cache files are present in the incoming/ directory, which holds ciphertext files that are still being fetched from the client incoming_size total size of cache files in the incoming/ directory incoming_size_old total size of 'old' cache files (more than 48 hours) encoding_count how many cache files are present in the encoding/ directory, which holds ciphertext files that are being encoded and uploaded encoding_size total size of cache files in the encoding/ directory encoding_size_old total size of 'old' cache files (more than 48 hours) **stats.node.uptime** how many seconds since the node process was started **stats.cpu_monitor.\*** 1min_avg, 5min_avg, 15min_avg estimate of what percentage of system CPU time was consumed by the node process, over the given time interval. Expressed as a float, 0.0 for 0%, 1.0 for 100% total estimate of total number of CPU seconds consumed by node since the process was started. Ticket #472 indicates that .total may sometimes be negative due to wraparound of the kernel's counter. **stats.load_monitor.\*** When enabled, the "load monitor" continually schedules a one-second callback, and measures how late the response is. This estimates system load (if the system is idle, the response should be on time). This is only enabled if a stats-gatherer is configured. avg_load average "load" value (seconds late) over the last minute max_load maximum "load" value over the last minute Running a Tahoe Stats-Gatherer Service ====================================== The "stats-gatherer" is a simple daemon that periodically collects stats from several tahoe nodes. It could be useful, e.g., in a production environment, where you want to monitor dozens of storage servers from a central management host. It merely gatherers statistics from many nodes into a single place: it does not do any actual analysis. The stats gatherer listens on a network port using the same Foolscap_ connection library that Tahoe clients use to connect to storage servers. Tahoe nodes can be configured to connect to the stats gatherer and publish their stats on a periodic basis. (In fact, what happens is that nodes connect to the gatherer and offer it a second FURL which points back to the node's "stats port", which the gatherer then uses to pull stats on a periodic basis. The initial connection is flipped to allow the nodes to live behind NAT boxes, as long as the stats-gatherer has a reachable IP address.) .. _Foolscap: http://foolscap.lothar.com/trac The stats-gatherer is created in the same fashion as regular tahoe client nodes and introducer nodes. Choose a base directory for the gatherer to live in (but do not create the directory). Then run: :: tahoe create-stats-gatherer $BASEDIR and start it with "tahoe start $BASEDIR". Once running, the gatherer will write a FURL into $BASEDIR/stats_gatherer.furl . To configure a Tahoe client/server node to contact the stats gatherer, copy this FURL into the node's tahoe.cfg file, in a section named "[client]", under a key named "stats_gatherer.furl", like so: :: [client] stats_gatherer.furl = pb://qbo4ktl667zmtiuou6lwbjryli2brv6t@192.168.0.8:49997/wxycb4kaexzskubjnauxeoptympyf45y or simply copy the stats_gatherer.furl file into the node's base directory (next to the tahoe.cfg file): it will be interpreted in the same way. The first time it is started, the gatherer will listen on a random unused TCP port, so it should not conflict with anything else that you have running on that host at that time. On subsequent runs, it will re-use the same port (to keep its FURL consistent). To explicitly control which port it uses, write the desired portnumber into a file named "portnum" (i.e. $BASEDIR/portnum), and the next time the gatherer is started, it will start listening on the given port. The portnum file is actually a "strports specification string", as described in `docs/configuration.rst `_. Once running, the stats gatherer will create a standard python "pickle" file in $BASEDIR/stats.pickle . Once a minute, the gatherer will pull stats information from every connected node and write them into the pickle. The pickle will contain a dictionary, in which node identifiers (known as "tubid" strings) are the keys, and the values are a dict with 'timestamp', 'nickname', and 'stats' keys. d[tubid][stats] will contain the stats dictionary as made available at http://localhost:3456/statistics?t=json . The pickle file will only contain the most recent update from each node. Other tools can be built to examine these stats and render them into something useful. For example, a tool could sum the "storage_server.disk_avail' values from all servers to compute a total-disk-available number for the entire grid (however, the "disk watcher" daemon, in misc/operations_helpers/spacetime/, is better suited for this specific task). Using Munin To Graph Stats Values ================================= The misc/munin/ directory contains various plugins to graph stats for Tahoe nodes. They are intended for use with the Munin_ system-management tool, which typically polls target systems every 5 minutes and produces a web page with graphs of various things over multiple time scales (last hour, last month, last year). .. _Munin: http://munin-monitoring.org/ Most of the plugins are designed to pull stats from a single Tahoe node, and are configured with the e.g. http://localhost:3456/statistics?t=json URL. The "tahoe_stats" plugin is designed to read from the pickle file created by the stats-gatherer. Some plugins are to be used with the disk watcher, and a few (like tahoe_nodememory) are designed to watch the node processes directly (and must therefore run on the same host as the target node). Please see the docstrings at the beginning of each plugin for details, and the "tahoe-conf" file for notes about configuration and installing these plugins into a Munin environment. tahoe-lafs-1.10.0/docs/subtree1.svg000066400000000000000000000450201221140116300170070ustar00rootroot00000000000000 image/svg+xml File CHK: URI... File CHK: URI... File CHK: URI... foo bar baz.jpg My Music My Stuff cool.mp3 boring.mp3 ROOT tahoe-lafs-1.10.0/docs/write_coordination.rst000066400000000000000000000016041221140116300211700ustar00rootroot00000000000000================================== Avoiding Write Collisions in Tahoe ================================== Tahoe does not provide locking of mutable files and directories. If there is more than one simultaneous attempt to change a mutable file or directory, then an ``UncoordinatedWriteError`` may result. This might, in rare cases, cause the file or directory contents to be accidentally deleted. The user is expected to ensure that there is at most one outstanding write or update request for a given file or directory at a time. One convenient way to accomplish this is to make a different file or directory for each person or process that wants to write. If mutable parts of a filesystem are accessed via sshfs, only a single sshfs mount should be used. There may be data loss if mutable files or directories are accessed via two sshfs mounts, or written both via sshfs and from other clients. tahoe-lafs-1.10.0/misc/000077500000000000000000000000001221140116300145365ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/awesome_weird_stuff/000077500000000000000000000000001221140116300205775ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/awesome_weird_stuff/boodlegrid.tac000066400000000000000000000140101221140116300233760ustar00rootroot00000000000000# -*- python -*- """Monitor a Tahoe grid, by playing sounds in response to remote events. To install: 1: install Boodler, from http://www.eblong.com/zarf/boodler/ 2: run "boodler.py -l listen.Sounds". This will run a daemon that listens on a network socket (31863 by default) and accepts commands in the form of "sound bird/crow1.aiff\n" 3: copy this file into a new directory, which we'll call $BASEDIR 4: write one or more logport FURLs into files named *.furl or *.furls, one per line. All logports from all such files will be used. 5: launch this daemon with 'cd $BASEDIR && twistd -y boodlegrid.tac' """ import os, time from zope.interface import implements from twisted.application import service from twisted.internet import protocol, reactor, defer from foolscap import Tub, Referenceable from foolscap.logging.interfaces import RILogObserver from twisted.python import log class Listener: def __init__(self): self.boodler = None # filled in when we connect to boodler self.last = {} def sound(self, name, slot=None, max=0.100): if not self.boodler: return now = time.time() if slot is None: slot = name if now < self.last.get(slot, 0) + max: return # too soon self.last[slot] = now self.boodler.write("sound %s\n" % name) def msg(self, m, furl): #print "got it", m message = m.get("message", m.get("format", "")) format = m.get("format", "") facility = m.get("facility", "") # messages emitted by the Introducer: client join/leave if message.startswith("introducer: subscription[storage] request"): print "new client" self.sound("voice/hooray.aiff") if message.startswith("introducer: unsubscribing"): print "unsubscribe" self.sound("electro/zaptrill-fade.aiff") # messages from the helper if message == "file already found in grid": print "already found" self.sound("mech/ziplash-high.aiff") #if message == "upload done": if format == "plaintext_hash=%(plaintext_hash)s, SI=%(SI)s, size=%(size)d": size = m.get("size") print "upload done, size", size self.sound("mech/ziplash-low.aiff") if "fetching " in message: # helper grabbing ciphertext from client self.sound("voice/phoneme/sh.aiff", max=0.5) # messages from storage servers if message.startswith("storage: slot_readv"): #self.sound("voice/phoneme/r.aiff") self.sound("percussion/wood-tap-hollow.aiff") # messages from webapi if message.startswith("Retrieve") and "starting" in message: self.sound("mech/metal-clack.aiff") if message.startswith("Publish") and "starting" in message: self.sound("mech/door-slam.aiff") #self.sound("mech/metal-clash.aiff") if ("web: %(clientip)s" in format and m.get("method") == "POST" and ("t=set_children" in m.get("uri", "") # FIXME: may give false-positives or "t=set-children" in m.get("uri", ""))): self.sound("mech/clock-clang.aiff") # generic messages #if m['level'] < 20: # self.sound("mech/keyboard-1.aiff") if "_check_for_done but we're not running" in message: pass elif format == "excessive reactor delay (%ss)": self.sound("animal/frog-cheep.aiff") print "excessive delay %s: %s" % (m['args'][0], furl) elif format == "excessive reactor delay (%(delay)ss)": self.sound("animal/frog-cheep.aiff") print "excessive delay %s: %s" % (m['delay'], furl) elif facility == "foolscap.negotiation": if (message == "got offer for an existing connection" or "master told us to use a new connection" in message): print "foolscap: got offer for an existing connection", message, furl else: #print "foolscap:", message pass elif m['level'] > 30: # SCARY or BAD #self.sound("mech/alarm-bell.aiff") self.sound("environ/thunder-tense.aiff") print m, furl elif m['level'] == 30: # WEIRD self.sound("mech/glass-breaking.aiff") print m, furl elif m['level'] > 20: # UNUSUAL or INFREQUENT or CURIOUS self.sound("mech/telephone-ring-old.aiff") print m, furl class BoodleSender(protocol.Protocol): def connectionMade(self): print "connected to boodler" self.factory.listener.boodler = self.transport class Bridge(Referenceable): implements(RILogObserver) def __init__(self, furl, listener): self.furl = furl self.listener = listener def remote_msg(self, m): d = defer.maybeDeferred(self.listener.msg, m, self.furl) d.addErrback(log.err) # never send errors to the remote side class Monitor(service.MultiService): def __init__(self): service.MultiService.__init__(self) self.tub = Tub() self.tub.setServiceParent(self) self.listener = Listener() self.targets = [] for fn in os.listdir("."): if fn.endswith(".furl") or fn.endswith(".furls"): for i,line in enumerate(open(fn, "r").readlines()): target = line.strip() if target: self.tub.connectTo(target, self._got_logpublisher, fn, i, target) cf = protocol.ClientFactory() cf.listener = self.listener cf.protocol = BoodleSender reactor.connectTCP("localhost", 31863, cf) def _got_logpublisher(self, publisher, fn, i, target): print "connected to %s:%d, %s" % (fn, i, target) b = Bridge(target, self.listener) publisher.callRemote("subscribe_to_all", b) m = Monitor() application = service.Application("boodlegrid") m.setServiceParent(application) tahoe-lafs-1.10.0/misc/build_helpers/000077500000000000000000000000001221140116300173575ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/build_helpers/build-deb.py000066400000000000000000000074121221140116300215640ustar00rootroot00000000000000#!/bin/false # invoke this with a specific python import sys, shutil, os.path from subprocess import Popen, PIPE PYTHON = sys.executable ARCH = sys.argv[1] class SubprocessError(Exception): pass def get_output(*cmd, **kwargs): tolerate_stderr = kwargs.get("tolerate_stderr", False) print " " + " ".join(cmd) p = Popen(cmd, stdout=PIPE) (out,err) = p.communicate() rc = p.returncode if rc != 0: print >>sys.stderr, err raise SubprocessError("command %s exited with rc=%s", (cmd, rc)) if err and not tolerate_stderr: print >>sys.stderr, "stderr:", err raise SubprocessError("command emitted unexpected stderr") print " =>", out, return out def run(*cmd, **kwargs): print " " + " ".join(cmd) # if "stdin" in kwargs: # stdin = kwargs.pop("stdin") # p = Popen(cmd, stdin=PIPE, **kwargs) # p.stdin.write(stdin) # p.stdin.close() # else: # p = Popen(cmd, **kwargs) p = Popen(cmd, **kwargs) rc = p.wait() if rc != 0: raise SubprocessError("command %s exited with rc=%s", (cmd, rc)) # the very first time you run setup.py, it will download+build darcsver and # whatnot, emitting noise to stdout. Run it once (and throw away that junk) # to avoid treating that noise as the package name. run(PYTHON, "setup.py", "darcsver") NAME = get_output(PYTHON, "setup.py", "--name").strip() VERSION = get_output(PYTHON, "setup.py", "--version").strip() TARBALL = "%s-%s.tar.gz" % (NAME, VERSION) DEBIAN_TARBALL = "%s_%s.orig.tar.gz" % (NAME, VERSION) BUILDDIR = "build/debian/%s-%s" % (NAME, VERSION) run(PYTHON, "setup.py", "sdist", "--formats=gztar") if os.path.exists("build/debian"): shutil.rmtree("build/debian") os.makedirs("build/debian") shutil.copyfile("dist/%s" % TARBALL, "build/debian/%s" % DEBIAN_TARBALL) run("tar", "xf", DEBIAN_TARBALL, cwd="build/debian") # now modify the tree for debian packaging. This is an algorithmic way of # applying the debian .diff, which factors out some of the similarities # between various debian/ubuntu releases. Everything we do after this point # will show up in the generated .diff, and thus form the debian-specific part # of the source package. DEBDIR = os.path.join(BUILDDIR, "debian") os.makedirs(DEBDIR) # The 'aliases' section in setup.cfg causes problems, so get rid of it. We # could get rid of the whole file, but 1: find_links is still sort of useful, # and 2: dpkg-buildpackage prefers to ignore file removal (as opposed to # file-modification) #os.unlink(os.path.join(BUILDDIR, "setup.cfg")) SETUPCFG = os.path.join(BUILDDIR, "setup.cfg") lines = open(SETUPCFG, "r").readlines() f = open(SETUPCFG, "w") for l in lines: if l.startswith("[aliases]"): break f.write(l) f.close() for n in ["compat", "control", "copyright", "pycompat", "rules"]: fn = "misc/debian/%s.%s" % (n, ARCH) if not os.path.exists(fn): fn = "misc/debian/%s" % n assert os.path.exists(fn) shutil.copyfile(fn, os.path.join(DEBDIR, n)) if n == "rules": os.chmod(os.path.join(DEBDIR, n), 0755) # +x # We put "local package" on the first line of the changelog entry to suppress # the lintian NMU warnings (since debchange's new entry's "author" will # probably be different than the what the debian/control Maintainer: field # says) DISTRIBUTION_MAP = {"sid": "unstable"} run("debchange", "--create", "--package", NAME, "--newversion", VERSION+"-1", "--distribution", DISTRIBUTION_MAP.get(ARCH, ARCH), "local package: 'make deb' build", cwd=BUILDDIR) # the package is ready to build. 'debuild' will produce the source package # (.dsc+.diff.gz), then build the .deb and produce a .changes file ready for # upload to an APT archive. The build log will go into a .build file. run("debuild", "-uc", "-us", cwd=BUILDDIR) tahoe-lafs-1.10.0/misc/build_helpers/check-build.py000066400000000000000000000030061221140116300221020ustar00rootroot00000000000000#! /usr/bin/env python # This helper script is used with the 'test-desert-island' Makefile target. import sys good = True build_out = sys.argv[1] mode = sys.argv[2] print for line in open(build_out, "r"): if mode == "no-downloads": # when setup_requires= uses # misc/dependencies/setuptools-0.6c8.egg, it causes a # "Downloading: misc/dependencies/.." line to be emitted, # which doesn't count as a network download. Lines that start # with "Reading" indicate that it is fetching web pages in # order to check for newer versions of packages. As long as it # doesn't actually download any packages then it still passes # this test. That is: it *would* have succeeded if you were on # a Desert Island, an airplane with no network, behind a # corporate firewall that disallows such connections, or if # you had turned off your network prior to running "python # setup.py build". A stronger requirement would be that it # doesn't even try to check for new packages on remote hosts # if it has all the packages that it needs locally, but we # currently don't enforce that stronger requirement. if line.startswith("Downloading http:"): print line, good = False if good: if mode == "no-downloads": print "Good: build did not try to download any files" sys.exit(0) else: if mode == "no-downloads": print "Failed: build tried to download files" sys.exit(1) tahoe-lafs-1.10.0/misc/build_helpers/clean-up-after-fake-dists.py000066400000000000000000000002161221140116300245630ustar00rootroot00000000000000import glob, os, shutil if os.path.exists('support'): shutil.rmtree('support') [shutil.rmtree(p) for p in glob.glob('pycryptopp*.egg')] tahoe-lafs-1.10.0/misc/build_helpers/gen-package-table.py000066400000000000000000000134101221140116300231570ustar00rootroot00000000000000#!/usr/bin/env python # This script generates a table of dependencies in HTML format on stdout. # It expects to be run in the tahoe-lafs-dep-eggs directory. import re, os, sys import pkg_resources extensions = ('.egg', '.tar.bz2', '.tar.gz', '.exe') platform_aliases = [('i686','x86'), ('i386','x86'), ('i86pc','x86'), ('win32','windows-x86'), ('win-amd64','windows-x86_64'), ('amd64','x86_64')] FILENAME_RE = re.compile(r'([a-zA-Z_0-9\.]*)-([0-9\.a-vx-z_]*)(-py[0-9\.]*)?(-.*)?') FILENAME_RE2 = re.compile(r'([a-zA-Z_0-9\.]*)-([0-9\.a-vx-z_]*)(win32|win-amd64)?(-py[0-9\.]*)?') matrix = {} pkgs = set() platform_dependent_pkgs = set() python_versions = set() depdir = '.' if len(sys.argv) > 1: depdir = sys.argv[1] filenames = os.listdir(depdir) def add(d, k, v): if k in d: d[k] += [v] else: d[k] = [v] for fname in filenames: for ext in extensions: if fname.endswith(ext): m = FILENAME_RE.match(fname[:-len(ext)]) try: pkg = m.group(1) pythonver = (m.group(3) or '-py')[3:] platform = (m.group(4) or '-')[1:] except (IndexError, AttributeError, TypeError): continue if not pythonver: m = FILENAME_RE2.match(fname[:-len(ext)]) if m.group(3): try: platform = m.group(3) pythonver = (m.group(4) or '-py')[3:] except (IndexError, AttributeError, TypeError): continue for (alias, replacement) in platform_aliases: if platform.endswith(alias): platform = platform[:-len(alias)] + replacement break pkgs.add(pkg) if platform: platform_dependent_pkgs.add(pkg) if pythonver not in matrix: python_versions.add(pythonver) matrix[pythonver] = {} add(matrix[pythonver], platform, (pkg, fname)) break platform_independent_pkgs = pkgs - platform_dependent_pkgs width = 100 / (len(platform_independent_pkgs) + 1) greybgstyle = '; background-color: #E0E0E0' nobgstyle = '' print '' print '' print '' print ' ' print ' Software packages that Tahoe-LAFS depends on' print '' print '' print '

What is this?

' print '

See quickstart.rst, wiki:Installation, and wiki:CompileError.' print '

Software packages that Tahoe-LAFS depends on

' print for pyver in reversed(sorted(python_versions)): greybackground = False if pyver: print '

Packages for Python %s that have compiled C/C++ code:

' % (pyver,) print '' print ' ' print ' ' % (width,) for pkg in sorted(platform_dependent_pkgs): print ' ' % (width, pkg) print ' ' first = True for platform in sorted(matrix[pyver]): if greybackground: bgstyle = greybgstyle else: bgstyle = nobgstyle greybackground = not greybackground row_files = sorted(matrix[pyver][platform]) style1 = first and 'border-top: 2px solid #000000' or '' style1 += bgstyle style2 = first and 'border-top: 2px solid #000000' or '' style2 += bgstyle print ' ' print ' ' % (style1, platform,) for pkg in sorted(platform_dependent_pkgs): files = [n for (p, n) in row_files if pkg == p] bestfile = files and max([(pkg_resources.parse_version(x), x) for x in files])[1] or None if pkg == 'pywin32' and not platform.startswith('windows'): print ' ' % (style2,) else: print ' ' % (style2, bestfile and '%s' % (bestfile, bestfile) or '') print ' ' first = False print '
 Platform  %s 
 %s  n/a  %s
' print print '

Packages that are platform-independent or source-only:

' print '' print ' ' print ' ' print ' ' print ' ' style1 = 'border-top: 2px solid #000000; background-color:#FFFFF0;' style2 = 'border-top: 2px solid #000000;' m = matrix[''][''] for pkg in sorted(platform_independent_pkgs): print ' ' print ' ' % (style1, pkg) files = [n for (p, n) in m if pkg == p] print ' ' % (style2, '
 '.join(['%s' % (f, f) for f in files])) print ' ' print '
 Package  All Python versions 
 %s  %s
' # The document does validate, but not when it is included at the bottom of a directory listing. #print '
' #print 'Valid HTML 4.01 Transitional' print '' tahoe-lafs-1.10.0/misc/build_helpers/get-version.py000066400000000000000000000022771221140116300222030ustar00rootroot00000000000000#!/usr/bin/env python """Determine the version number of the current tree. This should be run after 'setup.py darcsver'. It will emit a single line of text to stdout, either of the form '0.2.0' if this is a release tree (i.e. no patches have been added since the last release tag), or '0.2.0-34' (if 34 patches have been added since the last release tag). If the tree does not have a well-formed version number, this will emit 'unknown'. The version string thus calculated should exactly match the version string determined by setup.py (when it creates eggs and source tarballs) and also the version available in the code image when you do: from allmydata import __version__ """ import os.path, re def get_version(): VERSIONFILE = "src/allmydata/_version.py" verstr = "unknown" if os.path.exists(VERSIONFILE): VSRE = re.compile("^verstr = ['\"]([^'\"]*)['\"]", re.M) verstrline = open(VERSIONFILE, "rt").read() mo = VSRE.search(verstrline) if mo: verstr = mo.group(1) else: raise ValueError("if version.py exists, it must be well-formed") return verstr if __name__ == '__main__': verstr = get_version() print verstr tahoe-lafs-1.10.0/misc/build_helpers/pyver.py000066400000000000000000000001171221140116300210750ustar00rootroot00000000000000#!/usr/bin/env python import sys print "python%d.%d" % (sys.version_info[:2]) tahoe-lafs-1.10.0/misc/build_helpers/run-with-pythonpath.py000066400000000000000000000023511221140116300237030ustar00rootroot00000000000000# -*- python -*- # you must invoke this with an explicit python, from the tree root """Run an arbitrary command with a PYTHONPATH that will include the Tahoe code, including dependent libraries. Run this like: python misc/build_helpers/run-with-pythonpath.py python foo.py """ import os, sys # figure out where support/lib/pythonX.X/site-packages is # add it to os.environ["PYTHONPATH"] # spawn the child process def pylibdir(prefixdir): pyver = "python%d.%d" % (sys.version_info[:2]) if sys.platform == "win32": return os.path.join(prefixdir, "Lib", "site-packages") else: return os.path.join(prefixdir, "lib", pyver, "site-packages") basedir = os.path.dirname(os.path.abspath(__file__)) supportlib = pylibdir(os.path.abspath("support")) oldpp = os.environ.get("PYTHONPATH", "").split(os.pathsep) if oldpp == [""]: # grr silly split() behavior oldpp = [] newpp = os.pathsep.join(oldpp + [supportlib,]) os.environ['PYTHONPATH'] = newpp from twisted.python.procutils import which cmd = sys.argv[1] if cmd and cmd[0] not in "/~.": cmds = which(cmd) if not cmds: print >>sys.stderr, "'%s' not found on PATH" % (cmd,) sys.exit(-1) cmd = cmds[0] os.execve(cmd, sys.argv[1:], os.environ) tahoe-lafs-1.10.0/misc/build_helpers/run_trial.py000066400000000000000000000071241221140116300217340ustar00rootroot00000000000000#!/usr/bin/env python import os, sys, re, glob def read_version_py(infname): try: verstrline = open(infname, "rt").read() except EnvironmentError: return None else: VSRE = r"^verstr = ['\"]([^'\"]*)['\"]" mo = re.search(VSRE, verstrline, re.M) if mo: return mo.group(1) version = read_version_py(os.path.join('..', 'src', 'allmydata', '_version.py')) if version is None: raise AssertionError("We don't know which version we're supposed to be testing.") APPNAME='allmydata-tahoe' adglobals = {} execfile(os.path.join('..', 'src', 'allmydata', '_auto_deps.py'), adglobals) install_requires = adglobals['install_requires'] test_requires = adglobals.get('test_requires', ['mock']) # setuptools/zetuptoolz looks in __main__.__requires__ for a list of # requirements. __requires__ = [APPNAME + '==' + version] + install_requires + test_requires print "Requirements: %r" % (__requires__,) eggz = glob.glob(os.path.join('..', 'setuptools-*.egg')) if len(eggz) > 0: egg = os.path.realpath(eggz[0]) print "Inserting egg on sys.path: %r" % (egg,) sys.path.insert(0, egg) import pkg_resources pkg_resources # hush pyflakes modulename = None for i in xrange(1, len(sys.argv)): if not sys.argv[i].startswith('-'): modulename = sys.argv[i] break if modulename is None: raise AssertionError("no test module specified") __import__(modulename) srcfile = sys.modules[modulename].__file__ srcdir = os.path.dirname(os.path.realpath(srcfile)) components = modulename.split('.') leaf = os.path.normcase(components[-1]) if os.path.normcase(os.path.basename(srcfile)) in (leaf + '.py', leaf + '.pyc'): # strip the leaf module name components = components[:-1] for i in components: srcdir = os.path.dirname(srcdir) if os.path.normcase(srcdir).endswith('.egg'): srcdir = os.path.dirname(srcdir) elif os.path.normcase(os.path.basename(srcdir)) == 'site-packages': srcdir = os.path.dirname(srcdir) if re.search(r'python.+\..+', os.path.normcase(os.path.basename(srcdir))): srcdir = os.path.dirname(srcdir) if os.path.normcase(os.path.basename(srcdir)) == 'lib': srcdir = os.path.dirname(srcdir) rootdir = os.path.normcase(os.path.normpath(srcdir)) if os.path.basename(rootdir) == 'src': rootdir = os.path.dirname(rootdir) root_from_cwd = os.path.normcase(os.path.normpath(os.getcwd())) if os.path.basename(root_from_cwd) == 'src': root_from_cwd = os.path.dirname(root_from_cwd) same = (root_from_cwd == rootdir) if not same: try: same = os.path.samefile(root_from_cwd, rootdir) except AttributeError, e: e # hush pyflakes if not same: msg = ("We seem to be testing the code at %r\n" "(according to the source filename %r),\n" "but expected to be testing the code at %r.\n" % (rootdir, srcfile, root_from_cwd)) root_from_cwdu = os.path.normcase(os.path.normpath(os.getcwdu())) if os.path.basename(root_from_cwdu) == u'src': root_from_cwdu = os.path.dirname(root_from_cwdu) if not isinstance(root_from_cwd, unicode) and root_from_cwd.decode(sys.getfilesystemencoding(), 'replace') != root_from_cwdu: msg += ("However, this may be a false alarm because the current directory path\n" "is not representable in the filesystem encoding. This script needs to be\n" "run from the source directory to be tested, at a non-Unicode path.") else: msg += "This script needs to be run from the source directory to be tested." raise AssertionError(msg) from twisted.scripts.trial import run run()tahoe-lafs-1.10.0/misc/build_helpers/show-tool-versions.py000066400000000000000000000133011221140116300235300ustar00rootroot00000000000000#! /usr/bin/env python import locale, os, platform, subprocess, sys, traceback added_zetuptoolz_egg = False try: import pkg_resources pkg_resources # hush pyflakes except ImportError: import glob eggz = glob.glob('setuptools-*.egg') if len(eggz) > 0: egg = os.path.realpath(eggz[0]) print >>sys.stderr, "Inserting egg on sys.path: %r" % (egg,) added_zetuptoolz_egg = True sys.path.insert(0, egg) def foldlines(s, numlines=None): lines = s.split("\n") if numlines is not None: lines = lines[:numlines] return " ".join(lines).replace("\r", "") def print_platform(): try: import platform out = platform.platform() print "platform:", foldlines(out) print "machine: ", platform.machine() if hasattr(platform, 'linux_distribution'): print "linux_distribution:", repr(platform.linux_distribution()) except EnvironmentError: sys.stderr.write("\nGot exception using 'platform'. Exception follows\n") traceback.print_exc(file=sys.stderr) sys.stderr.flush() pass def print_python_ver(): print "python:", foldlines(sys.version) print 'maxunicode: ' + str(sys.maxunicode) def print_python_encoding_settings(): print 'filesystem.encoding: ' + str(sys.getfilesystemencoding()) print 'locale.getpreferredencoding: ' + str(locale.getpreferredencoding()) try: print 'locale.defaultlocale: ' + str(locale.getdefaultlocale()) except ValueError, e: print 'got exception from locale.getdefaultlocale(): ', e print 'locale.locale: ' + str(locale.getlocale()) def print_stdout(cmdlist, label=None, numlines=None): try: if label is None: label = cmdlist[0] res = subprocess.Popen(cmdlist, stdin=open(os.devnull), stdout=subprocess.PIPE).communicate()[0] print label + ': ' + foldlines(res, numlines) except EnvironmentError, e: if isinstance(e, OSError) and e.errno == 2: print label + ': no such file or directory' return sys.stderr.write("\nGot exception invoking '%s'. Exception follows.\n" % (cmdlist[0],)) traceback.print_exc(file=sys.stderr) sys.stderr.flush() pass def print_as_ver(): if os.path.exists('a.out'): print "WARNING: a file named a.out exists, and getting the version of the 'as' assembler writes to that filename, so I'm not attempting to get the version of 'as'." return try: res = subprocess.Popen(['as', '-version'], stdin=open(os.devnull), stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() print 'as: ' + foldlines(res[0]+' '+res[1]) if os.path.exists('a.out'): os.remove('a.out') except EnvironmentError: sys.stderr.write("\nGot exception invoking '%s'. Exception follows.\n" % ('as',)) traceback.print_exc(file=sys.stderr) sys.stderr.flush() pass def print_setuptools_ver(): if added_zetuptoolz_egg: # it would be misleading to report the bundled version of zetuptoolz as the installed version print "setuptools: using bundled egg" return try: import pkg_resources out = str(pkg_resources.require("setuptools")) print "setuptools:", foldlines(out) except (ImportError, EnvironmentError): sys.stderr.write("\nGot exception using 'pkg_resources' to get the version of setuptools. Exception follows\n") traceback.print_exc(file=sys.stderr) sys.stderr.flush() pass except pkg_resources.DistributionNotFound: print 'setuptools: DistributionNotFound' pass def print_py_pkg_ver(pkgname, modulename=None): if modulename is None: modulename = pkgname print try: import pkg_resources out = str(pkg_resources.require(pkgname)) print pkgname + ': ' + foldlines(out) except (ImportError, EnvironmentError): sys.stderr.write("\nGot exception using 'pkg_resources' to get the version of %s. Exception follows.\n" % (pkgname,)) traceback.print_exc(file=sys.stderr) sys.stderr.flush() pass except pkg_resources.DistributionNotFound: print pkgname + ': DistributionNotFound' pass try: __import__(modulename) except ImportError: pass else: modobj = sys.modules.get(modulename) print pkgname + ' module: ' + str(modobj) try: print pkgname + ' __version__: ' + str(modobj.__version__) except AttributeError: pass print_platform() print print_python_ver() print print_stdout(['locale']) print_python_encoding_settings() print print_stdout(['buildbot', '--version']) print_stdout(['buildslave', '--version']) if 'windows' in platform.system().lower(): print_stdout(['cl']) print_stdout(['gcc', '--version'], numlines=1) print_stdout(['g++', '--version'], numlines=1) print_stdout(['cryptest', 'V']) print_stdout(['git', '--version']) print_stdout(['openssl', 'version']) print_stdout(['darcs', '--version']) print_stdout(['darcs', '--exact-version'], label='darcs-exact-version') print_stdout(['flappclient', '--version']) print_stdout(['valgrind', '--version']) print_stdout(['lzip', '--version']) print_as_ver() print_setuptools_ver() print_py_pkg_ver('coverage') print_py_pkg_ver('trialcoverage') print_py_pkg_ver('pyflakes') print_py_pkg_ver('zope.interface') print_py_pkg_ver('setuptools_darcs') print_py_pkg_ver('darcsver') print_py_pkg_ver('Twisted', 'twisted') print_py_pkg_ver('TwistedCore', 'twisted.python') print_py_pkg_ver('TwistedWeb', 'twisted.web') print_py_pkg_ver('TwistedConch', 'twisted.conch') print_py_pkg_ver('pycryptopp') tahoe-lafs-1.10.0/misc/build_helpers/sub-ver.py000066400000000000000000000010021221140116300213050ustar00rootroot00000000000000#!/usr/bin/env python from allmydata import __version__ as v import sys if len(sys.argv) == 1: input = sys.stdin elif len(sys.argv) == 2: fname = sys.argv[1] input = file(fname, 'rb') else: raise ValueError('must provide 0 or 1 argument (stdin, or filename)') vern = { 'major': v.major or 0, 'minor': v.minor or 0, 'point': v.micro or 0, 'micro': v.micro or 0, 'revision' : v.revision or 0, 'build': str(v), } for line in input.readlines(): print line % vern, tahoe-lafs-1.10.0/misc/build_helpers/test-darcs-boringfile.py000066400000000000000000000003511221140116300241170ustar00rootroot00000000000000#!/usr/bin/env python import sys from subprocess import Popen, PIPE cmd = ["darcs", "whatsnew", "-l"] p = Popen(cmd, stdout=PIPE) output = p.communicate()[0] print output if output == "No changes!\n": sys.exit(0) sys.exit(1) test-dont-install-newer-dep-when-you-already-have-sufficiently-new-one.py000066400000000000000000000060011221140116300353010ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/build_helpers#!/usr/bin/env python import StringIO, os, platform, shutil, subprocess, sys, tarfile, zipfile import pkg_resources def test(): # We put a "fakedependency-1.0.0.egg" package and a # "fakedependency-9.9.99.tar.gz" into a directory, but the latter is # booby-trapped so it will raise an exception when you try to build it. # # Then we run # # python setup.py --fakedependency -v test -s buildtest.test_build_with_fake_dist # # which requires "fakedependency >= 1.0.0", imports fakedependency # and passes if fakedependency.__version__ == '1.0.0'. # # The goal is to turn red if the build system tries to build the # source dist when it could have used the binary dist. # # Note that for this test to make sense, Tahoe-LAFS needs to be asking # for a version of fakedependency which can be satisfied by 1.0.0. # The --fakedependency option to setup.py arranges that. fake_distdir = 'tahoe-deps' fake_distname = "fakedependency" fake_sdistversion = "9.9.99" fake_bdistversion = "1.0.0" sdist_setup = "raise Exception('Aha I caught you trying to build me. I am a fakedependency 9.9.99 sdist and you should be satisfied with a bdist.')" testsuite = "buildtest.test_build_with_fake_dist" dist_dirname = os.path.join(os.getcwd(), fake_distdir) try: os.makedirs(dist_dirname) except OSError: # probably already exists pass bdist_egg_name = os.path.join(dist_dirname, '%s-%s-py%s.%s-%s.egg' % (fake_distname, fake_bdistversion, platform.python_version_tuple()[0], platform.python_version_tuple()[1], pkg_resources.get_supported_platform())) try: bdist_egg = zipfile.ZipFile(bdist_egg_name, 'w') bdist_egg.writestr('fakedependency/__init__.py', '__version__ = "%s"\n' % (fake_bdistversion,)) bdist_egg.close() sdist_name = os.path.join(dist_dirname, '%s-%s.tar' % (fake_distname, fake_sdistversion)) sdist = tarfile.open(sdist_name, 'w:gz') sdist.errorlevel = 2 tarinfo = tarfile.TarInfo('setup.py') tarinfo.errorlevel = 2 tarinfo.size = len(sdist_setup) sdist.addfile(tarinfo, StringIO.StringIO(sdist_setup)) sdist.close() sys.exit(subprocess.call([sys.executable, "setup.py", "--fakedependency", "-v", "test", "-s", testsuite], env=os.environ)) finally: os.remove(bdist_egg_name) os.remove(sdist_name) cleanup() def cleanup(): egg_info = os.path.join('src', 'allmydata_tahoe.egg-info') bin_tahoe = os.path.join('bin', 'tahoe') bin_tahoe_pyscript = os.path.join('bin', 'tahoe.pyscript') if os.path.exists('build'): shutil.rmtree('build') if os.path.exists('support'): shutil.rmtree('support') if os.path.exists(egg_info): shutil.rmtree(egg_info) if os.path.exists(bin_tahoe): os.remove(bin_tahoe) if os.path.exists(bin_tahoe_pyscript): os.remove(bin_tahoe_pyscript) if __name__ == '__main__': test() tahoe-lafs-1.10.0/misc/build_helpers/test-dont-use-too-old-dep.py000066400000000000000000000066771221140116300246050ustar00rootroot00000000000000#!/usr/bin/env python import StringIO, os, platform, shutil, subprocess, sys, tarfile, zipfile, time import pkg_resources def test(): # We put a "fakedependency-0.9.9.egg" package and a # "fakedependency-1.0.0.tar.gz" into a directory, but the former is # booby-trapped so it will raise an exception when you try to import it. # # Then we run # # python setup.py --fakedependency -v test -s buildtest.test_build_with_fake_dist # # which requires "fakedependency >= 1.0.0", imports fakedependency # and passes if fakedependency.__version__ == '1.0.0'. # # The goal is to turn red if the build system tries to use the # source dist when it could have used the binary dist. # # Note that for this test to make sense, Tahoe-LAFS needs to be asking # for a version of fakedependency which can be satisfied by 1.0.0. # The --fakedependency option to setup.py arranges that. fake_distdir = 'tahoe-deps' fake_distname = "fakedependency" fake_sdistversion = "1.0.0" fake_bdistversion = "0.9.9" bdist_init = "raise Exception('Aha I caught you trying to import me. I am a fakedependency 0.9.9 package and you should not be satisfied with something < 1.0.0.')" sdist_setup = "import distutils.core\ndistutils.core.setup(name='fakedependency', version='1.0.0', packages=['fakedependency'])" sdist_init = "__version__ = '%s'" % (fake_sdistversion,) testsuite = "buildtest.test_build_with_fake_dist" dist_dirname = os.path.join(os.getcwd(), fake_distdir) try: os.makedirs(dist_dirname) except OSError: # probably already exists pass bdist_egg_name = os.path.join(dist_dirname, '%s-%s-py%s.%s-%s.egg' % (fake_distname, fake_bdistversion, platform.python_version_tuple()[0], platform.python_version_tuple()[1], pkg_resources.get_supported_platform())) try: bdist_egg = zipfile.ZipFile(bdist_egg_name, 'w') bdist_egg.writestr('fakedependency/__init__.py', bdist_init) bdist_egg.close() sdist_name = os.path.join(dist_dirname, '%s-%s.tar' % (fake_distname, fake_sdistversion)) sdist = tarfile.open(sdist_name, 'w:gz') sdist.errorlevel = 2 tarinfo = tarfile.TarInfo('setup.py') tarinfo.errorlevel = 2 tarinfo.mtime = time.time() tarinfo.size = len(sdist_setup) sdist.addfile(tarinfo, StringIO.StringIO(sdist_setup)) tarinfo = tarfile.TarInfo('fakedependency/__init__.py') tarinfo.errorlevel = 2 tarinfo.mtime = time.time() tarinfo.size = len(sdist_init) sdist.addfile(tarinfo, StringIO.StringIO(sdist_init)) sdist.close() sys.exit(subprocess.call([sys.executable, "setup.py", "--fakedependency", "-v", "test", "-s", testsuite], env=os.environ)) finally: os.remove(bdist_egg_name) os.remove(sdist_name) cleanup() def cleanup(): egg_info = os.path.join('src', 'allmydata_tahoe.egg-info') bin_tahoe = os.path.join('bin', 'tahoe') bin_tahoe_pyscript = os.path.join('bin', 'tahoe.pyscript') if os.path.exists('build'): shutil.rmtree('build') if os.path.exists('support'): shutil.rmtree('support') if os.path.exists(egg_info): shutil.rmtree(egg_info) if os.path.exists(bin_tahoe): os.remove(bin_tahoe) if os.path.exists(bin_tahoe_pyscript): os.remove(bin_tahoe_pyscript) if __name__ == '__main__': test() tahoe-lafs-1.10.0/misc/build_helpers/test-git-ignore.py000066400000000000000000000003411221140116300227500ustar00rootroot00000000000000#!/usr/bin/env python import sys from subprocess import Popen, PIPE cmd = ["git", "status", "--porcelain"] p = Popen(cmd, stdout=PIPE) output = p.communicate()[0] print output if output == "": sys.exit(0) sys.exit(1) tahoe-lafs-1.10.0/misc/build_helpers/test_mac_diskimage.py000066400000000000000000000111071221140116300235450ustar00rootroot00000000000000# This script uses hdiutil to attach a dmg (whose name is derived from the # appname and the version number passed in), asserts that it attached as # expected, cd's into the mounted filesystem, executes "$appname # --version-and-path", and checks whether the output of --version-and-path is # right. # If all of the paths listed therein are loaded from within the current PWD # then it exits with code 0. # If anything goes wrong then it exits with non-zero (failure). This is to # check that the Mac OS "DMG" (disk image) package that gets built is correctly # loading all of its packages from inside the image. # Here is an example output from --version-and-path: # allmydata-tahoe: 1.4.1-r3916 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/src), foolscap: 0.4.1 (/usr/local/lib/python2.6/dist-packages/foolscap-0.4.1-py2.6.egg), pycryptopp: 0.5.10 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/pycryptopp-0.5.10-py2.6-linux-x86_64.egg), zfec: 1.4.2 (/usr/local/lib/python2.6/dist-packages/zfec-1.4.2-py2.6-linux-x86_64.egg), Twisted: 8.2.0-r26987 (/usr/local/lib/python2.6/dist-packages/Twisted-8.2.0_r26987-py2.6-linux-x86_64.egg), Nevow: 0.9.32 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/Nevow-0.9.32-py2.6.egg), zope.interface: 3.4.0 (/usr/lib/python2.6/dist-packages), python: 2.6.2 (/usr/bin/python), platform: Linux-Ubuntu_9.04-x86_64-64bit_ELF (None), sqlite: 3.6.10 (unknown), simplejson: 2.0.1 (/usr/local/lib/python2.6/dist-packages/simplejson-2.0.1-py2.6-linux-x86_64.egg), argparse: 0.8.0 (/usr/local/lib/python2.6/dist-packages/argparse-0.8.0-py2.6.egg), pyOpenSSL: 0.7 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/pyOpenSSL-0.7-py2.6-linux-x86_64.egg), pyutil: 1.3.30 (/usr/local/lib/python2.6/dist-packages/pyutil-1.3.30-py2.6.egg), zbase32: 1.1.1 (/usr/local/lib/python2.6/dist-packages/zbase32-1.1.1-py2.6.egg), setuptools: 0.6c12dev (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/setuptools-0.6c12dev.egg), pysqlite: 2.4.1 (/usr/lib/python2.6/sqlite3) import os, re, subprocess, time def test_mac_diskimage(appname, version): """ Return True on success, raise exception on failure. """ assert isinstance(appname, basestring), appname assert isinstance(version, basestring), version DMGNAME='mac/'+appname+'-'+version+'.dmg' cmd = ['hdiutil', 'attach', DMGNAME] attachit = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) rc = attachit.wait() if rc != 0: raise Exception("FAIL: hdiutil returned non-zero exit code: %r from command: %r" % (rc, cmd,)) stderrtxt = attachit.stderr.read() if stderrtxt: raise Exception("FAIL: hdiutil said something on stderr: %r" % (stderrtxt,)) stdouttxt = attachit.stdout.read() mo = re.search("^(/[^ ]+)\s+Apple_HFS\s+(/Volumes/.*)$", stdouttxt, re.UNICODE|re.MULTILINE) if not mo: raise Exception("FAIL: hdiutil said something on stdout that didn't match our expectations: %r" % (stdouttxt,)) DEV=mo.group(1) MOUNTPOINT=mo.group(2) callitpid = None try: basedir = MOUNTPOINT + '/' + appname + '.app/Contents/Resources' os.chdir(basedir) cmd = ['../MacOS/' + appname, '--version-and-path'] callit = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) callitpid = callit.pid assert callitpid deadline = time.time() + 2 # If it takes longer than 2 seconds to do this then it fails. while True: rc = callit.poll() if rc is not None: break if time.time() > deadline: os.kill(callitpid, 15) raise Exception("FAIL: it took longer than 2 seconds to invoke $appname --version-and-path. stdout: %s, stderr: %s" % (callit.stdout.read(), callit.stderr.read())) time.sleep(0.05) if rc != 0: raise Exception("FAIL: $appname --version-and-path returned non-zero exit code: %r" % (rc,)) stdouttxt = callit.stdout.read() PKG_VER_PATH_RE=re.compile("(\S+): (\S+) \((.+?)\), ", re.UNICODE) for mo in PKG_VER_PATH_RE.finditer(stdouttxt): if not mo.group(3).startswith(basedir): raise Exception("FAIL: found package not loaded from basedir (%s); package was: %s" % (basedir, mo.groups(),)) return True # success! finally: if callitpid: os.kill(callitpid, 9) os.waitpid(callitpid, 0) subprocess.call(['hdiutil', 'detach', '-Force', DEV]) tahoe-lafs-1.10.0/misc/coding_tools/000077500000000000000000000000001221140116300172215ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/coding_tools/check-interfaces.py000066400000000000000000000214631221140116300227770ustar00rootroot00000000000000 # To check a particular Tahoe source distribution, this should be invoked from # the root directory of that distribution as # # bin/tahoe @misc/coding_tools/check-interfaces.py import os, sys, re, platform import zope.interface as zi # We use the forked version of verifyClass below. #from zope.interface.verify import verifyClass from zope.interface.advice import addClassAdvisor interesting_modules = re.compile(r'(allmydata)|(foolscap)\..*') excluded_classnames = re.compile(r'(_)|(Mock)|(Fake)|(Dummy).*') excluded_file_basenames = re.compile(r'(check)|(bench)_.*') _other_modules_with_violations = set() _err = sys.stderr _report_argname_mismatch = False # very noisy and usually not important # deep magic def strictly_implements(*interfaces): frame = sys._getframe(1) f_locals = frame.f_locals # Try to make sure we were called from a class def. Assumes Python > 2.2. if f_locals is frame.f_globals or '__module__' not in f_locals: raise TypeError("implements can be used only from a class definition.") if '__implements_advice_data__' in f_locals: raise TypeError("implements can be used only once in a class definition.") def _implements_advice(cls): interfaces, classImplements = cls.__dict__['__implements_advice_data__'] del cls.__implements_advice_data__ classImplements(cls, *interfaces) if interesting_modules.match(cls.__module__): if not excluded_classnames.match(cls.__name__): for interface in interfaces: try: verifyClass(interface, cls) except Exception, e: print >>_err, ("%s.%s does not correctly implement %s.%s:\n%s" % (cls.__module__, cls.__name__, interface.__module__, interface.__name__, e)) else: _other_modules_with_violations.add(cls.__module__) return cls f_locals['__implements_advice_data__'] = interfaces, zi.classImplements addClassAdvisor(_implements_advice, depth=2) def check(): # patchee-monkey zi.implements = strictly_implements if len(sys.argv) >= 2: if sys.argv[1] == '--help' or len(sys.argv) > 2: print >>_err, "Usage: check-miscaptures.py [SOURCEDIR]" return srcdir = sys.argv[1] else: # import modules under src/ by default srcdir = 'src' # attempt to avoid side-effects from importing command scripts sys.argv = ['', '--help'] syslow = platform.system().lower() is_windows = 'cygwin' in syslow or 'windows' in syslow for (dirpath, dirnames, filenames) in os.walk(srcdir): for fn in filenames: (basename, ext) = os.path.splitext(fn) if ext in ('.pyc', '.pyo') and not os.path.exists(os.path.join(dirpath, basename+'.py')): print >>_err, ("Warning: no .py source file for %r.\n" % (os.path.join(dirpath, fn),)) if ext == '.py' and not excluded_file_basenames.match(basename): relpath = os.path.join(dirpath[len(srcdir)+1:], basename) module = relpath.replace(os.sep, '/').replace('/', '.') try: __import__(module) except ImportError, e: if not is_windows and (' _win' in str(e) or 'win32' in str(e)): print >>_err, ("Warning: %r imports a Windows-specific module, so we cannot check it (%s).\n" % (module, str(e))) else: import traceback traceback.print_exc(file=_err) print >>_err others = list(_other_modules_with_violations) others.sort() print >>_err, "There were also interface violations in:\n", ", ".join(others), "\n" # Forked from # http://svn.zope.org/*checkout*/Zope3/trunk/src/zope/interface/verify.py?content-type=text%2Fplain&rev=27687 # but modified to report all interface violations rather than just the first. ############################################################################## # # Copyright (c) 2001, 2002 Zope Corporation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE. # ############################################################################## """Verify interface implementations $Id$ """ from zope.interface.exceptions import DoesNotImplement from zope.interface.exceptions import BrokenMethodImplementation from types import FunctionType, MethodType from zope.interface.interface import fromMethod, fromFunction, Method # This will be monkey-patched when running under Zope 2, so leave this # here: MethodTypes = (MethodType, ) def _verify(iface, candidate, tentative=0, vtype=None): """Verify that 'candidate' might correctly implements 'iface'. This involves: o Making sure the candidate defines all the necessary methods o Making sure the methods have the correct signature o Making sure the candidate asserts that it implements the interface Note that this isn't the same as verifying that the class does implement the interface. If optional tentative is true, suppress the "is implemented by" test. """ if vtype == 'c': tester = iface.implementedBy else: tester = iface.providedBy violations = [] def format(e): return " " + str(e).strip() + "\n" if not tentative and not tester(candidate): violations.append(format(DoesNotImplement(iface))) # Here the `desc` is either an `Attribute` or `Method` instance for name, desc in iface.namesAndDescriptions(1): if not hasattr(candidate, name): if (not isinstance(desc, Method)) and vtype == 'c': # We can't verify non-methods on classes, since the # class may provide attrs in it's __init__. continue if isinstance(desc, Method): violations.append(" The %r method was not provided.\n" % (name,)) else: violations.append(" The %r attribute was not provided.\n" % (name,)) continue attr = getattr(candidate, name) if not isinstance(desc, Method): # If it's not a method, there's nothing else we can test continue if isinstance(attr, FunctionType): # should never get here, since classes should not provide functions meth = fromFunction(attr, iface, name=name) elif (isinstance(attr, MethodTypes) and type(attr.im_func) is FunctionType): meth = fromMethod(attr, iface, name) else: if not callable(attr): violations.append(format(BrokenMethodImplementation(name, "Not a method"))) # sigh, it's callable, but we don't know how to intrspect it, so # we have to give it a pass. continue # Make sure that the required and implemented method signatures are # the same. desc = desc.getSignatureInfo() meth = meth.getSignatureInfo() mess = _incompat(desc, meth) if mess: violations.append(format(BrokenMethodImplementation(name, mess))) if violations: raise Exception("".join(violations)) return True def verifyClass(iface, candidate, tentative=0): return _verify(iface, candidate, tentative, vtype='c') def verifyObject(iface, candidate, tentative=0): return _verify(iface, candidate, tentative, vtype='o') def _incompat(required, implemented): if len(implemented['required']) > len(required['required']): return 'implementation requires too many arguments' if ((len(implemented['positional']) < len(required['positional'])) and not implemented['varargs']): return "implementation doesn't allow enough arguments" if required['kwargs'] and not implemented['kwargs']: return "implementation doesn't support keyword arguments" if required['varargs'] and not implemented['varargs']: return "implementation doesn't support variable arguments" if (_report_argname_mismatch and required['positional'] != implemented['positional'][:len(required['positional'])] and implemented['kwargs'] is None): return 'implementation has different argument names' if __name__ == "__main__": check() # Avoid spurious warnings about ignored exceptions during shutdown by doing a hard exit. os._exit(0) tahoe-lafs-1.10.0/misc/coding_tools/check-miscaptures.py000066400000000000000000000144051221140116300232110ustar00rootroot00000000000000#! /usr/bin/python import os, sys, compiler from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function def check_source(source): return check_thing(compiler.parse, source) def check_file(path): return check_thing(compiler.parseFile, path) def check_thing(parser, thing): try: ast = parser(thing) except SyntaxError, e: return e else: results = [] check_ast(ast, results) return results def check_ast(ast, results): """Check a node outside a loop.""" if isinstance(ast, (For, While, ListComp)): check_loop(ast, results) else: for child in ast.getChildNodes(): if isinstance(ast, Node): check_ast(child, results) def check_loop(ast, results): """Check a particular outer loop.""" # List comprehensions have a poorly designed AST of the form # ListComp(exprNode, [ListCompFor(...), ...]), in which the # result expression is outside the ListCompFor node even though # it is logically inside the loop(s). # There may be multiple ListCompFor nodes (in cases such as # [lambda: (a,b) for a in ... for b in ...] # ), and that case they are not nested in the AST. But these # warts (nonobviously) happen not to matter for our analysis. assigned = {} # maps name to lineno of topmost assignment nested = set() collect_assigned_and_nested(ast, assigned, nested) # For each nested function... for funcnode in nested: # Check for captured variables in this function. captured = set() collect_captured(funcnode, assigned, captured, False) for name in captured: # We want to report the outermost capturing function # (since that is where the workaround will need to be # added), and the topmost assignment to the variable. # Just one report per capturing function per variable # will do. results.append(make_result(funcnode, name, assigned[name])) # Check each node in the function body in case it # contains another 'for' loop. childnodes = funcnode.getChildNodes()[len(funcnode.defaults):] for child in childnodes: check_ast(funcnode, results) def collect_assigned_and_nested(ast, assigned, nested): """ Collect the names assigned in this loop, not including names assigned in nested functions. Also collect the nodes of functions that are nested one level deep. """ if isinstance(ast, AssName): if ast.name not in assigned or assigned[ast.name] > ast.lineno: assigned[ast.name] = ast.lineno else: childnodes = ast.getChildNodes() if isinstance(ast, (Lambda, Function)): nested.add(ast) # The default argument expressions are "outside" the # function, even though they are children of the # Lambda or Function node. childnodes = childnodes[:len(ast.defaults)] for child in childnodes: if isinstance(ast, Node): collect_assigned_and_nested(child, assigned, nested) def collect_captured(ast, assigned, captured, in_function_yet): """Collect any captured variables that are also in assigned.""" if isinstance(ast, Name): if ast.name in assigned: captured.add(ast.name) else: childnodes = ast.getChildNodes() if isinstance(ast, (Lambda, Function)): # Formal parameters of the function are excluded from # captures we care about in subnodes of the function body. new_assigned = assigned.copy() remove_argnames(ast.argnames, new_assigned) if len(new_assigned) > 0: for child in childnodes[len(ast.defaults):]: collect_captured(child, new_assigned, captured, True) # The default argument expressions are "outside" *this* # function, even though they are children of the Lambda or # Function node. if not in_function_yet: return childnodes = childnodes[:len(ast.defaults)] for child in childnodes: if isinstance(ast, Node): collect_captured(child, assigned, captured, True) def remove_argnames(names, fromset): for element in names: if element in fromset: del fromset[element] elif isinstance(element, (tuple, list)): remove_argnames(element, fromset) def make_result(funcnode, var_name, var_lineno): if hasattr(funcnode, 'name'): func_name = 'function %r' % (funcnode.name,) else: func_name = '' return (funcnode.lineno, func_name, var_name, var_lineno) def report(out, path, results): for r in results: print >>out, path + (":%r %s captures %r assigned at line %d" % r) def check(sources, out): class Counts: n = 0 processed_files = 0 suspect_files = 0 error_files = 0 counts = Counts() def _process(path): results = check_file(path) if isinstance(results, SyntaxError): print >>out, path + (" NOT ANALYSED due to syntax error: %s" % results) counts.error_files += 1 else: report(out, path, results) counts.n += len(results) counts.processed_files += 1 if len(results) > 0: counts.suspect_files += 1 for source in sources: print >>out, "Checking %s..." % (source,) if os.path.isfile(source): _process(source) else: for (dirpath, dirnames, filenames) in os.walk(source): for fn in filenames: (basename, ext) = os.path.splitext(fn) if ext == '.py': _process(os.path.join(dirpath, fn)) print >>out, ("%d suspiciously captured variables in %d out of %d file(s)." % (counts.n, counts.suspect_files, counts.processed_files)) if counts.error_files > 0: print >>out, ("%d file(s) not processed due to syntax errors." % (counts.error_files,)) return counts.n sources = ['src'] if len(sys.argv) > 1: sources = sys.argv[1:] if check(sources, sys.stderr) > 0: sys.exit(1) # TODO: self-tests tahoe-lafs-1.10.0/misc/coding_tools/check-umids.py000066400000000000000000000013751221140116300217750ustar00rootroot00000000000000#! /usr/bin/python # ./rumid.py foo.py import sys, re, os ok = True umids = {} for fn in sys.argv[1:]: fn = os.path.abspath(fn) for lineno,line in enumerate(open(fn, "r").readlines()): lineno = lineno+1 if "umid" not in line: continue mo = re.search("umid=[\"\']([^\"\']+)[\"\']", line) if mo: umid = mo.group(1) if umid in umids: oldfn, oldlineno = umids[umid] print "%s:%d: duplicate umid '%s'" % (fn, lineno, umid) print "%s:%d: first used here" % (oldfn, oldlineno) ok = False umids[umid] = (fn,lineno) if ok: print "all umids are unique" else: print "some umids were duplicates" sys.exit(1) tahoe-lafs-1.10.0/misc/coding_tools/coverage.el000066400000000000000000000110321221140116300213330ustar00rootroot00000000000000 (defvar coverage-annotation-file ".coverage.el") (defvar coverage-annotations nil) (defun find-coverage-annotation-file () (let ((dir (file-name-directory buffer-file-name)) (olddir "/")) (while (and (not (equal dir olddir)) (not (file-regular-p (concat dir coverage-annotation-file)))) (setq olddir dir dir (file-name-directory (directory-file-name dir)))) (and (not (equal dir olddir)) (concat dir coverage-annotation-file)) )) (defun load-coverage-annotations () (let* ((annotation-file (find-coverage-annotation-file)) (coverage (with-temp-buffer (insert-file-contents annotation-file) (let ((form (read (current-buffer)))) (eval form))))) (setq coverage-annotations coverage) coverage )) (defun coverage-unannotate () (save-excursion (dolist (ov (overlays-in (point-min) (point-max))) (delete-overlay ov)) (setq coverage-this-buffer-is-annotated nil) (message "Removed annotations") )) ;; in emacs22, it will be possible to put the annotations in the fringe. Set ;; a display property for one of the characters in the line, using ;; (right-fringe BITMAP FACE), where BITMAP should probably be right-triangle ;; or so, and FACE should probably be '(:foreground "red"). We can also ;; create new bitmaps, with faces. To do tartans will require a lot of ;; bitmaps, and you've only got about 8 pixels to work with. ;; unfortunately emacs21 gives us less control over the fringe. We can use ;; overlays to put letters on the left or right margins (in the text area, ;; overriding actual program text), and to modify the text being displayed ;; (by changing its background color, or adding a box around each word). (defun coverage-annotate (show-code) (let ((allcoverage (load-coverage-annotations)) (filename-key (expand-file-name buffer-file-truename)) thiscoverage code-lines covered-lines uncovered-code-lines ) (while (and (not (gethash filename-key allcoverage nil)) (string-match "/" filename-key)) ;; eat everything up to and including the first slash, then look again (setq filename-key (substring filename-key (+ 1 (string-match "/" filename-key))))) (setq thiscoverage (gethash filename-key allcoverage nil)) (if thiscoverage (progn (setq coverage-this-buffer-is-annotated t) (setq code-lines (nth 0 thiscoverage) covered-lines (nth 1 thiscoverage) uncovered-code-lines (nth 2 thiscoverage) ) (save-excursion (dolist (ov (overlays-in (point-min) (point-max))) (delete-overlay ov)) (if show-code (dolist (line code-lines) (goto-line line) ;;(add-text-properties (point) (line-end-position) '(face bold) ) (overlay-put (make-overlay (point) (line-end-position)) ;'before-string "C" ;'face '(background-color . "green") 'face '(:background "dark green") ) )) (dolist (line uncovered-code-lines) (goto-line line) (overlay-put (make-overlay (point) (line-end-position)) ;'before-string "D" ;'face '(:background "blue") ;'face '(:underline "blue") 'face '(:box "red") ) ) (message (format "Added annotations: %d uncovered lines" (safe-length uncovered-code-lines))) ) ) (message "unable to find coverage for this file")) )) (defun coverage-toggle-annotations (show-code) (interactive "P") (if coverage-this-buffer-is-annotated (coverage-unannotate) (coverage-annotate show-code)) ) (setq coverage-this-buffer-is-annotated nil) (make-variable-buffer-local 'coverage-this-buffer-is-annotated) (define-minor-mode coverage-annotation-minor-mode "Minor mode to annotate code-coverage information" nil " CA" '( ("\C-c\C-a" . coverage-toggle-annotations) ) () ; forms run on mode entry/exit ) (defun maybe-enable-coverage-mode () (if (string-match "/src/allmydata/" (buffer-file-name)) (coverage-annotation-minor-mode t) )) (add-hook 'python-mode-hook 'maybe-enable-coverage-mode) tahoe-lafs-1.10.0/misc/coding_tools/coverage2el.py000066400000000000000000000045151221140116300217760ustar00rootroot00000000000000 import os.path from coverage import coverage, summary, misc class ElispReporter(summary.SummaryReporter): def report(self): try: # coverage-3.4 has both omit= and include= . include= is applied # first, then omit= removes items from what's left. These are # tested with fnmatch, against fully-qualified filenames. self.find_code_units(None, omit=[os.path.abspath("src/allmydata/test/*")], include=[os.path.abspath("src/allmydata/*")]) except TypeError: # coverage-3.3 only had omit= self.find_code_units(None, ["/System", "/Library", "/usr/lib", "support/lib", "src/allmydata/test"]) out = open(".coverage.el", "w") out.write(""" ;; This is an elisp-readable form of the figleaf coverage data. It defines a ;; single top-level hash table in which the key is an asolute pathname, and ;; the value is a three-element list. The first element of this list is a ;; list of line numbers that represent actual code statements. The second is ;; a list of line numbers for lines which got used during the unit test. The ;; third is a list of line numbers for code lines that were not covered ;; (since 'code' and 'covered' start as sets, this last list is equal to ;; 'code - covered'). """) out.write("(let ((results (make-hash-table :test 'equal)))\n") for cu in self.code_units: f = cu.filename try: (fn, executable, missing, mf) = self.coverage.analysis(cu) except misc.NoSource: continue code_linenumbers = executable uncovered_code = missing covered_linenumbers = sorted(set(executable) - set(missing)) out.write(" (puthash \"%s\" '((%s) (%s) (%s)) results)\n" % (f, " ".join([str(ln) for ln in sorted(code_linenumbers)]), " ".join([str(ln) for ln in sorted(covered_linenumbers)]), " ".join([str(ln) for ln in sorted(uncovered_code)]), )) out.write(" results)\n") out.close() def main(): c = coverage() c.load() ElispReporter(c).report() if __name__ == '__main__': main() tahoe-lafs-1.10.0/misc/coding_tools/find-trailing-spaces.py000066400000000000000000000021141221140116300235740ustar00rootroot00000000000000#!/usr/bin/env python import os, sys from twisted.python import usage class Options(usage.Options): optFlags = [ ("recursive", "r", "Search for .py files recursively"), ] def parseArgs(self, *starting_points): self.starting_points = starting_points found = [False] def check(fn): f = open(fn, "r") for i,line in enumerate(f.readlines()): if line == "\n": continue if line[-1] == "\n": line = line[:-1] if line.rstrip() != line: # the %s:%d:%d: lets emacs' compile-mode jump to those locations print "%s:%d:%d: trailing whitespace" % (fn, i+1, len(line)+1) found[0] = True f.close() o = Options() o.parseOptions() if o['recursive']: for starting_point in o.starting_points: for root, dirs, files in os.walk(starting_point): for fn in [f for f in files if f.endswith(".py")]: fn = os.path.join(root, fn) check(fn) else: for fn in o.starting_points: check(fn) if found[0]: sys.exit(1) sys.exit(0) tahoe-lafs-1.10.0/misc/coding_tools/fixshebangs.py000066400000000000000000000015321221140116300220750ustar00rootroot00000000000000#!/usr/bin/env python from allmydata.util import fileutil import re, shutil, sys R=re.compile("^#! */usr/bin/python *$") for fname in sys.argv[1:]: inf = open(fname, "rU") rntf = fileutil.ReopenableNamedTemporaryFile() outf = open(rntf.name, "w") first = True for l in inf: if first and R.search(l): outf.write("#!/usr/bin/env python\n") else: outf.write(l) first = False outf.close() try: shutil.move(rntf.name, fname) except EnvironmentError: # Couldn't atomically overwrite, so just hope that this process doesn't die # and the target file doesn't get recreated in between the following two # operations: shutil.move(fname, fname + ".bak") shutil.move(rntf.name, fname) fileutil.remove_if_possible(fname + ".bak") tahoe-lafs-1.10.0/misc/coding_tools/make-canary-files.py000066400000000000000000000123101221140116300230600ustar00rootroot00000000000000#!/usr/bin/env python """ Given a list of nodeids and a 'convergence' file, create a bunch of files that will (when encoded at k=1,N=1) be uploaded to specific nodeids. Run this as follows: make-canary-files.py -c PATH/TO/convergence -n PATH/TO/nodeids -k 1 -N 1 It will create a directory named 'canaries', with one file per nodeid named '$NODEID-$NICKNAME.txt', that contains some random text. The 'nodeids' file should contain one base32 nodeid per line, followed by the optional nickname, like: --- 5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo server12 vb7vm2mneyid5jbyvcbk2wb5icdhwtun server13 ... --- The resulting 'canaries/5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo-server12.txt' file will, when uploaded with the given (convergence,k,N) pair, have its first share placed on the 5yyq/server12 storage server. If N>1, the other shares will be placed elsewhere, of course. This tool can be useful to construct a set of 'canary' files, which can then be uploaded to storage servers, and later downloaded to test a grid's health. If you are able to download the canary for server12 via some tahoe node X, then the following properties are known to be true: node X is running, and has established a connection to server12 server12 is running, and returning data for at least the given file Using k=1/N=1 creates a separate test for each server. The test process is then to download the whole directory of files (perhaps with a t=deep-check operation). Alternatively, you could upload with the usual k=3/N=10 and then move/delete shares to put all N shares on a single server. Note that any changes to the nodeid list will affect the placement of shares. Shares should be uploaded with the same nodeid list as this tool used when constructing the files. Also note that this tool uses the Tahoe codebase, so it should be run on a system where Tahoe is installed, or in a source tree with setup.py like this: setup.py run_with_pythonpath -p -c 'misc/make-canary-files.py ARGS..' """ import os, sha from twisted.python import usage from allmydata.immutable import upload from allmydata.util import base32 class Options(usage.Options): optParameters = [ ("convergence", "c", None, "path to NODEDIR/private/convergence"), ("nodeids", "n", None, "path to file with one base32 nodeid per line"), ("k", "k", 1, "number of necessary shares, defaults to 1", int), ("N", "N", 1, "number of total shares, defaults to 1", int), ] optFlags = [ ("verbose", "v", "Be noisy"), ] opts = Options() opts.parseOptions() verbose = bool(opts["verbose"]) nodes = {} for line in open(opts["nodeids"], "r").readlines(): line = line.strip() if not line or line.startswith("#"): continue pieces = line.split(None, 1) if len(pieces) == 2: nodeid_s, nickname = pieces else: nodeid_s = pieces[0] nickname = None nodeid = base32.a2b(nodeid_s) nodes[nodeid] = nickname if opts["k"] != 3 or opts["N"] != 10: print "note: using non-default k/N requires patching the Tahoe code" print "src/allmydata/client.py line 55, DEFAULT_ENCODING_PARAMETERS" convergence_file = os.path.expanduser(opts["convergence"]) convergence_s = open(convergence_file, "rb").read().strip() convergence = base32.a2b(convergence_s) def get_permuted_peers(key): results = [] for nodeid in nodes: permuted = sha.new(key + nodeid).digest() results.append((permuted, nodeid)) results.sort(lambda a,b: cmp(a[0], b[0])) return [ r[1] for r in results ] def find_share_for_target(target): target_s = base32.b2a(target) prefix = "The first share of this file will be placed on " + target_s + "\n" prefix += "This data is random: " attempts = 0 while True: attempts += 1 suffix = base32.b2a(os.urandom(10)) if verbose: print " trying", suffix, data = prefix + suffix + "\n" assert len(data) > 55 # no LIT files # now, what storage index will this get? u = upload.Data(data, convergence) eu = upload.EncryptAnUploadable(u) d = eu.get_storage_index() # this happens to run synchronously def _got_si(si, data=data): if verbose: print "SI", base32.b2a(si), peerlist = get_permuted_peers(si) if peerlist[0] == target: # great! if verbose: print " yay!" fn = base32.b2a(target) if nodes[target]: nickname = nodes[target].replace("/", "_") fn += "-" + nickname fn += ".txt" fn = os.path.join("canaries", fn) open(fn, "w").write(data) return True # nope, must try again if verbose: print " boo" return False d.addCallback(_got_si) # get sneaky and look inside the Deferred for the synchronous result if d.result: return attempts os.mkdir("canaries") attempts = [] for target in nodes: target_s = base32.b2a(target) print "working on", target_s attempts.append(find_share_for_target(target)) print "done" print "%d attempts total, avg %d per target, max %d" % \ (sum(attempts), 1.0* sum(attempts) / len(nodes), max(attempts)) tahoe-lafs-1.10.0/misc/coding_tools/make_umid000066400000000000000000000030501221140116300210750ustar00rootroot00000000000000#!/usr/bin/env python """Create a short probably-unique string for use as a umid= argument in a Foolscap log() call, to make it easier to locate the source code that generated the message. The main text of the log message is frequently unhelpful for this, and python doesn't make it cheap to compile in the filename and line number of logging calls. Given a message-unique-ID like 'aXoWcA', make your logging call look like: log.msg('OMG badness', level=log.WEIRD, umid='aXoWcA') Then later, if this message actually occurs, you can grep your source tree for aXoWcA to locate the code that caused it. Just stick to the convention that 'umid=' is reserved for this job. It is a good idea to make all the logging statements that could provoke an Incident (i.e. those at level=log.WEIRD or higher) have umid= arguments, to make it easier to write classifier functions for the incident-gatherer. """ ''' The following elisp code may be useful: (defun insert-umid () (interactive) (insert ", umid=\"") (call-process "make_umid" nil t) (delete-char -1) (insert "\"") ) (global-set-key (kbd "C-\`") 'insert-umid) ''' # ' # emacs gets confused by the odd number of single-quotes there import os, base64, sys def make_id(): while True: m = os.urandom(4) # this gives 6-character message ids m = base64.b64encode(m) if "/" in m or "+" in m: continue m = m.replace("=", "") break return m count = 1 if len(sys.argv) > 1: count = int(sys.argv[1]) for i in range(count): print make_id() tahoe-lafs-1.10.0/misc/dependencies/000077500000000000000000000000001221140116300171645ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/dependencies/COPYING.PSFL.txt000066400000000000000000000045271221140116300216100ustar00rootroot00000000000000PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. tahoe-lafs-1.10.0/misc/dependencies/COPYING.ZPL.txt000066400000000000000000000040541221140116300215040ustar00rootroot00000000000000 ZPL 2.1 Zope Public License (ZPL) Version 2.1 A copyright notice accompanies this license document that identifies the copyright holders. This license has been certified as open source. It has also been designated as GPL compatible by the Free Software Foundation (FSF). Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions in source code must retain the accompanying copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the accompanying copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Names of the copyright holders must not be used to endorse or promote products derived from this software without prior written permission from the copyright holders. 4. The right to distribute this software or to use it for any purpose does not give you the right to use Servicemarks (sm) or Trademarks (tm) of the copyright holders. Use of them is covered by separate agreement with the copyright holders. 5. If any files are modified, you must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. Disclaimer THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. tahoe-lafs-1.10.0/misc/dependencies/COPYING.setuptools.txt000066400000000000000000000010301221140116300232470ustar00rootroot00000000000000Setuptools is available under either the You may use setuptools under the Zope Public License version 2.1 or, at your option, any later version. See the file COPYING.ZPL.txt for the terms of the Zope Public License version 2.1. You may use this package under the Python Software Foundation License version 2 or, at your option, any later version. See the file COPYING.PSFL.txt for the terms of the Python Software Foundation Licence version 2. (You may choose to use this package under the terms of either licence, at your option.) tahoe-lafs-1.10.0/misc/incident-gatherer/000077500000000000000000000000001221140116300201325ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/incident-gatherer/classify_tahoe.py000066400000000000000000000046321221140116300235060ustar00rootroot00000000000000 import re umidmap = { 'lp1vaQ': 'download-not-enough-shares', '3uuBUQ': 'download-connection-lost-in-get-buckets', 'LkD9Pw': 'user-incident-button', } def classify_incident(trigger): m = trigger.get('message', '') f = trigger.get('format', '') umid_value = umidmap.get(trigger.get('umid',''), None) if umid_value: return umid_value if re.search(r"^they had shares .* that we didn't know about$", m): # Publish._got_write_answer return "mutable-publish-surprise-shares" if m.startswith("error during query"): # there are a couple of different places that can generate this # message (the result of cut-and-paste error-handling), so it isn't # clear which is which if re.search(r'mutable/servermap\.py.*_do_query', m): # servermap.ServermapUpdater._query_failed() where = "mapupdate" elif re.search(r'mutable/retrieve\.py.*_got_results_one_share', m): where = "retrieve" else: where = "unknown" if ("Calling Stale Broker" in m and "DeadReferenceError" in m): # a storage server went offline while we were talking to it (or # because the client was shut off in the middle of an operation) what = "lost-server" elif "IOError" in m: what = "ioerror" elif ("UncoordinatedWriteError" in m and "someone wrote to the data since we read the servermap" in m): what = "uncoordinated-write-error" elif "ConnectionLost" in m: what = "lost-server" else: what = "unknown" return "mutable-" + where + "-query-" + what if (f.startswith("ran out of peers:") and "have" in trigger and "need" in trigger): return "mutable-retrieve-failure" if m.startswith("invalid privkey from "): # TODO: a UCW causes this, after the prefix has changed. Compare the # prefix before trying to validate the privkey, to avoid the # duplicate error. return "mutable-mapupdate-bad-privkey" if trigger.get('facility', '') == "tahoe.introducer": if (trigger.get('isError', False) and "ConnectionDone" in str(trigger.get('failure',''))): return "introducer-lost-connection" if "Initial Introducer connection failed" in m: return "introducer-connection-failed" return None tahoe-lafs-1.10.0/misc/operations_helpers/000077500000000000000000000000001221140116300204435ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/operations_helpers/cpu-watcher-poll.py000066400000000000000000000007521221140116300242070ustar00rootroot00000000000000#!/usr/bin/env python from foolscap import Tub, eventual from twisted.internet import reactor import sys import pprint def oops(f): print "ERROR" print f def fetch(furl): t = Tub() t.startService() d = t.getReference(furl) d.addCallback(lambda rref: rref.callRemote("get_averages")) d.addCallback(pprint.pprint) return d d = eventual.fireEventually(sys.argv[1]) d.addCallback(fetch) d.addErrback(oops) d.addBoth(lambda res: reactor.stop()) reactor.run() tahoe-lafs-1.10.0/misc/operations_helpers/cpu-watcher-subscribe.py000066400000000000000000000026431221140116300252230ustar00rootroot00000000000000# -*- python -*- from twisted.internet import reactor import sys import os.path, pprint from twisted.application import service from twisted.python import log from foolscap import Tub, Referenceable, RemoteInterface from foolscap.schema import ListOf, TupleOf from zope.interface import implements Averages = ListOf( TupleOf(str, float, float, float) ) class RICPUWatcherSubscriber(RemoteInterface): def averages(averages=Averages): return None class CPUWatcherSubscriber(service.MultiService, Referenceable): implements(RICPUWatcherSubscriber) def __init__(self, furlthing): service.MultiService.__init__(self) if furlthing.startswith("pb://"): furl = furlthing else: furlfile = os.path.expanduser(furlthing) if os.path.isdir(furlfile): furlfile = os.path.join(furlfile, "watcher.furl") furl = open(furlfile, "r").read().strip() tub = Tub() tub.setServiceParent(self) tub.connectTo(furl, self.connected) def connected(self, rref): print "subscribing" d = rref.callRemote("get_averages") d.addCallback(self.remote_averages) d.addErrback(log.err) d = rref.callRemote("subscribe", self) d.addErrback(log.err) def remote_averages(self, averages): pprint.pprint(averages) c = CPUWatcherSubscriber(sys.argv[1]) c.startService() reactor.run() tahoe-lafs-1.10.0/misc/operations_helpers/cpu-watcher.tac000066400000000000000000000203061221140116300233570ustar00rootroot00000000000000# -*- python -*- """ # run this tool on a linux box in its own directory, with a file named # 'pids.txt' describing which processes to watch. It will follow CPU usage of # the given processes, and compute 1/5/15-minute moving averages for each # process. These averages can be retrieved from a foolscap connection # (published at ./watcher.furl), or through an HTTP query (using ./webport). # Each line of pids.txt describes a single process. Blank lines and ones that # begin with '#' are ignored. Each line is either "PID" or "PID NAME" (space # separated). PID is either a numeric process ID, a pathname to a file that # contains a process ID, or a pathname to a directory that contains a # twistd.pid file (which contains a process ID). NAME is an arbitrary string # that will be used to describe the process to watcher.furl subscribers, and # defaults to PID if not provided. """ # TODO: # built-in graphs on web interface import pickle, os.path, time, pprint from twisted.application import internet, service, strports from twisted.web import server, resource, http from twisted.python import log import simplejson from foolscap import Tub, Referenceable, RemoteInterface, eventual from foolscap.schema import ListOf, TupleOf from zope.interface import implements def read_cpu_times(pid): data = open("/proc/%d/stat" % pid, "r").read() data = data.split() times = data[13:17] # the values in /proc/%d/stat are in ticks, I think. My system has # CONFIG_HZ_1000=y in /proc/config.gz but nevertheless the numbers in # 'stat' appear to be 10ms each. HZ = 100 userspace_seconds = int(times[0]) * 1.0 / HZ system_seconds = int(times[1]) * 1.0 / HZ child_userspace_seconds = int(times[2]) * 1.0 / HZ child_system_seconds = int(times[3]) * 1.0 / HZ return (userspace_seconds, system_seconds) def read_pids_txt(): processes = [] for line in open("pids.txt", "r").readlines(): line = line.strip() if not line or line[0] == "#": continue parts = line.split() pidthing = parts[0] if len(parts) > 1: name = parts[1] else: name = pidthing pid = None try: pid = int(pidthing) except ValueError: pidfile = os.path.expanduser(pidthing) if os.path.isdir(pidfile): pidfile = os.path.join(pidfile, "twistd.pid") try: pid = int(open(pidfile, "r").read().strip()) except EnvironmentError: pass if pid is not None: processes.append( (pid, name) ) return processes Averages = ListOf( TupleOf(str, float, float, float) ) class RICPUWatcherSubscriber(RemoteInterface): def averages(averages=Averages): return None class RICPUWatcher(RemoteInterface): def get_averages(): """Return a list of rows, one for each process I am watching. Each row is (name, 1-min-avg, 5-min-avg, 15-min-avg), where 'name' is a string, and the averages are floats from 0.0 to 1.0 . Each average is the percentage of the CPU that this process has used: the change in CPU time divided by the change in wallclock time. """ return Averages def subscribe(observer=RICPUWatcherSubscriber): """Arrange for the given observer to get an 'averages' message every time the averages are updated. This message will contain a single argument, the same list of tuples that get_averages() returns.""" return None class CPUWatcher(service.MultiService, resource.Resource, Referenceable): implements(RICPUWatcher) POLL_INTERVAL = 30 # seconds HISTORY_LIMIT = 15 * 60 # 15min AVERAGES = (1*60, 5*60, 15*60) # 1min, 5min, 15min def __init__(self): service.MultiService.__init__(self) resource.Resource.__init__(self) try: self.history = pickle.load(open("history.pickle", "rb")) except: self.history = {} self.current = [] self.observers = set() ts = internet.TimerService(self.POLL_INTERVAL, self.poll) ts.setServiceParent(self) def startService(self): service.MultiService.startService(self) try: desired_webport = open("webport", "r").read().strip() except EnvironmentError: desired_webport = None webport = desired_webport or "tcp:0" root = self serv = strports.service(webport, server.Site(root)) serv.setServiceParent(self) if not desired_webport: got_port = serv._port.getHost().port open("webport", "w").write("tcp:%d\n" % got_port) self.tub = Tub(certFile="watcher.pem") self.tub.setServiceParent(self) try: desired_tubport = open("tubport", "r").read().strip() except EnvironmentError: desired_tubport = None tubport = desired_tubport or "tcp:0" l = self.tub.listenOn(tubport) if not desired_tubport: got_port = l.getPortnum() open("tubport", "w").write("tcp:%d\n" % got_port) d = self.tub.setLocationAutomatically() d.addCallback(self._tub_ready) d.addErrback(log.err) def _tub_ready(self, res): self.tub.registerReference(self, furlFile="watcher.furl") def getChild(self, path, req): if path == "": return self return resource.Resource.getChild(self, path, req) def render(self, req): t = req.args.get("t", ["html"])[0] ctype = "text/plain" data = "" if t == "html": data = "# name, 1min, 5min, 15min\n" data += pprint.pformat(self.current) + "\n" elif t == "json": #data = str(self.current) + "\n" # isn't that convenient? almost. data = simplejson.dumps(self.current, indent=True) else: req.setResponseCode(http.BAD_REQUEST) data = "Unknown t= %s\n" % t req.setHeader("content-type", ctype) return data def remote_get_averages(self): return self.current def remote_subscribe(self, observer): self.observers.add(observer) def notify(self, observer): d = observer.callRemote("averages", self.current) def _error(f): log.msg("observer error, removing them") log.msg(f) self.observers.discard(observer) d.addErrback(_error) def poll(self): max_history = self.HISTORY_LIMIT / self.POLL_INTERVAL current = [] try: processes = read_pids_txt() except: log.err() return for (pid, name) in processes: if pid not in self.history: self.history[pid] = [] now = time.time() try: (user_seconds, sys_seconds) = read_cpu_times(pid) self.history[pid].append( (now, user_seconds, sys_seconds) ) while len(self.history[pid]) > max_history+1: self.history[pid].pop(0) except: log.msg("error reading process %s (%s), ignoring" % (pid, name)) log.err() try: pickle.dump(self.history, open("history.pickle.tmp", "wb")) os.rename("history.pickle.tmp", "history.pickle") except: pass for (pid, name) in processes: row = [name] for avg in self.AVERAGES: row.append(self._average_N(pid, avg)) current.append(tuple(row)) self.current = current print current for ob in self.observers: eventual.eventually(self.notify, ob) def _average_N(self, pid, seconds): num_samples = seconds / self.POLL_INTERVAL samples = self.history[pid] if len(samples) < num_samples+1: return None first = -num_samples-1 elapsed_wall = samples[-1][0] - samples[first][0] elapsed_user = samples[-1][1] - samples[first][1] elapsed_sys = samples[-1][2] - samples[first][2] if elapsed_wall == 0.0: return 0.0 return (elapsed_user+elapsed_sys) / elapsed_wall application = service.Application("cpu-watcher") CPUWatcher().setServiceParent(application) tahoe-lafs-1.10.0/misc/operations_helpers/find-share-anomalies.py000066400000000000000000000043571221140116300250140ustar00rootroot00000000000000#!/usr/bin/env python # feed this the results of 'tahoe catalog-shares' for all servers import sys chk_encodings = {} sdmf_encodings = {} sdmf_versions = {} for catalog in sys.argv[1:]: for line in open(catalog, "r").readlines(): line = line.strip() pieces = line.split() if pieces[0] == "CHK": ftype, si, kN, size, ueb_hash, expiration, filename = pieces if si not in chk_encodings: chk_encodings[si] = (set(), set()) chk_encodings[si][0].add( (si, kN) ) chk_encodings[si][1].add( line ) if pieces[0] == "SDMF": ftype, si, kN, size, ver, expiration, filename = pieces if si not in sdmf_encodings: sdmf_encodings[si] = (set(), set()) sdmf_encodings[si][0].add( (si, kN) ) sdmf_encodings[si][1].add( line ) if si not in sdmf_versions: sdmf_versions[si] = (set(), set()) sdmf_versions[si][0].add( ver ) sdmf_versions[si][1].add( line ) chk_multiple_encodings = [(si,lines) for si,(encodings,lines) in chk_encodings.items() if len(encodings) > 1] chk_multiple_encodings.sort() sdmf_multiple_encodings = [(si,lines) for si,(encodings,lines) in sdmf_encodings.items() if len(encodings) > 1 ] sdmf_multiple_encodings.sort() sdmf_multiple_versions = [(si,lines) for si,(versions,lines) in sdmf_versions.items() if len(versions) > 1] sdmf_multiple_versions.sort() if chk_multiple_encodings: print print "CHK multiple encodings:" for (si,lines) in chk_multiple_encodings: print " " + si for line in sorted(lines): print " " + line if sdmf_multiple_encodings: print print "SDMF multiple encodings:" for (si,lines) in sdmf_multiple_encodings: print " " + si for line in sorted(lines): print " " + line if sdmf_multiple_versions: print print "SDMF multiple versions:" for (si,lines) in sdmf_multiple_versions: print " " + si for line in sorted(lines): print " " + line tahoe-lafs-1.10.0/misc/operations_helpers/getmem.py000066400000000000000000000005751221140116300223020ustar00rootroot00000000000000#! /usr/bin/env python from foolscap import Tub from foolscap.eventual import eventually import sys from twisted.internet import reactor def go(): t = Tub() d = t.getReference(sys.argv[1]) d.addCallback(lambda rref: rref.callRemote("get_memory_usage")) def _got(res): print res reactor.stop() d.addCallback(_got) eventually(go) reactor.run() tahoe-lafs-1.10.0/misc/operations_helpers/munin/000077500000000000000000000000001221140116300215715ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe-conf000066400000000000000000000013771221140116300235470ustar00rootroot00000000000000# put a copy of this file in /etc/munin/plugin-conf.d/tahoe-conf to let these # plugins know where the node's base directories are. Modify the lines below # to match your nodes. [tahoe-files] env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 [tahoe-sharesperfile] env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 [tahoe-storagespace] env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe-stats.plugin-conf000066400000000000000000000010441221140116300261670ustar00rootroot00000000000000[tahoe_storage_allocated] env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle [tahoe_storage_consumed] env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle [tahoe_runtime_load_avg] env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle [tahoe_runtime_load_peak] env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle [tahoe_storage_bytes_added] env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle [tahoe_storage_bytes_freed] env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_cpu_watcher000066400000000000000000000013301221140116300251750ustar00rootroot00000000000000#!/usr/bin/env python import os, sys, re import urllib import simplejson url = os.environ["url"] current = simplejson.loads(urllib.urlopen(url).read()) configinfo = """\ graph_title Tahoe CPU Usage graph_vlabel CPU % graph_category tahoe graph_info This graph shows the 5min average of CPU usage for each process """ data = "" for (name, avg1, avg5, avg15) in current: dataname = re.sub(r'[^\w]', '_', name) configinfo += dataname + ".label " + name + "\n" configinfo += dataname + ".draw LINE2\n" if avg5 is not None: data += dataname + ".value %.2f\n" % (100.0 * avg5) if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) print data.rstrip() tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_diskleft000066400000000000000000000013751221140116300245070ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin which pulls data from the server in # misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much free space # is left on all disks across the grid. The plugin should be configured with # env_url= pointing at the diskwatcher.tac webport. import os, sys, urllib, simplejson if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe Remaining Disk Space graph_vlabel bytes remaining graph_category tahoe graph_info This graph shows the total amount of disk space left available in the grid disk_left.label disk left disk_left.draw LINE1""" sys.exit(0) url = os.environ["url"] data = simplejson.load(urllib.urlopen(url))["available"] print "disk_left.value", data tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_disktotal000066400000000000000000000016251221140116300246760ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin which pulls data from the server in # misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much space is # present on all disks across the grid, and how much space is actually being # used. The plugin should be configured with env_url= pointing at the # diskwatcher.tac webport. import os, sys, urllib, simplejson if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe Total Disk Space graph_vlabel bytes graph_category tahoe graph_info This graph shows the total amount of disk space present in the grid, and how much of it is currently being used. disk_total.label disk total disk_total.draw LINE2 disk_used.label disk used disk_used.draw LINE1""" sys.exit(0) url = os.environ["url"] data = simplejson.load(urllib.urlopen(url)) print "disk_total.value", data["total"] print "disk_used.value", data["used"] tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_diskusage000066400000000000000000000024411221140116300246540ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin which pulls data from the server in # misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much disk space # is being used per unit time. The plugin should be configured with env_url= # pointing at the diskwatcher.tac webport. import os, sys, urllib, simplejson if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe Disk Usage Measurement graph_vlabel bytes per second graph_category tahoe graph_info This graph shows the estimated disk usage per unit time, totalled across all storage servers graph_args --lower-limit 0 --rigid rate_1hr.label (one hour sample) rate_1hr.draw LINE1 rate_1day.label (one day sample) rate_1day.draw LINE1 rate_2wk.label (two week sample) rate_2wk.draw LINE2 rate_4wk.label (four week sample) rate_4wk.draw LINE2""" sys.exit(0) url = os.environ["url"] timespans = simplejson.load(urllib.urlopen(url))["rates"] data = dict([(name, growth) for (name, timespan, growth, timeleft) in timespans]) # growth is in bytes per second if "1hr" in data: print "rate_1hr.value", data["1hr"] if "1day" in data: print "rate_1day.value", data["1day"] if "2wk" in data: print "rate_2wk.value", data["2wk"] if "4wk" in data: print "rate_4wk.value", data["4wk"] tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_diskused000066400000000000000000000013511221140116300245070ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin which pulls data from the server in # misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much space is # used on all disks across the grid. The plugin should be configured with # env_url= pointing at the diskwatcher.tac webport. import os, sys, urllib, simplejson if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe Total Disk Space Used graph_vlabel bytes used graph_category tahoe graph_info This graph shows the total amount of disk space used across the grid disk_used.label disk used disk_used.draw LINE1""" sys.exit(0) url = os.environ["url"] data = simplejson.load(urllib.urlopen(url))["used"] print "disk_used.value", data tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_doomsday000066400000000000000000000025171221140116300245200ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin which pulls data from the server in # misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much time is # left before the grid fills up. The plugin should be configured with # env_url= pointing at the diskwatcher.tac webport. import os, sys, urllib, simplejson if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe Remaining Time Predictor graph_vlabel days remaining graph_category tahoe graph_info This graph shows the estimated number of days left until storage space is exhausted days_1hr.label days left (one hour sample) days_1hr.draw LINE1 days_1day.label days left (one day sample) days_1day.draw LINE1 days_2wk.label days left (two week sample) days_2wk.draw LINE2 days_4wk.label days left (four week sample) days_4wk.draw LINE2""" sys.exit(0) url = os.environ["url"] timespans = simplejson.load(urllib.urlopen(url))["rates"] data = dict([(name, timeleft) for (name, timespan, growth, timeleft) in timespans if timeleft]) # timeleft is in seconds DAY = 24*60*60 if "1hr" in data: print "days_1hr.value", data["1hr"]/DAY if "1day" in data: print "days_1day.value", data["1day"]/DAY if "2wk" in data: print "days_2wk.value", data["2wk"]/DAY if "4wk" in data: print "days_4wk.value", data["4wk"]/DAY tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_estimate_files000066400000000000000000000030771221140116300257000ustar00rootroot00000000000000#!/usr/bin/env python import sys, os.path if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe File Estimate graph_vlabel files graph_category tahoe graph_info This graph shows the estimated number of files and directories present in the grid files.label files files.draw LINE2""" sys.exit(0) # Edit this to point at some subset of storage directories. node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"), os.path.expanduser("~amduser/prodnet/storage2"), os.path.expanduser("~amduser/prodnet/storage3"), os.path.expanduser("~amduser/prodnet/storage4"), ] sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"] # and edit this to reflect your default encoding's "total_shares" value, and # the total number of servers. N = 10 num_servers = 20 index_strings = set() for base in node_dirs: for section in sections: sampledir = os.path.join(base, "storage", "shares", section) indices = os.listdir(sampledir) index_strings.update(indices) unique_strings = len(index_strings) # the chance that any given file appears on any given server chance = 1.0 * N / num_servers # the chance that the file does *not* appear on the servers that we're # examining no_chance = (1-chance) ** len(node_dirs) # if a file has a 25% chance of not appearing in our sample, then we need to # raise our estimate by (1.25/1) correction = 1+no_chance #print "correction", correction files = unique_strings * (32*32/len(sections)) * correction print "files.value %d" % int(files) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_files000066400000000000000000000033651221140116300240050ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin to track the number of files that each node's # StorageServer is holding on behalf of other nodes. Each file that has been # uploaded to the mesh (and has shares present on this node) will be counted # here. When there are <= 100 nodes in the mesh, this count will equal the # total number of files that are active in the entire mesh. When there are # 200 nodes present in the mesh, it will represent about half of the total # number. # Copy this plugin into /etc/munun/plugins/tahoe-files and then put # the following in your /etc/munin/plugin-conf.d/foo file to let it know # where to find the basedirectory for each node: # # [tahoe-files] # env.basedir_NODE1 /path/to/node1 # env.basedir_NODE2 /path/to/node2 # env.basedir_NODE3 /path/to/node3 # import os, sys nodedirs = [] for k,v in os.environ.items(): if k.startswith("basedir_"): nodename = k[len("basedir_"):] nodedirs.append( (nodename, v) ) nodedirs.sort() configinfo = \ """graph_title Allmydata Tahoe Filecount graph_vlabel files graph_category tahoe graph_info This graph shows the number of files hosted by this node's StorageServer """ for nodename, basedir in nodedirs: configinfo += "%s.label %s\n" % (nodename, nodename) configinfo += "%s.draw LINE2\n" % (nodename,) if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) for nodename, basedir in nodedirs: shares = 0 root = os.path.join(basedir, "storage", "shares") for dirpath, dirnames, filenames in os.walk(root, topdown=True): if dirpath == root and "incoming" in dirnames: dirnames.remove("incoming") shares += len(filenames) print "%s.value %d" % (nodename, shares) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_helperstats_active000066400000000000000000000010701221140116300265630ustar00rootroot00000000000000#!/usr/bin/env python import os, sys import urllib import simplejson configinfo = """\ graph_title Tahoe Helper Stats - Active Files graph_vlabel bytes graph_category tahoe graph_info This graph shows the number of files being actively processed by the helper fetched.label Active Files fetched.draw LINE2 """ if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) url = os.environ["url"] data = simplejson.loads(urllib.urlopen(url).read()) print "fetched.value %d" % data["chk_upload_helper.active_uploads"] tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_helperstats_fetched000066400000000000000000000011151221140116300267120ustar00rootroot00000000000000#!/usr/bin/env python import os, sys import urllib import simplejson configinfo = """\ graph_title Tahoe Helper Stats - Bytes Fetched graph_vlabel bytes graph_category tahoe graph_info This graph shows the amount of data being fetched by the helper fetched.label Bytes Fetched fetched.type GAUGE fetched.draw LINE1 fetched.min 0 """ if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) url = os.environ["url"] data = simplejson.loads(urllib.urlopen(url).read()) print "fetched.value %d" % data["chk_upload_helper.fetched_bytes"] tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_introstats000066400000000000000000000015411221140116300251070ustar00rootroot00000000000000#!/usr/bin/env python import os, sys import urllib import simplejson configinfo = """\ graph_title Tahoe Introducer Stats graph_vlabel hosts graph_category tahoe graph_info This graph shows the number of hosts announcing and subscribing to various services storage_server.label Storage Servers storage_server.draw LINE1 storage_hosts.label Distinct Storage Hosts storage_hosts.draw LINE1 storage_client.label Clients storage_client.draw LINE2 """ if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) url = os.environ["url"] data = simplejson.loads(urllib.urlopen(url).read()) print "storage_server.value %d" % data["announcement_summary"]["storage"] print "storage_hosts.value %d" % data["announcement_distinct_hosts"]["storage"] print "storage_client.value %d" % data["subscription_summary"]["storage"] tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_nodememory000066400000000000000000000042141221140116300250530ustar00rootroot00000000000000#!/usr/bin/env python # This munin plugin isolates processes by looking for the 'pid' file created # by 'allmydata start', then extracts the amount of memory they consume (both # VmSize and VmRSS) from /proc import os, sys, re if 0: # for testing os.environ["nodememory_warner1"] = "run/warner1" os.environ["nodememory_warner2"] = "run/warner2" nodedirs = [] for k,v in os.environ.items(): if k.startswith("nodememory_"): nodename = k[len("nodememory_"):] nodedirs.append((nodename, v)) nodedirs.sort(lambda a,b: cmp(a[0],b[0])) pids = {} for node,nodedir in nodedirs: pidfile = os.path.join(nodedir, "twistd.pid") if os.path.exists(pidfile): pid = int(open(pidfile,"r").read()) pids[node] = pid fields = ["VmSize", "VmRSS"] if len(sys.argv) > 1: if sys.argv[1] == "config": configinfo = \ """graph_title Memory Consumed by Nodes graph_vlabel bytes graph_category Tahoe graph_info This graph shows the memory used by specific processes """ for nodename,nodedir in nodedirs: for f in fields: configinfo += "%s_%s.label %s used by %s\n" % (nodename, f, f, nodename) linetype = "LINE1" if f == "VmSize": linetype = "LINE2" configinfo += "%s_%s.draw %s\n" % (nodename, f, linetype) if f == "VmData": configinfo += "%s_%s.graph no\n" % (nodename, f) print configinfo sys.exit(0) nodestats = {} for node,pid in pids.items(): stats = {} statusfile = "/proc/%s/status" % pid if not os.path.exists(statusfile): continue for line in open(statusfile,"r").readlines(): for f in fields: if line.startswith(f + ":"): m = re.search(r'(\d+)', line) stats[f] = int(m.group(1)) nodestats[node] = stats for node,stats in nodestats.items(): for f,value in stats.items(): # TODO: not sure if /proc/%d/status means 1000 or 1024 when it says # 'kB' print "%s_%s.value %d" % (node, f, 1024*value) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_overhead000066400000000000000000000056321221140116300244770ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin which pulls total-used data from the server in # misc/operations_helpers/spacetime/diskwatcher.tac, and a total-deep-size number from custom # PHP database-querying scripts on a different server. It produces a graph of # how much garbage/overhead is present in the grid: the ratio of total-used # over (total-deep-size*N/k), expressed as a percentage. No overhead would be # 0, using twice as much space as we'd prefer would be 100. This is the # percentage which could be saved if we made GC work perfectly and reduced # other forms of overhead to zero. This script assumes 3-of-10. # A second graph is produced with how much of the total-deep-size number # would be saved if we removed data from inactive accounts. This is also on a # percentage scale. # A separate number (without a graph) is produced with the "effective # expansion factor". If there were no overhead, with 3-of-10, this would be # 3.33 . # Overhead is caused by the following problems (in order of size): # uncollected garbage: files that are no longer referenced but not yet deleted # inactive accounts: files that are referenced by cancelled accounts # share storage overhead: bucket directories # filesystem overhead: 4kB minimum block sizes # share overhead: hashes, pubkeys, lease information # This plugin should be configured with env_diskwatcher_url= pointing at the # diskwatcher.tac webport, and env_deepsize_url= pointing at the PHP script. import os, sys, urllib, simplejson if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe Overhead Calculator graph_vlabel Percentage graph_category tahoe graph_info This graph shows the estimated amount of storage overhead (ratio of actual disk usage to ideal disk usage). The 'overhead' number is how much space we could save if we implemented GC, and the 'inactive' number is how much additional space we could save if we could delete data for cancelled accounts. overhead.label disk usage overhead overhead.draw LINE2 inactive.label inactive account usage inactive.draw LINE1 effective_expansion.label Effective Expansion Factor effective_expansion.graph no""" sys.exit(0) diskwatcher_url = os.environ["diskwatcher_url"] total = simplejson.load(urllib.urlopen(diskwatcher_url))["used"] deepsize_url = os.environ["deepsize_url"] deepsize = simplejson.load(urllib.urlopen(deepsize_url)) k = 3; N = 10 expansion = float(N) / k ideal = expansion * deepsize["all"] overhead = (total - ideal) / ideal if overhead > 0: # until all the storage-servers come online, this number will be nonsense print "overhead.value %f" % (100.0 * overhead) # same for this one effective_expansion = total / deepsize["all"] print "effective_expansion.value %f" % effective_expansion # this value remains valid, though inactive_savings = (deepsize["all"] - deepsize["active"]) / deepsize["active"] print "inactive.value %f" % (100.0 * inactive_savings) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_rootdir_space000066400000000000000000000010141221140116300255250ustar00rootroot00000000000000#!/usr/bin/env python import os, sys import urllib import simplejson configinfo = """\ graph_title Tahoe Root Directory Size graph_vlabel bytes graph_category tahoe graph_info This graph shows the amount of space consumed by all files reachable from a given directory space.label Space space.draw LINE2 """ if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) url = os.environ["url"] data = int(urllib.urlopen(url).read().strip()) print "space.value %d" % data tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_server_latency_000066400000000000000000000060401221140116300260600ustar00rootroot00000000000000#!/usr/bin/env python # retrieve a latency statistic for a given operation and percentile from a # set of storage servers. # the OPERATION value should come from the following list: # allocate: allocate_buckets, first step to upload an immutable file # write: write data to an immutable share # close: finish writing to an immutable share # cancel: abandon a partial immutable share # get: get_buckets, first step to download an immutable file # read: read data from an immutable share # writev: slot_testv_and_readv_and_writev, modify/create a directory # readv: read a directory (or mutable file) # the PERCENTILE value should come from the following list: # 01_0: 1% # 10_0: 10% # 50_0: 50% (median) # 90_0: 90% # 99_0: 99% # 99_9: 99.9% # mean: # To use this, create a symlink from # /etc/munin/plugins/tahoe_server_latency_OPERATION_PERCENTILE to this # script. For example: # ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_latency_ \ # /etc/munin/plugins/tahoe_server_latency_allocate_99_9 # Also, you will need to put a list of node statistics URLs in the plugin's # environment, by adding a stanza like the following to a file in # /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_latencies: # # [tahoe_server_latency*] # env.url_storage1 http://localhost:9011/statistics?t=json # env.url_storage2 http://localhost:9012/statistics?t=json # env.url_storage3 http://localhost:9013/statistics?t=json # env.url_storage4 http://localhost:9014/statistics?t=json # of course, these URLs must match the webports you have configured into the # storage nodes. import os, sys import urllib import simplejson node_urls = [] for k,v in os.environ.items(): if k.startswith("url_"): nodename = k[len("url_"):] node_urls.append( (nodename, v) ) node_urls.sort() my_name = os.path.basename(sys.argv[0]) PREFIX = "tahoe_server_latency_" assert my_name.startswith(PREFIX) my_name = my_name[len(PREFIX):] (operation, percentile) = my_name.split("_", 1) if percentile == "mean": what = "mean" else: what = percentile.replace("_", ".") + "th percentile" configinfo = \ """graph_title Tahoe Server '%(operation)s' Latency (%(what)s) graph_vlabel seconds graph_category tahoe graph_info This graph shows how long '%(operation)s' operations took on the storage server, the %(what)s delay between message receipt and response generation, calculated over the last thousand operations. """ % {'operation': operation, 'what': what} for nodename, url in node_urls: configinfo += "%s.label %s\n" % (nodename, nodename) configinfo += "%s.draw LINE2\n" % (nodename,) if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) for nodename, url in node_urls: data = simplejson.loads(urllib.urlopen(url).read()) if percentile == "mean": p_key = "mean" else: p_key = percentile + "_percentile" key = "storage_server.latencies.%s.%s" % (operation, p_key) value = data["stats"][key] print "%s.value %s" % (nodename, value) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_server_operations_000066400000000000000000000050331221140116300266050ustar00rootroot00000000000000#!/usr/bin/env python # graph operations-per-second from a set of storage servers. # the OPERATION value should come from the following list: # allocate: allocate_buckets, first step to upload an immutable file # write: write data to an immutable share # close: finish writing to an immutable share # cancel: abandon a partial immutable share # get: get_buckets, first step to download an immutable file # read: read data from an immutable share # writev: slot_testv_and_readv_and_writev, modify/create a directory # readv: read a directory (or mutable file) # To use this, create a symlink from # /etc/munin/plugins/tahoe_server_operations_OPERATION to this script. For # example: # ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_operations_ \ # /etc/munin/plugins/tahoe_server_operations_allocate # Also, you will need to put a list of node statistics URLs in the plugin's # environment, by adding a stanza like the following to a file in # /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_operations: # # [tahoe_server_operations*] # env.url_storage1 http://localhost:9011/statistics?t=json # env.url_storage2 http://localhost:9012/statistics?t=json # env.url_storage3 http://localhost:9013/statistics?t=json # env.url_storage4 http://localhost:9014/statistics?t=json # of course, these URLs must match the webports you have configured into the # storage nodes. import os, sys import urllib import simplejson node_urls = [] for k,v in os.environ.items(): if k.startswith("url_"): nodename = k[len("url_"):] node_urls.append( (nodename, v) ) node_urls.sort() my_name = os.path.basename(sys.argv[0]) PREFIX = "tahoe_server_operations_" assert my_name.startswith(PREFIX) operation = my_name[len(PREFIX):] configinfo = \ """graph_title Tahoe Server '%(operation)s' Operations graph_vlabel ops per second graph_category tahoe graph_info This graph shows how many '%(operation)s' operations take place on the storage server """ % {'operation': operation} for nodename, url in node_urls: configinfo += "%s.label %s\n" % (nodename, nodename) configinfo += "%s.type DERIVE\n" % (nodename,) configinfo += "%s.min 0\n" % (nodename,) configinfo += "%s.draw LINE2\n" % (nodename,) if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) for nodename, url in node_urls: data = simplejson.loads(urllib.urlopen(url).read()) key = "storage_server.%s" % operation value = data["counters"][key] print "%s.value %s" % (nodename, value) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_spacetime000066400000000000000000000070521221140116300246520ustar00rootroot00000000000000#!/usr/bin/env python # copy .rrd files from a remote munin master host, sum the 'df' stats from a # list of hosts, use them to estimate a rate-of-change for the past month, # then extrapolate to guess how many weeks/months/years of storage space we # have left, and output it to another munin graph import sys, os, time import rrdtool MUNIN_HOST = "munin.allmydata.com" PREFIX = "%s:/var/lib/munin/prodtahoe/" % MUNIN_HOST FILES = [ "prodtahoe%d.allmydata.com-df-_dev_sd%s3-g.rrd" % (a,b) for a in (1,2,3,4,5) for b in ("a", "b", "c", "d") ] REMOTEFILES = [ PREFIX + f for f in FILES ] LOCALFILES = ["/var/lib/munin/prodtahoe/" + f for f in FILES ] WEBFILE = "/var/www/tahoe/spacetime.json" def rsync_rrd(): # copy the RRD files from your munin master host to a local one cmd = "rsync %s rrds/" % (" ".join(REMOTEFILES)) rc = os.system(cmd) assert rc == 0, rc def format_time(t): return time.strftime("%b %d %H:%M", time.localtime(t)) def predict_future(past_s): start_df = [] end_df = [] durations = [] for fn in LOCALFILES: d = rrdtool.fetch(fn, "AVERAGE", "-s", "-"+past_s, "-e", "-1hr") # ((start, end, step), (name1, name2, ...), [(data1, data2, ..), ...]) (start_time, end_time ,step) = d[0] #print format_time(start_time), " - ", format_time(end_time), step names = d[1] #for points in d[2]: # point = points[0] # print point start_space = d[2][0][0] if start_space is None: return None # I don't know why, but the last few points are always bogus. Running # 'rrdtool fetch' on the command line is usually ok.. I blame the python # bindinds. end_space = d[2][-4][0] if end_space is None: return None end_time = end_time - (4*step) start_df.append(start_space) end_df.append(end_space) durations.append(end_time - start_time) avg_start_df = sum(start_df) / len(start_df) avg_end_df = sum(end_df) / len(end_df) avg_duration = sum(durations) / len(durations) #print avg_start_df, avg_end_df, avg_duration rate = (avg_end_df - avg_start_df) / avg_duration #print "Rate", rate, " %/s" #print "measured over", avg_duration / 86400, "days" remaining = 100 - avg_end_df remaining_seconds = remaining / rate #print "remaining seconds", remaining_seconds remaining_days = remaining_seconds / 86400 #print "remaining days", remaining_days return remaining_days def write_to_file(samples): # write a JSON-formatted dictionary f = open(WEBFILE + ".tmp", "w") f.write("{ ") f.write(", ".join(['"%s": %s' % (k, samples[k]) for k in sorted(samples.keys())])) f.write("}\n") f.close() os.rename(WEBFILE + ".tmp", WEBFILE) if len(sys.argv) > 1 and sys.argv[1] == "config": print """\ graph_title Tahoe Remaining Space Predictor graph_vlabel days remaining graph_category tahoe graph_info This graph shows the estimated number of days left until storage space is exhausted days_2wk.label days left (2wk sample) days_2wk.draw LINE2 days_4wk.label days left (4wk sample) days_4wk.draw LINE2""" sys.exit(0) #rsync_rrd() samples = {} remaining_4wk = predict_future("4wk") if remaining_4wk is not None: print "days_4wk.value", remaining_4wk samples["remaining_4wk"] = remaining_4wk remaining_2wk = predict_future("2wk") if remaining_2wk is not None: print "days_2wk.value", remaining_2wk samples["remaining_2wk"] = remaining_2wk write_to_file(samples) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_stats000066400000000000000000000561371221140116300240460ustar00rootroot00000000000000#!/usr/bin/env python import os import pickle import re import sys import time STAT_VALIDITY = 300 # 5min limit on reporting stats PLUGINS = { # LOAD AVERAGE 'tahoe_runtime_load_avg': { 'statid': 'load_monitor.avg_load', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Average', 'graph_vlabel load', 'graph_category tahoe', 'graph_info This graph shows average reactor delay', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_runtime_load_peak': { 'statid': 'load_monitor.max_load', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Peak', 'graph_vlabel load', 'graph_category tahoe', 'graph_info This graph shows peak reactor delay', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, # STORAGE ALLOCATION (BYTES) 'tahoe_storage_consumed': { 'statid': 'storage_server.consumed', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Consumed', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows space consumed', 'graph_args --base 1024', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_allocated': { 'statid': 'storage_server.allocated', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Allocated', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows space allocated', 'graph_args --base 1024', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_bytes_added': { 'statid': 'storage_server.bytes_added', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Added', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows cummulative bytes added', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_bytes_freed': { 'statid': 'storage_server.bytes_freed', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Removed', 'graph_vlabel bytes', 'graph_category tahoe_storage_server', 'graph_info This graph shows cummulative bytes removed', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_allocate': { 'statid': 'storage_server.allocate', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server Allocate_Bucket Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many allocate_buckets operations occured per second. Each immutable file upload causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_get': { 'statid': 'storage_server.get', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server get_bucket Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many get_bucket operations occured per second. Each immutable file download/check causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_writev': { 'statid': 'storage_server.writev', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server writev Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many writev operations occured per second. Each mutable file / dirnode write causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_storage_operations_readv': { 'statid': 'storage_server.readv', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Storage Server readv Operations', 'graph_vlabel operations per second', 'graph_category tahoe_storage_server', 'graph_info This graph shows how many readv operations occured per second. Each dirnode read causes one such operation per server.', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, # HELPER 'tahoe_helper_incoming_files': { 'statid': 'chk_upload_helper.incoming_count', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Count', 'graph_vlabel n files', 'graph_category tahoe_helper', 'graph_info This graph shows number of incoming files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_incoming_filesize': { 'statid': 'chk_upload_helper.incoming_size', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Size', 'graph_vlabel bytes', 'graph_category tahoe_helper', 'graph_info This graph shows total size of incoming files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_incoming_files_old': { 'statid': 'chk_upload_helper.incoming_size_old', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming Old Files', 'graph_vlabel bytes', 'graph_category tahoe_helper', 'graph_info This graph shows total size of old incoming files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoding_files': { 'statid': 'chk_upload_helper.encoding_count', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Count', 'graph_vlabel n files', 'graph_category tahoe_helper', 'graph_info This graph shows number of encoding files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoding_filesize': { 'statid': 'chk_upload_helper.encoding_size', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Size', 'graph_vlabel bytes', 'graph_category tahoe_helper', 'graph_info This graph shows total size of encoding files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoding_files_old': { 'statid': 'chk_upload_helper.encoding_size_old', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding Old Files', 'graph_vlabel bytes', 'graph_category tahoe_helper', 'graph_info This graph shows total size of old encoding files', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_active_uploads': { 'statid': 'chk_upload_helper.active_uploads', 'category': 'stats', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Active Files', 'graph_vlabel n files', 'graph_category tahoe_helper', 'graph_info This graph shows number of files actively being processed by the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_upload_requests': { 'statid': 'chk_upload_helper.upload_requests', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Upload Requests', 'graph_vlabel requests', 'graph_category tahoe_helper', 'graph_info This graph shows the number of upload requests arriving at the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_upload_already_present': { 'statid': 'chk_upload_helper.upload_already_present', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Already Present', 'graph_vlabel requests', 'graph_category tahoe_helper', 'graph_info This graph shows the number of uploads whose files are already present in the grid', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_upload_need_upload': { 'statid': 'chk_upload_helper.upload_need_upload', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Needing Upload', 'graph_vlabel requests', 'graph_category tahoe_helper', 'graph_info This graph shows the number of uploads whose files are not already present in the grid', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_encoded_bytes': { 'statid': 'chk_upload_helper.encoded_bytes', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoded Bytes', 'graph_vlabel bytes', 'graph_category tahoe_helper', 'graph_info This graph shows the number of bytes encoded by the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_helper_fetched_bytes': { 'statid': 'chk_upload_helper.fetched_bytes', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Fetched Bytes', 'graph_vlabel bytes', 'graph_category tahoe_helper', 'graph_info This graph shows the number of bytes fetched by the helper', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, # WEBAPI 'tahoe_uploader_bytes_uploaded': { 'statid': 'uploader.bytes_uploaded', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', 'graph_vlabel bytes', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of bytes uploaded', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_uploader_files_uploaded': { 'statid': 'uploader.files_uploaded', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', 'graph_vlabel files', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of files uploaded', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_mutable_files_published': { 'statid': 'mutable.files_published', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Published', 'graph_vlabel files', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of mutable files published', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, 'tahoe_mutable_files_retrieved': { 'statid': 'mutable.files_retrieved', 'category': 'counters', 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Retrieved', 'graph_vlabel files', 'graph_category tahoe_traffic', 'graph_info This graph shows the number of files retrieved', ]), 'graph_config': '\n'.join(['%(name)s.label %(name)s', '%(name)s.type DERIVE', '%(name)s.min 0', '%(name)s.draw LINE1', ]), 'graph_render': '\n'.join(['%(name)s.value %(value)s', ]), }, } def smash_name(name): return re.sub('[^a-zA-Z0-9]', '_', name) def open_stats(fname): f = open(fname, 'rb') stats = pickle.load(f) f.close() return stats def main(argv): graph_name = os.path.basename(argv[0]) if graph_name.endswith('.py'): graph_name = graph_name[:-3] plugin_conf = PLUGINS.get(graph_name) for k,v in os.environ.items(): if k.startswith('statsfile'): stats_file = v break else: raise RuntimeError("No 'statsfile' env var found") stats = open_stats(stats_file) now = time.time() def output_nodes(output_section, check_time): for tubid, nodestats in stats.items(): if check_time and (now - nodestats.get('timestamp', 0)) > STAT_VALIDITY: continue name = smash_name("%s_%s" % (nodestats['nickname'], tubid[:4])) #value = nodestats['stats'][plugin_conf['category']].get(plugin_conf['statid']) category = plugin_conf['category'] statid = plugin_conf['statid'] value = nodestats['stats'][category].get(statid) if value is not None: args = { 'name': name, 'value': value } print plugin_conf[output_section] % args if len(argv) > 1: if sys.argv[1] == 'config': print plugin_conf['configheader'] output_nodes('graph_config', False) sys.exit(0) output_nodes('graph_render', True) if __name__ == '__main__': main(sys.argv) tahoe-lafs-1.10.0/misc/operations_helpers/munin/tahoe_storagespace000066400000000000000000000033401221140116300253540ustar00rootroot00000000000000#!/usr/bin/env python # This is a munin plugin to track the amount of disk space each node's # StorageServer is consuming on behalf of other nodes. This is where the # shares are kept. If there are N nodes present in the mesh, the total space # consumed by the entire mesh will be about N times the space reported by # this plugin. # Copy this plugin into /etc/munun/plugins/tahoe_storagespace and then put # the following in your /etc/munin/plugin-conf.d/foo file to let it know # where to find the basedirectory for each node: # # [tahoe_storagespace] # env.basedir_NODE1 /path/to/node1 # env.basedir_NODE2 /path/to/node2 # env.basedir_NODE3 /path/to/node3 # # Allmydata-tahoe must be installed on the system where this plugin is used, # since it imports a utility module from allmydata.utils . import os, sys import commands nodedirs = [] for k,v in os.environ.items(): if k.startswith("basedir_"): nodename = k[len("basedir_"):] nodedirs.append( (nodename, v) ) nodedirs.sort() seriesname = "storage" configinfo = \ """graph_title Allmydata Tahoe Shareholder Space graph_vlabel bytes graph_category tahoe graph_info This graph shows the space consumed by this node's StorageServer """ for nodename, basedir in nodedirs: configinfo += "%s.label %s\n" % (nodename, nodename) configinfo += "%s.draw LINE2\n" % (nodename,) if len(sys.argv) > 1: if sys.argv[1] == "config": print configinfo.rstrip() sys.exit(0) for nodename, basedir in nodedirs: cmd = "du --bytes --summarize %s" % os.path.join(basedir, "storage") rc,out = commands.getstatusoutput(cmd) if rc != 0: sys.exit(rc) bytes, extra = out.split() usage = int(bytes) print "%s.value %d" % (nodename, usage) tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/000077500000000000000000000000001221140116300231715ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/provisioning.py000066400000000000000000001072011221140116300262720ustar00rootroot00000000000000 from nevow import inevow, rend, loaders, tags as T import math import util # factorial and binomial copied from # http://mail.python.org/pipermail/python-list/2007-April/435718.html def div_ceil(n, d): """ The smallest integer k such that k*d >= n. """ return (n/d) + (n%d != 0) def factorial(n): """factorial(n): return the factorial of the integer n. factorial(0) = 1 factorial(n) with n<0 is -factorial(abs(n)) """ result = 1 for i in xrange(1, abs(n)+1): result *= i assert n >= 0 return result def binomial(n, k): assert 0 <= k <= n if k == 0 or k == n: return 1 # calculate n!/k! as one product, avoiding factors that # just get canceled P = k+1 for i in xrange(k+2, n+1): P *= i # if you are paranoid: # C, rem = divmod(P, factorial(n-k)) # assert rem == 0 # return C return P//factorial(n-k) class ProvisioningTool(rend.Page): addSlash = True docFactory = loaders.xmlfile(util.sibling("provisioning.xhtml")) def render_forms(self, ctx, data): req = inevow.IRequest(ctx) def getarg(name, astype=int): if req.method != "POST": return None if name in req.fields: return astype(req.fields[name].value) return None return self.do_forms(getarg) def do_forms(self, getarg): filled = getarg("filled", bool) def get_and_set(name, options, default=None, astype=int): current_value = getarg(name, astype) i_select = T.select(name=name) for (count, description) in options: count = astype(count) if ((current_value is not None and count == current_value) or (current_value is None and count == default)): o = T.option(value=str(count), selected="true")[description] else: o = T.option(value=str(count))[description] i_select = i_select[o] if current_value is None: current_value = default return current_value, i_select sections = {} def add_input(section, text, entry): if section not in sections: sections[section] = [] sections[section].extend([T.div[text, ": ", entry], "\n"]) def add_output(section, entry): if section not in sections: sections[section] = [] sections[section].extend([entry, "\n"]) def build_section(section): return T.fieldset[T.legend[section], sections[section]] def number(value, suffix=""): scaling = 1 if value < 1: fmt = "%1.2g%s" elif value < 100: fmt = "%.1f%s" elif value < 1000: fmt = "%d%s" elif value < 1e6: fmt = "%.2fk%s"; scaling = 1e3 elif value < 1e9: fmt = "%.2fM%s"; scaling = 1e6 elif value < 1e12: fmt = "%.2fG%s"; scaling = 1e9 elif value < 1e15: fmt = "%.2fT%s"; scaling = 1e12 elif value < 1e18: fmt = "%.2fP%s"; scaling = 1e15 else: fmt = "huge! %g%s" return fmt % (value / scaling, suffix) user_counts = [(5, "5 users"), (50, "50 users"), (200, "200 users"), (1000, "1k users"), (10000, "10k users"), (50000, "50k users"), (100000, "100k users"), (500000, "500k users"), (1000000, "1M users"), ] num_users, i_num_users = get_and_set("num_users", user_counts, 50000) add_input("Users", "How many users are on this network?", i_num_users) files_per_user_counts = [(100, "100 files"), (1000, "1k files"), (10000, "10k files"), (100000, "100k files"), (1e6, "1M files"), ] files_per_user, i_files_per_user = get_and_set("files_per_user", files_per_user_counts, 1000) add_input("Users", "How many files for each user? (avg)", i_files_per_user) space_per_user_sizes = [(1e6, "1MB"), (10e6, "10MB"), (100e6, "100MB"), (200e6, "200MB"), (1e9, "1GB"), (2e9, "2GB"), (5e9, "5GB"), (10e9, "10GB"), (100e9, "100GB"), (1e12, "1TB"), (2e12, "2TB"), (5e12, "5TB"), ] # Estimate ~5gb per user as a more realistic case space_per_user, i_space_per_user = get_and_set("space_per_user", space_per_user_sizes, 5e9) add_input("Users", "How much data for each user? (avg)", i_space_per_user) sharing_ratios = [(1.0, "1.0x"), (1.1, "1.1x"), (2.0, "2.0x"), ] sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio", sharing_ratios, 1.0, float) add_input("Users", "What is the sharing ratio? (1.0x is no-sharing and" " no convergence)", i_sharing_ratio) # Encoding parameters encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"), ("3-of-10-8", "3.3x (3-of-10, repair below 8)"), ("5-of-10-7", "2x (5-of-10, repair below 7)"), ("8-of-10-9", "1.25x (8-of-10, repair below 9)"), ("27-of-30-28", "1.1x (27-of-30, repair below 28"), ("25-of-100-50", "4x (25-of-100, repair below 50)"), ] encoding_parameters, i_encoding_parameters = \ get_and_set("encoding_parameters", encoding_choices, "3-of-10-5", str) encoding_pieces = encoding_parameters.split("-") k = int(encoding_pieces[0]) assert encoding_pieces[1] == "of" n = int(encoding_pieces[2]) # we repair the file when the number of available shares drops below # this value repair_threshold = int(encoding_pieces[3]) add_input("Servers", "What are the default encoding parameters?", i_encoding_parameters) # Server info num_server_choices = [ (5, "5 servers"), (10, "10 servers"), (15, "15 servers"), (30, "30 servers"), (50, "50 servers"), (100, "100 servers"), (200, "200 servers"), (300, "300 servers"), (500, "500 servers"), (1000, "1k servers"), (2000, "2k servers"), (5000, "5k servers"), (10e3, "10k servers"), (100e3, "100k servers"), (1e6, "1M servers"), ] num_servers, i_num_servers = \ get_and_set("num_servers", num_server_choices, 30, int) add_input("Servers", "How many servers are there?", i_num_servers) # availability is measured in dBA = -dBF, where 0dBF is 100% failure, # 10dBF is 10% failure, 20dBF is 1% failure, etc server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"), (13, "95% [13dBA] (1.2hr/day)"), (20, "99% [20dBA] (14min/day or 3.5days/year)"), (23, "99.5% [23dBA] (7min/day or 1.75days/year)"), (30, "99.9% [30dBA] (87sec/day or 9hours/year)"), (40, "99.99% [40dBA] (60sec/week or 53min/year)"), (50, "99.999% [50dBA] (5min per year)"), ] server_dBA, i_server_availability = \ get_and_set("server_availability", server_dBA_choices, 20, int) add_input("Servers", "What is the server availability?", i_server_availability) drive_MTBF_choices = [ (40, "40,000 Hours"), ] drive_MTBF, i_drive_MTBF = \ get_and_set("drive_MTBF", drive_MTBF_choices, 40, int) add_input("Drives", "What is the hard drive MTBF?", i_drive_MTBF) # http://www.tgdaily.com/content/view/30990/113/ # http://labs.google.com/papers/disk_failures.pdf # google sees: # 1.7% of the drives they replaced were 0-1 years old # 8% of the drives they repalced were 1-2 years old # 8.6% were 2-3 years old # 6% were 3-4 years old, about 8% were 4-5 years old drive_size_choices = [ (100, "100 GB"), (250, "250 GB"), (500, "500 GB"), (750, "750 GB"), (1000, "1000 GB"), (2000, "2000 GB"), (3000, "3000 GB"), ] drive_size, i_drive_size = \ get_and_set("drive_size", drive_size_choices, 3000, int) drive_size = drive_size * 1e9 add_input("Drives", "What is the capacity of each hard drive?", i_drive_size) drive_failure_model_choices = [ ("E", "Exponential"), ("U", "Uniform"), ] drive_failure_model, i_drive_failure_model = \ get_and_set("drive_failure_model", drive_failure_model_choices, "E", str) add_input("Drives", "How should we model drive failures?", i_drive_failure_model) # drive_failure_rate is in failures per second if drive_failure_model == "E": drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600) else: drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600) # deletion/gc/ownership mode ownership_choices = [ ("A", "no deletion, no gc, no owners"), ("B", "deletion, no gc, no owners"), ("C", "deletion, share timers, no owners"), ("D", "deletion, no gc, yes owners"), ("E", "deletion, owner timers"), ] ownership_mode, i_ownership_mode = \ get_and_set("ownership_mode", ownership_choices, "A", str) add_input("Servers", "What is the ownership mode?", i_ownership_mode) # client access behavior access_rates = [ (1, "one file per day"), (10, "10 files per day"), (100, "100 files per day"), (1000, "1k files per day"), (10e3, "10k files per day"), (100e3, "100k files per day"), ] download_files_per_day, i_download_rate = \ get_and_set("download_rate", access_rates, 100, int) add_input("Users", "How many files are downloaded per day?", i_download_rate) download_rate = 1.0 * download_files_per_day / (24*60*60) upload_files_per_day, i_upload_rate = \ get_and_set("upload_rate", access_rates, 10, int) add_input("Users", "How many files are uploaded per day?", i_upload_rate) upload_rate = 1.0 * upload_files_per_day / (24*60*60) delete_files_per_day, i_delete_rate = \ get_and_set("delete_rate", access_rates, 10, int) add_input("Users", "How many files are deleted per day?", i_delete_rate) delete_rate = 1.0 * delete_files_per_day / (24*60*60) # the value is in days lease_timers = [ (1, "one refresh per day"), (7, "one refresh per week"), ] lease_timer, i_lease = \ get_and_set("lease_timer", lease_timers, 7, int) add_input("Users", "How frequently do clients refresh files or accounts? " "(if necessary)", i_lease) seconds_per_lease = 24*60*60*lease_timer check_timer_choices = [ (1, "every week"), (4, "every month"), (8, "every two months"), (16, "every four months"), ] check_timer, i_check_timer = \ get_and_set("check_timer", check_timer_choices, 4, int) add_input("Users", "How frequently should we check on each file?", i_check_timer) file_check_interval = check_timer * 7 * 24 * 3600 if filled: add_output("Users", T.div["Total users: %s" % number(num_users)]) add_output("Users", T.div["Files per user: %s" % number(files_per_user)]) file_size = 1.0 * space_per_user / files_per_user add_output("Users", T.div["Average file size: ", number(file_size)]) total_files = num_users * files_per_user / sharing_ratio add_output("Grid", T.div["Total number of files in grid: ", number(total_files)]) total_space = num_users * space_per_user / sharing_ratio add_output("Grid", T.div["Total volume of plaintext in grid: ", number(total_space, "B")]) total_shares = n * total_files add_output("Grid", T.div["Total shares in grid: ", number(total_shares)]) expansion = float(n) / float(k) total_usage = expansion * total_space add_output("Grid", T.div["Share data in grid: ", number(total_usage, "B")]) if n > num_servers: # silly configuration, causes Tahoe2 to wrap and put multiple # shares on some servers. add_output("Servers", T.div["non-ideal: more shares than servers" " (n=%d, servers=%d)" % (n, num_servers)]) # every file has at least one share on every server buckets_per_server = total_files shares_per_server = total_files * ((1.0 * n) / num_servers) else: # if nobody is full, then no lease requests will be turned # down for lack of space, and no two shares for the same file # will share a server. Therefore the chance that any given # file has a share on any given server is n/num_servers. buckets_per_server = total_files * ((1.0 * n) / num_servers) # since each such represented file only puts one share on a # server, the total number of shares per server is the same. shares_per_server = buckets_per_server add_output("Servers", T.div["Buckets per server: ", number(buckets_per_server)]) add_output("Servers", T.div["Shares per server: ", number(shares_per_server)]) # how much space is used on the storage servers for the shares? # the share data itself share_data_per_server = total_usage / num_servers add_output("Servers", T.div["Share data per server: ", number(share_data_per_server, "B")]) # this is determined empirically. H=hashsize=32, for a one-segment # file and 3-of-10 encoding share_validation_per_server = 266 * shares_per_server # this could be 423*buckets_per_server, if we moved the URI # extension into a separate file, but that would actually consume # *more* space (minimum filesize is 4KiB), unless we moved all # shares for a given bucket into a single file. share_uri_extension_per_server = 423 * shares_per_server # ownership mode adds per-bucket data H = 32 # depends upon the desired security of delete/refresh caps # bucket_lease_size is the amount of data needed to keep track of # the delete/refresh caps for each bucket. bucket_lease_size = 0 client_bucket_refresh_rate = 0 owner_table_size = 0 if ownership_mode in ("B", "C", "D", "E"): bucket_lease_size = sharing_ratio * 1.0 * H if ownership_mode in ("B", "C"): # refreshes per second per client client_bucket_refresh_rate = (1.0 * n * files_per_user / seconds_per_lease) add_output("Users", T.div["Client share refresh rate (outbound): ", number(client_bucket_refresh_rate, "Hz")]) server_bucket_refresh_rate = (client_bucket_refresh_rate * num_users / num_servers) add_output("Servers", T.div["Server share refresh rate (inbound): ", number(server_bucket_refresh_rate, "Hz")]) if ownership_mode in ("D", "E"): # each server must maintain a bidirectional mapping from # buckets to owners. One way to implement this would be to # put a list of four-byte owner numbers into each bucket, and # a list of four-byte share numbers into each owner (although # of course we'd really just throw it into a database and let # the experts take care of the details). owner_table_size = 2*(buckets_per_server * sharing_ratio * 4) if ownership_mode in ("E",): # in this mode, clients must refresh one timer per server client_account_refresh_rate = (1.0 * num_servers / seconds_per_lease) add_output("Users", T.div["Client account refresh rate (outbound): ", number(client_account_refresh_rate, "Hz")]) server_account_refresh_rate = (client_account_refresh_rate * num_users / num_servers) add_output("Servers", T.div["Server account refresh rate (inbound): ", number(server_account_refresh_rate, "Hz")]) # TODO: buckets vs shares here is a bit wonky, but in # non-wrapping grids it shouldn't matter share_lease_per_server = bucket_lease_size * buckets_per_server share_ownertable_per_server = owner_table_size share_space_per_server = (share_data_per_server + share_validation_per_server + share_uri_extension_per_server + share_lease_per_server + share_ownertable_per_server) add_output("Servers", T.div["Share space per server: ", number(share_space_per_server, "B"), " (data ", number(share_data_per_server, "B"), ", validation ", number(share_validation_per_server, "B"), ", UEB ", number(share_uri_extension_per_server, "B"), ", lease ", number(share_lease_per_server, "B"), ", ownertable ", number(share_ownertable_per_server, "B"), ")", ]) # rates client_download_share_rate = download_rate * k client_download_byte_rate = download_rate * file_size add_output("Users", T.div["download rate: shares = ", number(client_download_share_rate, "Hz"), " , bytes = ", number(client_download_byte_rate, "Bps"), ]) total_file_check_rate = 1.0 * total_files / file_check_interval client_check_share_rate = total_file_check_rate / num_users add_output("Users", T.div["file check rate: shares = ", number(client_check_share_rate, "Hz"), " (interval = %s)" % number(1 / client_check_share_rate, "s"), ]) client_upload_share_rate = upload_rate * n # TODO: doesn't include overhead client_upload_byte_rate = upload_rate * file_size * expansion add_output("Users", T.div["upload rate: shares = ", number(client_upload_share_rate, "Hz"), " , bytes = ", number(client_upload_byte_rate, "Bps"), ]) client_delete_share_rate = delete_rate * n server_inbound_share_rate = (client_upload_share_rate * num_users / num_servers) server_inbound_byte_rate = (client_upload_byte_rate * num_users / num_servers) add_output("Servers", T.div["upload rate (inbound): shares = ", number(server_inbound_share_rate, "Hz"), " , bytes = ", number(server_inbound_byte_rate, "Bps"), ]) add_output("Servers", T.div["share check rate (inbound): ", number(total_file_check_rate * n / num_servers, "Hz"), ]) server_share_modify_rate = ((client_upload_share_rate + client_delete_share_rate) * num_users / num_servers) add_output("Servers", T.div["share modify rate: shares = ", number(server_share_modify_rate, "Hz"), ]) server_outbound_share_rate = (client_download_share_rate * num_users / num_servers) server_outbound_byte_rate = (client_download_byte_rate * num_users / num_servers) add_output("Servers", T.div["download rate (outbound): shares = ", number(server_outbound_share_rate, "Hz"), " , bytes = ", number(server_outbound_byte_rate, "Bps"), ]) total_share_space = num_servers * share_space_per_server add_output("Grid", T.div["Share space consumed: ", number(total_share_space, "B")]) add_output("Grid", T.div[" %% validation: %.2f%%" % (100.0 * share_validation_per_server / share_space_per_server)]) add_output("Grid", T.div[" %% uri-extension: %.2f%%" % (100.0 * share_uri_extension_per_server / share_space_per_server)]) add_output("Grid", T.div[" %% lease data: %.2f%%" % (100.0 * share_lease_per_server / share_space_per_server)]) add_output("Grid", T.div[" %% owner data: %.2f%%" % (100.0 * share_ownertable_per_server / share_space_per_server)]) add_output("Grid", T.div[" %% share data: %.2f%%" % (100.0 * share_data_per_server / share_space_per_server)]) add_output("Grid", T.div["file check rate: ", number(total_file_check_rate, "Hz")]) total_drives = max(div_ceil(int(total_share_space), int(drive_size)), num_servers) add_output("Drives", T.div["Total drives: ", number(total_drives), " drives"]) drives_per_server = div_ceil(total_drives, num_servers) add_output("Servers", T.div["Drives per server: ", drives_per_server]) # costs if drive_size == 3000 * 1e9: add_output("Servers", T.div["3000GB drive: $250 each"]) drive_cost = 250 else: add_output("Servers", T.div[T.b["unknown cost per drive, assuming $100"]]) drive_cost = 100 if drives_per_server <= 4: add_output("Servers", T.div["1U box with <= 4 drives: $1500"]) server_cost = 1500 # typical 1U box elif drives_per_server <= 12: add_output("Servers", T.div["2U box with <= 12 drives: $2500"]) server_cost = 2500 # 2U box else: add_output("Servers", T.div[T.b["Note: too many drives per server, " "assuming $3000"]]) server_cost = 3000 server_capital_cost = (server_cost + drives_per_server * drive_cost) total_server_cost = float(num_servers * server_capital_cost) add_output("Servers", T.div["Capital cost per server: $", server_capital_cost]) add_output("Grid", T.div["Capital cost for all servers: $", number(total_server_cost)]) # $70/Mbps/mo # $44/server/mo power+space server_bandwidth = max(server_inbound_byte_rate, server_outbound_byte_rate) server_bandwidth_mbps = div_ceil(int(server_bandwidth*8), int(1e6)) server_monthly_cost = 70*server_bandwidth_mbps + 44 add_output("Servers", T.div["Monthly cost per server: $", server_monthly_cost]) add_output("Users", T.div["Capital cost per user: $", number(total_server_cost / num_users)]) # reliability any_drive_failure_rate = total_drives * drive_failure_rate any_drive_MTBF = 1 // any_drive_failure_rate # in seconds any_drive_MTBF_days = any_drive_MTBF / 86400 add_output("Drives", T.div["MTBF (any drive): ", number(any_drive_MTBF_days), " days"]) drive_replacement_monthly_cost = (float(drive_cost) * any_drive_failure_rate *30*86400) add_output("Grid", T.div["Monthly cost of replacing drives: $", number(drive_replacement_monthly_cost)]) total_server_monthly_cost = float(num_servers * server_monthly_cost + drive_replacement_monthly_cost) add_output("Grid", T.div["Monthly cost for all servers: $", number(total_server_monthly_cost)]) add_output("Users", T.div["Monthly cost per user: $", number(total_server_monthly_cost / num_users)]) # availability file_dBA = self.file_availability(k, n, server_dBA) user_files_dBA = self.many_files_availability(file_dBA, files_per_user) all_files_dBA = self.many_files_availability(file_dBA, total_files) add_output("Users", T.div["availability of: ", "arbitrary file = %d dBA, " % file_dBA, "all files of user1 = %d dBA, " % user_files_dBA, "all files in grid = %d dBA" % all_files_dBA, ], ) time_until_files_lost = (n-k+1) / any_drive_failure_rate add_output("Grid", T.div["avg time until files are lost: ", number(time_until_files_lost, "s"), ", ", number(time_until_files_lost/86400, " days"), ]) share_data_loss_rate = any_drive_failure_rate * drive_size add_output("Grid", T.div["share data loss rate: ", number(share_data_loss_rate,"Bps")]) # the worst-case survival numbers occur when we do a file check # and the file is just above the threshold for repair (so we # decide to not repair it). The question is then: what is the # chance that the file will decay so badly before the next check # that we can't recover it? The resulting probability is per # check interval. # Note that the chances of us getting into this situation are low. P_disk_failure_during_interval = (drive_failure_rate * file_check_interval) disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval) disk_failure_dBA = -disk_failure_dBF file_survives_dBA = self.file_availability(k, repair_threshold, disk_failure_dBA) user_files_survives_dBA = self.many_files_availability( \ file_survives_dBA, files_per_user) all_files_survives_dBA = self.many_files_availability( \ file_survives_dBA, total_files) add_output("Users", T.div["survival of: ", "arbitrary file = %d dBA, " % file_survives_dBA, "all files of user1 = %d dBA, " % user_files_survives_dBA, "all files in grid = %d dBA" % all_files_survives_dBA, " (per worst-case check interval)", ]) all_sections = [] all_sections.append(build_section("Users")) all_sections.append(build_section("Servers")) all_sections.append(build_section("Drives")) if "Grid" in sections: all_sections.append(build_section("Grid")) f = T.form(action=".", method="post", enctype="multipart/form-data") if filled: action = "Recompute" else: action = "Compute" f = f[T.input(type="hidden", name="filled", value="true"), T.input(type="submit", value=action), all_sections, ] try: from allmydata import reliability # we import this just to test to see if the page is available _hush_pyflakes = reliability del _hush_pyflakes f = [T.div[T.a(href="../reliability")["Reliability Math"]], f] except ImportError: pass return f def file_availability(self, k, n, server_dBA): """ The full formula for the availability of a specific file is:: 1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)]) Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of this summation is the probability that there are exactly 'i' servers available, and what we're doing is adding up the cases where i is too low. This is a nuisance to calculate at all accurately, especially once N gets large, and when p is close to unity. So we make an engineering approximation: if (1-p) is very small, then each [i] term is much larger than the [i-1] term, and the sum is dominated by the i=k-1 term. This only works for (1-p) < 10%, and when the choose() function doesn't rise fast enough to compensate. For high-expansion encodings (3-of-10, 25-of-100), the choose() function is rising at the same time as the (1-p)**(N-i) term, so that's not an issue. For low-expansion encodings (7-of-10, 75-of-100) the two values are moving in opposite directions, so more care must be taken. Note that the p**i term has only a minor effect as long as (1-p)*N is small, and even then the effect is attenuated by the 1-p term. """ assert server_dBA > 9 # >=90% availability to use the approximation factor = binomial(n, k-1) factor_dBA = 10 * math.log10(factor) exponent = n - k + 1 file_dBA = server_dBA * exponent - factor_dBA return file_dBA def many_files_availability(self, file_dBA, num_files): """The probability that 'num_files' independent bernoulli trials will succeed (i.e. we can recover all files in the grid at any given moment) is p**num_files . Since p is close to unity, we express in p in dBA instead, so we can get useful precision on q (=1-p), and then the formula becomes:: P_some_files_unavailable = 1 - (1 - q)**num_files That (1-q)**n expands with the usual binomial sequence, 1 - nq + Xq**2 ... + Xq**n . We use the same approximation as before, since we know q is close to zero, and we get to ignore all the terms past -nq. """ many_files_dBA = file_dBA - 10 * math.log10(num_files) return many_files_dBA tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/provisioning.xhtml000066400000000000000000000010441221140116300267740ustar00rootroot00000000000000 Tahoe-LAFS - Provisioning Tool

Tahoe-LAFS Provisioning Tool

This page will help you determine how much disk space and network bandwidth will be required by various sizes and types of Tahoe-LAFS networks.

tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/reliability.py000066400000000000000000000261071221140116300260620ustar00rootroot00000000000000#! /usr/bin/python import math from allmydata.util import statistics from numpy import array, matrix, dot DAY=24*60*60 MONTH=31*DAY YEAR=365*DAY class ReliabilityModel: """Generate a model of system-wide reliability, given several input parameters. This runs a simulation in which time is quantized down to 'delta' seconds (default is one month): a smaller delta will result in a more accurate simulation, but will take longer to run. 'report_span' simulated seconds will be run. The encoding parameters are provided as 'k' (minimum number of shares needed to recover the file) and 'N' (total number of shares generated). The default parameters are 3-of-10. The first step is to build a probability of individual drive loss during any given delta. This uses a simple exponential model, in which the average drive lifetime is specified by the 'drive_lifetime' parameter (default is 8 years). The second step is to calculate a 'transition matrix': a table of probabilities that shows, given A shares at the start of the delta, what the chances are of having B shares left at the end of the delta. The current code optimistically assumes all drives are independent. A subclass could override that assumption. An additional 'repair matrix' is created to show what happens when the Checker/Repairer is run. In the simulation, the Checker will be run every 'check_period' seconds (default is one month), and the Repairer will be run if it sees fewer than 'R' shares (default 7). The third step is to finally run the simulation. An initial probability vector is created (with a 100% chance of N shares and a 0% chance of fewer than N shares), then it is multiplied by the transition matrix for every delta of time. Each time the Checker is to be run, the repair matrix is multiplied in, and some additional stats are accumulated (average number of repairs that occur, average number of shares regenerated per repair). The output is a ReliabilityReport instance, which contains a table that samples the state of the simulation once each 'report_period' seconds (defaults to 3 months). Each row of this table will contain the probability vector for one sample period (chance of having X shares, from 0 to N, at the end of the period). The report will also contain other information. """ @classmethod def run(klass, drive_lifetime=8*YEAR, k=3, R=7, N=10, delta=1*MONTH, check_period=1*MONTH, report_period=3*MONTH, report_span=5*YEAR, ): self = klass() check_period = check_period-1 P = self.p_in_period(drive_lifetime, delta) decay = self.build_decay_matrix(N, P) repair = self.build_repair_matrix(k, N, R) #print "DECAY:", decay #print "OLD-POST-REPAIR:", old_post_repair #print "NEW-POST-REPAIR:", decay * repair #print "REPAIR:", repair #print "DIFF:", (old_post_repair - decay * repair) START = array([0]*N + [1]) DEAD = array([1]*k + [0]*(1+N-k)) REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R)) REPAIR_newshares = array([0]*k + [N-i for i in range(k, R)] + [0]*(1+N-R)) assert REPAIR_newshares.shape[0] == N+1 #print "START", START #print "REPAIRp", REPAIRp #print "REPAIR_newshares", REPAIR_newshares unmaintained_state = START maintained_state = START last_check = 0 last_report = 0 P_repaired_last_check_period = 0.0 needed_repairs = [] needed_new_shares = [] report = ReliabilityReport() for t in range(0, report_span+delta, delta): # the .A[0] turns the one-row matrix back into an array unmaintained_state = (unmaintained_state * decay).A[0] maintained_state = (maintained_state * decay).A[0] if (t-last_check) > check_period: last_check = t # we do a check-and-repair this frequently need_repair = dot(maintained_state, REPAIRp) P_repaired_last_check_period = need_repair new_shares = dot(maintained_state, REPAIR_newshares) needed_repairs.append(need_repair) needed_new_shares.append(new_shares) maintained_state = (maintained_state * repair).A[0] if (t-last_report) > report_period: last_report = t P_dead_unmaintained = dot(unmaintained_state, DEAD) P_dead_maintained = dot(maintained_state, DEAD) cumulative_number_of_repairs = sum(needed_repairs) cumulative_number_of_new_shares = sum(needed_new_shares) report.add_sample(t, unmaintained_state, maintained_state, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) # record one more sample at the end of the run P_dead_unmaintained = dot(unmaintained_state, DEAD) P_dead_maintained = dot(maintained_state, DEAD) cumulative_number_of_repairs = sum(needed_repairs) cumulative_number_of_new_shares = sum(needed_new_shares) report.add_sample(t, unmaintained_state, maintained_state, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) #def yandm(seconds): # return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH)) #needed_repairs_total = sum(needed_repairs) #needed_new_shares_total = sum(needed_new_shares) #print "at 2y:" #print " unmaintained", unmaintained_state #print " maintained", maintained_state #print " number of repairs", needed_repairs_total #print " new shares generated", needed_new_shares_total #repair_rate_inv = report_span / needed_repairs_total #print " avg repair rate: once every %s" % yandm(repair_rate_inv) #print " avg repair download: one share every %s" % yandm(repair_rate_inv/k) #print " avg repair upload: one share every %s" % yandm(report_span / needed_new_shares_total) return report def p_in_period(self, avg_lifetime, period): """Given an average lifetime of a disk (using an exponential model), what is the chance that a live disk will survive the next 'period' seconds?""" # eg p_in_period(8*YEAR, MONTH) = 98.94% return math.exp(-1.0*period/avg_lifetime) def build_decay_matrix(self, N, P): """Return a decay matrix. decay[start_shares][end_shares] is the conditional probability of finishing with end_shares, given that we started with start_shares.""" decay_rows = [] decay_rows.append( [0.0]*(N+1) ) for start_shares in range(1, (N+1)): end_shares = self.build_decay_row(start_shares, P) decay_row = end_shares + [0.0] * (N-start_shares) assert len(decay_row) == (N+1), len(decay_row) decay_rows.append(decay_row) decay = matrix(decay_rows) return decay def build_decay_row(self, start_shares, P): """Return a decay row 'end_shares'. end_shares[i] is the chance that we finish with i shares, given that we started with start_shares, for all i between 0 and start_shares, inclusive. This implementation assumes that all shares are independent (IID), but a more complex model could incorporate inter-share failure correlations like having two shares on the same server.""" end_shares = statistics.binomial_distribution_pmf(start_shares, P) return end_shares def build_repair_matrix(self, k, N, R): """Return a repair matrix. repair[start][end]: is the conditional probability of the repairer finishing with 'end' shares, given that it began with 'start' shares (repair if fewer than R shares). The repairer's behavior is deterministic, so all values in this matrix are either 0 or 1. This matrix should be applied *after* the decay matrix.""" new_repair_rows = [] for start_shares in range(0, N+1): new_repair_row = [0] * (N+1) if start_shares < k: new_repair_row[start_shares] = 1 elif start_shares < R: new_repair_row[N] = 1 else: new_repair_row[start_shares] = 1 new_repair_rows.append(new_repair_row) repair = matrix(new_repair_rows) return repair class ReliabilityReport: def __init__(self): self.samples = [] def add_sample(self, when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained): """ when: the timestamp at the end of the report period unmaintained_shareprobs: a vector of probabilities, element[S] is the chance that there are S shares left at the end of the report period. This tracks what happens if no repair is ever done. maintained_shareprobs: same, but for 'maintained' grids, where check and repair is done at the end of each check period P_repaired_last_check_period: a float, with the probability that a repair was performed at the end of the most recent check period. cumulative_number_of_repairs: a float, with the average number of repairs that will have been performed by the end of the report period cumulative_number_of_new_shares: a float, with the average number of new shares that repair proceses generated by the end of the report period P_dead_unmaintained: a float, with the chance that the file will be unrecoverable at the end of the period P_dead_maintained: same, but for maintained grids """ row = (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) self.samples.append(row) tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/reliability.xhtml000066400000000000000000000040231221140116300265570ustar00rootroot00000000000000 Tahoe-LAFS - Reliability Tool

Tahoe-LAFS Reliability Tool

Given certain assumptions, this page calculates probability of share loss over time, to help make informed decisions about how much redundancy and repair bandwidth to configure on a Tahoe-LAFS grid.

Simulation Results

At the end of the report span (elapsed time ), the simulated file had the following properties:

  • Probability of loss (no maintenance):
  • Probability of loss (with maintenance):
  • Average repair frequency: once every secs
  • Average shares generated per repair:

This table shows how the following properties change over time:

  • P_repair: the chance that a repair was performed in the most recent check period.
  • P_dead (unmaintained): the chance that the file will be unrecoverable without periodic check+repair
  • P_dead (maintained): the chance that the file will be unrecoverable even with periodic check+repair
t P_repair P_dead (unmaintained) P_dead (maintained)
no simulation data!
tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/run.py000066400000000000000000000023521221140116300243510ustar00rootroot00000000000000#!/usr/bin/env python # this depends upon Twisted and Nevow, but not upon Tahoe itself import webbrowser from twisted.application import strports from twisted.internet import reactor from nevow import appserver, rend, loaders from twisted.web import static import web_reliability, provisioning class Root(rend.Page): docFactory = loaders.xmlstr('''\ Tahoe-LAFS Provisioning/Reliability Calculator

Reliability Tool

Provisioning Tool

''') child_reliability = web_reliability.ReliabilityTool() child_provisioning = provisioning.ProvisioningTool() def run(portnum): root = Root() root.putChild("tahoe.css", static.File("tahoe.css")) site = appserver.NevowSite(root) s = strports.service("tcp:%d" % portnum, site) s.startService() reactor.callLater(1.0, webbrowser.open, "http://localhost:%d/" % portnum) reactor.run() if __name__ == '__main__': import sys portnum = 8070 if len(sys.argv) > 1: portnum = int(sys.argv[1]) run(portnum) tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/tahoe.css000066400000000000000000000057121221140116300250100ustar00rootroot00000000000000 pre.overflow { background: #f7f7f7; border: 1px solid #d7d7d7; margin: 1em 1.75em; padding: .25em; overflow: auto; } /* ----------------------------------------------------------------------- */ /* colors borrowed from the Allmydata logo */ /* general style */ h1 { text-align: center; } table { margin: 1em auto; border: .2em solid #3289b4; border-spacing: 1px; } th { color: white; background-color: #58a1c3; } td { padding: .3em .3em; } th { padding: .3em .3em; } .table-headings-top th { text-align: center; } .table-headings-left th { text-align: right; vertical-align: top; } legend { font-weight: bold; } .connected-yes, .connected-True { border: 1px solid #75d24a; background-color: #EFE; } .connected-no, .connected-False { border: 1px solid #F00; background-color: #FBB; } .encoded, .nodeid { font-family: monospace; font-size: 80%; } .empty-marker { background-color: white; color: gray; } table td.empty-marker { padding: 6em 10em; text-align: center; vertical-align: center; } /* styles for server listings in tables (nickname above nodeid) */ th.nickname-and-peerid { text-align: left; } .nickname { font: inherit; font-family: sans-serif; font-weight: bold; } /* just in case, make sure floats don't stomp on big tables etc. */ #section { clear: both; } /* section-specific styles - turn this client info into a sidebar */ #this-client { font-size: 60%; border: .2em solid #3289b4; float: right; width: 40%; margin: 0 0 .5em .5em; padding: 3px; } #this-client .nodeid { font-size: inherit; } #this-client h2 { text-align: center; background: #3289b4; color: white; margin: -2px -2px 0 -2px; /* matches padding */ padding: .3em; } #this-client table { font-size: inherit; margin: 0 -3px -3px -3px; /* matches padding */ } #this-client td > ul { list-style-type: outside; margin: 0 0 0 2.3em; padding-left: 0; } /* services table */ .services { } /* --- Directory page styles --- */ body.tahoe-directory-page { color: black; background: #c0d9e6; margin: 1em 0; /* zero margin so the table can be flush */ } table.tahoe-directory { color: black; background: white; width: 100%; /*border-left-color: #D7E0E5; border-right-color: #D7E0E5;*/ border-left: 0; border-right: 0; } .tahoe-directory-footer { color: black; background: #c0d9e6; margin: 0 1em; /* compensate for page 0 margin */ } /* directory-screen toolbar */ .toolbar { display: table; margin: .2em auto; text-align: center; /*width: 100%;*/ } .toolbar .toolbar-item { display: inline; text-align: center; padding: 0 1em; } /* recent upload/download status pages */ table.status-download-events { #border: 1px solid #aaa; margin: 1em auto; border: .2em solid #3289b4; border-spacing: 1px; } table.status-download-events td { border: 1px solid #a00; padding: 2px } tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/test_provisioning.py000066400000000000000000000101601221140116300273260ustar00rootroot00000000000000 import unittest from allmydata import provisioning ReliabilityModel = None try: from allmydata.reliability import ReliabilityModel except ImportError: pass # might not be importable, since it needs NumPy from nevow import inevow from zope.interface import implements class MyRequest: implements(inevow.IRequest) pass class Provisioning(unittest.TestCase): def getarg(self, name, astype=int): if name in self.fields: return astype(self.fields[name]) return None def test_load(self): pt = provisioning.ProvisioningTool() self.fields = {} #r = MyRequest() #r.fields = self.fields #ctx = RequestContext() #unfilled = pt.renderSynchronously(ctx) lots_of_stan = pt.do_forms(self.getarg) self.failUnless(lots_of_stan is not None) self.fields = {'filled': True, "num_users": 50e3, "files_per_user": 1000, "space_per_user": 1e9, "sharing_ratio": 1.0, "encoding_parameters": "3-of-10-5", "num_servers": 30, "ownership_mode": "A", "download_rate": 100, "upload_rate": 10, "delete_rate": 10, "lease_timer": 7, } #filled = pt.renderSynchronously(ctx) more_stan = pt.do_forms(self.getarg) self.failUnless(more_stan is not None) # trigger the wraparound configuration self.fields["num_servers"] = 5 #filled = pt.renderSynchronously(ctx) more_stan = pt.do_forms(self.getarg) # and other ownership modes self.fields["ownership_mode"] = "B" more_stan = pt.do_forms(self.getarg) self.fields["ownership_mode"] = "E" more_stan = pt.do_forms(self.getarg) def test_provisioning_math(self): self.failUnlessEqual(provisioning.binomial(10, 0), 1) self.failUnlessEqual(provisioning.binomial(10, 1), 10) self.failUnlessEqual(provisioning.binomial(10, 2), 45) self.failUnlessEqual(provisioning.binomial(10, 9), 10) self.failUnlessEqual(provisioning.binomial(10, 10), 1) DAY=24*60*60 MONTH=31*DAY YEAR=365*DAY class Reliability(unittest.TestCase): def test_basic(self): if ReliabilityModel is None: raise unittest.SkipTest("reliability model requires NumPy") # test that numpy math works the way I think it does import numpy decay = numpy.matrix([[1,0,0], [.1,.9,0], [.01,.09,.9], ]) start = numpy.array([0,0,1]) g2 = (start * decay).A[0] self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9]))) g3 = (g2 * decay).A[0] self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81]))) # and the dot product recoverable = numpy.array([0,1,1]) P_recoverable_g2 = numpy.dot(g2, recoverable) self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09) P_recoverable_g3 = numpy.dot(g3, recoverable) self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162) r = ReliabilityModel.run(delta=100000, report_period=3*MONTH, report_span=5*YEAR) self.failUnlessEqual(len(r.samples), 20) last_row = r.samples[-1] #print last_row (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) = last_row self.failUnless(isinstance(P_repaired_last_check_period, float)) self.failUnless(isinstance(P_dead_unmaintained, float)) self.failUnless(isinstance(P_dead_maintained, float)) self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272) self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08) if __name__=='__main__': unittest.main() tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/util.py000066400000000000000000000001661221140116300245230ustar00rootroot00000000000000 import os.path def sibling(filename): return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) tahoe-lafs-1.10.0/misc/operations_helpers/provisioning/web_reliability.py000066400000000000000000000143461221140116300267210ustar00rootroot00000000000000 from nevow import rend, loaders, tags as T from nevow.inevow import IRequest import reliability # requires NumPy import util def get_arg(ctx_or_req, argname, default=None, multiple=False): """Extract an argument from either the query args (req.args) or the form body fields (req.fields). If multiple=False, this returns a single value (or the default, which defaults to None), and the query args take precedence. If multiple=True, this returns a tuple of arguments (possibly empty), starting with all those in the query args. """ req = IRequest(ctx_or_req) results = [] if argname in req.args: results.extend(req.args[argname]) if req.fields and argname in req.fields: results.append(req.fields[argname].value) if multiple: return tuple(results) if results: return results[0] return default DAY=24*60*60 MONTH=31*DAY YEAR=365*DAY def is_available(): if reliability: return True return False def yandm(seconds): return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH)) class ReliabilityTool(rend.Page): addSlash = True docFactory = loaders.xmlfile(util.sibling("reliability.xhtml")) DEFAULT_PARAMETERS = [ ("drive_lifetime", "8Y", "time", "Average drive lifetime"), ("k", 3, "int", "Minimum number of shares needed to recover the file"), ("R", 7, "int", "Repair threshold: repair will not occur until fewer than R shares " "are left"), ("N", 10, "int", "Total number of shares of the file generated"), ("delta", "1M", "time", "Amount of time between each simulation step"), ("check_period", "1M", "time", "How often to run the checker and repair if fewer than R shares"), ("report_period", "3M", "time", "Amount of time between result rows in this report"), ("report_span", "5Y", "time", "Total amount of time covered by this report"), ] def parse_time(self, s): if s.endswith("M"): return int(s[:-1]) * MONTH if s.endswith("Y"): return int(s[:-1]) * YEAR return int(s) def format_time(self, s): if s%YEAR == 0: return "%dY" % (s/YEAR) if s%MONTH == 0: return "%dM" % (s/MONTH) return "%d" % s def get_parameters(self, ctx): parameters = {} for (name,default,argtype,description) in self.DEFAULT_PARAMETERS: v = get_arg(ctx, name, default) if argtype == "time": value = self.parse_time(v) else: value = int(v) parameters[name] = value return parameters def renderHTTP(self, ctx): self.parameters = self.get_parameters(ctx) self.results = reliability.ReliabilityModel.run(**self.parameters) return rend.Page.renderHTTP(self, ctx) def make_input(self, name, old_value): return T.input(name=name, type="text", size="5", value=self.format_time(old_value)) def render_forms(self, ctx, data): f = T.form(action=".", method="get") table = [] for (name,default_value,argtype,description) in self.DEFAULT_PARAMETERS: old_value = self.parameters[name] i = self.make_input(name, old_value) table.append(T.tr[T.td[name+":"], T.td[i], T.td[description]]) go = T.input(type="submit", value="Recompute") return [T.h2["Simulation Parameters:"], f[T.table[table], go], ] def data_simulation_table(self, ctx, data): for row in self.results.samples: yield row def render_simulation_row(self, ctx, row): (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) = row ctx.fillSlots("t", yandm(when)) ctx.fillSlots("P_repair", "%.6f" % P_repaired_last_check_period) ctx.fillSlots("P_dead_unmaintained", "%.6g" % P_dead_unmaintained) ctx.fillSlots("P_dead_maintained", "%.6g" % P_dead_maintained) return ctx.tag def render_report_span(self, ctx, row): (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] return ctx.tag[yandm(when)] def render_P_loss_unmaintained(self, ctx, row): (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] return ctx.tag["%.6g (%1.8f%%)" % (P_dead_unmaintained, 100*P_dead_unmaintained)] def render_P_loss_maintained(self, ctx, row): (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] return ctx.tag["%.6g (%1.8f%%)" % (P_dead_maintained, 100*P_dead_maintained)] def render_P_repair_rate(self, ctx, row): (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] freq = when / cumulative_number_of_repairs return ctx.tag["%.6g" % freq] def render_P_repair_shares(self, ctx, row): (when, unmaintained_shareprobs, maintained_shareprobs, P_repaired_last_check_period, cumulative_number_of_repairs, cumulative_number_of_new_shares, P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] generated_shares = cumulative_number_of_new_shares / cumulative_number_of_repairs return ctx.tag["%1.2f" % generated_shares] tahoe-lafs-1.10.0/misc/operations_helpers/spacetime/000077500000000000000000000000001221140116300224155ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/operations_helpers/spacetime/diskwatcher.py000066400000000000000000000017551221140116300253070ustar00rootroot00000000000000 from axiom.item import Item from axiom.attributes import text, integer, timestamp class Sample(Item): # we didn't originally set typeName, so it was generated from the # fully-qualified classname ("diskwatcher.Sample"), then Axiom # automatically lowercases and un-dot-ifies it to get # "diskwatcher_sample". Now we explicitly provide a name. typeName = "diskwatcher_sample" # version 2 added the 'total' field schemaVersion = 2 url = text(indexed=True) when = timestamp(indexed=True) total = integer() used = integer() avail = integer() def upgradeSample1to2(old): return old.upgradeVersion("diskwatcher_sample", 1, 2, url=old.url, when=old.when, total=0, used=old.used, avail=old.avail) from axiom.upgrade import registerUpgrader registerUpgrader(upgradeSample1to2, "diskwatcher_sample", 1, 2) tahoe-lafs-1.10.0/misc/operations_helpers/spacetime/diskwatcher.tac000066400000000000000000000352001221140116300254160ustar00rootroot00000000000000# -*- python -*- """ Run this tool with twistd in its own directory, with a file named 'urls.txt' describing which nodes to query. Make sure to copy diskwatcher.py into the same directory. It will request disk-usage numbers from the nodes once per hour (or slower), and store them in a local database. It will compute usage-per-unit time values over several time ranges and make them available through an HTTP query (using ./webport). It will also provide an estimate of how much time is left before the grid's storage is exhausted. There are munin plugins (named tahoe_doomsday and tahoe_diskusage) to graph the values this tool computes. Each line of urls.txt points to a single node. Each node should have its own dedicated disk: if multiple nodes share a disk, only list one of them in urls.txt (otherwise that space will be double-counted, confusing the results). Each line should be in the form: http://host:webport/statistics?t=json """ # TODO: # built-in graphs on web interface import os.path, urllib, time from datetime import timedelta from twisted.application import internet, service, strports from twisted.web import server, resource, http, client from twisted.internet import defer from twisted.python import log import simplejson from axiom.attributes import AND from axiom.store import Store from epsilon import extime from diskwatcher import Sample #from axiom.item import Item #from axiom.attributes import text, integer, timestamp #class Sample(Item): # url = text() # when = timestamp() # used = integer() # avail = integer() #s = Store("history.axiom") #ns = Store("new-history.axiom") #for sa in s.query(Sample): # diskwatcher.Sample(store=ns, # url=sa.url, when=sa.when, used=sa.used, avail=sa.avail) #print "done" HOUR = 3600 DAY = 24*3600 WEEK = 7*DAY MONTH = 30*DAY YEAR = 365*DAY class DiskWatcher(service.MultiService, resource.Resource): POLL_INTERVAL = 1*HOUR AVERAGES = {#"60s": 60, #"5m": 5*60, #"30m": 30*60, "1hr": 1*HOUR, "1day": 1*DAY, "2wk": 2*WEEK, "4wk": 4*WEEK, } def __init__(self): assert os.path.exists("diskwatcher.tac") # run from the right directory self.growth_cache = {} service.MultiService.__init__(self) resource.Resource.__init__(self) self.store = Store("history.axiom") self.store.whenFullyUpgraded().addCallback(self._upgrade_complete) service.IService(self.store).setServiceParent(self) # let upgrader run ts = internet.TimerService(self.POLL_INTERVAL, self.poll) ts.setServiceParent(self) def _upgrade_complete(self, ignored): print "Axiom store upgrade complete" def startService(self): service.MultiService.startService(self) try: desired_webport = open("webport", "r").read().strip() except EnvironmentError: desired_webport = None webport = desired_webport or "tcp:0" root = self serv = strports.service(webport, server.Site(root)) serv.setServiceParent(self) if not desired_webport: got_port = serv._port.getHost().port open("webport", "w").write("tcp:%d\n" % got_port) def get_urls(self): for url in open("urls.txt","r").readlines(): if "#" in url: url = url[:url.find("#")] url = url.strip() if not url: continue yield url def poll(self): log.msg("polling..") #return self.poll_synchronous() return self.poll_asynchronous() def poll_asynchronous(self): # this didn't actually seem to work any better than poll_synchronous: # logs are more noisy, and I got frequent DNS failures. But with a # lot of servers to query, this is probably the better way to go. A # significant advantage of this approach is that we can use a # timeout= argument to tolerate hanging servers. dl = [] for url in self.get_urls(): when = extime.Time() d = client.getPage(url, timeout=60) d.addCallback(self.got_response, when, url) dl.append(d) d = defer.DeferredList(dl) def _done(res): fetched = len([1 for (success, value) in res if success]) log.msg("fetched %d of %d" % (fetched, len(dl))) d.addCallback(_done) return d def poll_synchronous(self): attempts = 0 fetched = 0 for url in self.get_urls(): attempts += 1 try: when = extime.Time() # if a server accepts the connection and then hangs, this # will block forever data_json = urllib.urlopen(url).read() self.got_response(data_json, when, url) fetched += 1 except: log.msg("error while fetching: %s" % url) log.err() log.msg("fetched %d of %d" % (fetched, attempts)) def got_response(self, data_json, when, url): data = simplejson.loads(data_json) total = data[u"stats"][u"storage_server.disk_total"] used = data[u"stats"][u"storage_server.disk_used"] avail = data[u"stats"][u"storage_server.disk_avail"] print "%s : total=%s, used=%s, avail=%s" % (url, total, used, avail) Sample(store=self.store, url=unicode(url), when=when, total=total, used=used, avail=avail) def calculate_growth_timeleft(self): timespans = [] total_avail_space = self.find_total_available_space() pairs = [ (timespan,name) for name,timespan in self.AVERAGES.items() ] pairs.sort() for (timespan,name) in pairs: growth = self.growth(timespan) print name, total_avail_space, growth if growth is not None: timeleft = None if growth > 0: timeleft = total_avail_space / growth timespans.append( (name, timespan, growth, timeleft) ) return timespans def find_total_space(self): # this returns the sum of disk-avail stats for all servers that 1) # are listed in urls.txt and 2) have responded recently. now = extime.Time() recent = now - timedelta(seconds=2*self.POLL_INTERVAL) total_space = 0 for url in self.get_urls(): url = unicode(url) latest = list(self.store.query(Sample, AND(Sample.url == url, Sample.when > recent), sort=Sample.when.descending, limit=1)) if latest: total_space += latest[0].total return total_space def find_total_available_space(self): # this returns the sum of disk-avail stats for all servers that 1) # are listed in urls.txt and 2) have responded recently. now = extime.Time() recent = now - timedelta(seconds=2*self.POLL_INTERVAL) total_avail_space = 0 for url in self.get_urls(): url = unicode(url) latest = list(self.store.query(Sample, AND(Sample.url == url, Sample.when > recent), sort=Sample.when.descending, limit=1)) if latest: total_avail_space += latest[0].avail return total_avail_space def find_total_used_space(self): # this returns the sum of disk-used stats for all servers that 1) are # listed in urls.txt and 2) have responded recently. now = extime.Time() recent = now - timedelta(seconds=2*self.POLL_INTERVAL) total_used_space = 0 for url in self.get_urls(): url = unicode(url) latest = list(self.store.query(Sample, AND(Sample.url == url, Sample.when > recent), sort=Sample.when.descending, limit=1)) if latest: total_used_space += latest[0].used return total_used_space def growth(self, timespan): """Calculate the bytes-per-second growth of the total disk-used stat, over a period of TIMESPAN seconds (i.e. between the most recent sample and the latest one that's at least TIMESPAN seconds ago), summed over all nodes which 1) are listed in urls.txt, 2) have responded recently, and 3) have a response at least as old as TIMESPAN. If there are no nodes which meet these criteria, we'll return None; this is likely to happen for the longer timespans (4wk) until the gatherer has been running and collecting data for that long.""" # a note about workload: for our oldest storage servers, as of # 25-Jan-2009, the first DB query here takes about 40ms per server # URL (some take as little as 10ms). There are about 110 servers, and # two queries each, so the growth() function takes about 7s to run # for each timespan. We track 4 timespans, and find_total_*_space() # takes about 2.3s to run, so calculate_growth_timeleft() takes about # 27s. Each HTTP query thus takes 27s, and we have six munin plugins # which perform HTTP queries every 5 minutes. By adding growth_cache(), # I hope to reduce this: the first HTTP query will still take 27s, # but the subsequent five should be about 2.3s each. # we're allowed to cache this value for 3 minutes if timespan in self.growth_cache: (when, value) = self.growth_cache[timespan] if time.time() - when < 3*60: return value td = timedelta(seconds=timespan) now = extime.Time() then = now - td recent = now - timedelta(seconds=2*self.POLL_INTERVAL) total_growth = 0.0 num_nodes = 0 for url in self.get_urls(): url = unicode(url) latest = list(self.store.query(Sample, AND(Sample.url == url, Sample.when > recent), sort=Sample.when.descending, limit=1)) if not latest: #print "no latest sample from", url continue # skip this node latest = latest[0] old = list(self.store.query(Sample, AND(Sample.url == url, Sample.when < then), sort=Sample.when.descending, limit=1)) if not old: #print "no old sample from", url continue # skip this node old = old[0] duration = latest.when.asPOSIXTimestamp() - old.when.asPOSIXTimestamp() if not duration: print "only one sample from", url continue rate = float(latest.used - old.used) / duration #print url, rate total_growth += rate num_nodes += 1 if not num_nodes: return None self.growth_cache[timespan] = (time.time(), total_growth) return total_growth def getChild(self, path, req): if path == "": return self return resource.Resource.getChild(self, path, req) def abbreviate_time(self, s): def _plural(count, unit): count = int(count) if count == 1: return "%d %s" % (count, unit) return "%d %ss" % (count, unit) if s is None: return "unknown" if s < 120: return _plural(s, "second") if s < 3*HOUR: return _plural(s/60, "minute") if s < 2*DAY: return _plural(s/HOUR, "hour") if s < 2*MONTH: return _plural(s/DAY, "day") if s < 4*YEAR: return _plural(s/MONTH, "month") return _plural(s/YEAR, "year") def abbreviate_space2(self, s, SI=True): if s is None: return "unknown" if SI: U = 1000.0 isuffix = "B" else: U = 1024.0 isuffix = "iB" def r(count, suffix): return "%.2f %s%s" % (count, suffix, isuffix) if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode return r(s, "") if s < U*U: return r(s/U, "k") if s < U*U*U: return r(s/(U*U), "M") if s < U*U*U*U: return r(s/(U*U*U), "G") if s < U*U*U*U*U: return r(s/(U*U*U*U), "T") return r(s/(U*U*U*U*U), "P") def abbreviate_space(self, s): return "(%s, %s)" % (self.abbreviate_space2(s, True), self.abbreviate_space2(s, False)) def render(self, req): t = req.args.get("t", ["html"])[0] ctype = "text/plain" data = "" if t == "html": data = "" for (name, timespan, growth, timeleft) in self.calculate_growth_timeleft(): data += "%f bytes per second (%sps), %s remaining (over %s)\n" % \ (growth, self.abbreviate_space2(growth, True), self.abbreviate_time(timeleft), name) used = self.find_total_used_space() data += "total used: %d bytes %s\n" % (used, self.abbreviate_space(used)) total = self.find_total_space() data += "total space: %d bytes %s\n" % (total, self.abbreviate_space(total)) elif t == "json": current = {"rates": self.calculate_growth_timeleft(), "total": self.find_total_space(), "used": self.find_total_used_space(), "available": self.find_total_available_space(), } data = simplejson.dumps(current, indent=True) else: req.setResponseCode(http.BAD_REQUEST) data = "Unknown t= %s\n" % t req.setHeader("content-type", ctype) return data application = service.Application("disk-watcher") DiskWatcher().setServiceParent(application) tahoe-lafs-1.10.0/misc/simulators/000077500000000000000000000000001221140116300167405ustar00rootroot00000000000000tahoe-lafs-1.10.0/misc/simulators/bench_spans.py000066400000000000000000000055751221140116300216110ustar00rootroot00000000000000""" To use this, get a trace file such as this one: wget http://tahoe-lafs.org/trac/tahoe-lafs/raw-attachment/ticket/1170/run-112-above28-flog-dump-sh8-on-nsziz.txt And run this command passing that trace file's name: python bench_spans.py run-112-above28-flog-dump-sh8-on-nsziz.txt """ from pyutil import benchutil from allmydata.util.spans import DataSpans import re, sys DUMP_S='_received spans trace .dump()' GET_R=re.compile('_received spans trace .get\(([0-9]*), ([0-9]*)\)') POP_R=re.compile('_received spans trace .pop\(([0-9]*), ([0-9]*)\)') REMOVE_R=re.compile('_received spans trace .remove\(([0-9]*), ([0-9]*)\)') GET_SPANS_S='_received spans trace .get_spans()' ADD_R=re.compile('_received spans trace .add\(([0-9]*), len=([0-9]*)\)') INIT_S='_received spans trace = DataSpans' class B(object): def __init__(self, inf): self.inf = inf def init(self, N): self.s = DataSpans() # self.stats = {} def run(self, N): count = 0 inline = self.inf.readline() while count < N and inline != '': if DUMP_S in inline: self.s.dump() # self.stats['dump'] = self.stats.get('dump', 0) + 1 elif GET_SPANS_S in inline: self.s.get_spans() # self.stats['get_spans'] = self.stats.get('get_spans', 0) + 1 elif ADD_R.search(inline): mo = ADD_R.search(inline) start = int(mo.group(1)) length = int(mo.group(2)) self.s.add(start, 'x'*length) # self.stats['add'] = self.stats.get('add', 0) + 1 elif GET_R.search(inline): mo = GET_R.search(inline) start = int(mo.group(1)) length = int(mo.group(2)) self.s.get(start, length) # self.stats['get'] = self.stats.get('get', 0) + 1 elif REMOVE_R.search(inline): mo = REMOVE_R.search(inline) start = int(mo.group(1)) length = int(mo.group(2)) self.s.remove(start, length) # self.stats['remove'] = self.stats.get('remove', 0) + 1 elif POP_R.search(inline): mo = POP_R.search(inline) start = int(mo.group(1)) length = int(mo.group(2)) self.s.pop(start, length) # self.stats['pop'] = self.stats.get('pop', 0) + 1 elif INIT_S in inline: pass else: print "Warning, didn't recognize this line: %r" % (inline,) count += 1 inline = self.inf.readline() # print self.stats benchutil.print_bench_footer(UNITS_PER_SECOND=1000000) print "(microseconds)" for N in [600, 6000, 60000]: b = B(open(sys.argv[1], 'rU')) print "%7d" % N, benchutil.rep_bench(b.run, N, b.init, UNITS_PER_SECOND=1000000) tahoe-lafs-1.10.0/misc/simulators/count_dirs.py000066400000000000000000000105661221140116300214730ustar00rootroot00000000000000#!/usr/bin/env python """ This tool estimates how much space would be consumed by a filetree into which a native directory was copied. One open question is how we should encode directories. One approach is to put a block of data on a server, one per directory, which effectively contains a dictionary that maps child names to targets (URIs for children which are files, slotnames for children which are directories). To prevent the server which hosts this data from either learning its contents or corrupting them, we can add encryption and integrity checks to the data, at the cost of storage overhead. This program is intended to estimate the size of these data blocks using real-world filenames and directories. You point it at a real directory, and it does a recursive walk of the filesystem, adding up the size of the filetree data structures that would be required to represent it. MODES: A: no confidentiality or integrity checking. Directories are serialized plaintext dictionaries which map file/subdir names to targets (either URIs or slotnames). Each entry can be changed independently. B1: child names and targets are encrypted. No integrity checks, so the server can still corrupt the contents undetectably. Each entry can still be changed independently. B2: same security properties as B1, but the dictionary is serialized before encryption. This reduces overhead at the cost of preventing independent updates of entries (all entries must be updated at the same time, so test-and-set operations are required to avoid data-losing races) C1: like B1, but adding HMACs to each entry to guarantee data integrity C2: like B2, but adding a single block-wide HMAC for data integrity """ import sys, os.path #URI:7jzbza6iwdsk5xbxsvdgjaugyrhetw64zpflp4gihmyh5krjblra====:a5qdejwbimu5b2wfke7xwexxlq======:gzeub5v42rjbgd7ccawnahu2evqd42lpdpzd447c6zkmdvjkpowq====:25:100:219889 # that's a printable representation of two 32-byte hashes (storage index, URI # extension block hash) and a 16-byte AES read-capability key, and some # share-count and size information URI_SIZE = 164 #pb://xextf3eap44o3wi27mf7ehiur6wvhzr6@207.7.153.180:56677,127.0.0.1:56677/zilcw5uz2yyyo=== # that's a FURL which points at the slot. Modes that need to add a # read-capability AES key will need more space. SLOTNAME_SIZE = 90 def slotsize(mode, numfiles, numdirs): # URI_sizes is the total space taken up by the target (dict keys) strings # for all of the targets that are files, instead of directories target_sizes_for_files = numfiles * URI_SIZE slotname_size = SLOTNAME_SIZE if mode in ("B1", "B2", "C1", "C2"): slotname_size += 16 # slotname_sizes is the total space taken up by the target strings for # all the targets that are directories, instead of files. These are # bigger when the read+write-cap slotname is larger than the store-cap, # which happens as soon as we seek to prevent the slot's host from # reading or corrupting it. target_sizes_for_subdirs = numdirs * slotname_size # now how much overhead is there for each entry? per_slot, per_entry = 0, 0 if mode == "B1": per_entry = 16+12+12 elif mode == "C1": per_entry = 16+12+12 + 32+32 elif mode == "B2": per_slot = 12 elif mode == "C2": per_slot = 12+32 num_entries = numfiles + numdirs total = (target_sizes_for_files + target_sizes_for_subdirs + per_slot + per_entry * num_entries ) return total MODES = ("A", "B1", "B2", "C1", "C2") def scan(root): total = dict([(mode,0) for mode in MODES]) num_files = 0 num_dirs = 0 for absroot, dirs, files in os.walk(root): #print absroot #print " %d files" % len(files) #print " %d subdirs" % len(dirs) num_files += len(files) num_dirs += len(dirs) stringsize = len(''.join(files) + ''.join(dirs)) for mode in MODES: total[mode] += slotsize(mode, len(files), len(dirs)) + stringsize print "%d directories" % num_dirs print "%d files" % num_files for mode in sorted(total.keys()): print "%s: %d bytes" % (mode, total[mode]) if __name__ == '__main__': scan(sys.argv[1]) """ 260:warner@monolith% ./count_dirs.py ~ 70925 directories 457199 files A: 90042361 bytes B1: 112302121 bytes B2: 92027061 bytes C1: 146102057 bytes C2: 94293461 bytes """ tahoe-lafs-1.10.0/misc/simulators/hashbasedsig.py000066400000000000000000000334711221140116300217470ustar00rootroot00000000000000#!python # range of hash output lengths range_L_hash = [128] lg_M = 53 # lg(required number of signatures before losing security) limit_bytes = 480000 # limit on signature length limit_cost = 500 # limit on Mcycles_Sig + weight_ver*Mcycles_ver weight_ver = 1 # how important verification cost is relative to signature cost # (note: setting this too high will just exclude useful candidates) L_block = 512 # bitlength of hash input blocks L_pad = 64 # bitlength of hash padding overhead (for M-D hashes) L_label = 80 # bitlength of hash position label L_prf = 256 # bitlength of hash output when used as a PRF cycles_per_byte = 15.8 # cost of hash Mcycles_per_block = cycles_per_byte * L_block / (8 * 1000000.0) from math import floor, ceil, log, log1p, pow, e from sys import stderr from gc import collect def lg(x): return log(x, 2) def ln(x): return log(x, e) def ceil_log(x, B): return int(ceil(log(x, B))) def ceil_div(x, y): return int(ceil(float(x) / float(y))) def floor_div(x, y): return int(floor(float(x) / float(y))) # number of compression function evaluations to hash k hash-outputs # we assume that there is a label in each block def compressions(k): return ceil_div(k + L_pad, L_block - L_label) # sum of power series sum([pow(p, i) for i in range(n)]) def sum_powers(p, n): if p == 1: return n return (pow(p, n) - 1)/(p - 1) def make_candidate(B, K, K1, K2, q, T, T_min, L_hash, lg_N, sig_bytes, c_sign, c_ver, c_ver_pm): Mcycles_sign = c_sign * Mcycles_per_block Mcycles_ver = c_ver * Mcycles_per_block Mcycles_ver_pm = c_ver_pm * Mcycles_per_block cost = Mcycles_sign + weight_ver*Mcycles_ver if sig_bytes >= limit_bytes or cost > limit_cost: return [] return [{ 'B': B, 'K': K, 'K1': K1, 'K2': K2, 'q': q, 'T': T, 'T_min': T_min, 'L_hash': L_hash, 'lg_N': lg_N, 'sig_bytes': sig_bytes, 'c_sign': c_sign, 'Mcycles_sign': Mcycles_sign, 'c_ver': c_ver, 'c_ver_pm': c_ver_pm, 'Mcycles_ver': Mcycles_ver, 'Mcycles_ver_pm': Mcycles_ver_pm, 'cost': cost, }] # K1 = size of root Merkle tree # K = size of middle Merkle trees # K2 = size of leaf Merkle trees # q = number of revealed private keys per signed message # Winternitz with B < 4 is never optimal. For example, going from B=4 to B=2 halves the # chain depth, but that is cancelled out by doubling (roughly) the number of digits. range_B = xrange(4, 33) M = pow(2, lg_M) def calculate(K, K1, K2, q_max, L_hash, trees): candidates = [] lg_K = lg(K) lg_K1 = lg(K1) lg_K2 = lg(K2) # We want the optimal combination of q and T. That takes too much time and memory # to search for directly, so we start by calculating the lowest possible value of T # for any q. Then for potential values of T, we calculate the smallest q such that we # will have at least L_hash bits of security against forgery using revealed private keys # (i.e. this method of forgery is no easier than finding a hash preimage), provided # that fewer than 2^lg_S_min messages are signed. # min height of certification tree (excluding root and bottom layer) T_min = ceil_div(lg_M - lg_K1, lg_K) last_q = None for T in xrange(T_min, T_min+21): # lg(total number of leaf private keys) lg_S = lg_K1 + lg_K*T lg_N = lg_S + lg_K2 # Suppose that m signatures have been made. The number of times X that a given bucket has # been chosen follows a binomial distribution B(m, p) where p = 1/S and S is the number of # buckets. I.e. Pr(X = x) = C(m, x) * p^x * (1-p)^(m-x). # # If an attacker picks a random seed and message that falls into a bucket that has been # chosen x times, then at most q*x private values in that bucket have been revealed, so # (ignoring the possibility of guessing private keys, which is negligable) the attacker's # success probability for a forgery using the revealed values is at most min(1, q*x / K2)^q. # # Let j = floor(K2/q). Conditioning on x, we have # # Pr(forgery) = sum_{x = 0..j}(Pr(X = x) * (q*x / K2)^q) + Pr(x > j) # = sum_{x = 1..j}(Pr(X = x) * (q*x / K2)^q) + Pr(x > j) # # We lose nothing by approximating (q*x / K2)^q as 1 for x > 4, i.e. ignoring the resistance # of the HORS scheme to forgery when a bucket has been chosen 5 or more times. # # Pr(forgery) < sum_{x = 1..4}(Pr(X = x) * (q*x / K2)^q) + Pr(x > 4) # # where Pr(x > 4) = 1 - sum_{x = 0..4}(Pr(X = x)) # # We use log arithmetic here because values very close to 1 cannot be represented accurately # in floating point, but their logarithms can (provided we use appropriate functions such as # log1p). lg_p = -lg_S lg_1_p = log1p(-pow(2, lg_p))/ln(2) # lg(1-p), computed accurately j = 5 lg_px = [lg_1_p * M]*j # We approximate lg(M-x) as lg(M) lg_px_step = lg_M + lg_p - lg_1_p for x in xrange(1, j): lg_px[x] = lg_px[x-1] - lg(x) + lg_px_step q = None # Find the minimum acceptable value of q. for q_cand in xrange(1, q_max+1): lg_q = lg(q_cand) lg_pforge = [lg_px[x] + (lg_q*x - lg_K2)*q_cand for x in xrange(1, j)] if max(lg_pforge) < -L_hash + lg(j) and lg_px[j-1] + 1.0 < -L_hash: #print "K = %d, K1 = %d, K2 = %d, L_hash = %d, lg_K2 = %.3f, q = %d, lg_pforge_1 = %.3f, lg_pforge_2 = %.3f, lg_pforge_3 = %.3f" \ # % (K, K1, K2, L_hash, lg_K2, q, lg_pforge_1, lg_pforge_2, lg_pforge_3) q = q_cand break if q is None or q == last_q: # if q hasn't decreased, this will be strictly worse than the previous candidate continue last_q = q # number of compressions to compute the Merkle hashes (h_M, c_M, _) = trees[K] (h_M1, c_M1, _) = trees[K1] (h_M2, c_M2, (dau, tri)) = trees[K2] # B = generalized Winternitz base for B in range_B: # n is the number of digits needed to sign the message representative and checksum. # The representation is base-B, except that we allow the most significant digit # to be up to 2B-1. n_L = ceil_div(L_hash-1, lg(B)) firstL_max = floor_div(pow(2, L_hash)-1, pow(B, n_L-1)) C_max = firstL_max + (n_L-1)*(B-1) n_C = ceil_log(ceil_div(C_max, 2), B) n = n_L + n_C firstC_max = floor_div(C_max, pow(B, n_C-1)) # Total depth of Winternitz hash chains. The chains for the most significant # digit of the message representative and of the checksum may be a different # length to those for the other digits. c_D = (n-2)*(B-1) + firstL_max + firstC_max # number of compressions to hash a Winternitz public key c_W = compressions(n*L_hash + L_label) # bitlength of a single Winternitz signature and authentication path L_MW = (n + h_M ) * L_hash L_MW1 = (n + h_M1) * L_hash # bitlength of the HORS signature and authentication paths # For all but one of the q authentication paths, one of the sibling elements in # another path is made redundant where they intersect. This cancels out the hash # that would otherwise be needed at the bottom of the path, making the total # length of the signature q*h_M2 + 1 hashes, rather than q*(h_M2 + 1). L_leaf = (q*h_M2 + 1) * L_hash # length of the overall GMSS+HORS signature and seeds sig_bytes = ceil_div(L_MW1 + T*L_MW + L_leaf + L_prf + ceil(lg_N), 8) c_MW = K *(c_D + c_W) + c_M + ceil_div(K *n*L_hash, L_prf) c_MW1 = K1*(c_D + c_W) + c_M1 + ceil_div(K1*n*L_hash, L_prf) # For simplicity, c_sign and c_ver don't take into account compressions saved # as a result of intersecting authentication paths in the HORS signature, so # are slight overestimates. c_sign = c_MW1 + T*c_MW + q*(c_M2 + 1) + ceil_div(K2*L_hash, L_prf) # *expected* number of compressions to verify a signature c_ver = c_D/2.0 + c_W + c_M1 + T*(c_D/2.0 + c_W + c_M) + q*(c_M2 + 1) c_ver_pm = (1 + T)*c_D/2.0 candidates += make_candidate(B, K, K1, K2, q, T, T_min, L_hash, lg_N, sig_bytes, c_sign, c_ver, c_ver_pm) return candidates def search(): for L_hash in range_L_hash: print >>stderr, "collecting... \r", collect() print >>stderr, "precomputing... \r", """ # d/dq (lg(q+1) + L_hash/q) = 1/(ln(2)*(q+1)) - L_hash/q^2 # Therefore lg(q+1) + L_hash/q is at a minimum when 1/(ln(2)*(q+1)) = L_hash/q^2. # Let alpha = L_hash*ln(2), then from the quadratic formula, the integer q that # minimizes lg(q+1) + L_hash/q is the floor or ceiling of (alpha + sqrt(alpha^2 - 4*alpha))/2. # (We don't want the other solution near 0.) alpha = floor(L_hash*ln(2)) # float q = floor((alpha + sqrt(alpha*(alpha-4)))/2) if lg(q+2) + L_hash/(q+1) < lg(q+1) + L_hash/q: q += 1 lg_S_margin = lg(q+1) + L_hash/q q_max = int(q) q = floor(L_hash*ln(2)) # float if lg(q+1) + L_hash/(q+1) < lg(q) + L_hash/q: q += 1 lg_S_margin = lg(q) + L_hash/q q_max = int(q) """ q_max = 4000 # find optimal Merkle tree shapes for this L_hash and each K trees = {} K_max = 50 c2 = compressions(2*L_hash + L_label) c3 = compressions(3*L_hash + L_label) for dau in xrange(0, 10): a = pow(2, dau) for tri in xrange(0, ceil_log(30-dau, 3)): x = int(a*pow(3, tri)) h = dau + 2*tri c_x = int(sum_powers(2, dau)*c2 + a*sum_powers(3, tri)*c3) for y in xrange(1, x+1): if tri > 0: # If the bottom level has arity 3, then for every 2 nodes by which the tree is # imperfect, we can save c3 compressions by pruning 3 leaves back to their parent. # If the tree is imperfect by an odd number of nodes, we can prune one extra leaf, # possibly saving a compression if c2 < c3. c_y = c_x - floor_div(x-y, 2)*c3 - ((x-y) % 2)*(c3-c2) else: # If the bottom level has arity 2, then for each node by which the tree is # imperfect, we can save c2 compressions by pruning 2 leaves back to their parent. c_y = c_x - (x-y)*c2 if y not in trees or (h, c_y, (dau, tri)) < trees[y]: trees[y] = (h, c_y, (dau, tri)) #for x in xrange(1, K_max+1): # print x, trees[x] candidates = [] progress = 0 fuzz = 0 complete = (K_max-1)*(2200-200)/100 for K in xrange(2, K_max+1): for K2 in xrange(200, 2200, 100): for K1 in xrange(max(2, K-fuzz), min(K_max, K+fuzz)+1): candidates += calculate(K, K1, K2, q_max, L_hash, trees) progress += 1 print >>stderr, "searching: %3d %% \r" % (100.0 * progress / complete,), print >>stderr, "filtering... \r", step = 2.0 bins = {} limit = floor_div(limit_cost, step) for bin in xrange(0, limit+2): bins[bin] = [] for c in candidates: bin = floor_div(c['cost'], step) bins[bin] += [c] del candidates # For each in a range of signing times, find the best candidate. best = [] for bin in xrange(0, limit): candidates = bins[bin] + bins[bin+1] + bins[bin+2] if len(candidates) > 0: best += [min(candidates, key=lambda c: c['sig_bytes'])] def format_candidate(candidate): return ("%(B)3d %(K)3d %(K1)3d %(K2)5d %(q)4d %(T)4d " "%(L_hash)4d %(lg_N)5.1f %(sig_bytes)7d " "%(c_sign)7d (%(Mcycles_sign)7.2f) " "%(c_ver)7d +/-%(c_ver_pm)5d (%(Mcycles_ver)5.2f +/-%(Mcycles_ver_pm)5.2f) " ) % candidate print >>stderr, " \r", if len(best) > 0: print " B K K1 K2 q T L_hash lg_N sig_bytes c_sign (Mcycles) c_ver ( Mcycles )" print "---- ---- ---- ------ ---- ---- ------ ------ --------- ------------------ --------------------------------" best.sort(key=lambda c: (c['sig_bytes'], c['cost'])) last_sign = None last_ver = None for c in best: if last_sign is None or c['c_sign'] < last_sign or c['c_ver'] < last_ver: print format_candidate(c) last_sign = c['c_sign'] last_ver = c['c_ver'] print else: print "No candidates found for L_hash = %d or higher." % (L_hash) return del bins del best print "Maximum signature size: %d bytes" % (limit_bytes,) print "Maximum (signing + %d*verification) cost: %.1f Mcycles" % (weight_ver, limit_cost) print "Hash parameters: %d-bit blocks with %d-bit padding and %d-bit labels, %.2f cycles per byte" \ % (L_block, L_pad, L_label, cycles_per_byte) print "PRF output size: %d bits" % (L_prf,) print "Security level given by L_hash is maintained for up to 2^%d signatures.\n" % (lg_M,) search() tahoe-lafs-1.10.0/misc/simulators/ringsim.py000066400000000000000000000177531221140116300207770ustar00rootroot00000000000000#! /usr/bin/python # used to discuss ticket #302: "stop permuting peerlist?" # import time import math from hashlib import md5 # sha1, sha256 myhash = md5 # md5: 1520 "uploads" per second # sha1: 1350 ups # sha256: 930 ups from itertools import count from twisted.python import usage def abbreviate_space(s, SI=True): if s is None: return "unknown" if SI: U = 1000.0 isuffix = "B" else: U = 1024.0 isuffix = "iB" def r(count, suffix): return "%.2f %s%s" % (count, suffix, isuffix) if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode return "%d B" % s if s < U*U: return r(s/U, "k") if s < U*U*U: return r(s/(U*U), "M") if s < U*U*U*U: return r(s/(U*U*U), "G") if s < U*U*U*U*U: return r(s/(U*U*U*U), "T") return r(s/(U*U*U*U*U), "P") def make_up_a_file_size(seed): h = int(myhash(seed).hexdigest(),16) max=2**31 if 1: # exponential distribution e = 8 + (h % (31-8)) return 2 ** e # uniform distribution return h % max # avg 1GB sizes = [make_up_a_file_size(str(i)) for i in range(10000)] avg_filesize = sum(sizes)/len(sizes) print "average file size:", abbreviate_space(avg_filesize) SERVER_CAPACITY = 10**12 class Server: def __init__(self, nodeid, capacity): self.nodeid = nodeid self.used = 0 self.capacity = capacity self.numshares = 0 self.full_at_tick = None def upload(self, sharesize): if self.used + sharesize < self.capacity: self.used += sharesize self.numshares += 1 return True return False def __repr__(self): if self.full_at_tick is not None: return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick) else: return "<%s %s>" % (self.__class__.__name__, self.nodeid) class Ring: SHOW_MINMAX = False def __init__(self, numservers, seed, permute): self.servers = [] for i in range(numservers): nodeid = myhash(str(seed)+str(i)).hexdigest() capacity = SERVER_CAPACITY s = Server(nodeid, capacity) self.servers.append(s) self.servers.sort(key=lambda s: s.nodeid) self.permute = permute #self.list_servers() def list_servers(self): for i in range(len(self.servers)): s = self.servers[i] next_s = self.servers[(i+1)%len(self.servers)] diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16)) s.next_diff = diff prev_s = self.servers[(i-1)%len(self.servers)] diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16)) s.prev_diff = diff print s, s.prev_diff print "sorted by delta" for s in sorted(self.servers, key=lambda s:s.prev_diff): print s, s.prev_diff def servers_for_si(self, si): if self.permute: def sortkey(s): return myhash(s.nodeid+si).digest() return sorted(self.servers, key=sortkey) for i in range(len(self.servers)): if self.servers[i].nodeid >= si: return self.servers[i:] + self.servers[:i] return list(self.servers) def show_servers(self, picked): bits = [] for s in self.servers: if s in picked: bits.append("1") else: bits.append("0") #d = [s in picked and "1" or "0" for s in self.servers] return "".join(bits) def dump_usage(self, numfiles, avg_space_per_file): print "uploaded", numfiles # avg_space_per_file measures expected grid-wide ciphertext per file used = list(reversed(sorted([s.used for s in self.servers]))) # used is actual per-server ciphertext usedpf = [1.0*u/numfiles for u in used] # usedpf is actual per-server-per-file ciphertext #print "min/max usage: %s/%s" % (abbreviate_space(used[-1]), # abbreviate_space(used[0])) avg_usage_per_file = avg_space_per_file/len(self.servers) # avg_usage_per_file is expected per-server-per-file ciphertext spreadpf = usedpf[0] - usedpf[-1] average_usagepf = sum(usedpf) / len(usedpf) variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1) std_deviation = math.sqrt(variance) sd_of_total = std_deviation / avg_usage_per_file print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % ( abbreviate_space(usedpf[-1]), abbreviate_space(usedpf[0]), abbreviate_space(avg_usage_per_file) ), print "spread-pf: %s (%.2f%%)" % ( abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file), #print "average_usage:", abbreviate_space(average_usagepf) print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation), 100.0*sd_of_total) if self.SHOW_MINMAX: s2 = sorted(self.servers, key=lambda s: s.used) print "least:", s2[0].nodeid print "most:", s2[-1].nodeid class Options(usage.Options): optParameters = [ ("k", "k", 3, "required shares", int), ("N", "N", 10, "total shares", int), ("servers", None, 100, "number of servers", int), ("seed", None, None, "seed to use for creating ring"), ("fileseed", None, "blah", "seed to use for creating files"), ("permute", "p", 1, "1 to permute, 0 to use flat ring", int), ] def postOptions(self): assert self["seed"] def do_run(ring, opts): avg_space_per_file = avg_filesize * opts["N"] / opts["k"] fileseed = opts["fileseed"] all_servers_have_room = True no_files_have_wrapped = True for filenum in count(0): #used = list(reversed(sorted([s.used for s in ring.servers]))) #used = [s.used for s in ring.servers] #print used si = myhash(fileseed+str(filenum)).hexdigest() filesize = make_up_a_file_size(si) sharesize = filesize / opts["k"] if filenum%4000==0 and filenum > 1: ring.dump_usage(filenum, avg_space_per_file) servers = ring.servers_for_si(si) #print ring.show_servers(servers[:opts["N"]]) remaining_shares = opts["N"] index = 0 server_was_full = False file_was_wrapped = False remaining_servers = set(servers) while remaining_shares: if index >= len(servers): index = 0 file_was_wrapped = True s = servers[index] accepted = s.upload(sharesize) if not accepted: server_was_full = True remaining_servers.discard(s) if not remaining_servers: print "-- GRID IS FULL" ring.dump_usage(filenum, avg_space_per_file) return filenum index += 1 continue remaining_shares -= 1 index += 1 # file is done being uploaded if server_was_full and all_servers_have_room: all_servers_have_room = False print "-- FIRST SERVER FULL" ring.dump_usage(filenum, avg_space_per_file) if file_was_wrapped and no_files_have_wrapped: no_files_have_wrapped = False print "-- FIRST FILE WRAPPED" ring.dump_usage(filenum, avg_space_per_file) def do_ring(opts): total_capacity = opts["servers"]*SERVER_CAPACITY avg_space_per_file = avg_filesize * opts["N"] / opts["k"] avg_files = total_capacity / avg_space_per_file print "expected number of uploads:", avg_files if opts["permute"]: print " PERMUTED" else: print " LINEAR" seed = opts["seed"] ring = Ring(opts["servers"], seed, opts["permute"]) do_run(ring, opts) def run(opts): do_ring(opts) if __name__ == "__main__": opts = Options() opts.parseOptions() run(opts) tahoe-lafs-1.10.0/misc/simulators/simulate_load.py000066400000000000000000000114641221140116300221420ustar00rootroot00000000000000#!/usr/bin/env python # WARNING. There is a bug in this script so that it does not simulate the actual Tahoe Two server selection algorithm that it was intended to simulate. See http://allmydata.org/trac/tahoe-lafs/ticket/302 (stop permuting peerlist, use SI as offset into ring instead?) import random SERVER_CAPACITY = 10**12 class Server: def __init__(self): self.si = random.randrange(0, 2**31) self.used = 0 self.max = SERVER_CAPACITY self.full_at_tick = None def __repr__(self): if self.full_at_tick is not None: return "<%s %s full at %d>" % (self.__class__.__name__, self.si, self.full_at_tick) else: return "<%s %s>" % (self.__class__.__name__, self.si) SERVERS = 4 K = 3 N = 10 def make_up_a_file_size(): return (2 ** random.randrange(8, 31)) def go(permutedpeerlist): servers = [ Server() for x in range(SERVERS) ] servers.sort(cmp=lambda x,y: cmp(x.si, y.si)) doubled_up_shares = 0 tick = 0 fullservers = 0 while True: nextsharesize = make_up_a_file_size() / K if permutedpeerlist: random.shuffle(servers) else: # rotate a random number rot = random.randrange(0, len(servers)) servers = servers[rot:] + servers[:rot] i = 0 wrapped = False sharestoput = N while sharestoput: server = servers[i] if server.used + nextsharesize < server.max: server.used += nextsharesize sharestoput -= 1 if wrapped: doubled_up_shares += 1 else: if server.full_at_tick is None: server.full_at_tick = tick fullservers += 1 if fullservers == len(servers): # print "Couldn't place share -- all servers full. Stopping." return (servers, doubled_up_shares) i += 1 if i == len(servers): wrapped = True i = 0 tick += 1 def div_ceil(n, d): """ The smallest integer k such that k*d >= n. """ return (n/d) + (n%d != 0) DESIRED_COLUMNS = 70 START_FILES = 137000 STOP_FILES = 144000 def test(permutedpeerlist, iters): # The i'th element of the filledat list is how many servers got full when the i'th file was uploaded. filledat = [] for test in range(iters): (servers, doubled_up_shares) = go(permutedpeerlist) print "doubled_up_shares: ", doubled_up_shares for server in servers: fidx = server.full_at_tick filledat.extend([0]*(fidx-len(filledat)+1)) filledat[fidx] += 1 startfiles = 0 while filledat[startfiles] == 0: startfiles += 1 filespercolumn = div_ceil(len(filledat) - startfiles, (DESIRED_COLUMNS - 3)) # to make comparisons between runs line up: # startfiles = START_FILES # filespercolumn = div_ceil(STOP_FILES - startfiles, (DESIRED_COLUMNS - 3)) # The i'th element of the compressedfilledat list is how many servers got full when the filespercolumn files starting at startfiles + i were uploaded. compressedfilledat = [] idx = startfiles while idx < len(filledat): compressedfilledat.append(0) for i in range(filespercolumn): compressedfilledat[-1] += filledat[idx] idx += 1 if idx >= len(filledat): break # The i'th element of the fullat list is how many servers were full by the tick numbered startfiles + i * filespercolumn (on average). fullat = [0] * len(compressedfilledat) for idx, num in enumerate(compressedfilledat): for fidx in range(idx, len(fullat)): fullat[fidx] += num for idx in range(len(fullat)): fullat[idx] = fullat[idx] / float(iters) # Now print it out as an ascii art graph. import sys for serversfull in range(40, 0, -1): sys.stdout.write("%2d " % serversfull) for numfull in fullat: if int(numfull) == serversfull: sys.stdout.write("*") else: sys.stdout.write(" ") sys.stdout.write("\n") sys.stdout.write(" ^-- servers full\n") idx = 0 while idx < len(fullat): nextmark = "%d--^ " % (startfiles + idx * filespercolumn) sys.stdout.write(nextmark) idx += len(nextmark) sys.stdout.write("\nfiles uploaded --> \n") if __name__ == "__main__": import sys iters = 16 for arg in sys.argv: if arg.startswith("--iters="): iters = int(arg[8:]) if "--permute" in sys.argv: print "doing permuted peerlist, iterations: %d" % iters test(True, iters) else: print "doing simple ring, iterations: %d" % iters test(False, iters) tahoe-lafs-1.10.0/misc/simulators/simulator.py000066400000000000000000000220441221140116300213330ustar00rootroot00000000000000#! /usr/bin/env python import sha as shamodule import os, random from pkg_resources import require require('PyRRD') from pyrrd import graph from pyrrd.rrd import DataSource, RRD, RRA def sha(s): return shamodule.new(s).digest() def randomid(): return os.urandom(20) class Node: def __init__(self, nid, introducer, simulator): self.nid = nid self.introducer = introducer self.simulator = simulator self.shares = {} self.capacity = random.randrange(1000) self.utilization = 0 self.files = [] def permute_peers(self, fileid): permuted = [(sha(fileid+n.nid),n) for n in self.introducer.get_all_nodes()] permuted.sort() return permuted def publish_file(self, fileid, size, numshares=100): sharesize = 4 * size / numshares permuted = self.permute_peers(fileid) last_givento = None tried = 0 givento = [] while numshares and permuted: pid,node = permuted.pop(0) tried += 1 last_givento = pid if node.accept_share(fileid, sharesize): givento.append((pid,node)) numshares -= 1 if numshares: # couldn't push, should delete for pid,node in givento: node.delete_share(fileid) return False self.files.append((fileid, numshares)) self.introducer.please_preserve(fileid, size, tried, last_givento) return (True, tried) def accept_share(self, fileid, sharesize): if self.utilization < self.capacity: # we have room! yay! self.shares[fileid] = sharesize self.utilization += sharesize return True if self.decide(sharesize): # we don't, but we'll make room self.make_space(sharesize) self.shares[fileid] = sharesize self.utilization += sharesize return True else: # we're full, try elsewhere return False def decide(self, sharesize): if sharesize > self.capacity: return False return False return random.random() > 0.5 def make_space(self, sharesize): assert sharesize <= self.capacity while self.capacity - self.utilization < sharesize: victim = random.choice(self.shares.keys()) self.simulator.lost_data(self.shares[victim]) self.delete_share(victim) def delete_share(self, fileid): if fileid in self.shares: self.utilization -= self.shares[fileid] del self.shares[fileid] return True return False def retrieve_file(self): if not self.files: return fileid,numshares = random.choice(self.files) needed = numshares / 4 peers = [] for pid,node in self.permute_peers(fileid): if random.random() > self.simulator.P_NODEAVAIL: continue # node isn't available right now if node.has_share(fileid): peers.append(node) if len(peers) >= needed: return True return False def delete_file(self): if not self.files: return False which = random.choice(self.files) self.files.remove(which) fileid,numshares = which self.introducer.delete(fileid) return True class Introducer: def __init__(self, simulator): self.living_files = {} self.utilization = 0 # total size of all active files self.simulator = simulator self.simulator.stamp_utilization(self.utilization) def get_all_nodes(self): return self.all_nodes def please_preserve(self, fileid, size, tried, last_givento): self.living_files[fileid] = (size, tried, last_givento) self.utilization += size self.simulator.stamp_utilization(self.utilization) def please_delete(self, fileid): self.delete(fileid) def permute_peers(self, fileid): permuted = [(sha(fileid+n.nid),n) for n in self.get_all_nodes()] permuted.sort() return permuted def delete(self, fileid): permuted = self.permute_peers(fileid) size, tried, last_givento = self.living_files[fileid] pid = "" while tried and pid < last_givento: pid,node = permuted.pop(0) had_it = node.delete_share(fileid) if had_it: tried -= 1 self.utilization -= size self.simulator.stamp_utilization(self.utilization) del self.living_files[fileid] class Simulator: NUM_NODES = 1000 EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"] RATE_ADDFILE = 1.0 / 10 RATE_DELFILE = 1.0 / 20 RATE_ADDNODE = 1.0 / 3000 RATE_DELNODE = 1.0 / 4000 P_NODEAVAIL = 1.0 def __init__(self): self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool self.prevstamptime = int(self.time) ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1) rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200) self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time) self.rrd.create() self.introducer = q = Introducer(self) self.all_nodes = [Node(randomid(), q, self) for i in range(self.NUM_NODES)] q.all_nodes = self.all_nodes self.next = [] self.schedule_events() self.verbose = False self.added_files = 0 self.added_data = 0 self.deleted_files = 0 self.published_files = [] self.failed_files = 0 self.lost_data_bytes = 0 # bytes deleted to make room for new shares def stamp_utilization(self, utilization): if int(self.time) > (self.prevstamptime+1): self.rrd.bufferValue(self.time, utilization) self.prevstamptime = int(self.time) def write_graph(self): self.rrd.update() self.rrd = None import gc gc.collect() def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds') area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend') g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0) g.data.append(def1) g.data.append(area1) g.write() def add_file(self): size = random.randrange(1000) n = random.choice(self.all_nodes) if self.verbose: print "add_file(size=%d, from node %s)" % (size, n) fileid = randomid() able = n.publish_file(fileid, size) if able: able, tried = able self.added_files += 1 self.added_data += size self.published_files.append(tried) else: self.failed_files += 1 def lost_data(self, size): self.lost_data_bytes += size def delete_file(self): all_nodes = self.all_nodes[:] random.shuffle(all_nodes) for n in all_nodes: if n.delete_file(): self.deleted_files += 1 return print "no files to delete" def _add_event(self, etype): rate = getattr(self, "RATE_" + etype) next = self.time + random.expovariate(rate) self.next.append((next, etype)) self.next.sort() def schedule_events(self): types = set([e[1] for e in self.next]) for etype in self.EVENTS: if not etype in types: self._add_event(etype) def do_event(self): time, etype = self.next.pop(0) assert time > self.time # current_time = self.time self.time = time self._add_event(etype) if etype == "ADDFILE": self.add_file() elif etype == "DELFILE": self.delete_file() elif etype == "ADDNODE": pass #self.add_node() elif etype == "DELNODE": #self.del_node() pass # self.print_stats(current_time, etype) def print_stats_header(self): print "time: added failed lost avg_tried" def print_stats(self, time, etype): if not self.published_files: avg_tried = "NONE" else: avg_tried = sum(self.published_files) / len(self.published_files) print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.introducer.living_files), self.introducer.utilization global s s = None def main(): # rrdtool.create("foo.rrd", # "--step 10", # "DS:files-added:DERIVE::0:1000", # "RRA:AVERAGE:1:1:1200", # ) global s s = Simulator() # s.print_stats_header() for i in range(1000): s.do_event() print "%d files added, %d files deleted" % (s.added_files, s.deleted_files) return s if __name__ == '__main__': main() tahoe-lafs-1.10.0/misc/simulators/sizes.py000066400000000000000000000172261221140116300204570ustar00rootroot00000000000000#! /usr/bin/env python import random, math, re from twisted.python import usage class Args(usage.Options): optParameters = [ ["mode", "m", "alpha", "validation scheme"], ["arity", "k", 2, "k (airty) for hash tree"], ] def opt_arity(self, option): self['arity'] = int(option) def parseArgs(self, *args): if len(args) > 0: self['mode'] = args[0] def charttest(): import gdchart sizes = [random.randrange(10, 20) for i in range(10)] x = gdchart.Line() x.width = 250 x.height = 250 x.xtitle = "sample" x.ytitle = "size" x.title = "Example Graph" #x.ext_color = [ "white", "yellow", "red", "blue", "green"] x.setData(sizes) #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"]) x.draw("simple.png") KiB=1024 MiB=1024*KiB GiB=1024*MiB TiB=1024*GiB PiB=1024*TiB class Sizes: def __init__(self, mode, file_size, arity=2): MAX_SEGSIZE = 128*KiB self.mode = mode self.file_size = file_size self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size) self.num_segs = num_segs = math.ceil(file_size / seg_size) self.num_blocks = num_blocks = num_segs self.num_shares = num_shares = 10 self.shares_needed = shares_needed = 3 self.block_size = block_size = seg_size / shares_needed self.share_size = share_size = block_size * num_blocks # none of this includes the share-level hash chain yet, since that is # only a function of the number of shares. All overhead numbers # assume that the share-level hash chain has already been sent, # including the root of the block-level hash tree. if mode == "alpha": # no hash tree at all self.block_arity = 0 self.block_tree_depth = 0 self.block_overhead = 0 self.bytes_until_some_data = 32 + share_size self.share_storage_overhead = 0 self.share_transmission_overhead = 0 elif mode == "beta": # k=num_blocks, d=1 # each block has a 32-byte hash self.block_arity = num_blocks self.block_tree_depth = 1 self.block_overhead = 32 # the share has a list of hashes, one for each block self.share_storage_overhead = (self.block_overhead * num_blocks) # we can get away with not sending the hash of the share that # we're sending in full, once self.share_transmission_overhead = self.share_storage_overhead - 32 # we must get the whole list (so it can be validated) before # any data can be validated self.bytes_until_some_data = (self.share_transmission_overhead + block_size) elif mode == "gamma": self.block_arity = k = arity d = math.ceil(math.log(num_blocks, k)) self.block_tree_depth = d num_leaves = k ** d # to make things easier, we make the pessimistic assumption that # we have to store hashes for all the empty places in the tree # (when the number of shares is not an exact exponent of k) self.block_overhead = 32 # the block hashes are organized into a k-ary tree, which # means storing (and eventually transmitting) more hashes. This # count includes all the low-level share hashes and the root. hash_nodes = (num_leaves*k - 1) / (k - 1) #print "hash_depth", d #print "num_leaves", num_leaves #print "hash_nodes", hash_nodes # the storage overhead is this self.share_storage_overhead = 32 * (hash_nodes - 1) # the transmission overhead is smaller: if we actually transmit # every block, we don't have to transmit 1/k of the # lowest-level block hashes, and we don't have to transmit the # root because it was already sent with the share-level hash tree self.share_transmission_overhead = 32 * (hash_nodes - 1 # the root - num_leaves / k) # we must get a full sibling hash chain before we can validate # any data sibling_length = d * (k-1) self.bytes_until_some_data = 32 * sibling_length + block_size else: raise ValueError("unknown mode '%s" % mode) self.storage_overhead = self.share_storage_overhead * num_shares self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size def dump(self): for k in ("mode", "file_size", "seg_size", "num_segs", "num_blocks", "num_shares", "shares_needed", "block_size", "share_size", "block_arity", "block_tree_depth", "block_overhead", "share_storage_overhead", "share_transmission_overhead", "storage_overhead", "storage_overhead_percentage", "bytes_until_some_data"): print k, getattr(self, k) def fmt(num, trim=False): if num < KiB: #s = str(num) + "#" s = "%.2f#" % num elif num < MiB: s = "%.2fk" % (num / KiB) elif num < GiB: s = "%.2fM" % (num / MiB) elif num < TiB: s = "%.2fG" % (num / GiB) elif num < PiB: s = "%.2fT" % (num / TiB) else: s = "big" if trim: s = re.sub(r'(\.0+)([kMGT#])', lambda m: m.group(2), s) else: s = re.sub(r'(\.0+)([kMGT#])', lambda m: (" "*len(m.group(1))+m.group(2)), s) if s.endswith("#"): s = s[:-1] + " " return s def text(): opts = Args() opts.parseOptions() mode = opts["mode"] arity = opts["arity"] # 0123456789012345678901234567890123456789012345678901234567890123456 print "mode=%s" % mode, " arity=%d" % arity print " storage storage" print "Size sharesize overhead overhead k d alacrity" print " (bytes) (%)" print "------- ------- -------- -------- ---- -- --------" #sizes = [2 ** i for i in range(7, 41)] radix = math.sqrt(10); expstep = 2 radix = 2; expstep = 2 #radix = 10; expstep = 1 maxexp = int(math.ceil(math.log(1e12, radix)))+2 sizes = [radix ** i for i in range(2,maxexp,expstep)] for file_size in sizes: s = Sizes(mode, file_size, arity) out = "" out += "%7s " % fmt(file_size, trim=True) out += "%7s " % fmt(s.share_size) out += "%8s" % fmt(s.storage_overhead) out += "%10.2f " % s.storage_overhead_percentage out += " %4d" % int(s.block_arity) out += " %2d" % int(s.block_tree_depth) out += " %8s" % fmt(s.bytes_until_some_data) print out def graph(): # doesn't work yet import Gnuplot opts = Args() opts.parseOptions() mode = opts["mode"] arity = opts["arity"] g = Gnuplot.Gnuplot(debug=1) g.title("overhead / alacrity tradeoffs") g.xlabel("file size") g.ylabel("stuff") sizes = [2 ** i for i in range(7, 32)] series = {"overhead": {}, "alacrity": {}} for file_size in sizes: s = Sizes(mode, file_size, arity) series["overhead"][file_size] = s.storage_overhead_percentage series["alacrity"][file_size] = s.bytes_until_some_data g.plot([ (fs, series["overhead"][fs]) for fs in sizes ]) raw_input("press return") if __name__ == '__main__': text() #graph() tahoe-lafs-1.10.0/misc/simulators/storage-overhead.py000066400000000000000000000057561221140116300225660ustar00rootroot00000000000000#!/usr/bin/env python import sys, math from allmydata import uri, storage from allmydata.immutable import upload from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE from allmydata.util import mathutil def roundup(size, blocksize=4096): return blocksize * mathutil.div_ceil(size, blocksize) class BigFakeString: def __init__(self, length): self.length = length self.fp = 0 def seek(self, offset, whence=0): if whence == 0: self.fp = offset elif whence == 1: self.fp += offset elif whence == 2: self.fp = self.length - offset def tell(self): return self.fp def calc(filesize, params=(3,7,10), segsize=DEFAULT_MAX_SEGMENT_SIZE): num_shares = params[2] if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD: urisize = len(uri.LiteralFileURI("A"*filesize).to_string()) sharesize = 0 sharespace = 0 else: u = upload.FileUploader(None) # XXX changed u.set_params(params) # unfortunately, Encoder doesn't currently lend itself to answering # this question without measuring a filesize, so we have to give it a # fake one data = BigFakeString(filesize) u.set_filehandle(data) u.set_encryption_key("a"*16) sharesize, blocksize = u.setup_encoder() # how much overhead? # 0x20 bytes of offsets # 0x04 bytes of extension length # 0x1ad bytes of extension (=429) # total is 465 bytes num_segments = mathutil.div_ceil(filesize, segsize) num_share_hashes = int(math.log(mathutil.next_power_of_k(num_shares, 2), 2)) + 1 sharesize = storage.allocated_size(sharesize, num_segments, num_share_hashes, 429) sharespace = num_shares * roundup(sharesize) urisize = len(uri.pack_uri(storage_index="a"*32, key="a"*16, uri_extension_hash="a"*32, needed_shares=params[0], total_shares=params[2], size=filesize)) return urisize, sharesize, sharespace def main(): filesize = int(sys.argv[1]) urisize, sharesize, sharespace = calc(filesize) print "urisize:", urisize print "sharesize: %10d" % sharesize print "sharespace: %10d" % sharespace print "desired expansion: %1.1f" % (1.0 * 10 / 3) print "effective expansion: %1.1f" % (1.0 * sharespace / filesize) def chart(): filesize = 2 while filesize < 2**20: urisize, sharesize, sharespace = calc(int(filesize)) expansion = 1.0 * sharespace / int(filesize) print "%d,%d,%d,%1.2f" % (int(filesize), urisize, sharespace, expansion) filesize = filesize * 2**0.5 if __name__ == '__main__': if sys.argv[1] == "chart": chart() else: main() tahoe-lafs-1.10.0/relnotes.txt000066400000000000000000000143401221140116300162010ustar00rootroot00000000000000ANNOUNCING Tahoe, the Least-Authority File System, v1.10 The Tahoe-LAFS team is pleased to announce the immediate availability of version 1.10.0 of Tahoe-LAFS, an extremely reliable distributed storage system. Get it here: https://tahoe-lafs.org/source/tahoe-lafs/trunk/docs/quickstart.rst Tahoe-LAFS is the first distributed storage system to offer "provider-independent security" — meaning that not even the operators of your storage servers can read or alter your data without your consent. Here is the one-page explanation of its unique security and fault-tolerance properties: https://tahoe-lafs.org/source/tahoe-lafs/trunk/docs/about.rst The previous stable release of Tahoe-LAFS was v1.9.2, released on July 3, 2012. v1.10.0 is a feature release which adds a new Introducer protocol, improves the appearance of the web-based user interface, improves grid security by making introducer FURLs unguessable, and fixes many bugs. See the NEWS file [1] for details. WHAT IS IT GOOD FOR? With Tahoe-LAFS, you distribute your filesystem across multiple servers, and even if some of the servers fail or are taken over by an attacker, the entire filesystem continues to work correctly, and continues to preserve your privacy and security. You can easily share specific files and directories with other people. In addition to the core storage system itself, volunteers have built other projects on top of Tahoe-LAFS and have integrated Tahoe-LAFS with existing systems, including Windows, JavaScript, iPhone, Android, Hadoop, Flume, Django, Puppet, bzr, mercurial, perforce, duplicity, TiddlyWiki, and more. See the Related Projects page on the wiki [3]. We believe that strong cryptography, Free and Open Source Software, erasure coding, and principled engineering practices make Tahoe-LAFS safer than RAID, removable drive, tape, on-line backup or cloud storage. This software is developed under test-driven development, and there are no known bugs or security flaws which would compromise confidentiality or data integrity under recommended use. (For all important issues that we are currently aware of please see the known_issues.rst file [2].) COMPATIBILITY This release should be compatible with the version 1 series of Tahoe-LAFS. Clients from this release can write files and directories in the format used by clients of all versions back to v1.0 (which was released March 25, 2008). Clients from this release can read files and directories produced by clients of all versions since v1.0. Servers from this release can serve clients of all versions back to v1.0 and clients from this release can use servers of all versions back to v1.0. Except for the new optional MDMF format, we have not made any intentional compatibility changes. However we do not yet have the test infrastructure to continuously verify that all new versions are interoperable with previous versions. We intend to build such an infrastructure in the future. The new Introducer protocol added in v1.10 is backwards compatible with older clients and introducer servers, however some features will be unavailable when an older node is involved. Please see docs/nodekeys.rst [14] for details. This is the eighteenth release in the version 1 series. This series of Tahoe-LAFS will be actively supported and maintained for the foreseeable future, and future versions of Tahoe-LAFS will retain the ability to read and write files compatible with this series. LICENCE You may use this package under the GNU General Public License, version 2 or, at your option, any later version. See the file "COPYING.GPL" [4] for the terms of the GNU General Public License, version 2. You may use this package under the Transitive Grace Period Public Licence, version 1 or, at your option, any later version. (The Transitive Grace Period Public Licence has requirements similar to the GPL except that it allows you to delay for up to twelve months after you redistribute a derived work before releasing the source code of your derived work.) See the file "COPYING.TGPPL.rst" [5] for the terms of the Transitive Grace Period Public Licence, version 1. (You may choose to use this package under the terms of either licence, at your option.) INSTALLATION Tahoe-LAFS works on Linux, Mac OS X, Windows, Solaris, *BSD, and probably most other systems. Start with "docs/quickstart.rst" [6]. HACKING AND COMMUNITY Please join us on the mailing list [7]. Patches are gratefully accepted -- the RoadMap page [8] shows the next improvements that we plan to make and CREDITS [9] lists the names of people who've contributed to the project. The Dev page [10] contains resources for hackers. SPONSORSHIP Atlas Networks has contributed several hosted servers for performance testing. Thank you to Atlas Networks [11] for their generous and public-spirited support. And a special thanks to Least Authority Enterprises [12], which employs several Tahoe-LAFS developers, for their continued support. HACK TAHOE-LAFS! If you can find a security flaw in Tahoe-LAFS which is serious enough that we feel compelled to warn our users and issue a fix, then we will award you with a customized t-shirts with your exploit printed on it and add you to the "Hack Tahoe-LAFS Hall Of Fame" [13]. ACKNOWLEDGEMENTS This is the twelfth release of Tahoe-LAFS to be created solely as a labor of love by volunteers. Thank you very much to the team of "hackers in the public interest" who make Tahoe-LAFS possible. Brian Warner on behalf of the Tahoe-LAFS team May 1, 2013 San Francisco, California, USA [1] https://tahoe-lafs.org/trac/tahoe-lafs/browser/NEWS.rst [2] https://tahoe-lafs.org/trac/tahoe-lafs/browser/docs/known_issues.rst [3] https://tahoe-lafs.org/trac/tahoe-lafs/wiki/RelatedProjects [4] https://tahoe-lafs.org/trac/tahoe-lafs/browser/COPYING.GPL [5] https://tahoe-lafs.org/trac/tahoe-lafs/browser/COPYING.TGPPL.rst [6] https://tahoe-lafs.org/trac/tahoe-lafs/browser/docs/quickstart.rst [7] https://tahoe-lafs.org/cgi-bin/mailman/listinfo/tahoe-dev [8] https://tahoe-lafs.org/trac/tahoe-lafs/roadmap [9] https://tahoe-lafs.org/trac/tahoe-lafs/browser/CREDITS [10] https://tahoe-lafs.org/trac/tahoe-lafs/wiki/Dev [11] http://atlasnetworks.us/ [12] https://leastauthority.com/ [13] https://tahoe-lafs.org/hacktahoelafs/ [14] https://tahoe-lafs.org/trac/tahoe-lafs/browser/docs/nodekeys.rst tahoe-lafs-1.10.0/setup.cfg000066400000000000000000000011351221140116300154240ustar00rootroot00000000000000[easy_install] zip_ok = False find_links = misc/dependencies tahoe-deps ../tahoe-deps https://tahoe-lafs.org/source/tahoe-lafs/deps/tahoe-lafs-dep-sdists/ https://tahoe-lafs.org/source/tahoe-lafs/deps/tahoe-lafs-dep-eggs/ [aliases] build = update_version develop --prefix=support make_executable build test = update_version develop --prefix=support make_executable build trial sdist = update_version sdist install = update_version install bdist_egg = update_version bdist_egg trial = update_version trial sdist_dsc = update_version sdist_dsc [egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 tahoe-lafs-1.10.0/setup.py000066400000000000000000000432241221140116300153220ustar00rootroot00000000000000#! /usr/bin/env python # -*- coding: utf-8 -*- import sys; assert sys.version_info < (3,), ur"Tahoe-LAFS does not run under Python 3. Please use a version of Python between 2.6 and 2.7.x inclusive." # Tahoe-LAFS -- secure, distributed storage grid # # Copyright © 2006-2012 The Tahoe-LAFS Software Foundation # # This file is part of Tahoe-LAFS. # # See the docs/about.rst file for licensing information. import glob, os, stat, subprocess, re ##### sys.path management def pylibdir(prefixdir): pyver = "python%d.%d" % (sys.version_info[:2]) if sys.platform == "win32": return os.path.join(prefixdir, "Lib", "site-packages") else: return os.path.join(prefixdir, "lib", pyver, "site-packages") basedir = os.path.dirname(os.path.abspath(__file__)) supportlib = pylibdir(os.path.join(basedir, "support")) # locate our version number def read_version_py(infname): try: verstrline = open(infname, "rt").read() except EnvironmentError: return None else: VSRE = r"^verstr = ['\"]([^'\"]*)['\"]" mo = re.search(VSRE, verstrline, re.M) if mo: return mo.group(1) version = read_version_py("src/allmydata/_version.py") APPNAME='allmydata-tahoe' APPNAMEFILE = os.path.join('src', 'allmydata', '_appname.py') APPNAMEFILESTR = "__appname__ = '%s'" % (APPNAME,) try: curappnamefilestr = open(APPNAMEFILE, 'rU').read() except EnvironmentError: # No file, or unreadable or something, okay then let's try to write one. open(APPNAMEFILE, "w").write(APPNAMEFILESTR) else: if curappnamefilestr.strip() != APPNAMEFILESTR: print("Error -- this setup.py file is configured with the 'application name' to be '%s', but there is already a file in place in '%s' which contains the contents '%s'. If the file is wrong, please remove it and setup.py will regenerate it and write '%s' into it." % (APPNAME, APPNAMEFILE, curappnamefilestr, APPNAMEFILESTR)) sys.exit(-1) # setuptools/zetuptoolz looks in __main__.__requires__ for a list of # requirements. When running "python setup.py test", __main__ is # setup.py, so we put the list here so that the requirements will be # available for tests: # Tahoe's dependencies are managed by the find_links= entry in setup.cfg and # the _auto_deps.install_requires list, which is used in the call to setup() # below. adglobals = {} execfile('src/allmydata/_auto_deps.py', adglobals) install_requires = adglobals['install_requires'] if len(sys.argv) > 1 and sys.argv[1] == '--fakedependency': del sys.argv[1] install_requires += ["fakedependency >= 1.0.0"] __requires__ = install_requires[:] egg = os.path.realpath(glob.glob('setuptools-*.egg')[0]) sys.path.insert(0, egg) import setuptools; setuptools.bootstrap_install_from = egg from setuptools import setup from setuptools.command import sdist from setuptools import Command trove_classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", "Environment :: Web Environment", "License :: OSI Approved :: GNU General Public License (GPL)", "License :: DFSG approved", "License :: Other/Proprietary License", "Intended Audience :: Developers", "Intended Audience :: End Users/Desktop", "Intended Audience :: System Administrators", "Operating System :: Microsoft", "Operating System :: Microsoft :: Windows", "Operating System :: Microsoft :: Windows :: Windows NT/2000", "Operating System :: Unix", "Operating System :: POSIX :: Linux", "Operating System :: POSIX", "Operating System :: MacOS :: MacOS X", "Operating System :: OS Independent", "Natural Language :: English", "Programming Language :: C", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.4", "Programming Language :: Python :: 2.5", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Topic :: Utilities", "Topic :: System :: Systems Administration", "Topic :: System :: Filesystems", "Topic :: System :: Distributed Computing", "Topic :: Software Development :: Libraries", "Topic :: Communications :: Usenet News", "Topic :: System :: Archiving :: Backup", "Topic :: System :: Archiving :: Mirroring", "Topic :: System :: Archiving", ] setup_requires = [] # Nevow imports itself when building, which causes Twisted and zope.interface # to be imported. We need to make sure that the versions of Twisted and # zope.interface used at build time satisfy Nevow's requirements. If not # then there are two problems: # - prior to Nevow v0.9.33, Nevow didn't declare its dependency on Twisted # in a way that enabled setuptools to satisfy that requirement at # build time. # - some versions of zope.interface, e.g. v3.6.4, are incompatible with # Nevow, and we need to avoid those both at build and run-time. # # This only matters when compatible versions of Twisted and zope.interface # are not already installed. Retire this hack when # https://bugs.launchpad.net/nevow/+bug/812537 has been fixed. setup_requires += [req for req in install_requires if req.startswith('Twisted') or req.startswith('zope.interface')] # trialcoverage is required if you want the "trial" unit test runner to have a # "--reporter=bwverbose-coverage" option which produces code-coverage results. # The required version is 0.3.3, because that is the latest version that only # depends on a version of pycoverage for which binary packages are available. if "--reporter=bwverbose-coverage" in sys.argv: setup_requires.append('trialcoverage >= 0.3.3') # stdeb is required to produce Debian files with the "sdist_dsc" command. if "sdist_dsc" in sys.argv: setup_requires.append('stdeb >= 0.3') # We no longer have any requirements specific to tests. tests_require=[] class Trial(Command): description = "run trial (use 'bin%stahoe debug trial' for the full set of trial options)" % (os.sep,) # This is just a subset of the most useful options, for compatibility. user_options = [ ("no-rterrors", None, "Don't print out tracebacks as they occur."), ("rterrors", "e", "Print out tracebacks as they occur (default, so ignored)."), ("until-failure", "u", "Repeat a test (specified by -s) until it fails."), ("reporter=", None, "The reporter to use for this test run."), ("suite=", "s", "Specify the test suite."), ("quiet", None, "Don't display version numbers and paths of Tahoe dependencies."), ] def initialize_options(self): self.rterrors = False self.no_rterrors = False self.until_failure = False self.reporter = None self.suite = "allmydata" self.quiet = False def finalize_options(self): pass def run(self): args = [sys.executable, os.path.join('bin', 'tahoe')] if not self.quiet: args.append('--version-and-path') args += ['debug', 'trial'] if self.rterrors and self.no_rterrors: raise AssertionError("--rterrors and --no-rterrors conflict.") if not self.no_rterrors: args.append('--rterrors') if self.until_failure: args.append('--until-failure') if self.reporter: args.append('--reporter=' + self.reporter) if self.suite: args.append(self.suite) rc = subprocess.call(args) sys.exit(rc) class MakeExecutable(Command): description = "make the 'bin%stahoe' scripts" % (os.sep,) user_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): bin_tahoe_template = os.path.join("bin", "tahoe-script.template") # tahoe.pyscript is really only necessary for Windows, but we also # create it on Unix for consistency. script_names = ["tahoe.pyscript", "tahoe"] # Create the tahoe script file under the 'bin' directory. This # file is exactly the same as the 'tahoe-script.template' script # except that the shebang line is rewritten to use our sys.executable # for the interpreter. f = open(bin_tahoe_template, "rU") script_lines = f.readlines() f.close() script_lines[0] = '#!%s\n' % (sys.executable,) for script_name in script_names: tahoe_script = os.path.join("bin", script_name) try: os.remove(tahoe_script) except Exception: if os.path.exists(tahoe_script): raise f = open(tahoe_script, "wb") for line in script_lines: f.write(line) f.close() # chmod +x unix_script = os.path.join("bin", "tahoe") old_mode = stat.S_IMODE(os.stat(unix_script)[stat.ST_MODE]) new_mode = old_mode | (stat.S_IXUSR | stat.S_IRUSR | stat.S_IXGRP | stat.S_IRGRP | stat.S_IXOTH | stat.S_IROTH ) os.chmod(unix_script, new_mode) old_tahoe_exe = os.path.join("bin", "tahoe.exe") try: os.remove(old_tahoe_exe) except Exception: if os.path.exists(old_tahoe_exe): raise GIT_VERSION_BODY = ''' # This _version.py is generated from git metadata by the tahoe setup.py. __pkgname__ = "%(pkgname)s" real_version = "%(version)s" full_version = "%(full)s" verstr = "%(normalized)s" __version__ = verstr ''' def run_command(args, cwd=None, verbose=False): try: # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd) except EnvironmentError as e: # if this gives a SyntaxError, note that Tahoe-LAFS requires Python 2.6+ if verbose: print("unable to run %s" % args[0]) print(e) return None stdout = p.communicate()[0].strip() if p.returncode != 0: if verbose: print("unable to run %s (error)" % args[0]) return None return stdout def versions_from_git(tag_prefix, verbose=False): # this runs 'git' from the directory that contains this file. That either # means someone ran a setup.py command (and this code is in # versioneer.py, thus the containing directory is the root of the source # tree), or someone ran a project-specific entry point (and this code is # in _version.py, thus the containing directory is somewhere deeper in # the source tree). This only gets called if the git-archive 'subst' # variables were *not* expanded, and _version.py hasn't already been # rewritten with a short version string, meaning we're inside a checked # out source tree. # versions_from_git (as copied from python-versioneer) returns strings # like "1.9.0-25-gb73aba9-dirty", which means we're in a tree with # uncommited changes (-dirty), the latest checkin is revision b73aba9, # the most recent tag was 1.9.0, and b73aba9 has 25 commits that weren't # in 1.9.0 . The narrow-minded NormalizedVersion parser that takes our # output (meant to enable sorting of version strings) refuses most of # that. Tahoe uses a function named suggest_normalized_version() that can # handle "1.9.0.post25", so dumb down our output to match. try: source_dir = os.path.dirname(os.path.abspath(__file__)) except NameError: # some py2exe/bbfreeze/non-CPython implementations don't do __file__ return {} # not always correct GIT = "git" if sys.platform == "win32": GIT = "git.cmd" stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"], cwd=source_dir) if stdout is None: return {} if not stdout.startswith(tag_prefix): if verbose: print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) return {} version = stdout[len(tag_prefix):] pieces = version.split("-") if len(pieces) == 1: normalized_version = pieces[0] else: normalized_version = "%s.post%s" % (pieces[0], pieces[1]) stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=source_dir) if stdout is None: return {} full = stdout.strip() if version.endswith("-dirty"): full += "-dirty" normalized_version += ".dev0" return {"version": version, "normalized": normalized_version, "full": full} # setup.cfg has an [aliases] section which runs "update_version" before many # commands (like "build" and "sdist") that need to know our package version # ahead of time. If you add different commands (or if we forgot some), you # may need to add it to setup.cfg and configure it to run update_version # before your command. class UpdateVersion(Command): description = "update _version.py from revision-control metadata" user_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): if os.path.isdir(os.path.join(basedir, ".git")): verstr = self.try_from_git() else: print("no version-control data found, leaving _version.py alone") return if verstr: self.distribution.metadata.version = verstr def try_from_git(self): versions = versions_from_git("allmydata-tahoe-", verbose=True) if versions: fn = 'src/allmydata/_version.py' f = open(fn, "wb") f.write(GIT_VERSION_BODY % { "pkgname": self.distribution.get_name(), "version": versions["version"], "normalized": versions["normalized"], "full": versions["full"] }) f.close() print("git-version: wrote '%s' into '%s'" % (versions["version"], fn)) return versions.get("normalized", None) class MySdist(sdist.sdist): """ A hook in the sdist command so that we can determine whether this the tarball should be 'SUMO' or not, i.e. whether or not to include the external dependency tarballs. Note that we always include misc/dependencies/* in the tarball; --sumo controls whether tahoe-deps/* is included as well. """ user_options = sdist.sdist.user_options + \ [('sumo', 's', "create a 'sumo' sdist which includes the contents of tahoe-deps/*"), ] boolean_options = ['sumo'] def initialize_options(self): sdist.sdist.initialize_options(self) self.sumo = False def make_distribution(self): # add our extra files to the list just before building the # tarball/zipfile. We override make_distribution() instead of run() # because setuptools.command.sdist.run() does not lend itself to # easy/robust subclassing (the code we need to add goes right smack # in the middle of a 12-line method). If this were the distutils # version, we'd override get_file_list(). if self.sumo: # If '--sumo' was specified, include tahoe-deps/* in the sdist. # We assume that the user has fetched the tahoe-deps.tar.gz # tarball and unpacked it already. self.filelist.extend([os.path.join("tahoe-deps", fn) for fn in os.listdir("tahoe-deps")]) # In addition, we want the tarball/zipfile to have -SUMO in the # name, and the unpacked directory to have -SUMO too. The easiest # way to do this is to patch self.distribution and override the # get_fullname() method. (an alternative is to modify # self.distribution.metadata.version, but that also affects the # contents of PKG-INFO). fullname = self.distribution.get_fullname() def get_fullname(): return fullname + "-SUMO" self.distribution.get_fullname = get_fullname try: old_mask = os.umask(int("022", 8)) return sdist.sdist.make_distribution(self) finally: os.umask(old_mask) setup_args = {} if version: setup_args["version"] = version setup(name=APPNAME, description='secure, decentralized, fault-tolerant filesystem', long_description=open('README.txt', 'rU').read(), author='the Tahoe-LAFS project', author_email='tahoe-dev@tahoe-lafs.org', url='https://tahoe-lafs.org/', license='GNU GPL', # see README.txt -- there is an alternative licence cmdclass={"trial": Trial, "make_executable": MakeExecutable, "update_version": UpdateVersion, "sdist": MySdist, }, package_dir = {'':'src'}, packages=['allmydata', 'allmydata.frontends', 'allmydata.immutable', 'allmydata.immutable.downloader', 'allmydata.introducer', 'allmydata.mutable', 'allmydata.scripts', 'allmydata.storage', 'allmydata.test', 'allmydata.util', 'allmydata.web', 'allmydata.web.static', 'allmydata.web.static.css', 'allmydata.windows', 'buildtest'], classifiers=trove_classifiers, test_suite="allmydata.test", install_requires=install_requires, tests_require=tests_require, package_data={"allmydata.web": ["*.xhtml"], "allmydata.web.static": ["*.js", "*.png", "*.css"], "allmydata.web.static.css": ["*.css"], }, setup_requires=setup_requires, entry_points = { 'console_scripts': [ 'tahoe = allmydata.scripts.runner:run' ] }, zip_safe=False, # We prefer unzipped for easier access. **setup_args ) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/000077500000000000000000000000001221140116300201635ustar00rootroot00000000000000tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/EGG-INFO/000077500000000000000000000000001221140116300213165ustar00rootroot00000000000000tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/EGG-INFO/PKG-INFO000066400000000000000000000120761221140116300224210ustar00rootroot00000000000000Metadata-Version: 1.0 Name: setuptools Version: 0.6c16dev4 Summary: Download, build, install, upgrade, and uninstall Python packages -- easily! (zetuptoolz fork) Home-page: http://pypi.python.org/pypi/setuptools Author: Phillip J. Eby Author-email: distutils-sig@python.org License: PSF or ZPL Description: ====================== This is not Setuptools ====================== This is the ``zetuptoolz`` fork of setuptools, which is used to install `Tahoe-LAFS`_. It has a `darcs source repository`_ and `issue tracker`_. For a list of differences between this fork and setuptools, see zetuptoolz.txt. Note that, to avoid interfering with any setuptools installation, zetuptoolz does not install a script called ``easy_install``. There is an ``easy_install_z`` script, but that is intended only for developers to test differences between setuptools and zetuptoolz. .. _Tahoe-LAFS: http://tahoe-lafs.org/ .. _darcs source repository: http://tahoe-lafs.org/source/zetuptoolz/trunk .. _issue tracker: http://tahoe-lafs.org/trac/zetuptoolz -------------------------------- Using Setuptools and EasyInstall -------------------------------- Here are some of the available manuals, tutorials, and other resources for learning about Setuptools, Python Eggs, and EasyInstall: * `The EasyInstall user's guide and reference manual`_ * `The setuptools Developer's Guide`_ * `The pkg_resources API reference`_ * `Package Compatibility Notes`_ (user-maintained) * `The Internal Structure of Python Eggs`_ Questions, comments, and bug reports should be directed to the `distutils-sig mailing list`_. If you have written (or know of) any tutorials, documentation, plug-ins, or other resources for setuptools users, please let us know about them there, so this reference list can be updated. If you have working, *tested* patches to correct problems or add features, you may submit them to the `setuptools bug tracker`_. .. _setuptools bug tracker: http://bugs.python.org/setuptools/ .. _Package Compatibility Notes: http://peak.telecommunity.com/DevCenter/PackageNotes .. _The Internal Structure of Python Eggs: http://peak.telecommunity.com/DevCenter/EggFormats .. _The setuptools Developer's Guide: http://peak.telecommunity.com/DevCenter/setuptools .. _The pkg_resources API reference: http://peak.telecommunity.com/DevCenter/PkgResources .. _The EasyInstall user's guide and reference manual: http://peak.telecommunity.com/DevCenter/EasyInstall .. _distutils-sig mailing list: http://mail.python.org/pipermail/distutils-sig/ ------- Credits ------- * The original design for the ``.egg`` format and the ``pkg_resources`` API was co-created by Phillip Eby and Bob Ippolito. Bob also implemented the first version of ``pkg_resources``, and supplied the OS X operating system version compatibility algorithm. * Ian Bicking implemented many early "creature comfort" features of easy_install, including support for downloading via Sourceforge and Subversion repositories. Ian's comments on the Web-SIG about WSGI application deployment also inspired the concept of "entry points" in eggs, and he has given talks at PyCon and elsewhere to inform and educate the community about eggs and setuptools. * Jim Fulton contributed time and effort to build automated tests of various aspects of ``easy_install``, and supplied the doctests for the command-line ``.exe`` wrappers on Windows. * Phillip J. Eby is the principal author and maintainer of setuptools, and first proposed the idea of an importable binary distribution format for Python application plug-ins. * Significant parts of the implementation of setuptools were funded by the Open Source Applications Foundation, to provide a plug-in infrastructure for the Chandler PIM application. In addition, many OSAF staffers (such as Mike "Code Bear" Taylor) contributed their time and stress as guinea pigs for the use of eggs and setuptools, even before eggs were "cool". (Thanks, guys!) .. _files: Keywords: CPAN PyPI distutils eggs package management Platform: UNKNOWN Classifier: Development Status :: 3 - Alpha Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: Python Software Foundation License Classifier: License :: OSI Approved :: Zope Public License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: System :: Archiving :: Packaging Classifier: Topic :: System :: Systems Administration Classifier: Topic :: Utilities tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/EGG-INFO/SOURCES.txt000066400000000000000000000023451221140116300232060ustar00rootroot00000000000000README.txt easy_install.py pkg_resources.py setup.cfg setup.py setuptools/__init__.py setuptools/archive_util.py setuptools/depends.py setuptools/dist.py setuptools/extension.py setuptools/package_index.py setuptools/sandbox.py setuptools/site-patch.py setuptools.egg-info/PKG-INFO setuptools.egg-info/SOURCES.txt setuptools.egg-info/dependency_links.txt setuptools.egg-info/entry_points.txt setuptools.egg-info/top_level.txt setuptools.egg-info/zip-safe setuptools/command/__init__.py setuptools/command/alias.py setuptools/command/bdist_egg.py setuptools/command/bdist_rpm.py setuptools/command/bdist_wininst.py setuptools/command/build_ext.py setuptools/command/build_py.py setuptools/command/develop.py setuptools/command/easy_install.py setuptools/command/egg_info.py setuptools/command/install.py setuptools/command/install_egg_info.py setuptools/command/install_lib.py setuptools/command/install_scripts.py setuptools/command/register.py setuptools/command/rotate.py setuptools/command/saveopts.py setuptools/command/scriptsetup.py setuptools/command/sdist.py setuptools/command/setopt.py setuptools/command/test.py setuptools/command/upload.py setuptools/tests/__init__.py setuptools/tests/test_packageindex.py setuptools/tests/test_resources.pytahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/EGG-INFO/dependency_links.txt000066400000000000000000000000011221140116300253640ustar00rootroot00000000000000 tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/EGG-INFO/entry_points.txt000066400000000000000000000050641221140116300246210ustar00rootroot00000000000000[distutils.commands] bdist_rpm = setuptools.command.bdist_rpm:bdist_rpm rotate = setuptools.command.rotate:rotate develop = setuptools.command.develop:develop setopt = setuptools.command.setopt:setopt build_py = setuptools.command.build_py:build_py scriptsetup = setuptools.command.scriptsetup:scriptsetup saveopts = setuptools.command.saveopts:saveopts egg_info = setuptools.command.egg_info:egg_info register = setuptools.command.register:register install_egg_info = setuptools.command.install_egg_info:install_egg_info alias = setuptools.command.alias:alias easy_install = setuptools.command.easy_install:easy_install install_scripts = setuptools.command.install_scripts:install_scripts bdist_wininst = setuptools.command.bdist_wininst:bdist_wininst bdist_egg = setuptools.command.bdist_egg:bdist_egg install = setuptools.command.install:install test = setuptools.command.test:test install_lib = setuptools.command.install_lib:install_lib build_ext = setuptools.command.build_ext:build_ext sdist = setuptools.command.sdist:sdist [egg_info.writers] dependency_links.txt = setuptools.command.egg_info:overwrite_arg requires.txt = setuptools.command.egg_info:write_requirements PKG-INFO = setuptools.command.egg_info:write_pkg_info eager_resources.txt = setuptools.command.egg_info:overwrite_arg top_level.txt = setuptools.command.egg_info:write_toplevel_names namespace_packages.txt = setuptools.command.egg_info:overwrite_arg entry_points.txt = setuptools.command.egg_info:write_entries depends.txt = setuptools.command.egg_info:warn_depends_obsolete [console_scripts] easy_install_z-2.6 = setuptools.command.easy_install:main easy_install_z = setuptools.command.easy_install:main [setuptools.file_finders] svn_cvs = setuptools.command.sdist:_default_revctrl [distutils.setup_keywords] dependency_links = setuptools.dist:assert_string_list entry_points = setuptools.dist:check_entry_points extras_require = setuptools.dist:check_extras test_runner = setuptools.dist:check_importable package_data = setuptools.dist:check_package_data install_requires = setuptools.dist:check_requirements include_package_data = setuptools.dist:assert_bool exclude_package_data = setuptools.dist:check_package_data namespace_packages = setuptools.dist:check_nsp test_suite = setuptools.dist:check_test_suite eager_resources = setuptools.dist:assert_string_list zip_safe = setuptools.dist:assert_bool test_loader = setuptools.dist:check_importable packages = setuptools.dist:check_packages tests_require = setuptools.dist:check_requirements [setuptools.installation] eggsecutable = setuptools.command.easy_install:bootstrap tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/EGG-INFO/top_level.txt000066400000000000000000000000461221140116300240500ustar00rootroot00000000000000easy_install pkg_resources setuptools tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/EGG-INFO/zip-safe000066400000000000000000000000011221140116300227460ustar00rootroot00000000000000 tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/easy_install.py000066400000000000000000000001761221140116300232300ustar00rootroot00000000000000"""Run the EasyInstall command""" if __name__ == '__main__': from setuptools.command.easy_install import main main() tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/pkg_resources.py000066400000000000000000002506701221140116300234220ustar00rootroot00000000000000"""Package resource API -------------------- A resource is a logical file contained within a package, or a logical subdirectory thereof. The package resource API expects resource names to have their path parts separated with ``/``, *not* whatever the local path separator is. Do not use os.path operations to manipulate resource names being passed into the API. The package resource API is designed to work with normal filesystem packages, .egg files, and unpacked .egg files. It can also work in a limited way with .zip files and with custom PEP 302 loaders that support the ``get_data()`` method. """ import sys, os, zipimport, time, re, imp try: frozenset except NameError: from sets import ImmutableSet as frozenset # capture these to bypass sandboxing from os import utime, rename, unlink, mkdir from os import open as os_open from os.path import isdir, split def _bypass_ensure_directory(name, mode=0777): # Sandbox-bypassing version of ensure_directory() dirname, filename = split(name) if dirname and filename and not isdir(dirname): _bypass_ensure_directory(dirname) mkdir(dirname, mode) _state_vars = {} def _declare_state(vartype, **kw): g = globals() for name, val in kw.iteritems(): g[name] = val _state_vars[name] = vartype def __getstate__(): state = {} g = globals() for k, v in _state_vars.iteritems(): state[k] = g['_sget_'+v](g[k]) return state def __setstate__(state): g = globals() for k, v in state.iteritems(): g['_sset_'+_state_vars[k]](k, g[k], v) return state def _sget_dict(val): return val.copy() def _sset_dict(key, ob, state): ob.clear() ob.update(state) def _sget_object(val): return val.__getstate__() def _sset_object(key, ob, state): ob.__setstate__(state) _sget_none = _sset_none = lambda *args: None def get_supported_platform(): """Return this platform's maximum compatible version. distutils.util.get_platform() normally reports the minimum version of Mac OS X that would be required to *use* extensions produced by distutils. But what we want when checking compatibility is to know the version of Mac OS X that we are *running*. To allow usage of packages that explicitly require a newer version of Mac OS X, we must also know the current version of the OS. If this condition occurs for any other platform with a version in its platform strings, this function should be extended accordingly. """ plat = get_build_platform(); m = macosVersionString.match(plat) if m is not None and sys.platform == "darwin": try: plat = 'macosx-%s-%s' % ('.'.join(_macosx_vers()[:2]), m.group(3)) except ValueError: pass # not Mac OS X return plat __all__ = [ # Basic resource access and distribution/entry point discovery 'require', 'run_script', 'get_provider', 'get_distribution', 'load_entry_point', 'get_entry_map', 'get_entry_info', 'iter_entry_points', 'resource_string', 'resource_stream', 'resource_filename', 'resource_listdir', 'resource_exists', 'resource_isdir', # Environmental control 'declare_namespace', 'working_set', 'add_activation_listener', 'find_distributions', 'set_extraction_path', 'cleanup_resources', 'get_default_cache', # Primary implementation classes 'Environment', 'WorkingSet', 'ResourceManager', 'Distribution', 'Requirement', 'EntryPoint', # Exceptions 'ResolutionError','VersionConflict','DistributionNotFound','UnknownExtra', 'ExtractionError', # Parsing functions and string utilities 'parse_requirements', 'parse_version', 'safe_name', 'safe_version', 'get_platform', 'compatible_platforms', 'yield_lines', 'split_sections', 'safe_extra', 'to_filename', # filesystem utilities 'ensure_directory', 'normalize_path', # Distribution "precedence" constants 'EGG_DIST', 'BINARY_DIST', 'SOURCE_DIST', 'CHECKOUT_DIST', 'DEVELOP_DIST', # "Provider" interfaces, implementations, and registration/lookup APIs 'IMetadataProvider', 'IResourceProvider', 'FileMetadata', 'PathMetadata', 'EggMetadata', 'EmptyProvider', 'empty_provider', 'NullProvider', 'EggProvider', 'DefaultProvider', 'ZipProvider', 'register_finder', 'register_namespace_handler', 'register_loader_type', 'fixup_namespace_packages', 'get_importer', # Deprecated/backward compatibility only 'run_main', 'AvailableDistributions', ] class ResolutionError(Exception): """Abstract base for dependency resolution errors""" def __repr__(self): return self.__class__.__name__+repr(self.args) class VersionConflict(ResolutionError): """An already-installed version conflicts with the requested version""" class DistributionNotFound(ResolutionError): """A requested distribution was not found""" class UnknownExtra(ResolutionError): """Distribution doesn't have an "extra feature" of the given name""" _provider_factories = {} PY_MAJOR = sys.version[:3] EGG_DIST = 3 BINARY_DIST = 2 SOURCE_DIST = 1 CHECKOUT_DIST = 0 DEVELOP_DIST = -1 def register_loader_type(loader_type, provider_factory): """Register `provider_factory` to make providers for `loader_type` `loader_type` is the type or class of a PEP 302 ``module.__loader__``, and `provider_factory` is a function that, passed a *module* object, returns an ``IResourceProvider`` for that module. """ _provider_factories[loader_type] = provider_factory def get_provider(moduleOrReq): """Return an IResourceProvider for the named module or requirement""" if isinstance(moduleOrReq,Requirement): return working_set.find(moduleOrReq) or require(str(moduleOrReq))[0] try: module = sys.modules[moduleOrReq] except KeyError: __import__(moduleOrReq) module = sys.modules[moduleOrReq] loader = getattr(module, '__loader__', None) return _find_adapter(_provider_factories, loader)(module) def _macosx_vers(_cache=[]): if not _cache: from platform import mac_ver _cache.append(mac_ver()[0].split('.')) return _cache[0] def _macosx_arch(machine): return {'PowerPC':'ppc', 'Power_Macintosh':'ppc'}.get(machine,machine) def get_build_platform(): """Return this platform's string for platform-specific distributions XXX Currently this is the same as ``distutils.util.get_platform()``, but it needs some hacks for Linux and Mac OS X. """ from distutils.util import get_platform plat = get_platform() if sys.platform == "darwin" and not plat.startswith('macosx-'): try: version = _macosx_vers() machine = os.uname()[4].replace(" ", "_") return "macosx-%d.%d-%s" % (int(version[0]), int(version[1]), _macosx_arch(machine)) except ValueError: # if someone is running a non-Mac darwin system, this will fall # through to the default implementation pass return plat macosVersionString = re.compile(r"macosx-(\d+)\.(\d+)-(.*)") darwinVersionString = re.compile(r"darwin-(\d+)\.(\d+)\.(\d+)-(.*)") get_platform = get_build_platform # XXX backward compat def compatible_platforms(provided,required): """Can code for the `provided` platform run on the `required` platform? Returns true if either platform is ``None``, or the platforms are equal. XXX Needs compatibility checks for Linux and other unixy OSes. """ if provided is None or required is None or provided==required: return True # easy case # Mac OS X special cases reqMac = macosVersionString.match(required) if reqMac: provMac = macosVersionString.match(provided) # is this a Mac package? if not provMac: # this is backwards compatibility for packages built before # setuptools 0.6. All packages built after this point will # use the new macosx designation. provDarwin = darwinVersionString.match(provided) if provDarwin: dversion = int(provDarwin.group(1)) macosversion = "%s.%s" % (reqMac.group(1), reqMac.group(2)) if dversion == 7 and macosversion >= "10.3" or \ dversion == 8 and macosversion >= "10.4": #import warnings #warnings.warn("Mac eggs should be rebuilt to " # "use the macosx designation instead of darwin.", # category=DeprecationWarning) return True return False # egg isn't macosx or legacy darwin # are they the same major version and machine type? if provMac.group(1) != reqMac.group(1) or \ provMac.group(3) != reqMac.group(3): return False # is the required OS major update >= the provided one? if int(provMac.group(2)) > int(reqMac.group(2)): return False return True # XXX Linux and other platforms' special cases should go here return False def run_script(dist_spec, script_name): """Locate distribution `dist_spec` and run its `script_name` script""" ns = sys._getframe(1).f_globals name = ns['__name__'] ns.clear() ns['__name__'] = name require(dist_spec)[0].run_script(script_name, ns) run_main = run_script # backward compatibility def get_distribution(dist): """Return a current distribution object for a Requirement or string""" if isinstance(dist,basestring): dist = Requirement.parse(dist) if isinstance(dist,Requirement): dist = get_provider(dist) if not isinstance(dist,Distribution): raise TypeError("Expected string, Requirement, or Distribution", dist) return dist def load_entry_point(dist, group, name): """Return `name` entry point of `group` for `dist` or raise ImportError""" return get_distribution(dist).load_entry_point(group, name) def get_entry_map(dist, group=None): """Return the entry point map for `group`, or the full entry map""" return get_distribution(dist).get_entry_map(group) def get_entry_info(dist, group, name): """Return the EntryPoint object for `group`+`name`, or ``None``""" return get_distribution(dist).get_entry_info(group, name) class IMetadataProvider: def has_metadata(name): """Does the package's distribution contain the named metadata?""" def get_metadata(name): """The named metadata resource as a string""" def get_metadata_lines(name): """Yield named metadata resource as list of non-blank non-comment lines Leading and trailing whitespace is stripped from each line, and lines with ``#`` as the first non-blank character are omitted.""" def metadata_isdir(name): """Is the named metadata a directory? (like ``os.path.isdir()``)""" def metadata_listdir(name): """List of metadata names in the directory (like ``os.listdir()``)""" def run_script(script_name, namespace): """Execute the named script in the supplied namespace dictionary""" class IResourceProvider(IMetadataProvider): """An object that provides access to package resources""" def get_resource_filename(manager, resource_name): """Return a true filesystem path for `resource_name` `manager` must be an ``IResourceManager``""" def get_resource_stream(manager, resource_name): """Return a readable file-like object for `resource_name` `manager` must be an ``IResourceManager``""" def get_resource_string(manager, resource_name): """Return a string containing the contents of `resource_name` `manager` must be an ``IResourceManager``""" def has_resource(resource_name): """Does the package contain the named resource?""" def resource_isdir(resource_name): """Is the named resource a directory? (like ``os.path.isdir()``)""" def resource_listdir(resource_name): """List of resource names in the directory (like ``os.listdir()``)""" class WorkingSet(object): """A collection of active distributions on sys.path (or a similar list)""" def __init__(self, entries=None): """Create working set from list of path entries (default=sys.path)""" self.entries = [] self.entry_keys = {} self.by_key = {} self.callbacks = [] if entries is None: entries = sys.path for entry in entries: self.add_entry(entry) def add_entry(self, entry): """Add a path item to ``.entries``, finding any distributions on it ``find_distributions(entry, True)`` is used to find distributions corresponding to the path entry, and they are added. `entry` is always appended to ``.entries``, even if it is already present. (This is because ``sys.path`` can contain the same value more than once, and the ``.entries`` of the ``sys.path`` WorkingSet should always equal ``sys.path``.) """ self.entry_keys.setdefault(entry, []) self.entries.append(entry) for dist in find_distributions(entry, True): self.add(dist, entry, False) def __contains__(self,dist): """True if `dist` is the active distribution for its project""" return self.by_key.get(dist.key) == dist def find(self, req): """Find a distribution matching requirement `req` If there is an active distribution for the requested project, this returns it as long as it meets the version requirement specified by `req`. But, if there is an active distribution for the project and it does *not* meet the `req` requirement, ``VersionConflict`` is raised. If there is no active distribution for the requested project, ``None`` is returned. """ dist = self.by_key.get(req.key) if dist is not None and dist not in req: raise VersionConflict(dist,req) # XXX add more info else: return dist def iter_entry_points(self, group, name=None): """Yield entry point objects from `group` matching `name` If `name` is None, yields all entry points in `group` from all distributions in the working set, otherwise only ones matching both `group` and `name` are yielded (in distribution order). """ for dist in self: entries = dist.get_entry_map(group) if name is None: for ep in entries.values(): yield ep elif name in entries: yield entries[name] def run_script(self, requires, script_name): """Locate distribution for `requires` and run `script_name` script""" ns = sys._getframe(1).f_globals name = ns['__name__'] ns.clear() ns['__name__'] = name self.require(requires)[0].run_script(script_name, ns) def __iter__(self): """Yield distributions for non-duplicate projects in the working set The yield order is the order in which the items' path entries were added to the working set. """ seen = {} for item in self.entries: for key in self.entry_keys[item]: if key not in seen: seen[key]=1 yield self.by_key[key] def add(self, dist, entry=None, insert=True): """Add `dist` to working set, associated with `entry` If `entry` is unspecified, it defaults to the ``.location`` of `dist`. On exit from this routine, `entry` is added to the end of the working set's ``.entries`` (if it wasn't already present). `dist` is only added to the working set if it's for a project that doesn't already have a distribution in the set. If it's added, any callbacks registered with the ``subscribe()`` method will be called. """ if insert: dist.insert_on(self.entries, entry) if entry is None: entry = dist.location keys = self.entry_keys.setdefault(entry,[]) keys2 = self.entry_keys.setdefault(dist.location,[]) if dist.key in self.by_key: return # ignore hidden distros # If we have a __requires__ then we can already tell if this # dist is unsatisfactory, in which case we won't add it. if __requires__ is not None: for thisreqstr in __requires__: try: for thisreq in parse_requirements(thisreqstr): if thisreq.key == dist.key: if dist not in thisreq: return except ValueError, e: e.args = tuple(e.args + ({'thisreqstr': thisreqstr},)) raise self.by_key[dist.key] = dist if dist.key not in keys: keys.append(dist.key) if dist.key not in keys2: keys2.append(dist.key) self._added_new(dist) def resolve(self, requirements, env=None, installer=None): """List all distributions needed to (recursively) meet `requirements` `requirements` must be a sequence of ``Requirement`` objects. `env`, if supplied, should be an ``Environment`` instance. If not supplied, it defaults to all distributions available within any entry or distribution in the working set. `installer`, if supplied, will be invoked with each requirement that cannot be met by an already-installed distribution; it should return a ``Distribution`` or ``None``. """ requirements = list(requirements)[::-1] # set up the stack processed = {} # set of processed requirements best = {} # key -> dist to_activate = [] while requirements: req = requirements.pop(0) # process dependencies breadth-first if req in processed: # Ignore cyclic or redundant dependencies continue dist = best.get(req.key) if dist is None: # Find the best distribution and add it to the map dist = self.by_key.get(req.key) if dist is None: if env is None: env = Environment(self.entries) dist = best[req.key] = env.best_match(req, self, installer) if dist is None: raise DistributionNotFound(req) # XXX put more info here to_activate.append(dist) if dist not in req: # Oops, the "best" so far conflicts with a dependency raise VersionConflict(dist,req) # XXX put more info here requirements.extend(dist.requires(req.extras)[::-1]) processed[req] = True return to_activate # return list of distros to activate def find_plugins(self, plugin_env, full_env=None, installer=None, fallback=True ): """Find all activatable distributions in `plugin_env` Example usage:: distributions, errors = working_set.find_plugins( Environment(plugin_dirlist) ) map(working_set.add, distributions) # add plugins+libs to sys.path print "Couldn't load", errors # display errors The `plugin_env` should be an ``Environment`` instance that contains only distributions that are in the project's "plugin directory" or directories. The `full_env`, if supplied, should be an ``Environment`` contains all currently-available distributions. If `full_env` is not supplied, one is created automatically from the ``WorkingSet`` this method is called on, which will typically mean that every directory on ``sys.path`` will be scanned for distributions. `installer` is a standard installer callback as used by the ``resolve()`` method. The `fallback` flag indicates whether we should attempt to resolve older versions of a plugin if the newest version cannot be resolved. This method returns a 2-tuple: (`distributions`, `error_info`), where `distributions` is a list of the distributions found in `plugin_env` that were loadable, along with any other distributions that are needed to resolve their dependencies. `error_info` is a dictionary mapping unloadable plugin distributions to an exception instance describing the error that occurred. Usually this will be a ``DistributionNotFound`` or ``VersionConflict`` instance. """ plugin_projects = list(plugin_env) plugin_projects.sort() # scan project names in alphabetic order error_info = {} distributions = {} if full_env is None: env = Environment(self.entries) env += plugin_env else: env = full_env + plugin_env shadow_set = self.__class__([]) map(shadow_set.add, self) # put all our entries in shadow_set for project_name in plugin_projects: for dist in plugin_env[project_name]: req = [dist.as_requirement()] try: resolvees = shadow_set.resolve(req, env, installer) except ResolutionError,v: error_info[dist] = v # save error info if fallback: continue # try the next older version of project else: break # give up on this project, keep going else: map(shadow_set.add, resolvees) distributions.update(dict.fromkeys(resolvees)) # success, no need to try any more versions of this project break distributions = list(distributions) distributions.sort() return distributions, error_info def require(self, *requirements): """Ensure that distributions matching `requirements` are activated `requirements` must be a string or a (possibly-nested) sequence thereof, specifying the distributions and versions required. The return value is a sequence of the distributions that needed to be activated to fulfill the requirements; all relevant distributions are included, even if they were already activated in this working set. """ needed = self.resolve(parse_requirements(requirements)) for dist in needed: self.add(dist) return needed def subscribe(self, callback): """Invoke `callback` for all distributions (including existing ones)""" if callback in self.callbacks: return self.callbacks.append(callback) for dist in self: callback(dist) def _added_new(self, dist): for callback in self.callbacks: callback(dist) def __getstate__(self): return ( self.entries[:], self.entry_keys.copy(), self.by_key.copy(), self.callbacks[:] ) def __setstate__(self, (entries, keys, by_key, callbacks)): self.entries = entries[:] self.entry_keys = keys.copy() self.by_key = by_key.copy() self.callbacks = callbacks[:] class Environment(object): """Searchable snapshot of distributions on a search path""" def __init__(self, search_path=None, platform=get_supported_platform(), python=PY_MAJOR): """Snapshot distributions available on a search path Any distributions found on `search_path` are added to the environment. `search_path` should be a sequence of ``sys.path`` items. If not supplied, ``sys.path`` is used. `platform` is an optional string specifying the name of the platform that platform-specific distributions must be compatible with. If unspecified, it defaults to the current platform. `python` is an optional string naming the desired version of Python (e.g. ``'2.4'``); it defaults to the current version. You may explicitly set `platform` (and/or `python`) to ``None`` if you wish to map *all* distributions, not just those compatible with the running platform or Python version. """ self._distmap = {} self._cache = {} self.platform = platform self.python = python self.scan(search_path) def can_add(self, dist): """Is distribution `dist` acceptable for this environment? The distribution must match the platform and python version requirements specified when this environment was created, or False is returned. """ return (self.python is None or dist.py_version is None or dist.py_version==self.python) \ and compatible_platforms(dist.platform,self.platform) def remove(self, dist): """Remove `dist` from the environment""" self._distmap[dist.key].remove(dist) def scan(self, search_path=None): """Scan `search_path` for distributions usable in this environment Any distributions found are added to the environment. `search_path` should be a sequence of ``sys.path`` items. If not supplied, ``sys.path`` is used. Only distributions conforming to the platform/python version defined at initialization are added. """ if search_path is None: search_path = sys.path for item in search_path: for dist in find_distributions(item): self.add(dist) def __getitem__(self,project_name): """Return a newest-to-oldest list of distributions for `project_name` """ try: return self._cache[project_name] except KeyError: project_name = project_name.lower() if project_name not in self._distmap: return [] if project_name not in self._cache: dists = self._cache[project_name] = self._distmap[project_name] _sort_dists(dists) return self._cache[project_name] def add(self,dist): """Add `dist` if we ``can_add()`` it and it isn't already added""" if self.can_add(dist) and dist.has_version(): dists = self._distmap.setdefault(dist.key,[]) if dist not in dists: dists.append(dist) if dist.key in self._cache: _sort_dists(self._cache[dist.key]) def best_match(self, req, working_set, installer=None): """Find distribution best matching `req` and usable on `working_set` This calls the ``find(req)`` method of the `working_set` to see if a suitable distribution is already active. (This may raise ``VersionConflict`` if an unsuitable version of the project is already active in the specified `working_set`.) If a suitable distribution isn't active, this method returns the newest platform-dependent distribution in the environment that meets the ``Requirement`` in `req`. If no suitable platform-dependent distribution is found, then the newest platform-independent distribution that meets the requirement is returned. (A platform- dependent distribution will typically have code compiled or specialized for that platform.) Otherwise, if `installer` is supplied, then the result of calling the environment's ``obtain(req, installer)`` method will be returned. """ dist = working_set.find(req) if dist is not None: return dist # first try to find a platform-dependent dist for dist in self[req.key]: if dist in req and dist.platform is not None: return dist # then try any other dist for dist in self[req.key]: if dist in req: return dist return self.obtain(req, installer) # try and download/install def obtain(self, requirement, installer=None): """Obtain a distribution matching `requirement` (e.g. via download) Obtain a distro that matches requirement (e.g. via download). In the base ``Environment`` class, this routine just returns ``installer(requirement)``, unless `installer` is None, in which case None is returned instead. This method is a hook that allows subclasses to attempt other ways of obtaining a distribution before falling back to the `installer` argument.""" if installer is not None: return installer(requirement) def __iter__(self): """Yield the unique project names of the available distributions""" for key in self._distmap.keys(): if self[key]: yield key def __iadd__(self, other): """In-place addition of a distribution or environment""" if isinstance(other,Distribution): self.add(other) elif isinstance(other,Environment): for project in other: for dist in other[project]: self.add(dist) else: raise TypeError("Can't add %r to environment" % (other,)) return self def __add__(self, other): """Add an environment or distribution to an environment""" new = self.__class__([], platform=None, python=None) for env in self, other: new += env return new AvailableDistributions = Environment # XXX backward compatibility class ExtractionError(RuntimeError): """An error occurred extracting a resource The following attributes are available from instances of this exception: manager The resource manager that raised this exception cache_path The base directory for resource extraction original_error The exception instance that caused extraction to fail """ class ResourceManager: """Manage resource extraction and packages""" extraction_path = None def __init__(self): self.cached_files = {} def resource_exists(self, package_or_requirement, resource_name): """Does the named resource exist?""" return get_provider(package_or_requirement).has_resource(resource_name) def resource_isdir(self, package_or_requirement, resource_name): """Is the named resource an existing directory?""" return get_provider(package_or_requirement).resource_isdir( resource_name ) def resource_filename(self, package_or_requirement, resource_name): """Return a true filesystem path for specified resource""" return get_provider(package_or_requirement).get_resource_filename( self, resource_name ) def resource_stream(self, package_or_requirement, resource_name): """Return a readable file-like object for specified resource""" return get_provider(package_or_requirement).get_resource_stream( self, resource_name ) def resource_string(self, package_or_requirement, resource_name): """Return specified resource as a string""" return get_provider(package_or_requirement).get_resource_string( self, resource_name ) def resource_listdir(self, package_or_requirement, resource_name): """List the contents of the named resource directory""" return get_provider(package_or_requirement).resource_listdir( resource_name ) def extraction_error(self): """Give an error message for problems extracting file(s)""" old_exc = sys.exc_info()[1] cache_path = self.extraction_path or get_default_cache() err = ExtractionError("""Can't extract file(s) to egg cache The following error occurred while trying to extract file(s) to the Python egg cache: %s The Python egg cache directory is currently set to: %s Perhaps your account does not have write access to this directory? You can change the cache directory by setting the PYTHON_EGG_CACHE environment variable to point to an accessible directory. """ % (old_exc, cache_path) ) err.manager = self err.cache_path = cache_path err.original_error = old_exc raise err def get_cache_path(self, archive_name, names=()): """Return absolute location in cache for `archive_name` and `names` The parent directory of the resulting path will be created if it does not already exist. `archive_name` should be the base filename of the enclosing egg (which may not be the name of the enclosing zipfile!), including its ".egg" extension. `names`, if provided, should be a sequence of path name parts "under" the egg's extraction location. This method should only be called by resource providers that need to obtain an extraction location, and only for names they intend to extract, as it tracks the generated names for possible cleanup later. """ extract_path = self.extraction_path or get_default_cache() target_path = os.path.join(extract_path, archive_name+'-tmp', *names) try: _bypass_ensure_directory(target_path) except: self.extraction_error() self.cached_files[target_path] = 1 return target_path def postprocess(self, tempname, filename): """Perform any platform-specific postprocessing of `tempname` This is where Mac header rewrites should be done; other platforms don't have anything special they should do. Resource providers should call this method ONLY after successfully extracting a compressed resource. They must NOT call it on resources that are already in the filesystem. `tempname` is the current (temporary) name of the file, and `filename` is the name it will be renamed to by the caller after this routine returns. """ if os.name == 'posix': # Make the resource executable mode = ((os.stat(tempname).st_mode) | 0555) & 07777 os.chmod(tempname, mode) def set_extraction_path(self, path): """Set the base path where resources will be extracted to, if needed. If you do not call this routine before any extractions take place, the path defaults to the return value of ``get_default_cache()``. (Which is based on the ``PYTHON_EGG_CACHE`` environment variable, with various platform-specific fallbacks. See that routine's documentation for more details.) Resources are extracted to subdirectories of this path based upon information given by the ``IResourceProvider``. You may set this to a temporary directory, but then you must call ``cleanup_resources()`` to delete the extracted files when done. There is no guarantee that ``cleanup_resources()`` will be able to remove all extracted files. (Note: you may not change the extraction path for a given resource manager once resources have been extracted, unless you first call ``cleanup_resources()``.) """ if self.cached_files: raise ValueError( "Can't change extraction path, files already extracted" ) self.extraction_path = path def cleanup_resources(self, force=False): """ Delete all extracted resource files and directories, returning a list of the file and directory names that could not be successfully removed. This function does not have any concurrency protection, so it should generally only be called when the extraction path is a temporary directory exclusive to a single process. This method is not automatically called; you must call it explicitly or register it as an ``atexit`` function if you wish to ensure cleanup of a temporary directory used for extractions. """ # XXX def get_default_cache(): """Determine the default cache location This returns the ``PYTHON_EGG_CACHE`` environment variable, if set. Otherwise, on Windows, it returns a "Python-Eggs" subdirectory of the "Application Data" directory. On all other systems, it's "~/.python-eggs". """ try: return os.environ['PYTHON_EGG_CACHE'] except KeyError: pass if os.name!='nt': return os.path.expanduser('~/.python-eggs') app_data = 'Application Data' # XXX this may be locale-specific! app_homes = [ (('APPDATA',), None), # best option, should be locale-safe (('USERPROFILE',), app_data), (('HOMEDRIVE','HOMEPATH'), app_data), (('HOMEPATH',), app_data), (('HOME',), None), (('WINDIR',), app_data), # 95/98/ME ] for keys, subdir in app_homes: dirname = '' for key in keys: if key in os.environ: dirname = os.path.join(dirname, os.environ[key]) else: break else: if subdir: dirname = os.path.join(dirname,subdir) return os.path.join(dirname, 'Python-Eggs') else: raise RuntimeError( "Please set the PYTHON_EGG_CACHE enviroment variable" ) def safe_name(name): """Convert an arbitrary string to a standard distribution name Any runs of non-alphanumeric/. characters are replaced with a single '-'. """ return re.sub('[^A-Za-z0-9.]+', '-', name) def safe_version(version): """Convert an arbitrary string to a standard version string Spaces become dots, and all other non-alphanumeric characters become dashes, with runs of multiple dashes condensed to a single dash. """ version = version.replace(' ','.') return re.sub('[^A-Za-z0-9.]+', '-', version) def safe_extra(extra): """Convert an arbitrary string to a standard 'extra' name Any runs of non-alphanumeric characters are replaced with a single '_', and the result is always lowercased. """ return re.sub('[^A-Za-z0-9.]+', '_', extra).lower() def to_filename(name): """Convert a project or version name to its filename-escaped form Any '-' characters are currently replaced with '_'. """ return name.replace('-','_') class NullProvider: """Try to implement resources and metadata for arbitrary PEP 302 loaders""" egg_name = None egg_info = None loader = None def __init__(self, module): self.loader = getattr(module, '__loader__', None) self.module_path = os.path.dirname(getattr(module, '__file__', '')) def get_resource_filename(self, manager, resource_name): return self._fn(self.module_path, resource_name) def get_resource_stream(self, manager, resource_name): return StringIO(self.get_resource_string(manager, resource_name)) def get_resource_string(self, manager, resource_name): return self._get(self._fn(self.module_path, resource_name)) def has_resource(self, resource_name): return self._has(self._fn(self.module_path, resource_name)) def has_metadata(self, name): return self.egg_info and self._has(self._fn(self.egg_info,name)) def get_metadata(self, name): if not self.egg_info: return "" return self._get(self._fn(self.egg_info,name)) def get_metadata_lines(self, name): return yield_lines(self.get_metadata(name)) def resource_isdir(self,resource_name): return self._isdir(self._fn(self.module_path, resource_name)) def metadata_isdir(self,name): return self.egg_info and self._isdir(self._fn(self.egg_info,name)) def resource_listdir(self,resource_name): return self._listdir(self._fn(self.module_path,resource_name)) def metadata_listdir(self,name): if self.egg_info: return self._listdir(self._fn(self.egg_info,name)) return [] def run_script(self,script_name,namespace): script = 'scripts/'+script_name if not self.has_metadata(script): raise ResolutionError("No script named %r" % script_name) script_text = self.get_metadata(script).replace('\r\n','\n') script_text = script_text.replace('\r','\n') script_filename = self._fn(self.egg_info,script) namespace['__file__'] = script_filename if os.path.exists(script_filename): execfile(script_filename, namespace, namespace) else: from linecache import cache cache[script_filename] = ( len(script_text), 0, script_text.split('\n'), script_filename ) script_code = compile(script_text,script_filename,'exec') exec script_code in namespace, namespace def _has(self, path): raise NotImplementedError( "Can't perform this operation for unregistered loader type" ) def _isdir(self, path): raise NotImplementedError( "Can't perform this operation for unregistered loader type" ) def _listdir(self, path): raise NotImplementedError( "Can't perform this operation for unregistered loader type" ) def _fn(self, base, resource_name): if resource_name: return os.path.join(base, *resource_name.split('/')) return base def _get(self, path): if hasattr(self.loader, 'get_data'): return self.loader.get_data(path) raise NotImplementedError( "Can't perform this operation for loaders without 'get_data()'" ) register_loader_type(object, NullProvider) class EggProvider(NullProvider): """Provider based on a virtual filesystem""" def __init__(self,module): NullProvider.__init__(self,module) self._setup_prefix() def _setup_prefix(self): # we assume here that our metadata may be nested inside a "basket" # of multiple eggs; that's why we use module_path instead of .archive path = self.module_path old = None while path!=old: if path.lower().endswith('.egg'): self.egg_name = os.path.basename(path) self.egg_info = os.path.join(path, 'EGG-INFO') self.egg_root = path break old = path path, base = os.path.split(path) class DefaultProvider(EggProvider): """Provides access to package resources in the filesystem""" def _has(self, path): return os.path.exists(path) def _isdir(self,path): return os.path.isdir(path) def _listdir(self,path): return os.listdir(path) def get_resource_stream(self, manager, resource_name): return open(self._fn(self.module_path, resource_name), 'rb') def _get(self, path): stream = open(path, 'rb') try: return stream.read() finally: stream.close() register_loader_type(type(None), DefaultProvider) class EmptyProvider(NullProvider): """Provider that returns nothing for all requests""" _isdir = _has = lambda self,path: False _get = lambda self,path: '' _listdir = lambda self,path: [] module_path = None def __init__(self): pass empty_provider = EmptyProvider() class ZipProvider(EggProvider): """Resource support for zips and eggs""" eagers = None def __init__(self, module): EggProvider.__init__(self,module) self.zipinfo = zipimport._zip_directory_cache[self.loader.archive] self.zip_pre = self.loader.archive+os.sep def _zipinfo_name(self, fspath): # Convert a virtual filename (full path to file) into a zipfile subpath # usable with the zipimport directory cache for our target archive if fspath.startswith(self.zip_pre): return fspath[len(self.zip_pre):] raise AssertionError( "%s is not a subpath of %s" % (fspath,self.zip_pre) ) def _parts(self,zip_path): # Convert a zipfile subpath into an egg-relative path part list fspath = self.zip_pre+zip_path # pseudo-fs path if fspath.startswith(self.egg_root+os.sep): return fspath[len(self.egg_root)+1:].split(os.sep) raise AssertionError( "%s is not a subpath of %s" % (fspath,self.egg_root) ) def get_resource_filename(self, manager, resource_name): if not self.egg_name: raise NotImplementedError( "resource_filename() only supported for .egg, not .zip" ) # no need to lock for extraction, since we use temp names zip_path = self._resource_to_zip(resource_name) eagers = self._get_eager_resources() if '/'.join(self._parts(zip_path)) in eagers: for name in eagers: self._extract_resource(manager, self._eager_to_zip(name)) return self._extract_resource(manager, zip_path) def _extract_resource(self, manager, zip_path): if zip_path in self._index(): for name in self._index()[zip_path]: last = self._extract_resource( manager, os.path.join(zip_path, name) ) return os.path.dirname(last) # return the extracted directory name zip_stat = self.zipinfo[zip_path] t,d,size = zip_stat[5], zip_stat[6], zip_stat[3] date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, # ymd (t&0xFFFF)>>11, (t>>5)&0x3F, (t&0x1F) * 2, 0, 0, -1 # hms, etc. ) timestamp = time.mktime(date_time) try: real_path = manager.get_cache_path( self.egg_name, self._parts(zip_path) ) if os.path.isfile(real_path): stat = os.stat(real_path) if stat.st_size==size and stat.st_mtime==timestamp: # size and stamp match, don't bother extracting return real_path outf, tmpnam = _mkstemp(".$extract", dir=os.path.dirname(real_path)) os.write(outf, self.loader.get_data(zip_path)) os.close(outf) utime(tmpnam, (timestamp,timestamp)) manager.postprocess(tmpnam, real_path) try: rename(tmpnam, real_path) except os.error: if os.path.isfile(real_path): stat = os.stat(real_path) if stat.st_size==size and stat.st_mtime==timestamp: # size and stamp match, somebody did it just ahead of # us, so we're done return real_path elif os.name=='nt': # Windows, del old file and retry unlink(real_path) rename(tmpnam, real_path) return real_path raise except os.error: manager.extraction_error() # report a user-friendly error return real_path def _get_eager_resources(self): if self.eagers is None: eagers = [] for name in ('native_libs.txt', 'eager_resources.txt'): if self.has_metadata(name): eagers.extend(self.get_metadata_lines(name)) self.eagers = eagers return self.eagers def _index(self): try: return self._dirindex except AttributeError: ind = {} for path in self.zipinfo: parts = path.split(os.sep) while parts: parent = os.sep.join(parts[:-1]) if parent in ind: ind[parent].append(parts[-1]) break else: ind[parent] = [parts.pop()] self._dirindex = ind return ind def _has(self, fspath): zip_path = self._zipinfo_name(fspath) return zip_path in self.zipinfo or zip_path in self._index() def _isdir(self,fspath): return self._zipinfo_name(fspath) in self._index() def _listdir(self,fspath): return list(self._index().get(self._zipinfo_name(fspath), ())) def _eager_to_zip(self,resource_name): return self._zipinfo_name(self._fn(self.egg_root,resource_name)) def _resource_to_zip(self,resource_name): return self._zipinfo_name(self._fn(self.module_path,resource_name)) register_loader_type(zipimport.zipimporter, ZipProvider) class FileMetadata(EmptyProvider): """Metadata handler for standalone PKG-INFO files Usage:: metadata = FileMetadata("/path/to/PKG-INFO") This provider rejects all data and metadata requests except for PKG-INFO, which is treated as existing, and will be the contents of the file at the provided location. """ def __init__(self,path): self.path = path def has_metadata(self,name): return name=='PKG-INFO' def get_metadata(self,name): if name=='PKG-INFO': return open(self.path,'rU').read() raise KeyError("No metadata except PKG-INFO is available") def get_metadata_lines(self,name): return yield_lines(self.get_metadata(name)) class PathMetadata(DefaultProvider): """Metadata provider for egg directories Usage:: # Development eggs: egg_info = "/path/to/PackageName.egg-info" base_dir = os.path.dirname(egg_info) metadata = PathMetadata(base_dir, egg_info) dist_name = os.path.splitext(os.path.basename(egg_info))[0] dist = Distribution(basedir,project_name=dist_name,metadata=metadata) # Unpacked egg directories: egg_path = "/path/to/PackageName-ver-pyver-etc.egg" metadata = PathMetadata(egg_path, os.path.join(egg_path,'EGG-INFO')) dist = Distribution.from_filename(egg_path, metadata=metadata) """ def __init__(self, path, egg_info): self.module_path = path self.egg_info = egg_info class EggMetadata(ZipProvider): """Metadata provider for .egg files""" def __init__(self, importer): """Create a metadata provider from a zipimporter""" self.zipinfo = zipimport._zip_directory_cache[importer.archive] self.zip_pre = importer.archive+os.sep self.loader = importer if importer.prefix: self.module_path = os.path.join(importer.archive, importer.prefix) else: self.module_path = importer.archive self._setup_prefix() class ImpWrapper: """PEP 302 Importer that wraps Python's "normal" import algorithm""" def __init__(self, path=None): self.path = path def find_module(self, fullname, path=None): subname = fullname.split(".")[-1] if subname != fullname and self.path is None: return None if self.path is None: path = None else: path = [self.path] try: file, filename, etc = imp.find_module(subname, path) except ImportError: return None return ImpLoader(file, filename, etc) class ImpLoader: """PEP 302 Loader that wraps Python's "normal" import algorithm""" def __init__(self, file, filename, etc): self.file = file self.filename = filename self.etc = etc def load_module(self, fullname): try: mod = imp.load_module(fullname, self.file, self.filename, self.etc) finally: if self.file: self.file.close() # Note: we don't set __loader__ because we want the module to look # normal; i.e. this is just a wrapper for standard import machinery return mod def get_importer(path_item): """Retrieve a PEP 302 "importer" for the given path item If there is no importer, this returns a wrapper around the builtin import machinery. The returned importer is only cached if it was created by a path hook. """ try: importer = sys.path_importer_cache[path_item] except KeyError: for hook in sys.path_hooks: try: importer = hook(path_item) except ImportError: pass else: break else: importer = None sys.path_importer_cache.setdefault(path_item,importer) if importer is None: try: importer = ImpWrapper(path_item) except ImportError: pass return importer _declare_state('dict', _distribution_finders = {}) def register_finder(importer_type, distribution_finder): """Register `distribution_finder` to find distributions in sys.path items `importer_type` is the type or class of a PEP 302 "Importer" (sys.path item handler), and `distribution_finder` is a callable that, passed a path item and the importer instance, yields ``Distribution`` instances found on that path item. See ``pkg_resources.find_on_path`` for an example.""" _distribution_finders[importer_type] = distribution_finder def find_distributions(path_item, only=False): """Yield distributions accessible via `path_item`""" importer = get_importer(path_item) finder = _find_adapter(_distribution_finders, importer) return finder(importer, path_item, only) def find_in_zip(importer, path_item, only=False): metadata = EggMetadata(importer) if metadata.has_metadata('PKG-INFO'): yield Distribution.from_filename(path_item, metadata=metadata) if only: return # don't yield nested distros for subitem in metadata.resource_listdir('/'): if subitem.endswith('.egg'): subpath = os.path.join(path_item, subitem) for dist in find_in_zip(zipimport.zipimporter(subpath), subpath): yield dist register_finder(zipimport.zipimporter, find_in_zip) def StringIO(*args, **kw): """Thunk to load the real StringIO on demand""" global StringIO try: from cStringIO import StringIO except ImportError: from StringIO import StringIO return StringIO(*args,**kw) def find_nothing(importer, path_item, only=False): return () register_finder(object,find_nothing) def find_on_path(importer, path_item, only=False): """Yield distributions accessible on a sys.path directory""" path_item = _normalize_cached(path_item) if os.path.isdir(path_item) and os.access(path_item, os.R_OK): if path_item.lower().endswith('.egg'): # unpacked egg yield Distribution.from_filename( path_item, metadata=PathMetadata( path_item, os.path.join(path_item,'EGG-INFO') ) ) else: # scan for .egg and .egg-info in directory for entry in os.listdir(path_item): lower = entry.lower() if lower.endswith('.egg-info'): fullpath = os.path.join(path_item, entry) if os.path.isdir(fullpath): # egg-info directory, allow getting metadata metadata = PathMetadata(path_item, fullpath) else: metadata = FileMetadata(fullpath) yield Distribution.from_location( path_item,entry,metadata,precedence=DEVELOP_DIST ) elif not only and lower.endswith('.egg'): for dist in find_distributions(os.path.join(path_item, entry)): yield dist elif not only and lower.endswith('.egg-link'): for line in file(os.path.join(path_item, entry)): if not line.strip(): continue for item in find_distributions(os.path.join(path_item,line.rstrip())): yield item break register_finder(ImpWrapper, find_on_path) _declare_state('dict', _namespace_handlers = {}) _declare_state('dict', _namespace_packages = {}) def register_namespace_handler(importer_type, namespace_handler): """Register `namespace_handler` to declare namespace packages `importer_type` is the type or class of a PEP 302 "Importer" (sys.path item handler), and `namespace_handler` is a callable like this:: def namespace_handler(importer,path_entry,moduleName,module): # return a path_entry to use for child packages Namespace handlers are only called if the importer object has already agreed that it can handle the relevant path item, and they should only return a subpath if the module __path__ does not already contain an equivalent subpath. For an example namespace handler, see ``pkg_resources.file_ns_handler``. """ _namespace_handlers[importer_type] = namespace_handler def _handle_ns(packageName, path_item): """Ensure that named package includes a subpath of path_item (if needed)""" importer = get_importer(path_item) if importer is None: return None loader = importer.find_module(packageName) if loader is None: return None module = sys.modules.get(packageName) if module is None: module = sys.modules[packageName] = imp.new_module(packageName) module.__path__ = []; _set_parent_ns(packageName) elif not hasattr(module,'__path__'): raise TypeError("Not a package:", packageName) handler = _find_adapter(_namespace_handlers, importer) subpath = handler(importer,path_item,packageName,module) if subpath is not None: path = module.__path__; path.append(subpath) loader.load_module(packageName); module.__path__ = path return subpath def declare_namespace(packageName): """Declare that package 'packageName' is a namespace package""" imp.acquire_lock() try: if packageName in _namespace_packages: return path, parent = sys.path, None if '.' in packageName: parent = '.'.join(packageName.split('.')[:-1]) declare_namespace(parent) __import__(parent) try: path = sys.modules[parent].__path__ except AttributeError: raise TypeError("Not a package:", parent) # Track what packages are namespaces, so when new path items are added, # they can be updated _namespace_packages.setdefault(parent,[]).append(packageName) _namespace_packages.setdefault(packageName,[]) for path_item in path: # Ensure all the parent's path items are reflected in the child, # if they apply _handle_ns(packageName, path_item) finally: imp.release_lock() def fixup_namespace_packages(path_item, parent=None): """Ensure that previously-declared namespace packages include path_item""" imp.acquire_lock() try: for package in _namespace_packages.get(parent,()): subpath = _handle_ns(package, path_item) if subpath: fixup_namespace_packages(subpath,package) finally: imp.release_lock() def file_ns_handler(importer, path_item, packageName, module): """Compute an ns-package subpath for a filesystem or zipfile importer""" subpath = os.path.join(path_item, packageName.split('.')[-1]) normalized = _normalize_cached(subpath) for item in module.__path__: if _normalize_cached(item)==normalized: break else: # Only return the path if it's not already there return subpath register_namespace_handler(ImpWrapper,file_ns_handler) register_namespace_handler(zipimport.zipimporter,file_ns_handler) def null_ns_handler(importer, path_item, packageName, module): return None register_namespace_handler(object,null_ns_handler) def normalize_path(filename): """Normalize a file/dir name for comparison purposes""" return os.path.normcase(os.path.realpath(filename)) def _normalize_cached(filename,_cache={}): try: return _cache[filename] except KeyError: _cache[filename] = result = normalize_path(filename) return result def _set_parent_ns(packageName): parts = packageName.split('.') name = parts.pop() if parts: parent = '.'.join(parts) setattr(sys.modules[parent], name, sys.modules[packageName]) def yield_lines(strs): """Yield non-empty/non-comment lines of a ``basestring`` or sequence""" if isinstance(strs,basestring): for s in strs.splitlines(): s = s.strip() if s and not s.startswith('#'): # skip blank lines/comments yield s else: for ss in strs: for s in yield_lines(ss): yield s LINE_END = re.compile(r"\s*(#.*)?$").match # whitespace and comment CONTINUE = re.compile(r"\s*\\\s*(#.*)?$").match # line continuation DISTRO = re.compile(r"\s*((\w|[-.])+)").match # Distribution or extra VERSION = re.compile(r"\s*(<=?|>=?|==|!=)\s*((\w|[-.])+)").match # ver. info COMMA = re.compile(r"\s*,").match # comma between items OBRACKET = re.compile(r"\s*\[").match CBRACKET = re.compile(r"\s*\]").match MODULE = re.compile(r"\w+(\.\w+)*$").match EGG_NAME = re.compile( r"(?P[^-]+)" r"( -(?P[^-]+) (-py(?P[^-]+) (-(?P.+))? )? )?", re.VERBOSE | re.IGNORECASE ).match component_re = re.compile(r'(\d+ | [a-z]+ | \.| -)', re.VERBOSE) replace = {'pre':'c', 'preview':'c','-':'final-','rc':'c','dev':'@'}.get def _parse_version_parts(s): for part in component_re.split(s): part = replace(part,part) if not part or part=='.': continue if part[:1] in '0123456789': yield part.zfill(8) # pad for numeric comparison else: yield '*'+part yield '*final' # ensure that alpha/beta/candidate are before final def parse_version(s): """Convert a version string to a chronologically-sortable key This is a rough cross between distutils' StrictVersion and LooseVersion; if you give it versions that would work with StrictVersion, then it behaves the same; otherwise it acts like a slightly-smarter LooseVersion. It is *possible* to create pathological version coding schemes that will fool this parser, but they should be very rare in practice. The returned value will be a tuple of strings. Numeric portions of the version are padded to 8 digits so they will compare numerically, but without relying on how numbers compare relative to strings. Dots are dropped, but dashes are retained. Trailing zeros between alpha segments or dashes are suppressed, so that e.g. "2.4.0" is considered the same as "2.4". Alphanumeric parts are lower-cased. The algorithm assumes that strings like "-" and any alpha string that alphabetically follows "final" represents a "patch level". So, "2.4-1" is assumed to be a branch or patch of "2.4", and therefore "2.4.1" is considered newer than "2.4-1", which in turn is newer than "2.4". Strings like "a", "b", "c", "alpha", "beta", "candidate" and so on (that come before "final" alphabetically) are assumed to be pre-release versions, so that the version "2.4" is considered newer than "2.4a1". Finally, to handle miscellaneous cases, the strings "pre", "preview", and "rc" are treated as if they were "c", i.e. as though they were release candidates, and therefore are not as new as a version string that does not contain them, and "dev" is replaced with an '@' so that it sorts lower than than any other pre-release tag. """ parts = [] for part in _parse_version_parts(s.lower()): if part.startswith('*'): if part<'*final': # remove '-' before a prerelease tag while parts and parts[-1]=='*final-': parts.pop() # remove trailing zeros from each series of numeric parts while parts and parts[-1]=='00000000': parts.pop() parts.append(part) return tuple(parts) class EntryPoint(object): """Object representing an advertised importable object""" def __init__(self, name, module_name, attrs=(), extras=(), dist=None): if not MODULE(module_name): raise ValueError("Invalid module name", module_name) self.name = name self.module_name = module_name self.attrs = tuple(attrs) self.extras = Requirement.parse(("x[%s]" % ','.join(extras))).extras self.dist = dist def __str__(self): s = "%s = %s" % (self.name, self.module_name) if self.attrs: s += ':' + '.'.join(self.attrs) if self.extras: s += ' [%s]' % ','.join(self.extras) return s def __repr__(self): return "EntryPoint.parse(%r)" % str(self) def load(self, require=True, env=None, installer=None): if require: self.require(env, installer) entry = __import__(self.module_name, globals(),globals(), ['__name__']) for attr in self.attrs: try: entry = getattr(entry,attr) except AttributeError: raise ImportError("%r has no %r attribute" % (entry,attr)) return entry def require(self, env=None, installer=None): if self.extras and not self.dist: raise UnknownExtra("Can't require() without a distribution", self) map(working_set.add, working_set.resolve(self.dist.requires(self.extras),env,installer)) #@classmethod def parse(cls, src, dist=None): """Parse a single entry point from string `src` Entry point syntax follows the form:: name = some.module:some.attr [extra1,extra2] The entry name and module name are required, but the ``:attrs`` and ``[extras]`` parts are optional """ try: attrs = extras = () name,value = src.split('=',1) if '[' in value: value,extras = value.split('[',1) req = Requirement.parse("x["+extras) if req.specs: raise ValueError extras = req.extras if ':' in value: value,attrs = value.split(':',1) if not MODULE(attrs.rstrip()): raise ValueError attrs = attrs.rstrip().split('.') except ValueError: raise ValueError( "EntryPoint must be in 'name=module:attrs [extras]' format", src ) else: return cls(name.strip(), value.strip(), attrs, extras, dist) parse = classmethod(parse) #@classmethod def parse_group(cls, group, lines, dist=None): """Parse an entry point group""" if not MODULE(group): raise ValueError("Invalid group name", group) this = {} for line in yield_lines(lines): ep = cls.parse(line, dist) if ep.name in this: raise ValueError("Duplicate entry point", group, ep.name) this[ep.name]=ep return this parse_group = classmethod(parse_group) #@classmethod def parse_map(cls, data, dist=None): """Parse a map of entry point groups""" if isinstance(data,dict): data = data.items() else: data = split_sections(data) maps = {} for group, lines in data: if group is None: if not lines: continue raise ValueError("Entry points must be listed in groups") group = group.strip() if group in maps: raise ValueError("Duplicate group name", group) maps[group] = cls.parse_group(group, lines, dist) return maps parse_map = classmethod(parse_map) class Distribution(object): """Wrap an actual or potential sys.path entry w/metadata""" def __init__(self, location=None, metadata=None, project_name=None, version=None, py_version=PY_MAJOR, platform=None, precedence = EGG_DIST ): self.project_name = safe_name(project_name or 'Unknown') if version is not None: self._version = safe_version(version) self.py_version = py_version self.platform = platform self.location = location self.precedence = precedence self._provider = metadata or empty_provider #@classmethod def from_location(cls,location,basename,metadata=None,**kw): project_name, version, py_version, platform = [None]*4 basename, ext = os.path.splitext(basename) if ext.lower() in (".egg",".egg-info"): match = EGG_NAME(basename) if match: project_name, version, py_version, platform = match.group( 'name','ver','pyver','plat' ) return cls( location, metadata, project_name=project_name, version=version, py_version=py_version, platform=platform, **kw ) from_location = classmethod(from_location) hashcmp = property( lambda self: ( getattr(self,'parsed_version',()), self.precedence, self.key, -len(self.location or ''), self.location, self.py_version, self.platform ) ) def __cmp__(self, other): return cmp(self.hashcmp, other) def __hash__(self): return hash(self.hashcmp) # These properties have to be lazy so that we don't have to load any # metadata until/unless it's actually needed. (i.e., some distributions # may not know their name or version without loading PKG-INFO) #@property def key(self): try: return self._key except AttributeError: self._key = key = self.project_name.lower() return key key = property(key) #@property def parsed_version(self): try: return self._parsed_version except AttributeError: self._parsed_version = pv = parse_version(self.version) return pv parsed_version = property(parsed_version) #@property def version(self): try: return self._version except AttributeError: for line in self._get_metadata('PKG-INFO'): if line.lower().startswith('version:'): self._version = safe_version(line.split(':',1)[1].strip()) return self._version else: raise ValueError( "Missing 'Version:' header and/or PKG-INFO file", self ) version = property(version) #@property def _dep_map(self): try: return self.__dep_map except AttributeError: dm = self.__dep_map = {None: []} for name in 'requires.txt', 'depends.txt': for extra,reqs in split_sections(self._get_metadata(name)): if extra: extra = safe_extra(extra) dm.setdefault(extra,[]).extend(parse_requirements(reqs)) return dm _dep_map = property(_dep_map) def requires(self,extras=()): """List of Requirements needed for this distro if `extras` are used""" dm = self._dep_map deps = [] deps.extend(dm.get(None,())) for ext in extras: try: deps.extend(dm[safe_extra(ext)]) except KeyError: raise UnknownExtra( "%s has no such extra feature %r" % (self, ext) ) return deps def _get_metadata(self,name): if self.has_metadata(name): for line in self.get_metadata_lines(name): yield line def activate(self,path=None): """Ensure distribution is importable on `path` (default=sys.path)""" if path is None: path = sys.path self.insert_on(path) if path is sys.path: fixup_namespace_packages(self.location) for pkg in self._get_metadata('namespace_packages.txt'): if pkg in sys.modules: declare_namespace(pkg) def egg_name(self): """Return what this distribution's standard .egg filename should be""" filename = "%s-%s-py%s" % ( to_filename(self.project_name), to_filename(self.version), self.py_version or PY_MAJOR ) if self.platform: filename += '-'+self.platform return filename def __repr__(self): if self.location: return "%s (%s)" % (self,self.location) else: return str(self) def __str__(self): try: version = getattr(self,'version',None) except ValueError: version = None version = version or "[unknown version]" return "%s %s" % (self.project_name,version) def __getattr__(self,attr): """Delegate all unrecognized public attributes to .metadata provider""" if attr.startswith('_'): raise AttributeError,attr return getattr(self._provider, attr) #@classmethod def from_filename(cls,filename,metadata=None, **kw): return cls.from_location( _normalize_cached(filename), os.path.basename(filename), metadata, **kw ) from_filename = classmethod(from_filename) def as_requirement(self): """Return a ``Requirement`` that matches this distribution exactly""" return Requirement.parse('%s==%s' % (self.project_name, self.version)) def load_entry_point(self, group, name): """Return the `name` entry point of `group` or raise ImportError""" ep = self.get_entry_info(group,name) if ep is None: raise ImportError("Entry point %r not found" % ((group,name),)) return ep.load() def get_entry_map(self, group=None): """Return the entry point map for `group`, or the full entry map""" try: ep_map = self._ep_map except AttributeError: ep_map = self._ep_map = EntryPoint.parse_map( self._get_metadata('entry_points.txt'), self ) if group is not None: return ep_map.get(group,{}) return ep_map def get_entry_info(self, group, name): """Return the EntryPoint object for `group`+`name`, or ``None``""" return self.get_entry_map(group).get(name) def insert_on(self, path, loc = None): """Insert self.location in path before its nearest parent directory""" loc = loc or self.location if not loc: return nloc = _normalize_cached(loc) bdir = os.path.dirname(nloc) npath= [(p and _normalize_cached(p) or p) for p in path] bp = None for p, item in enumerate(npath): if item==nloc: break elif item==bdir and self.precedence==EGG_DIST: # if it's an .egg, give it precedence over its directory if path is sys.path: self.check_version_conflict() path.insert(p, loc) npath.insert(p, nloc) break else: if path is sys.path: self.check_version_conflict() path.append(loc) return # p is the spot where we found or inserted loc; now remove duplicates while 1: try: np = npath.index(nloc, p+1) except ValueError: break else: del npath[np], path[np] p = np # ha! return def check_version_conflict(self): if self.key=='setuptools': return # ignore the inevitable setuptools self-conflicts :( nsp = dict.fromkeys(self._get_metadata('namespace_packages.txt')) loc = normalize_path(self.location) for modname in self._get_metadata('top_level.txt'): if (modname not in sys.modules or modname in nsp or modname in _namespace_packages ): continue fn = getattr(sys.modules[modname], '__file__', None) if fn and (normalize_path(fn).startswith(loc) or fn.startswith(loc)): continue issue_warning( "Module %s was already imported from %s, but %s is being added" " to sys.path" % (modname, fn, self.location), ) def has_version(self): try: self.version except ValueError: issue_warning("Unbuilt egg for "+repr(self)) return False return True def clone(self,**kw): """Copy this distribution, substituting in any changed keyword args""" for attr in ( 'project_name', 'version', 'py_version', 'platform', 'location', 'precedence' ): kw.setdefault(attr, getattr(self,attr,None)) kw.setdefault('metadata', self._provider) return self.__class__(**kw) #@property def extras(self): return [dep for dep in self._dep_map if dep] extras = property(extras) def issue_warning(*args,**kw): level = 1 g = globals() try: # find the first stack frame that is *not* code in # the pkg_resources module, to use for the warning while sys._getframe(level).f_globals is g: level += 1 except ValueError: pass from warnings import warn warn(stacklevel = level+1, *args, **kw) def parse_requirements(strs): """Yield ``Requirement`` objects for each specification in `strs` `strs` must be an instance of ``basestring``, or a (possibly-nested) iterable thereof. """ # create a steppable iterator, so we can handle \-continuations lines = iter(yield_lines(strs)) def scan_list(ITEM,TERMINATOR,line,p,groups,item_name): items = [] while not TERMINATOR(line,p): if CONTINUE(line,p): try: line = lines.next(); p = 0 except StopIteration: raise ValueError( "\\ must not appear on the last nonblank line" ) match = ITEM(line,p) if not match: raise ValueError("Expected "+item_name+" in",line,"at",line[p:]) items.append(match.group(*groups)) p = match.end() match = COMMA(line,p) if match: p = match.end() # skip the comma elif not TERMINATOR(line,p): raise ValueError( "Expected ',' or end-of-list in",line,"at",line[p:] ) match = TERMINATOR(line,p) if match: p = match.end() # skip the terminator, if any return line, p, items for line in lines: match = DISTRO(line) if not match: raise ValueError("Missing distribution spec", line) project_name = match.group(1) p = match.end() extras = [] match = OBRACKET(line,p) if match: p = match.end() line, p, extras = scan_list( DISTRO, CBRACKET, line, p, (1,), "'extra' name" ) line, p, specs = scan_list(VERSION,LINE_END,line,p,(1,2),"version spec") specs = [(op,safe_version(val)) for op,val in specs] yield Requirement(project_name, specs, extras) def _sort_dists(dists): tmp = [(dist.hashcmp,dist) for dist in dists] tmp.sort() dists[::-1] = [d for hc,d in tmp] class Requirement: def __init__(self, project_name, specs, extras): """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!""" self.unsafe_name, project_name = project_name, safe_name(project_name) self.project_name, self.key = project_name, project_name.lower() index = [(parse_version(v),state_machine[op],op,v) for op,v in specs] index.sort() self.specs = [(op,ver) for parsed,trans,op,ver in index] self.index, self.extras = index, tuple(map(safe_extra,extras)) self.hashCmp = ( self.key, tuple([(op,parsed) for parsed,trans,op,ver in index]), frozenset(self.extras) ) self.__hash = hash(self.hashCmp) def __str__(self): specs = ','.join([''.join(s) for s in self.specs]) extras = ','.join(self.extras) if extras: extras = '[%s]' % extras return '%s%s%s' % (self.project_name, extras, specs) def __eq__(self,other): return isinstance(other,Requirement) and self.hashCmp==other.hashCmp def __contains__(self,item): if isinstance(item,Distribution): if item.key != self.key: return False if self.index: item = item.parsed_version # only get if we need it elif isinstance(item,basestring): item = parse_version(item) last = None for parsed,trans,op,ver in self.index: action = trans[cmp(item,parsed)] if action=='F': return False elif action=='T': return True elif action=='+': last = True elif action=='-' or last is None: last = False if last is None: last = True # no rules encountered return last def __hash__(self): return self.__hash def __repr__(self): return "Requirement.parse(%r)" % str(self) #@staticmethod def parse(s): reqs = list(parse_requirements(s)) if reqs: if len(reqs)==1: return reqs[0] raise ValueError("Expected only one requirement", s) raise ValueError("No requirements found", s) parse = staticmethod(parse) state_machine = { # =>< '<' : '--T', '<=': 'T-T', '>' : 'F+F', '>=': 'T+F', '==': 'T..', '!=': 'F++', } def _get_mro(cls): """Get an mro for a type or classic class""" if not isinstance(cls,type): class cls(cls,object): pass return cls.__mro__[1:] return cls.__mro__ def _find_adapter(registry, ob): """Return an adapter factory for `ob` from `registry`""" for t in _get_mro(getattr(ob, '__class__', type(ob))): if t in registry: return registry[t] def ensure_directory(path): """Ensure that the parent directory of `path` exists""" dirname = os.path.dirname(path) if not os.path.isdir(dirname): os.makedirs(dirname) def split_sections(s): """Split a string or iterable thereof into (section,content) pairs Each ``section`` is a stripped version of the section header ("[section]") and each ``content`` is a list of stripped lines excluding blank lines and comment-only lines. If there are any such lines before the first section header, they're returned in a first ``section`` of ``None``. """ section = None content = [] for line in yield_lines(s): if line.startswith("["): if line.endswith("]"): if section or content: yield section, content section = line[1:-1].strip() content = [] else: raise ValueError("Invalid section heading", line) else: content.append(line) # wrap up last segment yield section, content def _mkstemp(*args,**kw): from tempfile import mkstemp old_open = os.open try: os.open = os_open # temporarily bypass sandboxing return mkstemp(*args,**kw) finally: os.open = old_open # and then put it back # Set up global resource manager (deliberately not state-saved) _manager = ResourceManager() def _initialize(g): for name in dir(_manager): if not name.startswith('_'): g[name] = getattr(_manager, name) _initialize(globals()) # Prepare the master working set and make the ``require()`` API available __requires__ = None _declare_state('object', working_set = WorkingSet()) try: # Does the main program list any requirements? from __main__ import __requires__ except ImportError: pass # No: just use the default working set based on sys.path else: # Yes: ensure the requirements are met, by prefixing sys.path if necessary try: working_set.require(__requires__) except (VersionConflict, DistributionNotFound): # try it without defaults already on sys.path working_set = WorkingSet([]) # by starting with an empty path try: for dist in working_set.resolve( parse_requirements(__requires__), Environment() ): working_set.add(dist) except DistributionNotFound: pass for entry in sys.path: # add any missing entries from sys.path if entry not in working_set.entries: working_set.add_entry(entry) sys.path[:] = working_set.entries # then copy back to sys.path require = working_set.require iter_entry_points = working_set.iter_entry_points add_activation_listener = working_set.subscribe run_script = working_set.run_script run_main = run_script # backward compatibility # Activate all distributions already on sys.path, and ensure that # all distributions added to the working set in the future (e.g. by # calling ``require()``) will get activated as well. add_activation_listener(lambda dist: dist.activate()) working_set.entries=[]; map(working_set.add_entry,sys.path) # match order tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/000077500000000000000000000000001221140116300224045ustar00rootroot00000000000000tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/__init__.py000066400000000000000000000060571221140116300245250ustar00rootroot00000000000000"""Extensions to the 'distutils' for large or complex distributions""" from setuptools.extension import Extension, Library from setuptools.dist import Distribution, Feature, _get_unpatched import distutils.core, setuptools.command from setuptools.depends import Require from distutils.core import Command as _Command from distutils.util import convert_path import os.path import os import sys __version__ = '0.6c16dev4' __all__ = [ 'setup', 'Distribution', 'Feature', 'Command', 'Extension', 'Require', 'find_packages' ] bootstrap_install_from = None def find_packages(where='.', exclude=()): """Return a list all Python packages found within directory 'where' 'where' should be supplied as a "cross-platform" (i.e. URL-style) path; it will be converted to the appropriate local path syntax. 'exclude' is a sequence of package names to exclude; '*' can be used as a wildcard in the names, such that 'foo.*' will exclude all subpackages of 'foo' (but not 'foo' itself). """ out = [] stack=[(convert_path(where), '')] while stack: where,prefix = stack.pop(0) for name in os.listdir(where): fn = os.path.join(where,name) if ('.' not in name and os.path.isdir(fn) and os.path.isfile(os.path.join(fn,'__init__.py')) ): out.append(prefix+name); stack.append((fn,prefix+name+'.')) for pat in list(exclude)+['ez_setup']: from fnmatch import fnmatchcase out = [item for item in out if not fnmatchcase(item,pat)] return out setup = distutils.core.setup _Command = _get_unpatched(_Command) class Command(_Command): __doc__ = _Command.__doc__ command_consumes_arguments = False def __init__(self, dist, **kw): # Add support for keyword arguments _Command.__init__(self,dist) for k,v in kw.items(): setattr(self,k,v) def reinitialize_command(self, command, reinit_subcommands=0, **kw): cmd = _Command.reinitialize_command(self, command, reinit_subcommands) for k,v in kw.items(): setattr(cmd,k,v) # update command with keywords return cmd import distutils.core distutils.core.Command = Command # we can't patch distutils.cmd, alas def findall(dir = os.curdir): """Find all files under 'dir' and return the list of full filenames (relative to 'dir'). """ all_files = [] for base, dirs, files in os.walk(dir): if base==os.curdir or base.startswith(os.curdir+os.sep): base = base[2:] if base: files = [os.path.join(base, f) for f in files] all_files.extend(filter(os.path.isfile, files)) return all_files import distutils.filelist distutils.filelist.findall = findall # fix findall bug in distutils. # sys.dont_write_bytecode was introduced in Python 2.6. if ((hasattr(sys, "dont_write_bytecode") and sys.dont_write_bytecode) or (not hasattr(sys, "dont_write_bytecode") and os.environ.get("PYTHONDONTWRITEBYTECODE"))): _dont_write_bytecode = True else: _dont_write_bytecode = False tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/archive_util.py000066400000000000000000000134211221140116300254350ustar00rootroot00000000000000"""Utilities for extracting common archive formats""" __all__ = [ "unpack_archive", "unpack_zipfile", "unpack_tarfile", "default_filter", "UnrecognizedFormat", "extraction_drivers", "unpack_directory", ] import zipfile, tarfile, os, shutil from pkg_resources import ensure_directory from distutils.errors import DistutilsError class UnrecognizedFormat(DistutilsError): """Couldn't recognize the archive type""" def default_filter(src,dst): """The default progress/filter callback; returns True for all files""" return dst def unpack_archive(filename, extract_dir, progress_filter=default_filter, drivers=None ): """Unpack `filename` to `extract_dir`, or raise ``UnrecognizedFormat`` `progress_filter` is a function taking two arguments: a source path internal to the archive ('/'-separated), and a filesystem path where it will be extracted. The callback must return the desired extract path (which may be the same as the one passed in), or else ``None`` to skip that file or directory. The callback can thus be used to report on the progress of the extraction, as well as to filter the items extracted or alter their extraction paths. `drivers`, if supplied, must be a non-empty sequence of functions with the same signature as this function (minus the `drivers` argument), that raise ``UnrecognizedFormat`` if they do not support extracting the designated archive type. The `drivers` are tried in sequence until one is found that does not raise an error, or until all are exhausted (in which case ``UnrecognizedFormat`` is raised). If you do not supply a sequence of drivers, the module's ``extraction_drivers`` constant will be used, which means that ``unpack_zipfile`` and ``unpack_tarfile`` will be tried, in that order. """ for driver in drivers or extraction_drivers: try: driver(filename, extract_dir, progress_filter) except UnrecognizedFormat: continue else: return else: raise UnrecognizedFormat( "Not a recognized archive type: %s" % filename ) def unpack_directory(filename, extract_dir, progress_filter=default_filter): """"Unpack" a directory, using the same interface as for archives Raises ``UnrecognizedFormat`` if `filename` is not a directory """ if not os.path.isdir(filename): raise UnrecognizedFormat("%s is not a directory" % (filename,)) paths = {filename:('',extract_dir)} for base, dirs, files in os.walk(filename): src,dst = paths[base] for d in dirs: paths[os.path.join(base,d)] = src+d+'/', os.path.join(dst,d) for f in files: name = src+f target = os.path.join(dst,f) target = progress_filter(src+f, target) if not target: continue # skip non-files ensure_directory(target) f = os.path.join(base,f) shutil.copyfile(f, target) shutil.copystat(f, target) def unpack_zipfile(filename, extract_dir, progress_filter=default_filter): """Unpack zip `filename` to `extract_dir` Raises ``UnrecognizedFormat`` if `filename` is not a zipfile (as determined by ``zipfile.is_zipfile()``). See ``unpack_archive()`` for an explanation of the `progress_filter` argument. """ if not zipfile.is_zipfile(filename): raise UnrecognizedFormat("%s is not a zip file" % (filename,)) z = zipfile.ZipFile(filename) try: for info in z.infolist(): name = info.filename # don't extract absolute paths or ones with .. in them if name.startswith('/') or '..' in name: continue target = os.path.join(extract_dir, *name.split('/')) target = progress_filter(name, target) if not target: continue if name.endswith('/'): # directory ensure_directory(target) else: # file ensure_directory(target) data = z.read(info.filename) f = open(target,'wb') try: f.write(data) finally: f.close() del data finally: z.close() def unpack_tarfile(filename, extract_dir, progress_filter=default_filter): """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir` Raises ``UnrecognizedFormat`` if `filename` is not a tarfile (as determined by ``tarfile.open()``). See ``unpack_archive()`` for an explanation of the `progress_filter` argument. """ try: tarobj = tarfile.open(filename) except tarfile.TarError: raise UnrecognizedFormat( "%s is not a compressed or uncompressed tar file" % (filename,) ) try: tarobj.chown = lambda *args: None # don't do any chowning! for member in tarobj: if member.isfile() or member.isdir(): name = member.name # don't extract absolute paths or ones with .. in them if not name.startswith('/') and '..' not in name: dst = os.path.join(extract_dir, *name.split('/')) dst = progress_filter(name, dst) if dst: if dst.endswith(os.sep): dst = dst[:-1] try: tarobj._extract_member(member,dst) # XXX Ugh except tarfile.ExtractError: pass # chown/chmod/mkfifo/mknode/makedev failed return True finally: tarobj.close() extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/000077500000000000000000000000001221140116300240225ustar00rootroot00000000000000tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/__init__.py000066400000000000000000000012021221140116300261260ustar00rootroot00000000000000__all__ = [ 'alias', 'bdist_egg', 'bdist_rpm', 'build_ext', 'build_py', 'develop', 'easy_install', 'egg_info', 'install', 'install_lib', 'rotate', 'saveopts', 'sdist', 'setopt', 'test', 'upload', 'install_egg_info', 'install_scripts', 'register', 'bdist_wininst', 'scriptsetup', ] import sys if sys.version>='2.5': # In Python 2.5 and above, distutils includes its own upload command __all__.remove('upload') from distutils.command.bdist import bdist if 'egg' not in bdist.format_commands: bdist.format_command['egg'] = ('bdist_egg', "Python .egg file") bdist.format_commands.append('egg') del bdist, sys tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/alias.py000066400000000000000000000046211221140116300254700ustar00rootroot00000000000000import distutils, os from setuptools import Command from distutils.util import convert_path from distutils import log from distutils.errors import * from setuptools.command.setopt import edit_config, option_base, config_file def shquote(arg): """Quote an argument for later parsing by shlex.split()""" for c in '"', "'", "\\", "#": if c in arg: return repr(arg) if arg.split()!=[arg]: return repr(arg) return arg class alias(option_base): """Define a shortcut that invokes one or more commands""" description = "define a shortcut to invoke one or more commands" command_consumes_arguments = True user_options = [ ('remove', 'r', 'remove (unset) the alias'), ] + option_base.user_options boolean_options = option_base.boolean_options + ['remove'] def initialize_options(self): option_base.initialize_options(self) self.args = None self.remove = None def finalize_options(self): option_base.finalize_options(self) if self.remove and len(self.args)!=1: raise DistutilsOptionError( "Must specify exactly one argument (the alias name) when " "using --remove" ) def run(self): aliases = self.distribution.get_option_dict('aliases') if not self.args: print "Command Aliases" print "---------------" for alias in aliases: print "setup.py alias", format_alias(alias, aliases) return elif len(self.args)==1: alias, = self.args if self.remove: command = None elif alias in aliases: print "setup.py alias", format_alias(alias, aliases) return else: print "No alias definition found for %r" % alias return else: alias = self.args[0] command = ' '.join(map(shquote,self.args[1:])) edit_config(self.filename, {'aliases': {alias:command}}, self.dry_run) def format_alias(name, aliases): source, command = aliases[name] if source == config_file('global'): source = '--global-config ' elif source == config_file('user'): source = '--user-config ' elif source == config_file('local'): source = '' else: source = '--filename=%r' % source return source+name+' '+command tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/bdist_egg.py000066400000000000000000000431251221140116300263300ustar00rootroot00000000000000"""setuptools.command.bdist_egg Build .egg distributions""" # This module should be kept compatible with Python 2.3 import sys, os, marshal from setuptools import Command from distutils.dir_util import remove_tree, mkpath from distutils.sysconfig import get_python_version, get_python_lib from distutils import log from distutils.errors import DistutilsSetupError from pkg_resources import get_build_platform, Distribution, ensure_directory from pkg_resources import EntryPoint from types import CodeType from setuptools.extension import Library def strip_module(filename): if '.' in filename: filename = os.path.splitext(filename)[0] if filename.endswith('module'): filename = filename[:-6] return filename def write_stub(resource, pyfile): f = open(pyfile,'w') f.write('\n'.join([ "def __bootstrap__():", " global __bootstrap__, __loader__, __file__", " import sys, pkg_resources, imp", " __file__ = pkg_resources.resource_filename(__name__,%r)" % resource, " __loader__ = None; del __bootstrap__, __loader__", " imp.load_dynamic(__name__,__file__)", "__bootstrap__()", "" # terminal \n ])) f.close() # stub __init__.py for packages distributed without one NS_PKG_STUB = '__import__("pkg_resources").declare_namespace(__name__)' class bdist_egg(Command): description = "create an \"egg\" distribution" user_options = [ ('bdist-dir=', 'b', "temporary directory for creating the distribution"), ('plat-name=', 'p', "platform name to embed in generated filenames " "(default: %s)" % get_build_platform()), ('exclude-source-files', None, "remove all .py files from the generated egg"), ('keep-temp', 'k', "keep the pseudo-installation tree around after " + "creating the distribution archive"), ('dist-dir=', 'd', "directory to put final built distributions in"), ('skip-build', None, "skip rebuilding everything (for testing/debugging)"), ] boolean_options = [ 'keep-temp', 'skip-build', 'exclude-source-files' ] def initialize_options (self): self.bdist_dir = None self.plat_name = None self.keep_temp = 0 self.dist_dir = None self.skip_build = 0 self.egg_output = None self.exclude_source_files = None def finalize_options(self): ei_cmd = self.ei_cmd = self.get_finalized_command("egg_info") self.egg_info = ei_cmd.egg_info if self.bdist_dir is None: bdist_base = self.get_finalized_command('bdist').bdist_base self.bdist_dir = os.path.join(bdist_base, 'egg') if self.plat_name is None: self.plat_name = get_build_platform() self.set_undefined_options('bdist',('dist_dir', 'dist_dir')) if self.egg_output is None: # Compute filename of the output egg basename = Distribution( None, None, ei_cmd.egg_name, ei_cmd.egg_version, get_python_version(), self.distribution.has_ext_modules() and self.plat_name ).egg_name() self.egg_output = os.path.join(self.dist_dir, basename+'.egg') def do_install_data(self): # Hack for packages that install data to install's --install-lib self.get_finalized_command('install').install_lib = self.bdist_dir site_packages = os.path.normcase(os.path.realpath(get_python_lib())) old, self.distribution.data_files = self.distribution.data_files,[] for item in old: if isinstance(item,tuple) and len(item)==2: if os.path.isabs(item[0]): realpath = os.path.realpath(item[0]) normalized = os.path.normcase(realpath) if normalized==site_packages or normalized.startswith( site_packages+os.sep ): item = realpath[len(site_packages)+1:], item[1] # XXX else: raise ??? self.distribution.data_files.append(item) try: log.info("installing package data to %s" % self.bdist_dir) self.call_command('install_data', force=0, root=None) finally: self.distribution.data_files = old def get_outputs(self): return [self.egg_output] def call_command(self,cmdname,**kw): """Invoke reinitialized command `cmdname` with keyword args""" for dirname in INSTALL_DIRECTORY_ATTRS: kw.setdefault(dirname,self.bdist_dir) kw.setdefault('skip_build',self.skip_build) kw.setdefault('dry_run', self.dry_run) cmd = self.reinitialize_command(cmdname, **kw) self.run_command(cmdname) return cmd def run(self): # Generate metadata first self.run_command("egg_info") # We run install_lib before install_data, because some data hacks # pull their data path from the install_lib command. log.info("installing library code to %s" % self.bdist_dir) instcmd = self.get_finalized_command('install') old_root = instcmd.root; instcmd.root = None if self.distribution.has_c_libraries() and not self.skip_build: self.run_command('build_clib') cmd = self.call_command('install_lib', warn_dir=0) instcmd.root = old_root all_outputs, ext_outputs = self.get_ext_outputs() self.stubs = [] to_compile = [] for (p,ext_name) in enumerate(ext_outputs): filename,ext = os.path.splitext(ext_name) pyfile = os.path.join(self.bdist_dir, strip_module(filename)+'.py') self.stubs.append(pyfile) log.info("creating stub loader for %s" % ext_name) if not self.dry_run: write_stub(os.path.basename(ext_name), pyfile) to_compile.append(pyfile) ext_outputs[p] = ext_name.replace(os.sep,'/') to_compile.extend(self.make_init_files()) if to_compile: cmd.byte_compile(to_compile) if self.distribution.data_files: self.do_install_data() # Make the EGG-INFO directory archive_root = self.bdist_dir egg_info = os.path.join(archive_root,'EGG-INFO') self.mkpath(egg_info) if self.distribution.scripts: script_dir = os.path.join(egg_info, 'scripts') log.info("installing scripts to %s" % script_dir) self.call_command('install_scripts',install_dir=script_dir,no_ep=1) self.copy_metadata_to(egg_info) native_libs = os.path.join(egg_info, "native_libs.txt") if all_outputs: log.info("writing %s" % native_libs) if not self.dry_run: ensure_directory(native_libs) libs_file = open(native_libs, 'wt') libs_file.write('\n'.join(all_outputs)) libs_file.write('\n') libs_file.close() elif os.path.isfile(native_libs): log.info("removing %s" % native_libs) if not self.dry_run: os.unlink(native_libs) write_safety_flag( os.path.join(archive_root,'EGG-INFO'), self.zip_safe() ) if os.path.exists(os.path.join(self.egg_info,'depends.txt')): log.warn( "WARNING: 'depends.txt' will not be used by setuptools 0.6!\n" "Use the install_requires/extras_require setup() args instead." ) if self.exclude_source_files: self.zap_pyfiles() # Make the archive make_zipfile(self.egg_output, archive_root, verbose=self.verbose, dry_run=self.dry_run, mode=self.gen_header()) if not self.keep_temp: remove_tree(self.bdist_dir, dry_run=self.dry_run) # Add to 'Distribution.dist_files' so that the "upload" command works getattr(self.distribution,'dist_files',[]).append( ('bdist_egg',get_python_version(),self.egg_output)) def zap_pyfiles(self): log.info("Removing .py files from temporary directory") for base,dirs,files in walk_egg(self.bdist_dir): for name in files: if name.endswith('.py'): path = os.path.join(base,name) log.debug("Deleting %s", path) os.unlink(path) def zip_safe(self): safe = getattr(self.distribution,'zip_safe',None) if safe is not None: return safe log.warn("zip_safe flag not set; analyzing archive contents...") return analyze_egg(self.bdist_dir, self.stubs) def make_init_files(self): """Create missing package __init__ files""" init_files = [] for base,dirs,files in walk_egg(self.bdist_dir): if base==self.bdist_dir: # don't put an __init__ in the root continue for name in files: if name.endswith('.py'): if '__init__.py' not in files: pkg = base[len(self.bdist_dir)+1:].replace(os.sep,'.') if self.distribution.has_contents_for(pkg): log.warn("Creating missing __init__.py for %s",pkg) filename = os.path.join(base,'__init__.py') if not self.dry_run: f = open(filename,'w'); f.write(NS_PKG_STUB) f.close() init_files.append(filename) break else: # not a package, don't traverse to subdirectories dirs[:] = [] return init_files def gen_header(self): epm = EntryPoint.parse_map(self.distribution.entry_points or '') ep = epm.get('setuptools.installation',{}).get('eggsecutable') if ep is None: return 'w' # not an eggsecutable, do it the usual way. if not ep.attrs or ep.extras: raise DistutilsSetupError( "eggsecutable entry point (%r) cannot have 'extras' " "or refer to a module" % (ep,) ) pyver = sys.version[:3] pkg = ep.module_name full = '.'.join(ep.attrs) base = ep.attrs[0] basename = os.path.basename(self.egg_output) header = ( "#!/bin/sh\n" 'if [ `basename $0` = "%(basename)s" ]\n' 'then exec python%(pyver)s -c "' "import sys, os; sys.path.insert(0, os.path.abspath('$0')); " "from %(pkg)s import %(base)s; sys.exit(%(full)s())" '" "$@"\n' 'else\n' ' echo $0 is not the correct name for this egg file.\n' ' echo Please rename it back to %(basename)s and try again.\n' ' exec false\n' 'fi\n' ) % locals() if not self.dry_run: mkpath(os.path.dirname(self.egg_output), dry_run=self.dry_run) f = open(self.egg_output, 'w') f.write(header) f.close() return 'a' def copy_metadata_to(self, target_dir): prefix = os.path.join(self.egg_info,'') for path in self.ei_cmd.filelist.files: if path.startswith(prefix): target = os.path.join(target_dir, path[len(prefix):]) ensure_directory(target) self.copy_file(path, target) def get_ext_outputs(self): """Get a list of relative paths to C extensions in the output distro""" all_outputs = [] ext_outputs = [] paths = {self.bdist_dir:''} for base, dirs, files in os.walk(self.bdist_dir): for filename in files: if os.path.splitext(filename)[1].lower() in NATIVE_EXTENSIONS: all_outputs.append(paths[base]+filename) for filename in dirs: paths[os.path.join(base,filename)] = paths[base]+filename+'/' if self.distribution.has_ext_modules(): build_cmd = self.get_finalized_command('build_ext') for ext in build_cmd.extensions: if isinstance(ext,Library): continue fullname = build_cmd.get_ext_fullname(ext.name) filename = build_cmd.get_ext_filename(fullname) if not os.path.basename(filename).startswith('dl-'): if os.path.exists(os.path.join(self.bdist_dir,filename)): ext_outputs.append(filename) return all_outputs, ext_outputs NATIVE_EXTENSIONS = dict.fromkeys('.dll .so .dylib .pyd'.split()) def walk_egg(egg_dir): """Walk an unpacked egg's contents, skipping the metadata directory""" walker = os.walk(egg_dir) base,dirs,files = walker.next() if 'EGG-INFO' in dirs: dirs.remove('EGG-INFO') yield base,dirs,files for bdf in walker: yield bdf def analyze_egg(egg_dir, stubs): # check for existing flag in EGG-INFO for flag,fn in safety_flags.items(): if os.path.exists(os.path.join(egg_dir,'EGG-INFO',fn)): return flag if not can_scan(): return False safe = True for base, dirs, files in walk_egg(egg_dir): for name in files: if name.endswith('.py') or name.endswith('.pyw'): continue elif name.endswith('.pyc') or name.endswith('.pyo'): # always scan, even if we already know we're not safe safe = scan_module(egg_dir, base, name, stubs) and safe return safe def write_safety_flag(egg_dir, safe): # Write or remove zip safety flag file(s) for flag,fn in safety_flags.items(): fn = os.path.join(egg_dir, fn) if os.path.exists(fn): if safe is None or bool(safe)!=flag: os.unlink(fn) elif safe is not None and bool(safe)==flag: f=open(fn,'wb'); f.write('\n'); f.close() safety_flags = { True: 'zip-safe', False: 'not-zip-safe', } def scan_module(egg_dir, base, name, stubs): """Check whether module possibly uses unsafe-for-zipfile stuff""" filename = os.path.join(base,name) if filename[:-1] in stubs: return True # Extension module pkg = base[len(egg_dir)+1:].replace(os.sep,'.') module = pkg+(pkg and '.' or '')+os.path.splitext(name)[0] f = open(filename,'rb'); f.read(8) # skip magic & date code = marshal.load(f); f.close() safe = True symbols = dict.fromkeys(iter_symbols(code)) for bad in ['__file__', '__path__']: if bad in symbols: log.warn("%s: module references %s", module, bad) safe = False if 'inspect' in symbols: for bad in [ 'getsource', 'getabsfile', 'getsourcefile', 'getfile' 'getsourcelines', 'findsource', 'getcomments', 'getframeinfo', 'getinnerframes', 'getouterframes', 'stack', 'trace' ]: if bad in symbols: log.warn("%s: module MAY be using inspect.%s", module, bad) safe = False if '__name__' in symbols and '__main__' in symbols and '.' not in module: if sys.version[:3]=="2.4": # -m works w/zipfiles in 2.5 log.warn("%s: top-level module may be 'python -m' script", module) safe = False return safe def iter_symbols(code): """Yield names and strings used by `code` and its nested code objects""" for name in code.co_names: yield name for const in code.co_consts: if isinstance(const,basestring): yield const elif isinstance(const,CodeType): for name in iter_symbols(const): yield name def can_scan(): if not sys.platform.startswith('java') and sys.platform != 'cli': # CPython, PyPy, etc. return True log.warn("Unable to analyze compiled code on this platform.") log.warn("Please ask the author to include a 'zip_safe'" " setting (either True or False) in the package's setup.py") # Attribute names of options for commands that might need to be convinced to # install to the egg build directory INSTALL_DIRECTORY_ATTRS = [ 'install_lib', 'install_dir', 'install_data', 'install_base' ] def make_zipfile(zip_filename, base_dir, verbose=0, dry_run=0, compress=None, mode='w' ): """Create a zip file from all the files under 'base_dir'. The output zip file will be named 'base_dir' + ".zip". Uses either the "zipfile" Python module (if available) or the InfoZIP "zip" utility (if installed and found on the default search path). If neither tool is available, raises DistutilsExecError. Returns the name of the output zip file. """ import zipfile mkpath(os.path.dirname(zip_filename), dry_run=dry_run) log.info("creating '%s' and adding '%s' to it", zip_filename, base_dir) def visit(z, dirname, names): for name in names: path = os.path.normpath(os.path.join(dirname, name)) if os.path.isfile(path): p = path[len(base_dir)+1:] if not dry_run: z.write(path, p) log.debug("adding '%s'" % p) if compress is None: compress = (sys.version>="2.4") # avoid 2.3 zipimport bug when 64 bits compression = [zipfile.ZIP_STORED, zipfile.ZIP_DEFLATED][bool(compress)] if not dry_run: z = zipfile.ZipFile(zip_filename, mode, compression=compression) os.path.walk(base_dir, visit, z) z.close() else: os.path.walk(base_dir, visit, None) return zip_filename # tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/bdist_rpm.py000066400000000000000000000037511221140116300263650ustar00rootroot00000000000000# This is just a kludge so that bdist_rpm doesn't guess wrong about the # distribution name and version, if the egg_info command is going to alter # them, another kludge to allow you to build old-style non-egg RPMs, and # finally, a kludge to track .rpm files for uploading when run on Python <2.5. from distutils.command.bdist_rpm import bdist_rpm as _bdist_rpm import sys, os class bdist_rpm(_bdist_rpm): def initialize_options(self): _bdist_rpm.initialize_options(self) self.no_egg = None if sys.version<"2.5": # Track for uploading any .rpm file(s) moved to self.dist_dir def move_file(self, src, dst, level=1): _bdist_rpm.move_file(self, src, dst, level) if dst==self.dist_dir and src.endswith('.rpm'): getattr(self.distribution,'dist_files',[]).append( ('bdist_rpm', src.endswith('.src.rpm') and 'any' or sys.version[:3], os.path.join(dst, os.path.basename(src))) ) def run(self): self.run_command('egg_info') # ensure distro name is up-to-date _bdist_rpm.run(self) def _make_spec_file(self): version = self.distribution.get_version() rpmversion = version.replace('-','_') spec = _bdist_rpm._make_spec_file(self) line23 = '%define version '+version line24 = '%define version '+rpmversion spec = [ line.replace( "Source0: %{name}-%{version}.tar", "Source0: %{name}-%{unmangled_version}.tar" ).replace( "setup.py install ", "setup.py install --single-version-externally-managed " ).replace( "%setup", "%setup -n %{name}-%{unmangled_version}" ).replace(line23,line24) for line in spec ] spec.insert(spec.index(line24)+1, "%define unmangled_version "+version) return spec tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/bdist_wininst.py000066400000000000000000000043531221140116300272610ustar00rootroot00000000000000from distutils.command.bdist_wininst import bdist_wininst as _bdist_wininst import os, sys class bdist_wininst(_bdist_wininst): _good_upload = _bad_upload = None def create_exe(self, arcname, fullname, bitmap=None): _bdist_wininst.create_exe(self, arcname, fullname, bitmap) installer_name = self.get_installer_filename(fullname) if self.target_version: pyversion = self.target_version # fix 2.5+ bdist_wininst ignoring --target-version spec self._bad_upload = ('bdist_wininst', 'any', installer_name) else: pyversion = 'any' self._good_upload = ('bdist_wininst', pyversion, installer_name) def _fix_upload_names(self): good, bad = self._good_upload, self._bad_upload dist_files = getattr(self.distribution, 'dist_files', []) if bad in dist_files: dist_files.remove(bad) if good not in dist_files: dist_files.append(good) def reinitialize_command (self, command, reinit_subcommands=0): cmd = self.distribution.reinitialize_command( command, reinit_subcommands) if command in ('install', 'install_lib'): cmd.install_lib = None # work around distutils bug return cmd def run(self): self._is_running = True try: _bdist_wininst.run(self) self._fix_upload_names() finally: self._is_running = False if not hasattr(_bdist_wininst, 'get_installer_filename'): def get_installer_filename(self, fullname): # Factored out to allow overriding in subclasses if self.target_version: # if we create an installer for a specific python version, # it's better to include this in the name installer_name = os.path.join(self.dist_dir, "%s.win32-py%s.exe" % (fullname, self.target_version)) else: installer_name = os.path.join(self.dist_dir, "%s.win32.exe" % fullname) return installer_name # get_installer_filename() tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/build_ext.py000066400000000000000000000263761221140116300263710ustar00rootroot00000000000000from distutils.command.build_ext import build_ext as _du_build_ext try: # Attempt to use Pyrex for building extensions, if available from Pyrex.Distutils.build_ext import build_ext as _build_ext except ImportError: _build_ext = _du_build_ext import os, sys from distutils.file_util import copy_file from setuptools.extension import Library from distutils.ccompiler import new_compiler from distutils.sysconfig import customize_compiler, get_config_var get_config_var("LDSHARED") # make sure _config_vars is initialized from distutils.sysconfig import _config_vars from distutils import log from distutils.errors import * have_rtld = False use_stubs = False libtype = 'shared' if sys.platform == "darwin": use_stubs = True elif os.name != 'nt': try: from dl import RTLD_NOW have_rtld = True use_stubs = True except ImportError: pass def if_dl(s): if have_rtld: return s return '' class build_ext(_build_ext): def run(self): """Build extensions in build directory, then copy if --inplace""" old_inplace, self.inplace = self.inplace, 0 _build_ext.run(self) self.inplace = old_inplace if old_inplace: self.copy_extensions_to_source() def copy_extensions_to_source(self): build_py = self.get_finalized_command('build_py') for ext in self.extensions: fullname = self.get_ext_fullname(ext.name) filename = self.get_ext_filename(fullname) modpath = fullname.split('.') package = '.'.join(modpath[:-1]) package_dir = build_py.get_package_dir(package) dest_filename = os.path.join(package_dir,os.path.basename(filename)) src_filename = os.path.join(self.build_lib,filename) # Always copy, even if source is older than destination, to ensure # that the right extensions for the current Python/platform are # used. copy_file( src_filename, dest_filename, verbose=self.verbose, dry_run=self.dry_run ) if ext._needs_stub: self.write_stub(package_dir or os.curdir, ext, True) if _build_ext is not _du_build_ext and not hasattr(_build_ext,'pyrex_sources'): # Workaround for problems using some Pyrex versions w/SWIG and/or 2.4 def swig_sources(self, sources, *otherargs): # first do any Pyrex processing sources = _build_ext.swig_sources(self, sources) or sources # Then do any actual SWIG stuff on the remainder return _du_build_ext.swig_sources(self, sources, *otherargs) def get_ext_filename(self, fullname): filename = _build_ext.get_ext_filename(self,fullname) if fullname in self.ext_map: ext = self.ext_map[fullname] if isinstance(ext,Library): fn, ext = os.path.splitext(filename) return self.shlib_compiler.library_filename(fn,libtype) elif use_stubs and ext._links_to_dynamic: d,fn = os.path.split(filename) return os.path.join(d,'dl-'+fn) return filename def initialize_options(self): _build_ext.initialize_options(self) self.shlib_compiler = None self.shlibs = [] self.ext_map = {} def finalize_options(self): _build_ext.finalize_options(self) self.extensions = self.extensions or [] self.check_extensions_list(self.extensions) self.shlibs = [ext for ext in self.extensions if isinstance(ext,Library)] if self.shlibs: self.setup_shlib_compiler() for ext in self.extensions: ext._full_name = self.get_ext_fullname(ext.name) for ext in self.extensions: fullname = ext._full_name self.ext_map[fullname] = ext ltd = ext._links_to_dynamic = \ self.shlibs and self.links_to_dynamic(ext) or False ext._needs_stub = ltd and use_stubs and not isinstance(ext,Library) filename = ext._file_name = self.get_ext_filename(fullname) libdir = os.path.dirname(os.path.join(self.build_lib,filename)) if ltd and libdir not in ext.library_dirs: ext.library_dirs.append(libdir) if ltd and use_stubs and os.curdir not in ext.runtime_library_dirs: ext.runtime_library_dirs.append(os.curdir) def setup_shlib_compiler(self): compiler = self.shlib_compiler = new_compiler( compiler=self.compiler, dry_run=self.dry_run, force=self.force ) if sys.platform == "darwin": tmp = _config_vars.copy() try: # XXX Help! I don't have any idea whether these are right... _config_vars['LDSHARED'] = "gcc -Wl,-x -dynamiclib -undefined dynamic_lookup" _config_vars['CCSHARED'] = " -dynamiclib" _config_vars['SO'] = ".dylib" customize_compiler(compiler) finally: _config_vars.clear() _config_vars.update(tmp) else: customize_compiler(compiler) if self.include_dirs is not None: compiler.set_include_dirs(self.include_dirs) if self.define is not None: # 'define' option is a list of (name,value) tuples for (name,value) in self.define: compiler.define_macro(name, value) if self.undef is not None: for macro in self.undef: compiler.undefine_macro(macro) if self.libraries is not None: compiler.set_libraries(self.libraries) if self.library_dirs is not None: compiler.set_library_dirs(self.library_dirs) if self.rpath is not None: compiler.set_runtime_library_dirs(self.rpath) if self.link_objects is not None: compiler.set_link_objects(self.link_objects) # hack so distutils' build_extension() builds a library instead compiler.link_shared_object = link_shared_object.__get__(compiler) def get_export_symbols(self, ext): if isinstance(ext,Library): return ext.export_symbols return _build_ext.get_export_symbols(self,ext) def build_extension(self, ext): _compiler = self.compiler try: if isinstance(ext,Library): self.compiler = self.shlib_compiler _build_ext.build_extension(self,ext) if ext._needs_stub: self.write_stub( self.get_finalized_command('build_py').build_lib, ext ) finally: self.compiler = _compiler def links_to_dynamic(self, ext): """Return true if 'ext' links to a dynamic lib in the same package""" # XXX this should check to ensure the lib is actually being built # XXX as dynamic, and not just using a locally-found version or a # XXX static-compiled version libnames = dict.fromkeys([lib._full_name for lib in self.shlibs]) pkg = '.'.join(ext._full_name.split('.')[:-1]+['']) for libname in ext.libraries: if pkg+libname in libnames: return True return False def get_outputs(self): outputs = _build_ext.get_outputs(self) optimize = self.get_finalized_command('build_py').optimize for ext in self.extensions: if ext._needs_stub: base = os.path.join(self.build_lib, *ext._full_name.split('.')) outputs.append(base+'.py') outputs.append(base+'.pyc') if optimize: outputs.append(base+'.pyo') return outputs def write_stub(self, output_dir, ext, compile=False): log.info("writing stub loader for %s to %s",ext._full_name, output_dir) stub_file = os.path.join(output_dir, *ext._full_name.split('.'))+'.py' if compile and os.path.exists(stub_file): raise DistutilsError(stub_file+" already exists! Please delete.") if not self.dry_run: f = open(stub_file,'w') f.write('\n'.join([ "def __bootstrap__():", " global __bootstrap__, __file__, __loader__", " import sys, os, pkg_resources, imp"+if_dl(", dl"), " __file__ = pkg_resources.resource_filename(__name__,%r)" % os.path.basename(ext._file_name), " del __bootstrap__", " if '__loader__' in globals():", " del __loader__", if_dl(" old_flags = sys.getdlopenflags()"), " old_dir = os.getcwd()", " try:", " os.chdir(os.path.dirname(__file__))", if_dl(" sys.setdlopenflags(dl.RTLD_NOW)"), " imp.load_dynamic(__name__,__file__)", " finally:", if_dl(" sys.setdlopenflags(old_flags)"), " os.chdir(old_dir)", "__bootstrap__()", "" # terminal \n ])) f.close() if compile: from distutils.util import byte_compile byte_compile([stub_file], optimize=0, force=True, dry_run=self.dry_run) optimize = self.get_finalized_command('install_lib').optimize if optimize > 0: byte_compile([stub_file], optimize=optimize, force=True, dry_run=self.dry_run) if os.path.exists(stub_file) and not self.dry_run: os.unlink(stub_file) if use_stubs or os.name=='nt': # Build shared libraries # def link_shared_object(self, objects, output_libname, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None ): self.link( self.SHARED_LIBRARY, objects, output_libname, output_dir, libraries, library_dirs, runtime_library_dirs, export_symbols, debug, extra_preargs, extra_postargs, build_temp, target_lang ) else: # Build static libraries everywhere else libtype = 'static' def link_shared_object(self, objects, output_libname, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None ): # XXX we need to either disallow these attrs on Library instances, # or warn/abort here if set, or something... #libraries=None, library_dirs=None, runtime_library_dirs=None, #export_symbols=None, extra_preargs=None, extra_postargs=None, #build_temp=None assert output_dir is None # distutils build_ext doesn't pass this output_dir,filename = os.path.split(output_libname) basename, ext = os.path.splitext(filename) if self.library_filename("x").startswith('lib'): # strip 'lib' prefix; this is kludgy if some platform uses # a different prefix basename = basename[3:] self.create_static_lib( objects, basename, output_dir, debug, target_lang ) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/build_py.py000066400000000000000000000164461221140116300262160ustar00rootroot00000000000000import os.path, sys, fnmatch from distutils.command.build_py import build_py as _build_py from distutils.util import convert_path from glob import glob class build_py(_build_py): """Enhanced 'build_py' command that includes data files with packages The data files are specified via a 'package_data' argument to 'setup()'. See 'setuptools.dist.Distribution' for more details. Also, this version of the 'build_py' command allows you to specify both 'py_modules' and 'packages' in the same setup operation. """ def finalize_options(self): _build_py.finalize_options(self) self.package_data = self.distribution.package_data self.exclude_package_data = self.distribution.exclude_package_data or {} if 'data_files' in self.__dict__: del self.__dict__['data_files'] def run(self): self.old_run() if sys.platform == "win32": from setuptools.command.scriptsetup import do_scriptsetup do_scriptsetup() def old_run(self): """Build modules, packages, and copy data files to build directory""" if not self.py_modules and not self.packages: return if self.py_modules: self.build_modules() if self.packages: self.build_packages() self.build_package_data() # Only compile actual .py files, using our base class' idea of what our # output files are. self.byte_compile(_build_py.get_outputs(self, include_bytecode=0)) def __getattr__(self,attr): if attr=='data_files': # lazily compute data files self.data_files = files = self._get_data_files(); return files return _build_py.__getattr__(self,attr) def _get_data_files(self): """Generate list of '(package,src_dir,build_dir,filenames)' tuples""" self.analyze_manifest() data = [] for package in self.packages or (): # Locate package source directory src_dir = self.get_package_dir(package) # Compute package build directory build_dir = os.path.join(*([self.build_lib] + package.split('.'))) # Length of path to strip from found files plen = len(src_dir)+1 # Strip directory from globbed filenames filenames = [ file[plen:] for file in self.find_data_files(package, src_dir) ] data.append( (package, src_dir, build_dir, filenames) ) return data def find_data_files(self, package, src_dir): """Return filenames for package's data files in 'src_dir'""" globs = (self.package_data.get('', []) + self.package_data.get(package, [])) files = self.manifest_files.get(package, [])[:] for pattern in globs: # Each pattern has to be converted to a platform-specific path files.extend(glob(os.path.join(src_dir, convert_path(pattern)))) return self.exclude_data_files(package, src_dir, files) def build_package_data(self): """Copy data files into build directory""" lastdir = None for package, src_dir, build_dir, filenames in self.data_files: for filename in filenames: target = os.path.join(build_dir, filename) self.mkpath(os.path.dirname(target)) self.copy_file(os.path.join(src_dir, filename), target) def analyze_manifest(self): self.manifest_files = mf = {} if not self.distribution.include_package_data: return src_dirs = {} for package in self.packages or (): # Locate package source directory src_dirs[assert_relative(self.get_package_dir(package))] = package self.run_command('egg_info') ei_cmd = self.get_finalized_command('egg_info') for path in ei_cmd.filelist.files: d,f = os.path.split(assert_relative(path)) prev = None oldf = f while d and d!=prev and d not in src_dirs: prev = d d, df = os.path.split(d) f = os.path.join(df, f) if d in src_dirs: if path.endswith('.py') and f==oldf: continue # it's a module, not data mf.setdefault(src_dirs[d],[]).append(path) def get_data_files(self): pass # kludge 2.4 for lazy computation if sys.version<"2.4": # Python 2.4 already has this code def get_outputs(self, include_bytecode=1): """Return complete list of files copied to the build directory This includes both '.py' files and data files, as well as '.pyc' and '.pyo' files if 'include_bytecode' is true. (This method is needed for the 'install_lib' command to do its job properly, and to generate a correct installation manifest.) """ return _build_py.get_outputs(self, include_bytecode) + [ os.path.join(build_dir, filename) for package, src_dir, build_dir,filenames in self.data_files for filename in filenames ] def check_package(self, package, package_dir): """Check namespace packages' __init__ for declare_namespace""" try: return self.packages_checked[package] except KeyError: pass init_py = _build_py.check_package(self, package, package_dir) self.packages_checked[package] = init_py if not init_py or not self.distribution.namespace_packages: return init_py for pkg in self.distribution.namespace_packages: if pkg==package or pkg.startswith(package+'.'): break else: return init_py f = open(init_py,'rU') if 'declare_namespace' not in f.read(): from distutils.errors import DistutilsError raise DistutilsError( "Namespace package problem: %s is a namespace package, but its\n" "__init__.py does not call declare_namespace()! Please fix it.\n" '(See the setuptools manual under "Namespace Packages" for ' "details.)\n" % (package,) ) f.close() return init_py def initialize_options(self): self.packages_checked={} _build_py.initialize_options(self) def exclude_data_files(self, package, src_dir, files): """Filter filenames for package's data files in 'src_dir'""" globs = (self.exclude_package_data.get('', []) + self.exclude_package_data.get(package, [])) bad = [] for pattern in globs: bad.extend( fnmatch.filter( files, os.path.join(src_dir, convert_path(pattern)) ) ) bad = dict.fromkeys(bad) seen = {} return [ f for f in files if f not in bad and f not in seen and seen.setdefault(f,1) # ditch dupes ] def assert_relative(path): if not os.path.isabs(path): return path from distutils.errors import DistutilsSetupError raise DistutilsSetupError( """Error: setup script specifies an absolute path: %s setup() arguments must *always* be /-separated paths relative to the setup.py directory, *never* absolute paths. """ % path ) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/develop.py000066400000000000000000000125461221140116300260420ustar00rootroot00000000000000from setuptools.command.easy_install import easy_install from distutils.util import convert_path from pkg_resources import Distribution, PathMetadata, normalize_path from distutils import log from distutils.errors import * import sys, os, setuptools, glob class develop(easy_install): """Set up package for development""" description = "install package in 'development mode'" user_options = easy_install.user_options + [ ("uninstall", "u", "Uninstall this source package"), ("egg-path=", None, "Set the path to be used in the .egg-link file"), ] boolean_options = easy_install.boolean_options + ['uninstall'] command_consumes_arguments = False # override base def run(self): self.old_run() if sys.platform == "win32": from setuptools.command.scriptsetup import do_scriptsetup do_scriptsetup() def old_run(self): if self.uninstall: self.multi_version = True self.uninstall_link() else: self.install_for_development() self.warn_deprecated_options() def initialize_options(self): self.uninstall = None self.egg_path = None easy_install.initialize_options(self) self.setup_path = None self.always_copy_from = '.' # always copy eggs installed in curdir def finalize_options(self): ei = self.get_finalized_command("egg_info") if ei.broken_egg_info: raise DistutilsError( "Please rename %r to %r before using 'develop'" % (ei.egg_info, ei.broken_egg_info) ) self.args = [ei.egg_name] easy_install.finalize_options(self) # pick up setup-dir .egg files only: no .egg-info self.package_index.scan(glob.glob('*.egg')) self.egg_link = os.path.join(self.install_dir, ei.egg_name+'.egg-link') self.egg_base = ei.egg_base if self.egg_path is None: self.egg_path = os.path.abspath(ei.egg_base) target = normalize_path(self.egg_base) if normalize_path(os.path.join(self.install_dir, self.egg_path)) != target: raise DistutilsOptionError( "--egg-path must be a relative path from the install" " directory to "+target ) # Make a distribution for the package's source self.dist = Distribution( target, PathMetadata(target, os.path.abspath(ei.egg_info)), project_name = ei.egg_name ) p = self.egg_base.replace(os.sep,'/') if p!= os.curdir: p = '../' * (p.count('/')+1) self.setup_path = p p = normalize_path(os.path.join(self.install_dir, self.egg_path, p)) if p != normalize_path(os.curdir): raise DistutilsOptionError( "Can't get a consistent path to setup script from" " installation directory", p, normalize_path(os.curdir)) def install_for_development(self): # Ensure metadata is up-to-date self.run_command('egg_info') # Build extensions in-place self.reinitialize_command('build_ext', inplace=1) self.run_command('build_ext') self.install_site_py() # ensure that target dir is site-safe if setuptools.bootstrap_install_from: self.easy_install(setuptools.bootstrap_install_from) setuptools.bootstrap_install_from = None # create an .egg-link in the installation dir, pointing to our egg log.info("Creating %s (link to %s)", self.egg_link, self.egg_base) if not self.dry_run: f = open(self.egg_link,"w") f.write(self.egg_path + "\n" + self.setup_path) f.close() # postprocess the installed distro, fixing up .pth, installing scripts, # and handling requirements self.process_distribution(None, self.dist, not self.no_deps) def uninstall_link(self): if os.path.exists(self.egg_link): log.info("Removing %s (link to %s)", self.egg_link, self.egg_base) contents = [line.rstrip() for line in file(self.egg_link)] if contents not in ([self.egg_path], [self.egg_path, self.setup_path]): log.warn("Link points to %s: uninstall aborted", contents) return if not self.dry_run: os.unlink(self.egg_link) if not self.dry_run: self.update_pth(self.dist) # remove any .pth link to us if self.distribution.scripts: # XXX should also check for entry point scripts! log.warn("Note: you must uninstall or replace scripts manually!") def install_egg_scripts(self, dist): if dist is not self.dist: # Installing a dependency, so fall back to normal behavior return easy_install.install_egg_scripts(self,dist) # create wrapper scripts in the script dir, pointing to dist.scripts # new-style... self.install_wrapper_scripts(dist) # ...and old-style for script_name in self.distribution.scripts or []: script_path = os.path.abspath(convert_path(script_name)) script_name = os.path.basename(script_path) f = open(script_path,'rU') script_text = f.read() f.close() self.install_script(dist, script_name, script_text, script_path) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/easy_install.py000066400000000000000000002005031221140116300270630ustar00rootroot00000000000000#!python """\ Easy Install ------------ A tool for doing automatic download/extract/build of distutils-based Python packages. For detailed documentation, see the accompanying EasyInstall.txt file, or visit the `EasyInstall home page`__. __ http://peak.telecommunity.com/DevCenter/EasyInstall """ import sys, os.path, zipimport, shutil, tempfile, zipfile, re, stat, random from glob import glob from setuptools import Command, _dont_write_bytecode from setuptools import __version__ as setuptools_version from setuptools.sandbox import run_setup from distutils import log, dir_util from distutils.sysconfig import get_python_lib from distutils.errors import DistutilsArgError, DistutilsOptionError, \ DistutilsError from setuptools.archive_util import unpack_archive from setuptools.package_index import PackageIndex, parse_bdist_wininst from setuptools.package_index import URL_SCHEME from setuptools.command import bdist_egg, egg_info from pkg_resources import * sys_executable = os.path.normpath(sys.executable) __all__ = [ 'samefile', 'easy_install', 'PthDistributions', 'extract_wininst_cfg', 'main', 'get_exe_prefixes', ] def samefile(p1,p2): if hasattr(os.path,'samefile') and ( os.path.exists(p1) and os.path.exists(p2) ): return os.path.samefile(p1,p2) return ( os.path.normpath(os.path.normcase(p1)) == os.path.normpath(os.path.normcase(p2)) ) class easy_install(Command): """Manage a download/build/install process""" description = "Find/get/install Python packages" command_consumes_arguments = True user_options = [ ('prefix=', None, "installation prefix"), ("zip-ok", "z", "install package as a zipfile"), ("multi-version", "m", "make apps have to require() a version"), ("upgrade", "U", "force upgrade (searches PyPI for latest versions)"), ("install-dir=", "d", "install package to DIR"), ("script-dir=", "s", "install scripts to DIR"), ("exclude-scripts", "x", "Don't install scripts"), ("always-copy", "a", "Copy all needed packages to install dir"), ("index-url=", "i", "base URL of Python Package Index"), ("find-links=", "f", "additional URL(s) to search for packages"), ("delete-conflicting", "D", "no longer needed; don't use this"), ("ignore-conflicts-at-my-risk", None, "no longer needed; don't use this"), ("build-directory=", "b", "download/extract/build in DIR; keep the results"), ('optimize=', 'O', "also compile with optimization: -O1 for \"python -O\", " "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"), ('record=', None, "filename in which to record list of installed files"), ('always-unzip', 'Z', "don't install as a zipfile, no matter what"), ('site-dirs=','S',"list of directories where .pth files work"), ('editable', 'e', "Install specified packages in editable form"), ('no-deps', 'N', "don't install dependencies"), ('allow-hosts=', 'H', "pattern(s) that hostnames must match"), ('local-snapshots-ok', 'l', "allow building eggs from local checkouts"), ] boolean_options = [ 'zip-ok', 'multi-version', 'exclude-scripts', 'upgrade', 'always-copy', 'delete-conflicting', 'ignore-conflicts-at-my-risk', 'editable', 'no-deps', 'local-snapshots-ok', ] negative_opt = {'always-unzip': 'zip-ok'} create_index = PackageIndex def initialize_options(self): self.zip_ok = self.local_snapshots_ok = None self.install_dir = self.script_dir = self.exclude_scripts = None self.index_url = None self.find_links = None self.build_directory = None self.args = None self.optimize = self.record = None self.upgrade = self.always_copy = self.multi_version = None self.editable = self.no_deps = self.allow_hosts = None self.root = self.prefix = self.no_report = None # Options not specifiable via command line self.package_index = None self.pth_file = self.always_copy_from = None self.delete_conflicting = None self.ignore_conflicts_at_my_risk = None self.site_dirs = None self.installed_projects = {} self.sitepy_installed = False # Always read easy_install options, even if we are subclassed, or have # an independent instance created. This ensures that defaults will # always come from the standard configuration file(s)' "easy_install" # section, even if this is a "develop" or "install" command, or some # other embedding. self._dry_run = None self.verbose = self.distribution.verbose self.distribution._set_command_options( self, self.distribution.get_option_dict('easy_install') ) def delete_blockers(self, blockers): for filename in blockers: if os.path.exists(filename) or os.path.islink(filename): log.info("Deleting %s", filename) if not self.dry_run: if os.path.isdir(filename) and not os.path.islink(filename): rmtree(filename) else: os.unlink(filename) def finalize_options(self): self._expand('install_dir','script_dir','build_directory','site_dirs') # If a non-default installation directory was specified, default the # script directory to match it. if self.script_dir is None: self.script_dir = self.install_dir # Let install_dir get set by install_lib command, which in turn # gets its info from the install command, and takes into account # --prefix and --home and all that other crud. self.set_undefined_options('install_lib', ('install_dir','install_dir') ) # Likewise, set default script_dir from 'install_scripts.install_dir' self.set_undefined_options('install_scripts', ('install_dir', 'script_dir') ) # default --record from the install command self.set_undefined_options('install', ('record', 'record')) normpath = map(normalize_path, sys.path) self.all_site_dirs = get_site_dirs() if self.site_dirs is not None: site_dirs = [ os.path.expanduser(s.strip()) for s in self.site_dirs.split(',') ] for d in site_dirs: if not os.path.isdir(d): log.warn("%s (in --site-dirs) does not exist", d) elif normalize_path(d) not in normpath: raise DistutilsOptionError( d+" (in --site-dirs) is not on sys.path" ) else: self.all_site_dirs.append(normalize_path(d)) if not self.editable: self.check_site_dir() self.index_url = self.index_url or "http://pypi.python.org/simple" self.shadow_path = self.all_site_dirs[:] for path_item in self.install_dir, normalize_path(self.script_dir): if path_item not in self.shadow_path: self.shadow_path.insert(0, path_item) if self.allow_hosts is not None: hosts = [s.strip() for s in self.allow_hosts.split(',')] else: hosts = ['*'] if self.package_index is None: self.package_index = self.create_index( self.index_url, search_path = self.shadow_path+sys.path, hosts=hosts, ) self.local_index = Environment(self.shadow_path+sys.path) if self.find_links is not None: if isinstance(self.find_links, basestring): self.find_links = self.find_links.split() else: self.find_links = [] if self.local_snapshots_ok: self.package_index.scan_egg_links(self.shadow_path+sys.path) self.package_index.add_find_links(self.find_links) self.set_undefined_options('install_lib', ('optimize','optimize')) if not isinstance(self.optimize,int): try: self.optimize = int(self.optimize) if not (0 <= self.optimize <= 2): raise ValueError except ValueError: raise DistutilsOptionError("--optimize must be 0, 1, or 2") if self.delete_conflicting and self.ignore_conflicts_at_my_risk: raise DistutilsOptionError( "Can't use both --delete-conflicting and " "--ignore-conflicts-at-my-risk at the same time" ) if self.editable and not self.build_directory: raise DistutilsArgError( "Must specify a build directory (-b) when using --editable" ) if not self.args: raise DistutilsArgError( "No urls, filenames, or requirements specified (see --help)") self.outputs = [] def run(self): if self.verbose!=self.distribution.verbose: log.set_verbosity(self.verbose) try: for spec in self.args: self.easy_install(spec, not self.no_deps) if self.record: outputs = self.outputs if self.root: # strip any package prefix root_len = len(self.root) for counter in xrange(len(outputs)): outputs[counter] = outputs[counter][root_len:] from distutils import file_util self.execute( file_util.write_file, (self.record, outputs), "writing list of installed files to '%s'" % self.record ) self.warn_deprecated_options() finally: log.set_verbosity(self.distribution.verbose) def pseudo_tempname(self): """Return a pseudo-tempname base in the install directory. This code is intentionally naive; if a malicious party can write to the target directory you're already in deep doodoo. """ try: pid = os.getpid() except: pid = random.randint(0,sys.maxint) return os.path.join(self.install_dir, "test-easy-install-%s" % pid) def warn_deprecated_options(self): if self.delete_conflicting or self.ignore_conflicts_at_my_risk: log.warn( "Note: The -D, --delete-conflicting and" " --ignore-conflicts-at-my-risk no longer have any purpose" " and should not be used." ) def check_site_dir(self): """Verify that self.install_dir is .pth-capable dir, if needed""" instdir = normalize_path(self.install_dir) pth_file = os.path.join(instdir,'easy-install.pth') # mkdir it if necessary try: os.makedirs(instdir) except OSError: # Oh well -- hopefully this error simply means that it is already there. # If not the subsequent write test will identify the problem. pass # add it to site dirs self.all_site_dirs.append(instdir) # Is it a configured, PYTHONPATH, implicit, or explicit site dir? is_site_dir = instdir in self.all_site_dirs if not is_site_dir and not self.multi_version: # No? Then directly test whether it does .pth file processing is_site_dir = self.check_pth_processing() else: # make sure we can write to target dir testfile = self.pseudo_tempname()+'.write-test' test_exists = os.path.exists(testfile) try: if test_exists: os.unlink(testfile) open(testfile,'w').close() os.unlink(testfile) except (OSError,IOError): self.cant_write_to_target() if not is_site_dir and not self.multi_version: # Can't install non-multi to non-site dir log.warn(self.no_default_version_msg()) if is_site_dir: if self.pth_file is None: self.pth_file = PthDistributions(pth_file, self.all_site_dirs) else: self.pth_file = None if self.multi_version and not os.path.exists(pth_file): self.sitepy_installed = True # don't need site.py in this case self.pth_file = None # and don't create a .pth file self.install_dir = instdir def cant_write_to_target(self): msg = """can't create or remove files in install directory The following error occurred while trying to add or remove files in the installation directory: %s The installation directory you specified (via --install-dir, --prefix, or the distutils default setting) was: %s """ % (sys.exc_info()[1], self.install_dir,) if not os.path.exists(self.install_dir): msg += """ This directory does not currently exist. Please create it and try again, or choose a different installation directory (using the -d or --install-dir option). """ else: msg += """ Perhaps your account does not have write access to this directory? If the installation directory is a system-owned directory, you may need to sign in as the administrator or "root" account. If you do not have administrative access to this machine, you may wish to choose a different installation directory, preferably one that is listed in your PYTHONPATH environment variable. For information on other options, you may wish to consult the documentation at: http://peak.telecommunity.com/EasyInstall.html Please make the appropriate changes for your system and try again. """ raise DistutilsError(msg) def check_pth_processing(self): """Empirically verify whether .pth files are supported in inst. dir""" instdir = self.install_dir log.info("Checking .pth file support in %s", instdir) pth_file = self.pseudo_tempname()+".pth" ok_file = pth_file+'.ok' ok_exists = os.path.exists(ok_file) try: if ok_exists: os.unlink(ok_file) f = open(pth_file,'w') except (OSError,IOError): self.cant_write_to_target() else: try: f.write("import os;open(%r,'w').write('OK')\n" % (ok_file,)) f.close(); f=None executable = sys.executable if os.name=='nt': dirname,basename = os.path.split(executable) alt = os.path.join(dirname,'pythonw.exe') if basename.lower()=='python.exe' and os.path.exists(alt): # use pythonw.exe to avoid opening a console window executable = alt from distutils.spawn import spawn spawn([executable,'-E','-c','pass'],0) if os.path.exists(ok_file): log.info( "TEST PASSED: %s appears to support .pth files", instdir ) return True finally: if f: f.close() if os.path.exists(ok_file): os.unlink(ok_file) if os.path.exists(pth_file): os.unlink(pth_file) if not self.multi_version: log.warn("TEST FAILED: %s does NOT support .pth files", instdir) return False def install_egg_scripts(self, dist): """Write all the scripts for `dist`, unless scripts are excluded""" if not self.exclude_scripts and dist.metadata_isdir('scripts'): for script_name in dist.metadata_listdir('scripts'): self.install_script( dist, script_name, dist.get_metadata('scripts/'+script_name) ) self.install_wrapper_scripts(dist) def add_output(self, path): if os.path.isdir(path): for base, dirs, files in os.walk(path): for filename in files: self.outputs.append(os.path.join(base,filename)) else: self.outputs.append(path) def not_editable(self, spec): if self.editable: raise DistutilsArgError( "Invalid argument %r: you can't use filenames or URLs " "with --editable (except via the --find-links option)." % (spec,) ) def check_editable(self,spec): if not self.editable: return if os.path.exists(os.path.join(self.build_directory, spec.key)): raise DistutilsArgError( "%r already exists in %s; can't do a checkout there" % (spec.key, self.build_directory) ) def easy_install(self, spec, deps=False): tmpdir = tempfile.mkdtemp(prefix="easy_install-") download = None if not self.editable: self.install_site_py() try: if not isinstance(spec,Requirement): if URL_SCHEME(spec): # It's a url, download it to tmpdir and process self.not_editable(spec) download = self.package_index.download(spec, tmpdir) return self.install_item(None, download, tmpdir, deps, True) elif os.path.exists(spec): # Existing file or directory, just process it directly self.not_editable(spec) return self.install_item(None, spec, tmpdir, deps, True) else: spec = parse_requirement_arg(spec) self.check_editable(spec) dist = self.package_index.fetch_distribution( spec, tmpdir, self.upgrade, self.editable, not self.always_copy, self.local_index ) if dist is None: msg = "Could not find suitable distribution for %r" % spec if self.always_copy: msg+=" (--always-copy skips system and development eggs)" raise DistutilsError(msg) elif dist.precedence==DEVELOP_DIST: # .egg-info dists don't need installing, just process deps self.process_distribution(spec, dist, deps, "Using") return dist else: return self.install_item(spec, dist.location, tmpdir, deps) finally: if os.path.exists(tmpdir): rmtree(tmpdir) def install_item(self, spec, download, tmpdir, deps, install_needed=False): # Installation is also needed if file in tmpdir or is not an egg install_needed = install_needed or self.always_copy install_needed = install_needed or os.path.dirname(download) == tmpdir install_needed = install_needed or not download.endswith('.egg') install_needed = install_needed or ( self.always_copy_from is not None and os.path.dirname(normalize_path(download)) == normalize_path(self.always_copy_from) ) if spec and not install_needed: # at this point, we know it's a local .egg, we just don't know if # it's already installed. for dist in self.local_index[spec.project_name]: if dist.location==download: break else: install_needed = True # it's not in the local index log.info("Processing %s", os.path.basename(download)) if install_needed: dists = self.install_eggs(spec, download, tmpdir) for dist in dists: self.process_distribution(spec, dist, deps) else: dists = [self.check_conflicts(self.egg_distribution(download))] self.process_distribution(spec, dists[0], deps, "Using") if spec is not None: for dist in dists: if dist in spec: return dist def process_distribution(self, requirement, dist, deps=True, *info): self.update_pth(dist) self.package_index.add(dist) self.local_index.add(dist) self.install_egg_scripts(dist) self.installed_projects[dist.key] = dist log.info(self.installation_report(requirement, dist, *info)) if dist.has_metadata('dependency_links.txt'): self.package_index.add_find_links( dist.get_metadata_lines('dependency_links.txt') ) if not deps and not self.always_copy: return elif requirement is not None and dist.key != requirement.key: log.warn("Skipping dependencies for %s", dist) return # XXX this is not the distribution we were looking for elif requirement is None or dist not in requirement: # if we wound up with a different version, resolve what we've got distreq = dist.as_requirement() requirement = requirement or distreq requirement = Requirement( distreq.project_name, distreq.specs, requirement.extras ) log.info("Processing dependencies for %s", requirement) try: distros = WorkingSet([]).resolve( [requirement], self.local_index, self.easy_install ) except DistributionNotFound, e: raise DistutilsError( "Could not find required distribution %s" % e.args ) except VersionConflict, e: raise DistutilsError( "Installed distribution %s conflicts with requirement %s" % e.args ) if self.always_copy or self.always_copy_from: # Force all the relevant distros to be copied or activated for dist in distros: if dist.key not in self.installed_projects: self.easy_install(dist.as_requirement()) log.info("Finished processing dependencies for %s", requirement) def should_unzip(self, dist): if self.zip_ok is not None: return not self.zip_ok if dist.has_metadata('not-zip-safe'): return True if not dist.has_metadata('zip-safe'): return True return False def maybe_move(self, spec, dist_filename, setup_base): dst = os.path.join(self.build_directory, spec.key) if os.path.exists(dst): log.warn( "%r already exists in %s; build directory %s will not be kept", spec.key, self.build_directory, setup_base ) return setup_base if os.path.isdir(dist_filename): setup_base = dist_filename else: if os.path.dirname(dist_filename)==setup_base: os.unlink(dist_filename) # get it out of the tmp dir contents = os.listdir(setup_base) if len(contents)==1: dist_filename = os.path.join(setup_base,contents[0]) if os.path.isdir(dist_filename): # if the only thing there is a directory, move it instead setup_base = dist_filename ensure_directory(dst); shutil.move(setup_base, dst) return dst def install_wrapper_scripts(self, dist): if not self.exclude_scripts: for args in get_script_args(dist, script_dir=self.script_dir): self.write_script(*args) def install_script(self, dist, script_name, script_text, dev_path=None): """Generate a legacy script wrapper and install it""" spec = str(dist.as_requirement()) is_script = is_python_script(script_text, script_name) requires = [spec] + [str(r) for r in dist.requires()] if is_script and dev_path: script_text = get_script_header(script_text) + ( "# EASY-INSTALL-DEV-SCRIPT: %(spec)r,%(script_name)r\n" "__requires__ = %(requires)r\n" "from pkg_resources import require; require(%(spec)r)\n" "del require\n" "__file__ = %(dev_path)r\n" "execfile(__file__)\n" ) % locals() elif is_script: script_text = get_script_header(script_text) + ( "# EASY-INSTALL-SCRIPT: %(spec)r,%(script_name)r\n" "__requires__ = %(requires)r\n" "import pkg_resources\n" "pkg_resources.run_script(%(spec)r, %(script_name)r)\n" ) % locals() self.write_script(script_name, script_text, 'b') def write_script(self, script_name, contents, mode="t", blockers=()): """Write an executable file to the scripts directory""" self.delete_blockers( # clean up old .py/.pyw w/o a script [os.path.join(self.script_dir,x) for x in blockers]) log.info("Installing %s script to %s", script_name, self.script_dir) target = os.path.join(self.script_dir, script_name) self.add_output(target) if not self.dry_run: ensure_directory(target) f = open(target,"w"+mode) f.write(contents) f.close() chmod(target,0755) def install_eggs(self, spec, dist_filename, tmpdir): # .egg dirs or files are already built, so just return them if dist_filename.lower().endswith('.egg'): return [self.install_egg(dist_filename, tmpdir)] elif dist_filename.lower().endswith('.exe'): return [self.install_exe(dist_filename, tmpdir)] # Anything else, try to extract and build setup_base = tmpdir if os.path.isfile(dist_filename) and not dist_filename.endswith('.py'): unpack_archive(dist_filename, tmpdir, self.unpack_progress) elif os.path.isdir(dist_filename): setup_base = os.path.abspath(dist_filename) if (setup_base.startswith(tmpdir) # something we downloaded and self.build_directory and spec is not None ): setup_base = self.maybe_move(spec, dist_filename, setup_base) # Find the setup.py file setup_script = os.path.join(setup_base, 'setup.py') if not os.path.exists(setup_script): setups = glob(os.path.join(setup_base, '*', 'setup.py')) if not setups: raise DistutilsError( "Couldn't find a setup script in %s" % os.path.abspath(dist_filename) ) if len(setups)>1: raise DistutilsError( "Multiple setup scripts in %s" % os.path.abspath(dist_filename) ) setup_script = setups[0] # Now run it, and return the result if self.editable: log.info(self.report_editable(spec, setup_script)) return [] else: return self.build_and_install(setup_script, setup_base) def egg_distribution(self, egg_path): if os.path.isdir(egg_path): metadata = PathMetadata(egg_path,os.path.join(egg_path,'EGG-INFO')) else: metadata = EggMetadata(zipimport.zipimporter(egg_path)) return Distribution.from_filename(egg_path,metadata=metadata) def install_egg(self, egg_path, tmpdir): destination = os.path.join(self.install_dir,os.path.basename(egg_path)) destination = os.path.abspath(destination) if not self.dry_run: ensure_directory(destination) dist = self.egg_distribution(egg_path) self.check_conflicts(dist) if not samefile(egg_path, destination): if os.path.isdir(destination) and not os.path.islink(destination): dir_util.remove_tree(destination, dry_run=self.dry_run) elif os.path.exists(destination): self.execute(os.unlink,(destination,),"Removing "+destination) uncache_zipdir(destination) if os.path.isdir(egg_path): if egg_path.startswith(tmpdir): f,m = shutil.move, "Moving" else: f,m = shutil.copytree, "Copying" elif self.should_unzip(dist): self.mkpath(destination) f,m = self.unpack_and_compile, "Extracting" elif egg_path.startswith(tmpdir): f,m = shutil.move, "Moving" else: f,m = shutil.copy2, "Copying" self.execute(f, (egg_path, destination), (m+" %s to %s") % (os.path.basename(egg_path),os.path.dirname(destination))) self.add_output(destination) return self.egg_distribution(destination) def install_exe(self, dist_filename, tmpdir): # See if it's valid, get data cfg = extract_wininst_cfg(dist_filename) if cfg is None: raise DistutilsError( "%s is not a valid distutils Windows .exe" % dist_filename ) # Create a dummy distribution object until we build the real distro dist = Distribution(None, project_name=cfg.get('metadata','name'), version=cfg.get('metadata','version'), platform="win32" ) # Convert the .exe to an unpacked egg egg_path = dist.location = os.path.join(tmpdir, dist.egg_name()+'.egg') egg_tmp = egg_path+'.tmp' egg_info = os.path.join(egg_tmp, 'EGG-INFO') pkg_inf = os.path.join(egg_info, 'PKG-INFO') ensure_directory(pkg_inf) # make sure EGG-INFO dir exists dist._provider = PathMetadata(egg_tmp, egg_info) # XXX self.exe_to_egg(dist_filename, egg_tmp) # Write EGG-INFO/PKG-INFO if not os.path.exists(pkg_inf): f = open(pkg_inf,'w') f.write('Metadata-Version: 1.0\n') for k,v in cfg.items('metadata'): if k!='target_version': f.write('%s: %s\n' % (k.replace('_','-').title(), v)) f.close() script_dir = os.path.join(egg_info,'scripts') self.delete_blockers( # delete entry-point scripts to avoid duping [os.path.join(script_dir,args[0]) for args in get_script_args(dist)] ) # Build .egg file from tmpdir bdist_egg.make_zipfile( egg_path, egg_tmp, verbose=self.verbose, dry_run=self.dry_run ) # install the .egg return self.install_egg(egg_path, tmpdir) def exe_to_egg(self, dist_filename, egg_tmp): """Extract a bdist_wininst to the directories an egg would use""" # Check for .pth file and set up prefix translations prefixes = get_exe_prefixes(dist_filename) to_compile = [] native_libs = [] top_level = {} def process(src,dst): s = src.lower() for old,new in prefixes: if s.startswith(old): src = new+src[len(old):] parts = src.split('/') dst = os.path.join(egg_tmp, *parts) dl = dst.lower() if dl.endswith('.pyd') or dl.endswith('.dll'): parts[-1] = bdist_egg.strip_module(parts[-1]) top_level[os.path.splitext(parts[0])[0]] = 1 native_libs.append(src) elif dl.endswith('.py') and old!='SCRIPTS/': top_level[os.path.splitext(parts[0])[0]] = 1 to_compile.append(dst) return dst if not src.endswith('.pth'): log.warn("WARNING: can't process %s", src) return None # extract, tracking .pyd/.dll->native_libs and .py -> to_compile unpack_archive(dist_filename, egg_tmp, process) stubs = [] for res in native_libs: if res.lower().endswith('.pyd'): # create stubs for .pyd's parts = res.split('/') resource = parts[-1] parts[-1] = bdist_egg.strip_module(parts[-1])+'.py' pyfile = os.path.join(egg_tmp, *parts) to_compile.append(pyfile); stubs.append(pyfile) bdist_egg.write_stub(resource, pyfile) self.byte_compile(to_compile) # compile .py's bdist_egg.write_safety_flag(os.path.join(egg_tmp,'EGG-INFO'), bdist_egg.analyze_egg(egg_tmp, stubs)) # write zip-safety flag for name in 'top_level','native_libs': if locals()[name]: txt = os.path.join(egg_tmp, 'EGG-INFO', name+'.txt') if not os.path.exists(txt): open(txt,'w').write('\n'.join(locals()[name])+'\n') def check_conflicts(self, dist): """Verify that there are no conflicting "old-style" packages""" return dist # XXX temporarily disable until new strategy is stable from imp import find_module, get_suffixes from glob import glob blockers = [] names = dict.fromkeys(dist._get_metadata('top_level.txt')) # XXX private attr exts = {'.pyc':1, '.pyo':1} # get_suffixes() might leave one out for ext,mode,typ in get_suffixes(): exts[ext] = 1 for path,files in expand_paths([self.install_dir]+self.all_site_dirs): for filename in files: base,ext = os.path.splitext(filename) if base in names: if not ext: # no extension, check for package try: f, filename, descr = find_module(base, [path]) except ImportError: continue else: if f: f.close() if filename not in blockers: blockers.append(filename) elif ext in exts and base!='site': # XXX ugh blockers.append(os.path.join(path,filename)) if blockers: self.found_conflicts(dist, blockers) return dist def found_conflicts(self, dist, blockers): if self.delete_conflicting: log.warn("Attempting to delete conflicting packages:") return self.delete_blockers(blockers) msg = """\ ------------------------------------------------------------------------- CONFLICT WARNING: The following modules or packages have the same names as modules or packages being installed, and will be *before* the installed packages in Python's search path. You MUST remove all of the relevant files and directories before you will be able to use the package(s) you are installing: %s """ % '\n '.join(blockers) if self.ignore_conflicts_at_my_risk: msg += """\ (Note: you can run EasyInstall on '%s' with the --delete-conflicting option to attempt deletion of the above files and/or directories.) """ % dist.project_name else: msg += """\ Note: you can attempt this installation again with EasyInstall, and use either the --delete-conflicting (-D) option or the --ignore-conflicts-at-my-risk option, to either delete the above files and directories, or to ignore the conflicts, respectively. Note that if you ignore the conflicts, the installed package(s) may not work. """ msg += """\ ------------------------------------------------------------------------- """ sys.stderr.write(msg) sys.stderr.flush() if not self.ignore_conflicts_at_my_risk: raise DistutilsError("Installation aborted due to conflicts") def installation_report(self, req, dist, what="Installed"): """Helpful installation message for display to package users""" msg = "\n%(what)s %(eggloc)s%(extras)s" if self.multi_version and not self.no_report: msg += """ Because this distribution was installed --multi-version, before you can import modules from this package in an application, you will need to 'import pkg_resources' and then use a 'require()' call similar to one of these examples, in order to select the desired version: pkg_resources.require("%(name)s") # latest installed version pkg_resources.require("%(name)s==%(version)s") # this exact version pkg_resources.require("%(name)s>=%(version)s") # this version or higher """ if self.install_dir not in map(normalize_path,sys.path): msg += """ Note also that the installation directory must be on sys.path at runtime for this to work. (e.g. by being the application's script directory, by being on PYTHONPATH, or by being added to sys.path by your code.) """ eggloc = dist.location name = dist.project_name version = dist.version extras = '' # TODO: self.report_extras(req, dist) return msg % locals() def report_editable(self, spec, setup_script): dirname = os.path.dirname(setup_script) python = sys.executable return """\nExtracted editable version of %(spec)s to %(dirname)s If it uses setuptools in its setup script, you can activate it in "development" mode by going to that directory and running:: %(python)s setup.py develop See the setuptools documentation for the "develop" command for more info. """ % locals() def run_setup(self, setup_script, setup_base, args): sys.modules.setdefault('distutils.command.bdist_egg', bdist_egg) sys.modules.setdefault('distutils.command.egg_info', egg_info) args = list(args) if self.verbose>2: v = 'v' * (self.verbose - 1) args.insert(0,'-'+v) elif self.verbose<2: args.insert(0,'-q') if self.dry_run: args.insert(0,'-n') log.info( "Running %s %s", setup_script[len(setup_base)+1:], ' '.join(args) ) try: run_setup(setup_script, args) except SystemExit, v: raise DistutilsError("Setup script exited with %s" % (v.args[0],)) def build_and_install(self, setup_script, setup_base): args = ['bdist_egg', '--dist-dir'] dist_dir = tempfile.mkdtemp( prefix='egg-dist-tmp-', dir=os.path.dirname(setup_script) ) try: args.append(dist_dir) self.run_setup(setup_script, setup_base, args) all_eggs = Environment([dist_dir]) eggs = [] for key in all_eggs: for dist in all_eggs[key]: eggs.append(self.install_egg(dist.location, setup_base)) if not eggs and not self.dry_run: log.warn("No eggs found in %s (setup script problem?)", dist_dir) return eggs finally: rmtree(dist_dir) log.set_verbosity(self.verbose) # restore our log verbosity def update_pth(self,dist): if self.pth_file is None: return for d in self.pth_file[dist.key]: # drop old entries if self.multi_version or d.location != dist.location: log.info("Removing %s from easy-install.pth file", d) self.pth_file.remove(d) if d.location in self.shadow_path: self.shadow_path.remove(d.location) if not self.multi_version: if dist.location in self.pth_file.paths: log.info( "%s is already the active version in easy-install.pth", dist ) else: log.info("Adding %s to easy-install.pth file", dist) self.pth_file.add(dist) # add new entry if dist.location not in self.shadow_path: self.shadow_path.append(dist.location) if not self.dry_run: self.pth_file.save() if dist.key=='setuptools': # Ensure that setuptools itself never becomes unavailable! # XXX should this check for latest version? filename = os.path.join(self.install_dir,'setuptools.pth') if os.path.islink(filename): os.unlink(filename) f = open(filename, 'wt') f.write(self.pth_file.make_relative(dist.location)+'\n') f.close() def unpack_progress(self, src, dst): # Progress filter for unpacking log.debug("Unpacking %s to %s", src, dst) return dst # only unpack-and-compile skips files for dry run def unpack_and_compile(self, egg_path, destination): to_compile = []; to_chmod = [] def pf(src,dst): if dst.endswith('.py') and not src.startswith('EGG-INFO/'): to_compile.append(dst) elif dst.endswith('.dll') or dst.endswith('.so'): to_chmod.append(dst) self.unpack_progress(src,dst) return not self.dry_run and dst or None unpack_archive(egg_path, destination, pf) self.byte_compile(to_compile) if not self.dry_run: for f in to_chmod: mode = ((os.stat(f)[stat.ST_MODE]) | 0555) & 07755 chmod(f, mode) def byte_compile(self, to_compile): if _dont_write_bytecode: self.warn('byte-compiling is disabled, skipping.') return from distutils.util import byte_compile try: # try to make the byte compile messages quieter log.set_verbosity(self.verbose - 1) byte_compile(to_compile, optimize=0, force=1, dry_run=self.dry_run) if self.optimize: byte_compile( to_compile, optimize=self.optimize, force=1, dry_run=self.dry_run ) finally: log.set_verbosity(self.verbose) # restore original verbosity def no_default_version_msg(self): return """bad install directory or PYTHONPATH You are attempting to install a package to a directory that is not on PYTHONPATH and which Python does not read ".pth" files from. The installation directory you specified (via --install-dir, --prefix, or the distutils default setting) was: %s and your PYTHONPATH environment variable currently contains: %r Here are some of your options for correcting the problem: * You can choose a different installation directory, i.e., one that is on PYTHONPATH or supports .pth files * You can add the installation directory to the PYTHONPATH environment variable. (It must then also be on PYTHONPATH whenever you run Python and want to use the package(s) you are installing.) * You can set up the installation directory to support ".pth" files by using one of the approaches described here: http://peak.telecommunity.com/EasyInstall.html#custom-installation-locations Proceeding to install. Please remember that unless you make one of these changes you will not be able to run the installed code. """ % ( self.install_dir, os.environ.get('PYTHONPATH','') ) def install_site_py(self): """Make sure there's a site.py in the target dir, if needed""" if self.sitepy_installed: return # already did it, or don't need to sitepy = os.path.join(self.install_dir, "site.py") source = resource_string("setuptools", "site-patch.py") current = "" if os.path.exists(sitepy): log.debug("Checking existing site.py in %s", self.install_dir) current = open(sitepy,'rb').read() if not current.startswith('def __boot():'): print ("\n" "***********************************************************************\n" "Warning: %s is not a\n" "setuptools-generated site.py. It will not be overwritten.\n" "***********************************************************************\n" ) % (sitepy,) self.sitepy_installed = True return if current != source: log.info("Creating %s", sitepy) if not self.dry_run: ensure_directory(sitepy) f = open(sitepy,'wb') f.write(source) f.close() self.byte_compile([sitepy]) self.sitepy_installed = True INSTALL_SCHEMES = dict( posix = dict( install_dir = '$base/lib/python$py_version_short/site-packages', script_dir = '$base/bin', ), ) DEFAULT_SCHEME = dict( install_dir = '$base/Lib/site-packages', script_dir = '$base/Scripts', ) def _expand(self, *attrs): config_vars = self.get_finalized_command('install').config_vars if self.prefix: # Set default install_dir/scripts from --prefix config_vars = config_vars.copy() config_vars['base'] = self.prefix scheme = self.INSTALL_SCHEMES.get(os.name,self.DEFAULT_SCHEME) for attr,val in scheme.items(): if getattr(self,attr,None) is None: setattr(self,attr,val) from distutils.util import subst_vars for attr in attrs: val = getattr(self, attr) if val is not None: val = subst_vars(val, config_vars) if os.name == 'posix': val = os.path.expanduser(val) setattr(self, attr, val) def get_site_dirs(): # return a list of 'site' dirs sitedirs = filter(None,os.environ.get('PYTHONPATH','').split(os.pathsep)) prefixes = [sys.prefix] if sys.exec_prefix != sys.prefix: prefixes.append(sys.exec_prefix) for prefix in prefixes: if prefix: if sys.platform in ('os2emx', 'riscos'): sitedirs.append(os.path.join(prefix, "Lib", "site-packages")) elif os.sep == '/': sitedirs.extend([os.path.join(prefix, "lib", "python" + sys.version[:3], "site-packages"), os.path.join(prefix, "lib", "site-python")]) else: sitedirs.extend( [prefix, os.path.join(prefix, "lib", "site-packages")] ) if sys.platform == 'darwin': # for framework builds *only* we add the standard Apple # locations. Currently only per-user, but /Library and # /Network/Library could be added too if 'Python.framework' in prefix: home = os.environ.get('HOME') if home: sitedirs.append( os.path.join(home, 'Library', 'Python', sys.version[:3], 'site-packages')) for plat_specific in (0,1): site_lib = get_python_lib(plat_specific) if site_lib not in sitedirs: sitedirs.append(site_lib) sitedirs = map(normalize_path, sitedirs) return sitedirs def expand_paths(inputs): """Yield sys.path directories that might contain "old-style" packages""" seen = {} for dirname in inputs: dirname = normalize_path(dirname) if dirname in seen: continue seen[dirname] = 1 if not os.path.isdir(dirname): continue files = os.listdir(dirname) yield dirname, files for name in files: if not name.endswith('.pth'): # We only care about the .pth files continue if name in ('easy-install.pth','setuptools.pth'): # Ignore .pth files that we control continue # Read the .pth file f = open(os.path.join(dirname,name)) lines = list(yield_lines(f)) f.close() # Yield existing non-dupe, non-import directory lines from it for line in lines: if not line.startswith("import"): line = normalize_path(line.rstrip()) if line not in seen: seen[line] = 1 if not os.path.isdir(line): continue yield line, os.listdir(line) def extract_wininst_cfg(dist_filename): """Extract configuration data from a bdist_wininst .exe Returns a ConfigParser.RawConfigParser, or None """ f = open(dist_filename,'rb') try: endrec = zipfile._EndRecData(f) if endrec is None: return None prepended = (endrec[9] - endrec[5]) - endrec[6] if prepended < 12: # no wininst data here return None f.seek(prepended-12) import struct, StringIO, ConfigParser tag, cfglen, bmlen = struct.unpack("egg path translations for a given .exe file""" prefixes = [ ('PURELIB/', ''), ('PLATLIB/pywin32_system32', ''), ('PLATLIB/', ''), ('SCRIPTS/', 'EGG-INFO/scripts/') ] z = zipfile.ZipFile(exe_filename) try: for info in z.infolist(): name = info.filename parts = name.split('/') if len(parts)==3 and parts[2]=='PKG-INFO': if parts[1].endswith('.egg-info'): prefixes.insert(0,('/'.join(parts[:2]), 'EGG-INFO/')) break if len(parts)!=2 or not name.endswith('.pth'): continue if name.endswith('-nspkg.pth'): continue if parts[0].upper() in ('PURELIB','PLATLIB'): for pth in yield_lines(z.read(name)): pth = pth.strip().replace('\\','/') if not pth.startswith('import'): prefixes.append((('%s/%s/' % (parts[0],pth)), '')) finally: z.close() prefixes = [(x.lower(),y) for x, y in prefixes] prefixes.sort(); prefixes.reverse() return prefixes def parse_requirement_arg(spec): try: return Requirement.parse(spec) except ValueError: raise DistutilsError( "Not a URL, existing file, or requirement spec: %r" % (spec,) ) class PthDistributions(Environment): """A .pth file with Distribution paths in it""" dirty = False def __init__(self, filename, sitedirs=()): self.filename = filename; self.sitedirs=map(normalize_path, sitedirs) self.basedir = normalize_path(os.path.dirname(self.filename)) self._load(); Environment.__init__(self, [], None, None) for path in yield_lines(self.paths): map(self.add, find_distributions(path, True)) def _load(self): self.paths = [] saw_import = False seen = dict.fromkeys(self.sitedirs) if os.path.isfile(self.filename): for line in open(self.filename,'rt'): if line.startswith('import'): saw_import = True continue path = line.rstrip() self.paths.append(path) if not path.strip() or path.strip().startswith('#'): continue # skip non-existent paths, in case somebody deleted a package # manually, and duplicate paths as well path = self.paths[-1] = normalize_path( os.path.join(self.basedir,path) ) if not os.path.exists(path) or path in seen: self.paths.pop() # skip it self.dirty = True # we cleaned up, so we're dirty now :) continue seen[path] = 1 if self.paths and not saw_import: self.dirty = True # ensure anything we touch has import wrappers while self.paths and not self.paths[-1].strip(): self.paths.pop() def save(self): """Write changed .pth file back to disk""" if not self.dirty: return data = '\n'.join(map(self.make_relative,self.paths)) if data: log.debug("Saving %s", self.filename) data = ( "import sys; sys.__plen = len(sys.path)\n" "%s\n" "import sys; new=sys.path[sys.__plen:];" " del sys.path[sys.__plen:];" " p=getattr(sys,'__egginsert',len(os.environ.get('PYTHONPATH','').split(os.pathsep))); sys.path[p:p]=new;" " sys.__egginsert = p+len(new)\n" ) % data if os.path.islink(self.filename): os.unlink(self.filename) f = open(self.filename,'wb') f.write(data); f.close() elif os.path.exists(self.filename): log.debug("Deleting empty %s", self.filename) os.unlink(self.filename) self.dirty = False def add(self,dist): """Add `dist` to the distribution map""" if dist.location not in self.paths and dist.location not in self.sitedirs: self.paths.append(dist.location); self.dirty = True Environment.add(self,dist) def remove(self,dist): """Remove `dist` from the distribution map""" while dist.location in self.paths: self.paths.remove(dist.location); self.dirty = True Environment.remove(self,dist) def make_relative(self,path): npath, last = os.path.split(normalize_path(path)) baselen = len(self.basedir) parts = [last] sep = os.altsep=='/' and '/' or os.sep while len(npath)>=baselen: if npath==self.basedir: parts.append(os.curdir) parts.reverse() return sep.join(parts) npath, last = os.path.split(npath) parts.append(last) else: return path def get_script_header(script_text, executable=sys_executable, wininst=False): """Create a #! line, getting options (if any) from script_text""" from distutils.command.build_scripts import first_line_re first = (script_text+'\n').splitlines()[0] match = first_line_re.match(first) options = '' if match: options = match.group(1) or '' if options: options = ' '+options if wininst: executable = "python.exe" else: executable = nt_quote_arg(executable) hdr = "#!%(executable)s%(options)s\n" % locals() if unicode(hdr,'ascii','ignore').encode('ascii') != hdr: # Non-ascii path to sys.executable, use -x to prevent warnings if options: if options.strip().startswith('-'): options = ' -x'+options.strip()[1:] # else: punt, we can't do it, let the warning happen anyway else: options = ' -x' executable = fix_jython_executable(executable, options) hdr = "#!%(executable)s%(options)s\n" % locals() return hdr def auto_chmod(func, arg, exc): if func is os.remove and os.name=='nt': chmod(arg, stat.S_IWRITE) return func(arg) exc = sys.exc_info() raise exc[0], (exc[1][0], exc[1][1] + (" %s %s" % (func,arg))) def uncache_zipdir(path): """Ensure that the importer caches dont have stale info for `path`""" from zipimport import _zip_directory_cache as zdc _uncache(path, zdc) _uncache(path, sys.path_importer_cache) def _uncache(path, cache): if path in cache: del cache[path] else: path = normalize_path(path) for p in cache: if normalize_path(p)==path: del cache[p] return def is_python(text, filename=''): "Is this string a valid Python script?" try: compile(text, filename, 'exec') except (SyntaxError, TypeError): return False else: return True def is_sh(executable): """Determine if the specified executable is a .sh (contains a #! line)""" try: fp = open(executable) magic = fp.read(2) fp.close() except (OSError,IOError): return executable return magic == '#!' def nt_quote_arg(arg): """Quote a command line argument according to Windows parsing rules""" result = [] needquote = False nb = 0 needquote = (" " in arg) or ("\t" in arg) if needquote: result.append('"') for c in arg: if c == '\\': nb += 1 elif c == '"': # double preceding backslashes, then add a \" result.append('\\' * (nb*2) + '\\"') nb = 0 else: if nb: result.append('\\' * nb) nb = 0 result.append(c) if nb: result.append('\\' * nb) if needquote: result.append('\\' * nb) # double the trailing backslashes result.append('"') return ''.join(result) def is_python_script(script_text, filename): """Is this text, as a whole, a Python script? (as opposed to shell/bat/etc. """ if filename.endswith('.py') or filename.endswith('.pyw'): return True # extension says it's Python if is_python(script_text, filename): return True # it's syntactically valid Python if script_text.startswith('#!'): # It begins with a '#!' line, so check if 'python' is in it somewhere return 'python' in script_text.splitlines()[0].lower() return False # Not any Python I can recognize try: from os import chmod as _chmod except ImportError: # Jython compatibility def _chmod(*args): pass def chmod(path, mode): log.debug("changing mode of %s to %o", path, mode) try: _chmod(path, mode) except os.error, e: log.debug("chmod failed: %s", e) def fix_jython_executable(executable, options): if sys.platform.startswith('java') and is_sh(executable): # Workaround Jython's sys.executable being a .sh (an invalid # shebang line interpreter) if options: # Can't apply the workaround, leave it broken log.warn("WARNING: Unable to adapt shebang line for Jython," " the following script is NOT executable\n" " see http://bugs.jython.org/issue1112 for" " more information.") else: return '/usr/bin/env %s' % executable return executable def get_script_args(dist, executable=sys_executable, wininst=False, script_dir=None): """Yield write_script() argument tuples for a distribution's entrypoints""" spec = str(dist.as_requirement()) requires = [spec] + [str(r) for r in dist.requires()] header = get_script_header("", executable, wininst) generated_by = "# generated by zetuptoolz %s" % (setuptools_version,) for group in 'console_scripts', 'gui_scripts': for name, ep in dist.get_entry_map(group).items(): script_head, script_tail = (( "# EASY-INSTALL-ENTRY-SCRIPT: %(spec)r,%(group)r,%(name)r\n" "%(generated_by)s\n" "__requires__ = %(requires)r\n" "import sys\n" "from pkg_resources import load_entry_point\n" "\n" ) % locals(), ( "sys.exit(\n" " load_entry_point(%(spec)r, %(group)r, %(name)r)()\n" ")\n" ) % locals()) if wininst or sys.platform == "win32": # On Windows/wininst, add a .py[w] extension. Delete any existing # -script.py[w], .exe, and .exe.manifest. if group=='gui_scripts': ext = '.pyw' old = ['','.pyw','-script.pyw','.exe','.exe.manifest'] which_python = 'pythonw.exe' new_header = re.sub('(?i)python.exe', which_python, header) else: ext = '.pyscript' old = ['','.pyscript','.py','.pyc','.pyo','-script.py','.exe','.exe.manifest'] which_python = 'python.exe' new_header = re.sub('(?i)pythonw.exe', which_python, header) len_ext = len(ext) script_head += ( "# If this script doesn't work for you, make sure that the %(ext)s\n" "# extension is included in the PATHEXT environment variable, and is\n" "# associated with %(which_python)s in the registry.\n" "\n" "if sys.argv[0].endswith(%(ext)r):\n" " sys.argv[0] = sys.argv[0][:-%(len_ext)r]\n" "\n" ) % locals() if os.path.exists(new_header[2:-1]) or sys.platform != 'win32': hdr = new_header else: hdr = header yield (name+ext, hdr + script_head + script_tail, 't', [name+x for x in old]) # Also write a shell script that runs the .pyscript, for cygwin. # # We can't use a Python script, because the Python interpreter that we want # to use is the native Windows one, which won't understand a cygwin path. # Windows paths written with forward slashes are universally understood # (by native Python, cygwin Python, and bash), so we'll use 'cygpath -m' to # get the directory from which the script was run in that form. This makes # the cygwin script and .pyscript position-independent, provided they are # in the same directory. def quote_path(s): return "\\'".join("'" + p.replace('\\', '/') + "'" for p in s.split("'")) pyscript = quote_path("/"+name+ext) python_path = quote_path(sys.executable) shell_script_text = ( '#!/bin/sh\n' '%(generated_by)s\n' '\n' 'ScriptDir=`cygpath -m "$0/.."`\n' '%(python_path)s "${ScriptDir}"%(pyscript)s "$@"\n' ) % locals() yield (name, shell_script_text, 'b') else: # On other platforms, we assume the right thing to do is to # just write the stub with no extension. yield (name, header + script_head + script_tail) def rmtree(path, ignore_errors=False, onerror=auto_chmod): """Recursively delete a directory tree. This code is taken from the Python 2.4 version of 'shutil', because the 2.3 version doesn't really work right. """ if ignore_errors: def onerror(*args): pass elif onerror is None: def onerror(*args): raise names = [] try: names = os.listdir(path) except os.error, err: onerror(os.listdir, path, sys.exc_info()) for name in names: fullname = os.path.join(path, name) try: mode = os.lstat(fullname).st_mode except os.error: mode = 0 if stat.S_ISDIR(mode): rmtree(fullname, ignore_errors, onerror) else: try: os.remove(fullname) except os.error, err: onerror(os.remove, fullname, sys.exc_info()) try: os.rmdir(path) except os.error: onerror(os.rmdir, path, sys.exc_info()) def bootstrap(): # This function is called when setuptools*.egg is run using /bin/sh import setuptools; argv0 = os.path.dirname(setuptools.__path__[0]) sys.argv[0] = argv0; sys.argv.append(argv0); main() def main(argv=None, **kw): from setuptools import setup from setuptools.dist import Distribution import distutils.core USAGE = """\ usage: %(script)s [options] requirement_or_url ... or: %(script)s --help """ def gen_usage (script_name): script = os.path.basename(script_name) return USAGE % vars() def with_ei_usage(f): old_gen_usage = distutils.core.gen_usage try: distutils.core.gen_usage = gen_usage return f() finally: distutils.core.gen_usage = old_gen_usage class DistributionWithoutHelpCommands(Distribution): common_usage = "" def _show_help(self,*args,**kw): with_ei_usage(lambda: Distribution._show_help(self,*args,**kw)) if argv is None: argv = sys.argv[1:] with_ei_usage(lambda: setup( script_args = ['-q','easy_install', '-v']+argv, script_name = sys.argv[0] or 'easy_install', distclass=DistributionWithoutHelpCommands, **kw ) ) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/egg_info.py000066400000000000000000000335751221140116300261660ustar00rootroot00000000000000"""setuptools.command.egg_info Create a distribution's .egg-info directory and contents""" # This module should be kept compatible with Python 2.3 import os, re from setuptools import Command from distutils.errors import * from distutils import log from setuptools.command.sdist import sdist from distutils.util import convert_path from distutils.filelist import FileList from pkg_resources import parse_requirements, safe_name, parse_version, \ safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename from sdist import walk_revctrl class egg_info(Command): description = "create a distribution's .egg-info directory" user_options = [ ('egg-base=', 'e', "directory containing .egg-info directories" " (default: top of the source tree)"), ('tag-svn-revision', 'r', "Add subversion revision ID to version number"), ('tag-date', 'd', "Add date stamp (e.g. 20050528) to version number"), ('tag-build=', 'b', "Specify explicit tag to add to version number"), ('no-svn-revision', 'R', "Don't add subversion revision ID [default]"), ('no-date', 'D', "Don't include date stamp [default]"), ] boolean_options = ['tag-date', 'tag-svn-revision'] negative_opt = {'no-svn-revision': 'tag-svn-revision', 'no-date': 'tag-date'} def initialize_options(self): self.egg_name = None self.egg_version = None self.egg_base = None self.egg_info = None self.tag_build = None self.tag_svn_revision = 0 self.tag_date = 0 self.broken_egg_info = False self.vtags = None def save_version_info(self, filename): from setopt import edit_config edit_config( filename, {'egg_info': {'tag_svn_revision':0, 'tag_date': 0, 'tag_build': self.tags()} } ) def finalize_options (self): self.egg_name = safe_name(self.distribution.get_name()) self.vtags = self.tags() self.egg_version = self.tagged_version() try: list( parse_requirements('%s==%s' % (self.egg_name,self.egg_version)) ) except ValueError: raise DistutilsOptionError( "Invalid distribution name or version syntax: %s-%s" % (self.egg_name,self.egg_version) ) if self.egg_base is None: dirs = self.distribution.package_dir self.egg_base = (dirs or {}).get('',os.curdir) self.ensure_dirname('egg_base') self.egg_info = to_filename(self.egg_name)+'.egg-info' if self.egg_base != os.curdir: self.egg_info = os.path.join(self.egg_base, self.egg_info) if '-' in self.egg_name: self.check_broken_egg_info() # Set package version for the benefit of dumber commands # (e.g. sdist, bdist_wininst, etc.) # self.distribution.metadata.version = self.egg_version # If we bootstrapped around the lack of a PKG-INFO, as might be the # case in a fresh checkout, make sure that any special tags get added # to the version info # pd = self.distribution._patched_dist if pd is not None and pd.key==self.egg_name.lower(): pd._version = self.egg_version pd._parsed_version = parse_version(self.egg_version) self.distribution._patched_dist = None def write_or_delete_file(self, what, filename, data, force=False): """Write `data` to `filename` or delete if empty If `data` is non-empty, this routine is the same as ``write_file()``. If `data` is empty but not ``None``, this is the same as calling ``delete_file(filename)`. If `data` is ``None``, then this is a no-op unless `filename` exists, in which case a warning is issued about the orphaned file (if `force` is false), or deleted (if `force` is true). """ if data: self.write_file(what, filename, data) elif os.path.exists(filename): if data is None and not force: log.warn( "%s not set in setup(), but %s exists", what, filename ) return else: self.delete_file(filename) def write_file(self, what, filename, data): """Write `data` to `filename` (if not a dry run) after announcing it `what` is used in a log message to identify what is being written to the file. """ log.info("writing %s to %s", what, filename) if not self.dry_run: f = open(filename, 'wb') f.write(data) f.close() def delete_file(self, filename): """Delete `filename` (if not a dry run) after announcing it""" log.info("deleting %s", filename) if not self.dry_run: os.unlink(filename) def tagged_version(self): return safe_version(self.distribution.get_version() + self.vtags) def run(self): self.mkpath(self.egg_info) installer = self.distribution.fetch_build_egg for ep in iter_entry_points('egg_info.writers'): writer = ep.load(installer=installer) writer(self, ep.name, os.path.join(self.egg_info,ep.name)) # Get rid of native_libs.txt if it was put there by older bdist_egg nl = os.path.join(self.egg_info, "native_libs.txt") if os.path.exists(nl): self.delete_file(nl) self.find_sources() def tags(self): version = '' if self.tag_build: version+=self.tag_build if self.tag_svn_revision and ( os.path.exists('.svn') or os.path.exists('PKG-INFO') ): version += '-r%s' % self.get_svn_revision() if self.tag_date: import time; version += time.strftime("-%Y%m%d") return version def get_svn_revision(self): revision = 0 urlre = re.compile('url="([^"]+)"') revre = re.compile('committed-rev="(\d+)"') for base,dirs,files in os.walk(os.curdir): if '.svn' not in dirs: dirs[:] = [] continue # no sense walking uncontrolled subdirs dirs.remove('.svn') f = open(os.path.join(base,'.svn','entries')) data = f.read() f.close() if data.startswith('9 and d[9]]+[0]) if base==os.curdir: base_url = dirurl+'/' # save the root url elif not dirurl.startswith(base_url): dirs[:] = [] continue # not part of the same svn tree, skip it revision = max(revision, localrev) return str(revision or get_pkg_info_revision()) def find_sources(self): """Generate SOURCES.txt manifest file""" manifest_filename = os.path.join(self.egg_info,"SOURCES.txt") mm = manifest_maker(self.distribution) mm.manifest = manifest_filename mm.run() self.filelist = mm.filelist def check_broken_egg_info(self): bei = self.egg_name+'.egg-info' if self.egg_base != os.curdir: bei = os.path.join(self.egg_base, bei) if os.path.exists(bei): log.warn( "-"*78+'\n' "Note: Your current .egg-info directory has a '-' in its name;" '\nthis will not work correctly with "setup.py develop".\n\n' 'Please rename %s to %s to correct this problem.\n'+'-'*78, bei, self.egg_info ) self.broken_egg_info = self.egg_info self.egg_info = bei # make it work for now class FileList(FileList): """File list that accepts only existing, platform-independent paths""" def append(self, item): if item.endswith('\r'): # Fix older sdists built on Windows item = item[:-1] path = convert_path(item) if os.path.exists(path): self.files.append(path) class manifest_maker(sdist): template = "MANIFEST.in" def initialize_options (self): self.use_defaults = 1 self.prune = 1 self.manifest_only = 1 self.force_manifest = 1 def finalize_options(self): pass def run(self): self.filelist = FileList() if not os.path.exists(self.manifest): self.write_manifest() # it must exist so it'll get in the list self.filelist.findall() self.add_defaults() if os.path.exists(self.template): self.read_template() self.prune_file_list() self.filelist.sort() self.filelist.remove_duplicates() self.write_manifest() def write_manifest (self): """Write the file list in 'self.filelist' (presumably as filled in by 'add_defaults()' and 'read_template()') to the manifest file named by 'self.manifest'. """ files = self.filelist.files if os.sep!='/': files = [f.replace(os.sep,'/') for f in files] self.execute(write_file, (self.manifest, files), "writing manifest file '%s'" % self.manifest) def warn(self, msg): # suppress missing-file warnings from sdist if not msg.startswith("standard file not found:"): sdist.warn(self, msg) def add_defaults(self): sdist.add_defaults(self) self.filelist.append(self.template) self.filelist.append(self.manifest) rcfiles = list(walk_revctrl()) if rcfiles: self.filelist.extend(rcfiles) elif os.path.exists(self.manifest): self.read_manifest() ei_cmd = self.get_finalized_command('egg_info') self.filelist.include_pattern("*", prefix=ei_cmd.egg_info) def prune_file_list (self): build = self.get_finalized_command('build') base_dir = self.distribution.get_fullname() self.filelist.exclude_pattern(None, prefix=build.build_base) self.filelist.exclude_pattern(None, prefix=base_dir) sep = re.escape(os.sep) self.filelist.exclude_pattern(sep+r'(RCS|CVS|\.svn)'+sep, is_regex=1) def write_file (filename, contents): """Create a file with the specified name and write 'contents' (a sequence of strings without line terminators) to it. """ f = open(filename, "wb") # always write POSIX-style manifest f.write("\n".join(contents)) f.close() def write_pkg_info(cmd, basename, filename): log.info("writing %s", filename) if not cmd.dry_run: metadata = cmd.distribution.metadata metadata.version, oldver = cmd.egg_version, metadata.version metadata.name, oldname = cmd.egg_name, metadata.name try: # write unescaped data to PKG-INFO, so older pkg_resources # can still parse it metadata.write_pkg_info(cmd.egg_info) finally: metadata.name, metadata.version = oldname, oldver safe = getattr(cmd.distribution,'zip_safe',None) import bdist_egg; bdist_egg.write_safety_flag(cmd.egg_info, safe) def warn_depends_obsolete(cmd, basename, filename): if os.path.exists(filename): log.warn( "WARNING: 'depends.txt' is not used by setuptools 0.6!\n" "Use the install_requires/extras_require setup() args instead." ) def write_requirements(cmd, basename, filename): dist = cmd.distribution data = ['\n'.join(yield_lines(dist.install_requires or ()))] for extra,reqs in (dist.extras_require or {}).items(): data.append('\n\n[%s]\n%s' % (extra, '\n'.join(yield_lines(reqs)))) cmd.write_or_delete_file("requirements", filename, ''.join(data)) def write_toplevel_names(cmd, basename, filename): pkgs = dict.fromkeys( [k.split('.',1)[0] for k in cmd.distribution.iter_distribution_names() ] ) cmd.write_file("top-level names", filename, '\n'.join(pkgs)+'\n') def overwrite_arg(cmd, basename, filename): write_arg(cmd, basename, filename, True) def write_arg(cmd, basename, filename, force=False): argname = os.path.splitext(basename)[0] value = getattr(cmd.distribution, argname, None) if value is not None: value = '\n'.join(value)+'\n' cmd.write_or_delete_file(argname, filename, value, force) def write_entries(cmd, basename, filename): ep = cmd.distribution.entry_points if isinstance(ep,basestring) or ep is None: data = ep elif ep is not None: data = [] for section, contents in ep.items(): if not isinstance(contents,basestring): contents = EntryPoint.parse_group(section, contents) contents = '\n'.join(map(str,contents.values())) data.append('[%s]\n%s\n\n' % (section,contents)) data = ''.join(data) cmd.write_or_delete_file('entry points', filename, data, True) def get_pkg_info_revision(): # See if we can get a -r### off of PKG-INFO, in case this is an sdist of # a subversion revision # if os.path.exists('PKG-INFO'): f = open('PKG-INFO','rU') for line in f: match = re.match(r"Version:.*-r(\d+)\s*$", line) if match: return int(match.group(1)) return 0 # tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/install.py000066400000000000000000000101001221140116300260320ustar00rootroot00000000000000import setuptools, sys, glob from distutils.command.install import install as _install from distutils.errors import DistutilsArgError class install(_install): """Use easy_install to install the package, w/dependencies""" user_options = _install.user_options + [ ('old-and-unmanageable', None, "Try not to use this!"), ('single-version-externally-managed', None, "used by system package builders to create 'flat' eggs"), ] boolean_options = _install.boolean_options + [ 'old-and-unmanageable', 'single-version-externally-managed', ] new_commands = [ ('install_egg_info', lambda self: True), ('install_scripts', lambda self: True), ] _nc = dict(new_commands) sub_commands = [ cmd for cmd in _install.sub_commands if cmd[0] not in _nc ] + new_commands def initialize_options(self): _install.initialize_options(self) self.old_and_unmanageable = None self.single_version_externally_managed = None self.no_compile = None # make DISTUTILS_DEBUG work right! def finalize_options(self): _install.finalize_options(self) if self.root: self.single_version_externally_managed = True elif self.single_version_externally_managed: if not self.root and not self.record: raise DistutilsArgError( "You must specify --record or --root when building system" " packages" ) def handle_extra_path(self): if self.root or self.single_version_externally_managed: # explicit backward-compatibility mode, allow extra_path to work return _install.handle_extra_path(self) # Ignore extra_path when installing an egg (or being run by another # command without --root or --single-version-externally-managed self.path_file = None self.extra_dirs = '' def run(self): self.old_run() if sys.platform == "win32": from setuptools.command.scriptsetup import do_scriptsetup do_scriptsetup() def old_run(self): # Explicit request for old-style install? Just do it if self.old_and_unmanageable or self.single_version_externally_managed: return _install.run(self) # Attempt to detect whether we were called from setup() or by another # command. If we were called by setup(), our caller will be the # 'run_command' method in 'distutils.dist', and *its* caller will be # the 'run_commands' method. If we were called any other way, our # immediate caller *might* be 'run_command', but it won't have been # called by 'run_commands'. This is slightly kludgy, but seems to # work. # caller = sys._getframe(2) caller_module = caller.f_globals.get('__name__','') caller_name = caller.f_code.co_name if caller_module != 'distutils.dist' or caller_name!='run_commands': # We weren't called from the command line or setup(), so we # should run in backward-compatibility mode to support bdist_* # commands. _install.run(self) else: self.do_egg_install() def do_egg_install(self): easy_install = self.distribution.get_command_class('easy_install') cmd = easy_install( self.distribution, args="x", root=self.root, record=self.record, ) cmd.ensure_finalized() # finalize before bdist_egg munges install cmd cmd.always_copy_from = '.' # make sure local-dir eggs get installed # pick up setup-dir .egg files only: no .egg-info cmd.package_index.scan(glob.glob('*.egg')) self.run_command('bdist_egg') args = [self.distribution.get_command_obj('bdist_egg').egg_output] if setuptools.bootstrap_install_from: # Bootstrap self-installation of setuptools args.insert(0, setuptools.bootstrap_install_from) cmd.args = args cmd.run() setuptools.bootstrap_install_from = None # tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/install_egg_info.py000066400000000000000000000072141221140116300277030ustar00rootroot00000000000000from setuptools import Command from setuptools.archive_util import unpack_archive from distutils import log, dir_util import os, shutil, pkg_resources class install_egg_info(Command): """Install an .egg-info directory for the package""" description = "Install an .egg-info directory for the package" user_options = [ ('install-dir=', 'd', "directory to install to"), ] def initialize_options(self): self.install_dir = None def finalize_options(self): self.set_undefined_options('install_lib',('install_dir','install_dir')) ei_cmd = self.get_finalized_command("egg_info") basename = pkg_resources.Distribution( None, None, ei_cmd.egg_name, ei_cmd.egg_version ).egg_name()+'.egg-info' self.source = ei_cmd.egg_info self.target = os.path.join(self.install_dir, basename) self.outputs = [self.target] def run(self): self.run_command('egg_info') target = self.target if os.path.isdir(self.target) and not os.path.islink(self.target): dir_util.remove_tree(self.target, dry_run=self.dry_run) elif os.path.exists(self.target): self.execute(os.unlink,(self.target,),"Removing "+self.target) if not self.dry_run: pkg_resources.ensure_directory(self.target) self.execute(self.copytree, (), "Copying %s to %s" % (self.source, self.target) ) self.install_namespaces() def get_outputs(self): return self.outputs def copytree(self): # Copy the .egg-info tree to site-packages def skimmer(src,dst): # filter out source-control directories; note that 'src' is always # a '/'-separated path, regardless of platform. 'dst' is a # platform-specific path. for skip in '.svn/','CVS/': if src.startswith(skip) or '/'+skip in src: return None self.outputs.append(dst) log.debug("Copying %s to %s", src, dst) return dst unpack_archive(self.source, self.target, skimmer) def install_namespaces(self): nsp = self._get_all_ns_packages() if not nsp: return filename,ext = os.path.splitext(self.target) filename += '-nspkg.pth'; self.outputs.append(filename) log.info("Installing %s",filename) if not self.dry_run: f = open(filename,'wb') for pkg in nsp: pth = tuple(pkg.split('.')) trailer = '\n' if '.' in pkg: trailer = ( "; m and setattr(sys.modules[%r], %r, m)\n" % ('.'.join(pth[:-1]), pth[-1]) ) f.write( "import sys,new,os; " "p = os.path.join(sys._getframe(1).f_locals['sitedir'], " "*%(pth)r); " "ie = os.path.exists(os.path.join(p,'__init__.py')); " "m = not ie and " "sys.modules.setdefault(%(pkg)r,new.module(%(pkg)r)); " "mp = (m or []) and m.__dict__.setdefault('__path__',[]); " "(p not in mp) and mp.append(p)%(trailer)s" % locals() ) f.close() def _get_all_ns_packages(self): nsp = {} for pkg in self.distribution.namespace_packages or []: pkg = pkg.split('.') while pkg: nsp['.'.join(pkg)] = 1 pkg.pop() nsp=list(nsp) nsp.sort() # set up shorter names first return nsp tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/install_lib.py000066400000000000000000000046601221140116300266760ustar00rootroot00000000000000from distutils.command.install_lib import install_lib as _install_lib import os class install_lib(_install_lib): """Don't add compiled flags to filenames of non-Python files""" def _bytecode_filenames (self, py_filenames): bytecode_files = [] for py_file in py_filenames: if not py_file.endswith('.py'): continue if self.compile: bytecode_files.append(py_file + "c") if self.optimize > 0: bytecode_files.append(py_file + "o") return bytecode_files def run(self): self.build() outfiles = self.install() if outfiles is not None: # always compile, in case we have any extension stubs to deal with self.byte_compile(outfiles) def get_exclusions(self): exclude = {} nsp = self.distribution.namespace_packages if (nsp and self.get_finalized_command('install') .single_version_externally_managed ): for pkg in nsp: parts = pkg.split('.') while parts: pkgdir = os.path.join(self.install_dir, *parts) for f in '__init__.py', '__init__.pyc', '__init__.pyo': exclude[os.path.join(pkgdir,f)] = 1 parts.pop() return exclude def copy_tree( self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1 ): assert preserve_mode and preserve_times and not preserve_symlinks exclude = self.get_exclusions() if not exclude: return _install_lib.copy_tree(self, infile, outfile) # Exclude namespace package __init__.py* files from the output from setuptools.archive_util import unpack_directory from distutils import log outfiles = [] def pf(src, dst): if dst in exclude: log.warn("Skipping installation of %s (namespace package)",dst) return False log.info("copying %s -> %s", src, os.path.dirname(dst)) outfiles.append(dst) return dst unpack_directory(infile, outfile, pf) return outfiles def get_outputs(self): outputs = _install_lib.get_outputs(self) exclude = self.get_exclusions() if exclude: return [f for f in outputs if f not in exclude] return outputs tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/install_scripts.py000066400000000000000000000035651221140116300276220ustar00rootroot00000000000000from distutils.command.install_scripts import install_scripts \ as _install_scripts from easy_install import get_script_args, sys_executable, chmod from pkg_resources import Distribution, PathMetadata, ensure_directory import os from distutils import log class install_scripts(_install_scripts): """Do normal script install, plus any egg_info wrapper scripts""" def initialize_options(self): _install_scripts.initialize_options(self) self.no_ep = False def run(self): self.run_command("egg_info") if self.distribution.scripts: _install_scripts.run(self) # run first to set up self.outfiles else: self.outfiles = [] if self.no_ep: # don't install entry point scripts into .egg file! return ei_cmd = self.get_finalized_command("egg_info") dist = Distribution( ei_cmd.egg_base, PathMetadata(ei_cmd.egg_base, ei_cmd.egg_info), ei_cmd.egg_name, ei_cmd.egg_version, ) bs_cmd = self.get_finalized_command('build_scripts') executable = getattr(bs_cmd,'executable',sys_executable) is_wininst = getattr( self.get_finalized_command("bdist_wininst"), '_is_running', False ) for args in get_script_args(dist, executable, is_wininst): self.write_script(*args) def write_script(self, script_name, contents, mode="t", *ignored): """Write an executable file to the scripts directory""" log.info("Installing %s script to %s", script_name, self.install_dir) target = os.path.join(self.install_dir, script_name) self.outfiles.append(target) if not self.dry_run: ensure_directory(target) f = open(target,"w"+mode) f.write(contents) f.close() chmod(target,0755) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/register.py000066400000000000000000000004251221140116300262210ustar00rootroot00000000000000from distutils.command.register import register as _register class register(_register): __doc__ = _register.__doc__ def run(self): # Make sure that we are using valid current name/version info self.run_command('egg_info') _register.run(self) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/rotate.py000066400000000000000000000037011221140116300256730ustar00rootroot00000000000000import distutils, os from setuptools import Command from distutils.util import convert_path from distutils import log from distutils.errors import * class rotate(Command): """Delete older distributions""" description = "delete older distributions, keeping N newest files" user_options = [ ('match=', 'm', "patterns to match (required)"), ('dist-dir=', 'd', "directory where the distributions are"), ('keep=', 'k', "number of matching distributions to keep"), ] boolean_options = [] def initialize_options(self): self.match = None self.dist_dir = None self.keep = None def finalize_options(self): if self.match is None: raise DistutilsOptionError( "Must specify one or more (comma-separated) match patterns " "(e.g. '.zip' or '.egg')" ) if self.keep is None: raise DistutilsOptionError("Must specify number of files to keep") try: self.keep = int(self.keep) except ValueError: raise DistutilsOptionError("--keep must be an integer") if isinstance(self.match, basestring): self.match = [ convert_path(p.strip()) for p in self.match.split(',') ] self.set_undefined_options('bdist',('dist_dir', 'dist_dir')) def run(self): self.run_command("egg_info") from glob import glob for pattern in self.match: pattern = self.distribution.get_name()+'*'+pattern files = glob(os.path.join(self.dist_dir,pattern)) files = [(os.path.getmtime(f),f) for f in files] files.sort() files.reverse() log.info("%d file(s) matching %s", len(files), pattern) files = files[self.keep:] for (t,f) in files: log.info("Deleting %s", f) if not self.dry_run: os.unlink(f) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/saveopts.py000066400000000000000000000013431221140116300262410ustar00rootroot00000000000000import distutils, os from setuptools import Command from setuptools.command.setopt import edit_config, option_base class saveopts(option_base): """Save command-line options to a file""" description = "save supplied options to setup.cfg or other config file" def run(self): dist = self.distribution commands = dist.command_options.keys() settings = {} for cmd in commands: if cmd=='saveopts': continue # don't save our own options! for opt,(src,val) in dist.get_option_dict(cmd).items(): if src=="command line": settings.setdefault(cmd,{})[opt] = val edit_config(self.filename, settings, self.dry_run) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/scriptsetup.py000066400000000000000000000311651221140116300267670ustar00rootroot00000000000000from distutils.errors import DistutilsSetupError from setuptools import Command import sys class scriptsetup(Command): action = (sys.platform == "win32" and "set up .pyscript association and PATHEXT variable to run scripts" or "this does nothing on non-Windows platforms") user_options = [ ('allusers', 'a', 'make changes for all users of this Windows installation (requires Administrator privileges)'), ] boolean_options = ['allusers'] def initialize_options(self): self.allusers = False def finalize_options(self): pass def run(self): if sys.platform != "win32": print "\n'scriptsetup' isn't needed on non-Windows platforms." else: do_scriptsetup(self.allusers) def do_scriptsetup(allusers=False): print "\nSetting up environment to run scripts for %s..." % (allusers and "all users" or "the current user") from _winreg import HKEY_CURRENT_USER, HKEY_LOCAL_MACHINE, HKEY_CLASSES_ROOT, \ REG_SZ, REG_EXPAND_SZ, KEY_QUERY_VALUE, KEY_SET_VALUE, \ OpenKey, CreateKey, QueryValueEx, SetValueEx, FlushKey, CloseKey USER_ENV = "Environment" try: user_env = OpenKey(HKEY_CURRENT_USER, USER_ENV, 0, KEY_QUERY_VALUE) except WindowsError, e: raise DistutilsSetupError("I could not read the user environment from the registry.\n%r" % (e,)) SYSTEM_ENV = "SYSTEM\\CurrentControlSet\\Control\\Session Manager\\Environment" try: system_env = OpenKey(HKEY_LOCAL_MACHINE, SYSTEM_ENV, 0, KEY_QUERY_VALUE) except WindowsError, e: raise DistutilsSetupError("I could not read the system environment from the registry.\n%r" % (e,)) # HKEY_CLASSES_ROOT is a merged view that would only confuse us. # USER_CLASSES = "SOFTWARE\\Classes" try: user_classes = OpenKey(HKEY_CURRENT_USER, USER_CLASSES, 0, KEY_QUERY_VALUE) except WindowsError, e: raise DistutilsSetupError("I could not read the user filetype associations from the registry.\n%r" % (e,)) SYSTEM_CLASSES = "SOFTWARE\\Classes" try: system_classes = OpenKey(HKEY_LOCAL_MACHINE, SYSTEM_CLASSES, 0, KEY_QUERY_VALUE) except WindowsError, e: raise DistutilsSetupError("I could not read the system filetype associations from the registry.\n%r" % (e,)) def query(key, subkey, what): try: (value, type) = QueryValueEx(key, subkey) except WindowsError, e: if e.winerror == 2: # not found return None raise DistutilsSetupError("I could not read %s from the registry.\n%r" % (what, e)) # It does not matter that we don't expand environment strings, in fact it's better not to. if type != REG_SZ and type != REG_EXPAND_SZ: raise DistutilsSetupError("I expected the registry entry for %s to have a string type (REG_SZ or REG_EXPAND_SZ), " "and was flummoxed by it having type code %r." % (what, type)) return (value, type) def open_and_query(key, path, subkey, what): try: read_key = OpenKey(key, path, 0, KEY_QUERY_VALUE) except WindowsError, e: if e.winerror == 2: # not found return None raise DistutilsSetupError("I could not read %s from the registry because I could not open " "the parent key.\n%r" % (what, e)) try: return query(read_key, subkey, what) finally: CloseKey(read_key) def update(key_name_path, subkey, desired_value, desired_type, goal, what): (key, name, path) = key_name_path (old_value, old_type) = open_and_query(key, path, subkey, what) or (None, None) if (old_value, old_type) == (desired_value, desired_type): print "Already done: %s." % (goal,) return False try: update_key = OpenKey(key, path, 0, KEY_SET_VALUE|KEY_QUERY_VALUE) except WindowsError, e: if e.winerror != 2: raise DistutilsSetupError("I tried to %s, but was not successful because I could not open " "the registry key %s\\%s for writing.\n%r" % (goal, name, path, e)) try: update_key = CreateKey(key, path) except WindowsError, e: raise DistutilsSetupError("I tried to %s, but was not successful because the registry key %s\\%s " "did not exist, and I was unable to create it.\n%r" % (goal, name, path, e)) (new_value, new_type) = (None, None) try: SetValueEx(update_key, subkey, 0, desired_type, desired_value) except WindowsError, e: raise DistutilsSetupError("I tried to %s, but was not able to set the subkey %r under %s\\%s to be %r.\n%r" % (goal, subkey, name, path, desired_value)) else: (new_value, new_type) = query(update_key, subkey, what) or (None, None) finally: FlushKey(update_key) CloseKey(update_key) if (new_value, new_type) != (desired_value, desired_type): raise DistutilsSetupError("I tried to %s by setting the subkey %r under %s\\%s to be %r, " "and the call to SetValueEx succeeded, but the value ended up as " "%r instead (it was previously %r). Maybe the update was unexpectedly virtualized?" % (goal, subkey, name, path, desired_value, new_value, old_value)) print "Done: %s." % (goal,) return True # Maintenance hazard: 'add_to_environment' and 'associate' use very similar, but not identical logic. def add_to_environment(varname, addition, change_allusers): changed = False what = "the %s environment variable %s" % (change_allusers and "system" or "user", varname) goal = "add %s to %s" % (addition, what) system_valueandtype = query(system_env, varname, "the system environment variable %s" % (varname,)) user_valueandtype = query(user_env, varname, "the user environment variable %s" % (varname,)) if change_allusers: (value, type) = system_valueandtype or (u'', REG_SZ) key_name_path = (HKEY_LOCAL_MACHINE, "HKEY_LOCAL_MACHINE", SYSTEM_ENV) else: (value, type) = user_valueandtype or system_valueandtype or (u'', REG_SZ) key_name_path = (HKEY_CURRENT_USER, "HKEY_CURRENT_USER", USER_ENV) if addition.lower() in value.lower().split(u';'): print "Already done: %s." % (goal,) else: changed |= update(key_name_path, varname, value + u';' + addition, type, goal, what) if change_allusers: # Also change any overriding environment entry for the current user. (user_value, user_type) = user_valueandtype or (u'', REG_SZ) split_value = user_value.lower().split(u';') if not (addition.lower() in split_value or u'%'+varname.lower()+u'%' in split_value): now_what = "the overriding user environment variable %s" % (varname,) changed |= update((HKEY_CURRENT_USER, "HKEY_CURRENT_USER", USER_ENV), varname, user_value + u';' + addition, user_type, "add %s to %s" % (addition, now_what), now_what) return changed def associate(ext, target, change_allusers): changed = False what = "the %s association for %s" % (change_allusers and "system" or "user", ext) goal = "associate the filetype %s with %s for %s" % (ext, target, change_allusers and "all users" or "the current user") try: if change_allusers: target_key = OpenKey(HKEY_LOCAL_MACHINE, "%s\\%s" % (SYSTEM_CLASSES, target), 0, KEY_QUERY_VALUE) else: target_key = OpenKey(HKEY_CLASSES_ROOT, target, 0, KEY_QUERY_VALUE) except WindowsError, e: raise DistutilsSetupError("I was going to %s, but that won't work because the %s class does not exist in the registry, " "as far as I can tell.\n%r" % (goal, target, e)) CloseKey(target_key) system_key_name_path = (HKEY_LOCAL_MACHINE, "HKEY_LOCAL_MACHINE", "%s\\%s" % (SYSTEM_CLASSES, ext)) user_key_name_path = (HKEY_CURRENT_USER, "HKEY_CURRENT_USER", "%s\\%s" % (USER_CLASSES, ext)) system_valueandtype = open_and_query(system_classes, ext, "", "the system association for %s" % (ext,)) user_valueandtype = open_and_query(user_classes, ext, "", "the user association for %s" % (ext,)) if change_allusers: (value, type) = system_valueandtype or (u'', REG_SZ) key_name_path = system_key_name_path else: (value, type) = user_valueandtype or system_valueandtype or (u'', REG_SZ) key_name_path = user_key_name_path if value == target: print "Already done: %s." % (goal,) else: changed |= update(key_name_path, "", unicode(target), REG_SZ, goal, what) if change_allusers: # Also change any overriding association for the current user. (user_value, user_type) = user_valueandtype or (u'', REG_SZ) if user_value != target: changed |= update(user_key_name_path, "", unicode(target), REG_SZ, "associate the filetype %s with %s for the current user " \ "(because the system association is overridden)" % (ext, target), "the overriding user association for %s" % (ext,)) return changed def broadcast_settingchange(change_allusers): print "Broadcasting that the environment has changed, please wait..." # # # LRESULT WINAPI SendMessageTimeoutW(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam, # UINT fuFlags, UINT uTimeout, PDWORD_PTR lpdwResult); try: from ctypes import WINFUNCTYPE, POINTER, windll, addressof, c_wchar_p from ctypes.wintypes import LONG, HWND, UINT, WPARAM, LPARAM, DWORD SendMessageTimeout = WINFUNCTYPE(POINTER(LONG), HWND, UINT, WPARAM, LPARAM, UINT, UINT, POINTER(POINTER(DWORD))) \ (("SendMessageTimeoutW", windll.user32)) HWND_BROADCAST = 0xFFFF WM_SETTINGCHANGE = 0x001A SMTO_ABORTIFHUNG = 0x0002 SendMessageTimeout(HWND_BROADCAST, WM_SETTINGCHANGE, change_allusers and 1 or 0, addressof(c_wchar_p(u"Environment")), SMTO_ABORTIFHUNG, 5000, None); except Exception, e: print "Warning: %r" % (e,) changed_assoc = associate(".pyscript", "Python.File", allusers) changed_env = False try: changed_env |= add_to_environment("PATHEXT", ".pyscript", allusers) changed_env |= add_to_environment("PATHEXT", ".pyw", allusers) finally: CloseKey(user_env) CloseKey(system_env) if changed_assoc or changed_env: broadcast_settingchange(allusers) if changed_env: # whether logout is needed seems to randomly differ between installations # of XP, but it is not needed in Vista or later. try: import platform, re need_logout = not re.search(r'^[6-9]|([1-9][0-9]+)\.', platform.version()) except Exception, e: e # hush pyflakes need_logout = True if need_logout: print """ *********************************************************************** Changes have been made to the persistent environment, but they may not take effect in this Windows session. Running installed Python scripts from a Command Prompt may only work after you have logged out and back in again, or rebooted. *********************************************************************** """ else: print """ *********************************************************************** Changes have been made to the persistent environment, but not in this Command Prompt. Running installed Python scripts will only work from new Command Prompts opened from now on. *********************************************************************** """ tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/sdist.py000066400000000000000000000162371221140116300255330ustar00rootroot00000000000000from distutils.command.sdist import sdist as _sdist from distutils.util import convert_path from distutils import log from glob import glob import os, re, sys, pkg_resources entities = [ ("<","<"), (">", ">"), (""", '"'), ("'", "'"), ("&", "&") ] def unescape(data): for old,new in entities: data = data.replace(old,new) return data def re_finder(pattern, postproc=None): def find(dirname, filename): f = open(filename,'rU') data = f.read() f.close() for match in pattern.finditer(data): path = match.group(1) if postproc: path = postproc(path) yield joinpath(dirname,path) return find def joinpath(prefix,suffix): if not prefix: return suffix return os.path.join(prefix,suffix) def walk_revctrl(dirname=''): """Find all files under revision control""" for ep in pkg_resources.iter_entry_points('setuptools.file_finders'): for item in ep.load()(dirname): yield item def _default_revctrl(dirname=''): for path, finder in finders: path = joinpath(dirname,path) if os.path.isfile(path): for path in finder(dirname,path): if os.path.isfile(path): yield path elif os.path.isdir(path): for item in _default_revctrl(path): yield item def externals_finder(dirname, filename): """Find any 'svn:externals' directories""" found = False f = open(filename,'rb') for line in iter(f.readline, ''): # can't use direct iter! parts = line.split() if len(parts)==2: kind,length = parts data = f.read(int(length)) if kind=='K' and data=='svn:externals': found = True elif kind=='V' and found: f.close() break else: f.close() return for line in data.splitlines(): parts = line.split() if parts: yield joinpath(dirname, parts[0]) entries_pattern = re.compile(r'name="([^"]+)"(?![^>]+deleted="true")', re.I) def entries_finder(dirname, filename): f = open(filename,'rU') data = f.read() f.close() if data.startswith('=6 and record[5]=="delete": continue # skip deleted yield joinpath(dirname, record[0]) finders = [ (convert_path('CVS/Entries'), re_finder(re.compile(r"^\w?/([^/]+)/", re.M))), (convert_path('.svn/entries'), entries_finder), (convert_path('.svn/dir-props'), externals_finder), (convert_path('.svn/dir-prop-base'), externals_finder), # svn 1.4 ] class sdist(_sdist): """Smart sdist that finds anything supported by revision control""" user_options = [ ('formats=', None, "formats for source distribution (comma-separated list)"), ('keep-temp', 'k', "keep the distribution tree around after creating " + "archive file(s)"), ('dist-dir=', 'd', "directory to put the source distribution archive(s) in " "[default: dist]"), ] negative_opt = {} def run(self): self.run_command('egg_info') ei_cmd = self.get_finalized_command('egg_info') self.filelist = ei_cmd.filelist self.filelist.append(os.path.join(ei_cmd.egg_info,'SOURCES.txt')) self.check_readme() self.check_metadata() self.make_distribution() dist_files = getattr(self.distribution,'dist_files',[]) for file in self.archive_files: data = ('sdist', '', file) if data not in dist_files: dist_files.append(data) def read_template(self): try: _sdist.read_template(self) except: # grody hack to close the template file (MANIFEST.in) # this prevents easy_install's attempt at deleting the file from # dying and thus masking the real error sys.exc_info()[2].tb_next.tb_frame.f_locals['template'].close() raise # Cribbed from old distutils code, to work around new distutils code # that tries to do some of the same stuff as we do, in a way that makes # us loop. def add_defaults (self): standards = [('README', 'README.txt'), self.distribution.script_name] for fn in standards: if type(fn) is tuple: alts = fn got_it = 0 for fn in alts: if os.path.exists(fn): got_it = 1 self.filelist.append(fn) break if not got_it: self.warn("standard file not found: should have one of " + ', '.join(alts)) else: if os.path.exists(fn): self.filelist.append(fn) else: self.warn("standard file '%s' not found" % fn) optional = ['test/test*.py', 'setup.cfg'] for pattern in optional: files = filter(os.path.isfile, glob(pattern)) if files: self.filelist.extend(files) if self.distribution.has_pure_modules(): build_py = self.get_finalized_command('build_py') self.filelist.extend(build_py.get_source_files()) if self.distribution.has_ext_modules(): build_ext = self.get_finalized_command('build_ext') self.filelist.extend(build_ext.get_source_files()) if self.distribution.has_c_libraries(): build_clib = self.get_finalized_command('build_clib') self.filelist.extend(build_clib.get_source_files()) if self.distribution.has_scripts(): build_scripts = self.get_finalized_command('build_scripts') self.filelist.extend(build_scripts.get_source_files()) def check_readme(self): alts = ("README", "README.txt") for f in alts: if os.path.exists(f): return else: self.warn( "standard file not found: should have one of " +', '.join(alts) ) def make_release_tree(self, base_dir, files): _sdist.make_release_tree(self, base_dir, files) # Save any egg_info command line options used to create this sdist dest = os.path.join(base_dir, 'setup.cfg') if hasattr(os,'link') and os.path.exists(dest): # unlink and re-copy, since it might be hard-linked, and # we don't want to change the source version os.unlink(dest) self.copy_file('setup.cfg', dest) self.get_finalized_command('egg_info').save_version_info(dest) # tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/setopt.py000066400000000000000000000116521221140116300257170ustar00rootroot00000000000000import distutils, os from setuptools import Command from distutils.util import convert_path from distutils import log from distutils.errors import * __all__ = ['config_file', 'edit_config', 'option_base', 'setopt'] def config_file(kind="local"): """Get the filename of the distutils, local, global, or per-user config `kind` must be one of "local", "global", or "user" """ if kind=='local': return 'setup.cfg' if kind=='global': return os.path.join( os.path.dirname(distutils.__file__),'distutils.cfg' ) if kind=='user': dot = os.name=='posix' and '.' or '' return os.path.expanduser(convert_path("~/%spydistutils.cfg" % dot)) raise ValueError( "config_file() type must be 'local', 'global', or 'user'", kind ) def edit_config(filename, settings, dry_run=False): """Edit a configuration file to include `settings` `settings` is a dictionary of dictionaries or ``None`` values, keyed by command/section name. A ``None`` value means to delete the entire section, while a dictionary lists settings to be changed or deleted in that section. A setting of ``None`` means to delete that setting. """ from ConfigParser import RawConfigParser log.debug("Reading configuration from %s", filename) opts = RawConfigParser() opts.read([filename]) for section, options in settings.items(): if options is None: log.info("Deleting section [%s] from %s", section, filename) opts.remove_section(section) else: if not opts.has_section(section): log.debug("Adding new section [%s] to %s", section, filename) opts.add_section(section) for option,value in options.items(): if value is None: log.debug("Deleting %s.%s from %s", section, option, filename ) opts.remove_option(section,option) if not opts.options(section): log.info("Deleting empty [%s] section from %s", section, filename) opts.remove_section(section) else: log.debug( "Setting %s.%s to %r in %s", section, option, value, filename ) opts.set(section,option,value) log.info("Writing %s", filename) if not dry_run: f = open(filename,'w'); opts.write(f); f.close() class option_base(Command): """Abstract base class for commands that mess with config files""" user_options = [ ('global-config', 'g', "save options to the site-wide distutils.cfg file"), ('user-config', 'u', "save options to the current user's pydistutils.cfg file"), ('filename=', 'f', "configuration file to use (default=setup.cfg)"), ] boolean_options = [ 'global-config', 'user-config', ] def initialize_options(self): self.global_config = None self.user_config = None self.filename = None def finalize_options(self): filenames = [] if self.global_config: filenames.append(config_file('global')) if self.user_config: filenames.append(config_file('user')) if self.filename is not None: filenames.append(self.filename) if not filenames: filenames.append(config_file('local')) if len(filenames)>1: raise DistutilsOptionError( "Must specify only one configuration file option", filenames ) self.filename, = filenames class setopt(option_base): """Save command-line options to a file""" description = "set an option in setup.cfg or another config file" user_options = [ ('command=', 'c', 'command to set an option for'), ('option=', 'o', 'option to set'), ('set-value=', 's', 'value of the option'), ('remove', 'r', 'remove (unset) the value'), ] + option_base.user_options boolean_options = option_base.boolean_options + ['remove'] def initialize_options(self): option_base.initialize_options(self) self.command = None self.option = None self.set_value = None self.remove = None def finalize_options(self): option_base.finalize_options(self) if self.command is None or self.option is None: raise DistutilsOptionError("Must specify --command *and* --option") if self.set_value is None and not self.remove: raise DistutilsOptionError("Must specify --set-value or --remove") def run(self): edit_config( self.filename, { self.command: {self.option.replace('-','_'):self.set_value} }, self.dry_run ) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/test.py000066400000000000000000000113751221140116300253620ustar00rootroot00000000000000from setuptools import Command from distutils.errors import DistutilsOptionError import sys from pkg_resources import * from unittest import TestLoader, main class ScanningLoader(TestLoader): def loadTestsFromModule(self, module): """Return a suite of all tests cases contained in the given module If the module is a package, load tests from all the modules in it. If the module has an ``additional_tests`` function, call it and add the return value to the tests. """ tests = [] if module.__name__!='setuptools.tests.doctest': # ugh tests.append(TestLoader.loadTestsFromModule(self,module)) if hasattr(module, "additional_tests"): tests.append(module.additional_tests()) if hasattr(module, '__path__'): for file in resource_listdir(module.__name__, ''): if file.endswith('.py') and file!='__init__.py': submodule = module.__name__+'.'+file[:-3] else: if resource_exists( module.__name__, file+'/__init__.py' ): submodule = module.__name__+'.'+file else: continue tests.append(self.loadTestsFromName(submodule)) if len(tests)!=1: return self.suiteClass(tests) else: return tests[0] # don't create a nested suite for only one return class test(Command): """Command to run unit tests after in-place build""" description = "run unit tests after in-place build" user_options = [ ('test-module=','m', "Run 'test_suite' in specified module"), ('test-suite=','s', "Test suite to run (e.g. 'some_module.test_suite')"), ('test-runner=','r', "Test runner to use"), ] def initialize_options(self): self.test_runner = None self.test_suite = None self.test_module = None self.test_loader = None def finalize_options(self): if self.test_suite is None: if self.test_module is None: self.test_suite = self.distribution.test_suite else: self.test_suite = self.test_module+".test_suite" elif self.test_module: raise DistutilsOptionError( "You may specify a module or a suite, but not both" ) self.test_args = [self.test_suite] if self.verbose: self.test_args.insert(0,'--verbose') if self.test_loader is None: self.test_loader = getattr(self.distribution,'test_loader',None) if self.test_loader is None: self.test_loader = "setuptools.command.test:ScanningLoader" if self.test_runner is None: self.test_runner = getattr(self.distribution,'test_runner',None) def with_project_on_sys_path(self, func): # Ensure metadata is up-to-date self.run_command('egg_info') # Build extensions in-place self.reinitialize_command('build_ext', inplace=1) self.run_command('build_ext') ei_cmd = self.get_finalized_command("egg_info") old_path = sys.path[:] old_modules = sys.modules.copy() try: sys.path.insert(0, normalize_path(ei_cmd.egg_base)) working_set.__init__() add_activation_listener(lambda dist: dist.activate()) require('%s==%s' % (ei_cmd.egg_name, ei_cmd.egg_version)) func() finally: sys.path[:] = old_path sys.modules.clear() sys.modules.update(old_modules) working_set.__init__() def run(self): if self.distribution.install_requires: self.distribution.fetch_build_eggs(self.distribution.install_requires) if self.distribution.tests_require: self.distribution.fetch_build_eggs(self.distribution.tests_require) if self.test_suite: cmd = ' '.join(self.test_args) if self.dry_run: self.announce('skipping "unittest %s" (dry run)' % cmd) else: self.announce('running "unittest %s"' % cmd) self.with_project_on_sys_path(self.run_tests) def run_tests(self): import unittest loader_ep = EntryPoint.parse("x="+self.test_loader) loader_class = loader_ep.load(require=False) kw = {} if self.test_runner is not None: runner_ep = EntryPoint.parse("x="+self.test_runner) runner_class = runner_ep.load(require=False) kw['testRunner'] = runner_class() unittest.main( None, None, [unittest.__file__]+self.test_args, testLoader = loader_class(), **kw ) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/command/upload.py000066400000000000000000000147711221140116300256720ustar00rootroot00000000000000"""distutils.command.upload Implements the Distutils 'upload' subcommand (upload package to PyPI).""" from distutils.errors import * from distutils.core import Command from distutils.spawn import spawn from distutils import log try: from hashlib import md5 except ImportError: from md5 import md5 import os import socket import platform import ConfigParser import httplib import base64 import urlparse import cStringIO as StringIO class upload(Command): description = "upload binary package to PyPI" DEFAULT_REPOSITORY = 'http://pypi.python.org/pypi' user_options = [ ('repository=', 'r', "url of repository [default: %s]" % DEFAULT_REPOSITORY), ('show-response', None, 'display full response text from server'), ('sign', 's', 'sign files to upload using gpg'), ('identity=', 'i', 'GPG identity used to sign files'), ] boolean_options = ['show-response', 'sign'] def initialize_options(self): self.username = '' self.password = '' self.repository = '' self.show_response = 0 self.sign = False self.identity = None def finalize_options(self): if self.identity and not self.sign: raise DistutilsOptionError( "Must use --sign for --identity to have meaning" ) if os.environ.has_key('HOME'): rc = os.path.join(os.environ['HOME'], '.pypirc') if os.path.exists(rc): self.announce('Using PyPI login from %s' % rc) config = ConfigParser.ConfigParser({ 'username':'', 'password':'', 'repository':''}) config.read(rc) if not self.repository: self.repository = config.get('server-login', 'repository') if not self.username: self.username = config.get('server-login', 'username') if not self.password: self.password = config.get('server-login', 'password') if not self.repository: self.repository = self.DEFAULT_REPOSITORY def run(self): if not self.distribution.dist_files: raise DistutilsOptionError("No dist file created in earlier command") for command, pyversion, filename in self.distribution.dist_files: self.upload_file(command, pyversion, filename) def upload_file(self, command, pyversion, filename): # Sign if requested if self.sign: gpg_args = ["gpg", "--detach-sign", "-a", filename] if self.identity: gpg_args[2:2] = ["--local-user", self.identity] spawn(gpg_args, dry_run=self.dry_run) # Fill in the data content = open(filename,'rb').read() basename = os.path.basename(filename) comment = '' if command=='bdist_egg' and self.distribution.has_ext_modules(): comment = "built on %s" % platform.platform(terse=1) data = { ':action':'file_upload', 'protcol_version':'1', 'name':self.distribution.get_name(), 'version':self.distribution.get_version(), 'content':(basename,content), 'filetype':command, 'pyversion':pyversion, 'md5_digest':md5(content).hexdigest(), } if command == 'bdist_rpm': dist, version, id = platform.dist() if dist: comment = 'built for %s %s' % (dist, version) elif command == 'bdist_dumb': comment = 'built for %s' % platform.platform(terse=1) data['comment'] = comment if self.sign: data['gpg_signature'] = (os.path.basename(filename) + ".asc", open(filename+".asc").read()) # set up the authentication auth = "Basic " + base64.encodestring(self.username + ":" + self.password).strip() # Build up the MIME payload for the POST data boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = '\n--' + boundary end_boundary = sep_boundary + '--' body = StringIO.StringIO() for key, value in data.items(): # handle multiple entries for the same name if type(value) != type([]): value = [value] for value in value: if type(value) is tuple: fn = ';filename="%s"' % value[0] value = value[1] else: fn = "" value = str(value) body.write(sep_boundary) body.write('\nContent-Disposition: form-data; name="%s"'%key) body.write(fn) body.write("\n\n") body.write(value) if value and value[-1] == '\r': body.write('\n') # write an extra newline (lurve Macs) body.write(end_boundary) body.write("\n") body = body.getvalue() self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO) # build the Request # We can't use urllib2 since we need to send the Basic # auth right with the first request schema, netloc, url, params, query, fragments = \ urlparse.urlparse(self.repository) assert not params and not query and not fragments if schema == 'http': http = httplib.HTTPConnection(netloc) elif schema == 'https': http = httplib.HTTPSConnection(netloc) else: raise AssertionError, "unsupported schema "+schema data = '' loglevel = log.INFO try: http.connect() http.putrequest("POST", url) http.putheader('Content-type', 'multipart/form-data; boundary=%s'%boundary) http.putheader('Content-length', str(len(body))) http.putheader('Authorization', auth) http.endheaders() http.send(body) except socket.error, e: self.announce(str(e), log.ERROR) return r = http.getresponse() if r.status == 200: self.announce('Server response (%s): %s' % (r.status, r.reason), log.INFO) else: self.announce('Upload failed (%s): %s' % (r.status, r.reason), log.ERROR) if self.show_response: print '-'*75, r.read(), '-'*75 tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/depends.py000066400000000000000000000141011221140116300243750ustar00rootroot00000000000000from __future__ import generators import sys, imp, marshal from imp import PKG_DIRECTORY, PY_COMPILED, PY_SOURCE, PY_FROZEN from distutils.version import StrictVersion, LooseVersion __all__ = [ 'Require', 'find_module', 'get_module_constant', 'extract_constant' ] class Require: """A prerequisite to building or installing a distribution""" def __init__(self,name,requested_version,module,homepage='', attribute=None,format=None ): if format is None and requested_version is not None: format = StrictVersion if format is not None: requested_version = format(requested_version) if attribute is None: attribute = '__version__' self.__dict__.update(locals()) del self.self def full_name(self): """Return full package/distribution name, w/version""" if self.requested_version is not None: return '%s-%s' % (self.name,self.requested_version) return self.name def version_ok(self,version): """Is 'version' sufficiently up-to-date?""" return self.attribute is None or self.format is None or \ str(version)!="unknown" and version >= self.requested_version def get_version(self, paths=None, default="unknown"): """Get version number of installed module, 'None', or 'default' Search 'paths' for module. If not found, return 'None'. If found, return the extracted version attribute, or 'default' if no version attribute was specified, or the value cannot be determined without importing the module. The version is formatted according to the requirement's version format (if any), unless it is 'None' or the supplied 'default'. """ if self.attribute is None: try: f,p,i = find_module(self.module,paths) if f: f.close() return default except ImportError: return None v = get_module_constant(self.module,self.attribute,default,paths) if v is not None and v is not default and self.format is not None: return self.format(v) return v def is_present(self,paths=None): """Return true if dependency is present on 'paths'""" return self.get_version(paths) is not None def is_current(self,paths=None): """Return true if dependency is present and up-to-date on 'paths'""" version = self.get_version(paths) if version is None: return False return self.version_ok(version) def _iter_code(code): """Yield '(op,arg)' pair for each operation in code object 'code'""" from array import array from dis import HAVE_ARGUMENT, EXTENDED_ARG bytes = array('b',code.co_code) eof = len(code.co_code) ptr = 0 extended_arg = 0 while ptr=HAVE_ARGUMENT: arg = bytes[ptr+1] + bytes[ptr+2]*256 + extended_arg ptr += 3 if op==EXTENDED_ARG: extended_arg = arg * 65536L continue else: arg = None ptr += 1 yield op,arg def find_module(module, paths=None): """Just like 'imp.find_module()', but with package support""" parts = module.split('.') while parts: part = parts.pop(0) f, path, (suffix,mode,kind) = info = imp.find_module(part, paths) if kind==PKG_DIRECTORY: parts = parts or ['__init__'] paths = [path] elif parts: raise ImportError("Can't find %r in %s" % (parts,module)) return info def get_module_constant(module, symbol, default=-1, paths=None): """Find 'module' by searching 'paths', and extract 'symbol' Return 'None' if 'module' does not exist on 'paths', or it does not define 'symbol'. If the module defines 'symbol' as a constant, return the constant. Otherwise, return 'default'.""" try: f, path, (suffix,mode,kind) = find_module(module,paths) except ImportError: # Module doesn't exist return None try: if kind==PY_COMPILED: f.read(8) # skip magic & date code = marshal.load(f) elif kind==PY_FROZEN: code = imp.get_frozen_object(module) elif kind==PY_SOURCE: code = compile(f.read(), path, 'exec') else: # Not something we can parse; we'll have to import it. :( if module not in sys.modules: imp.load_module(module,f,path,(suffix,mode,kind)) return getattr(sys.modules[module],symbol,None) finally: if f: f.close() return extract_constant(code,symbol,default) def extract_constant(code,symbol,default=-1): """Extract the constant value of 'symbol' from 'code' If the name 'symbol' is bound to a constant value by the Python code object 'code', return that value. If 'symbol' is bound to an expression, return 'default'. Otherwise, return 'None'. Return value is based on the first assignment to 'symbol'. 'symbol' must be a global, or at least a non-"fast" local in the code block. That is, only 'STORE_NAME' and 'STORE_GLOBAL' opcodes are checked, and 'symbol' must be present in 'code.co_names'. """ if symbol not in code.co_names: # name's not there, can't possibly be an assigment return None name_idx = list(code.co_names).index(symbol) STORE_NAME = 90 STORE_GLOBAL = 97 LOAD_CONST = 100 const = default for op, arg in _iter_code(code): if op==LOAD_CONST: const = code.co_consts[arg] elif arg==name_idx and (op==STORE_NAME or op==STORE_GLOBAL): return const else: const = default if sys.platform.startswith('java') or sys.platform == 'cli': # XXX it'd be better to test assertions about bytecode instead... del extract_constant, get_module_constant __all__.remove('extract_constant') __all__.remove('get_module_constant') tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/dist.py000066400000000000000000000726641221140116300237400ustar00rootroot00000000000000__all__ = ['Distribution'] from distutils.core import Distribution as _Distribution from setuptools.depends import Require from setuptools.command.install import install from setuptools.command.sdist import sdist from setuptools.command.install_lib import install_lib from distutils.errors import DistutilsOptionError, DistutilsPlatformError from distutils.errors import DistutilsSetupError import setuptools, pkg_resources, distutils.core, distutils.dist, distutils.cmd import os, distutils.log, re def _get_unpatched(cls): """Protect against re-patching the distutils if reloaded Also ensures that no other distutils extension monkeypatched the distutils first. """ while cls.__module__.startswith('setuptools'): cls, = cls.__bases__ if not cls.__module__.startswith('distutils'): raise AssertionError( "distutils has already been patched by %r" % cls ) return cls _Distribution = _get_unpatched(_Distribution) sequence = tuple, list def check_importable(dist, attr, value): try: ep = pkg_resources.EntryPoint.parse('x='+value) assert not ep.extras except (TypeError,ValueError,AttributeError,AssertionError): raise DistutilsSetupError( "%r must be importable 'module:attrs' string (got %r)" % (attr,value) ) def assert_string_list(dist, attr, value): """Verify that value is a string list or None""" try: assert ''.join(value)!=value except (TypeError,ValueError,AttributeError,AssertionError): raise DistutilsSetupError( "%r must be a list of strings (got %r)" % (attr,value) ) def check_nsp(dist, attr, value): """Verify that namespace packages are valid""" assert_string_list(dist,attr,value) for nsp in value: if not dist.has_contents_for(nsp): raise DistutilsSetupError( "Distribution contains no modules or packages for " + "namespace package %r" % nsp ) if '.' in nsp: parent = '.'.join(nsp.split('.')[:-1]) if parent not in value: distutils.log.warn( "WARNING: %r is declared as a package namespace, but %r" " is not: please correct this in setup.py", nsp, parent ) def check_extras(dist, attr, value): """Verify that extras_require mapping is valid""" try: for k,v in value.items(): list(pkg_resources.parse_requirements(v)) except (TypeError,ValueError,AttributeError): raise DistutilsSetupError( "'extras_require' must be a dictionary whose values are " "strings or lists of strings containing valid project/version " "requirement specifiers." ) def assert_bool(dist, attr, value): """Verify that value is True, False, 0, or 1""" if bool(value) != value: raise DistutilsSetupError( "%r must be a boolean value (got %r)" % (attr,value) ) def check_requirements(dist, attr, value): """Verify that install_requires is a valid requirements list""" try: list(pkg_resources.parse_requirements(value)) except (TypeError,ValueError): raise DistutilsSetupError( "%r must be a string or list of strings " "containing valid project/version requirement specifiers" % (attr,) ) def check_entry_points(dist, attr, value): """Verify that entry_points map is parseable""" try: pkg_resources.EntryPoint.parse_map(value) except ValueError, e: raise DistutilsSetupError(e) def check_test_suite(dist, attr, value): if not isinstance(value,basestring): raise DistutilsSetupError("test_suite must be a string") def check_package_data(dist, attr, value): """Verify that value is a dictionary of package names to glob lists""" if isinstance(value,dict): for k,v in value.items(): if not isinstance(k,str): break try: iter(v) except TypeError: break else: return raise DistutilsSetupError( attr+" must be a dictionary mapping package names to lists of " "wildcard patterns" ) def check_packages(dist, attr, value): for pkgname in value: if not re.match(r'\w+(\.\w+)*', pkgname): distutils.log.warn( "WARNING: %r not a valid package name; please use only" ".-separated package names in setup.py", pkgname ) class Distribution(_Distribution): """Distribution with support for features, tests, and package data This is an enhanced version of 'distutils.dist.Distribution' that effectively adds the following new optional keyword arguments to 'setup()': 'install_requires' -- a string or sequence of strings specifying project versions that the distribution requires when installed, in the format used by 'pkg_resources.require()'. They will be installed automatically when the package is installed. If you wish to use packages that are not available in PyPI, or want to give your users an alternate download location, you can add a 'find_links' option to the '[easy_install]' section of your project's 'setup.cfg' file, and then setuptools will scan the listed web pages for links that satisfy the requirements. 'extras_require' -- a dictionary mapping names of optional "extras" to the additional requirement(s) that using those extras incurs. For example, this:: extras_require = dict(reST = ["docutils>=0.3", "reSTedit"]) indicates that the distribution can optionally provide an extra capability called "reST", but it can only be used if docutils and reSTedit are installed. If the user installs your package using EasyInstall and requests one of your extras, the corresponding additional requirements will be installed if needed. 'features' -- a dictionary mapping option names to 'setuptools.Feature' objects. Features are a portion of the distribution that can be included or excluded based on user options, inter-feature dependencies, and availability on the current system. Excluded features are omitted from all setup commands, including source and binary distributions, so you can create multiple distributions from the same source tree. Feature names should be valid Python identifiers, except that they may contain the '-' (minus) sign. Features can be included or excluded via the command line options '--with-X' and '--without-X', where 'X' is the name of the feature. Whether a feature is included by default, and whether you are allowed to control this from the command line, is determined by the Feature object. See the 'Feature' class for more information. 'test_suite' -- the name of a test suite to run for the 'test' command. If the user runs 'python setup.py test', the package will be installed, and the named test suite will be run. The format is the same as would be used on a 'unittest.py' command line. That is, it is the dotted name of an object to import and call to generate a test suite. 'package_data' -- a dictionary mapping package names to lists of filenames or globs to use to find data files contained in the named packages. If the dictionary has filenames or globs listed under '""' (the empty string), those names will be searched for in every package, in addition to any names for the specific package. Data files found using these names/globs will be installed along with the package, in the same location as the package. Note that globs are allowed to reference the contents of non-package subdirectories, as long as you use '/' as a path separator. (Globs are automatically converted to platform-specific paths at runtime.) In addition to these new keywords, this class also has several new methods for manipulating the distribution's contents. For example, the 'include()' and 'exclude()' methods can be thought of as in-place add and subtract commands that add or remove packages, modules, extensions, and so on from the distribution. They are used by the feature subsystem to configure the distribution for the included and excluded features. """ _patched_dist = None def patch_missing_pkg_info(self, attrs): # Fake up a replacement for the data that would normally come from # PKG-INFO, but which might not yet be built if this is a fresh # checkout. # if not attrs or 'name' not in attrs or 'version' not in attrs: return key = pkg_resources.safe_name(str(attrs['name'])).lower() dist = pkg_resources.working_set.by_key.get(key) if dist is not None and not dist.has_metadata('PKG-INFO'): dist._version = pkg_resources.safe_version(str(attrs['version'])) self._patched_dist = dist def __init__ (self, attrs=None): have_package_data = hasattr(self, "package_data") if not have_package_data: self.package_data = {} self.require_features = [] self.features = {} self.dist_files = [] self.patch_missing_pkg_info(attrs) # Make sure we have any eggs needed to interpret 'attrs' if attrs is not None: self.dependency_links = attrs.pop('dependency_links', []) assert_string_list(self,'dependency_links',self.dependency_links) if attrs and 'setup_requires' in attrs: self.fetch_build_eggs(attrs.pop('setup_requires')) for ep in pkg_resources.iter_entry_points('distutils.setup_keywords'): if not hasattr(self,ep.name): setattr(self,ep.name,None) _Distribution.__init__(self,attrs) if isinstance(self.metadata.version, (int,long,float)): # Some people apparently take "version number" too literally :) self.metadata.version = str(self.metadata.version) def parse_command_line(self): """Process features after parsing command line options""" result = _Distribution.parse_command_line(self) if self.features: self._finalize_features() return result def _feature_attrname(self,name): """Convert feature name to corresponding option attribute name""" return 'with_'+name.replace('-','_') def fetch_build_eggs(self, requires): """Resolve pre-setup requirements""" from pkg_resources import working_set, parse_requirements for dist in working_set.resolve( parse_requirements(requires), installer=self.fetch_build_egg ): working_set.add(dist) def finalize_options(self): _Distribution.finalize_options(self) if self.features: self._set_global_opts_from_features() for ep in pkg_resources.iter_entry_points('distutils.setup_keywords'): value = getattr(self,ep.name,None) if value is not None: ep.require(installer=self.fetch_build_egg) ep.load()(self, ep.name, value) def fetch_build_egg(self, req): """Fetch an egg needed for building""" try: cmd = self._egg_fetcher except AttributeError: from setuptools.command.easy_install import easy_install dist = self.__class__({'script_args':['easy_install']}) dist.parse_config_files() opts = dist.get_option_dict('easy_install') keep = ( 'find_links', 'site_dirs', 'index_url', 'optimize', 'site_dirs', 'allow_hosts' ) for key in opts.keys(): if key not in keep: del opts[key] # don't use any other settings if self.dependency_links: links = self.dependency_links[:] if 'find_links' in opts: links = opts['find_links'][1].split() + links opts['find_links'] = ('setup', links) cmd = easy_install( dist, args=["x"], install_dir=os.curdir, exclude_scripts=True, always_copy=False, build_directory=None, editable=False, upgrade=False, multi_version=True, no_report = True ) cmd.ensure_finalized() self._egg_fetcher = cmd return cmd.easy_install(req) def _set_global_opts_from_features(self): """Add --with-X/--without-X options based on optional features""" go = [] no = self.negative_opt.copy() for name,feature in self.features.items(): self._set_feature(name,None) feature.validate(self) if feature.optional: descr = feature.description incdef = ' (default)' excdef='' if not feature.include_by_default(): excdef, incdef = incdef, excdef go.append(('with-'+name, None, 'include '+descr+incdef)) go.append(('without-'+name, None, 'exclude '+descr+excdef)) no['without-'+name] = 'with-'+name self.global_options = self.feature_options = go + self.global_options self.negative_opt = self.feature_negopt = no def _finalize_features(self): """Add/remove features and resolve dependencies between them""" # First, flag all the enabled items (and thus their dependencies) for name,feature in self.features.items(): enabled = self.feature_is_included(name) if enabled or (enabled is None and feature.include_by_default()): feature.include_in(self) self._set_feature(name,1) # Then disable the rest, so that off-by-default features don't # get flagged as errors when they're required by an enabled feature for name,feature in self.features.items(): if not self.feature_is_included(name): feature.exclude_from(self) self._set_feature(name,0) def get_command_class(self, command): """Pluggable version of get_command_class()""" if command in self.cmdclass: return self.cmdclass[command] for ep in pkg_resources.iter_entry_points('distutils.commands',command): ep.require(installer=self.fetch_build_egg) self.cmdclass[command] = cmdclass = ep.load() return cmdclass else: return _Distribution.get_command_class(self, command) def print_commands(self): for ep in pkg_resources.iter_entry_points('distutils.commands'): if ep.name not in self.cmdclass: try: cmdclass = ep.load(False) # don't require extras, we're not running self.cmdclass[ep.name] = cmdclass except ImportError: pass # see https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1405 return _Distribution.print_commands(self) def _set_feature(self,name,status): """Set feature's inclusion status""" setattr(self,self._feature_attrname(name),status) def feature_is_included(self,name): """Return 1 if feature is included, 0 if excluded, 'None' if unknown""" return getattr(self,self._feature_attrname(name)) def include_feature(self,name): """Request inclusion of feature named 'name'""" if self.feature_is_included(name)==0: descr = self.features[name].description raise DistutilsOptionError( descr + " is required, but was excluded or is not available" ) self.features[name].include_in(self) self._set_feature(name,1) def include(self,**attrs): """Add items to distribution that are named in keyword arguments For example, 'dist.exclude(py_modules=["x"])' would add 'x' to the distribution's 'py_modules' attribute, if it was not already there. Currently, this method only supports inclusion for attributes that are lists or tuples. If you need to add support for adding to other attributes in this or a subclass, you can add an '_include_X' method, where 'X' is the name of the attribute. The method will be called with the value passed to 'include()'. So, 'dist.include(foo={"bar":"baz"})' will try to call 'dist._include_foo({"bar":"baz"})', which can then handle whatever special inclusion logic is needed. """ for k,v in attrs.items(): include = getattr(self, '_include_'+k, None) if include: include(v) else: self._include_misc(k,v) def exclude_package(self,package): """Remove packages, modules, and extensions in named package""" pfx = package+'.' if self.packages: self.packages = [ p for p in self.packages if p!=package and not p.startswith(pfx) ] if self.py_modules: self.py_modules = [ p for p in self.py_modules if p!=package and not p.startswith(pfx) ] if self.ext_modules: self.ext_modules = [ p for p in self.ext_modules if p.name!=package and not p.name.startswith(pfx) ] def has_contents_for(self,package): """Return true if 'exclude_package(package)' would do something""" pfx = package+'.' for p in self.iter_distribution_names(): if p==package or p.startswith(pfx): return True def _exclude_misc(self,name,value): """Handle 'exclude()' for list/tuple attrs without a special handler""" if not isinstance(value,sequence): raise DistutilsSetupError( "%s: setting must be a list or tuple (%r)" % (name, value) ) try: old = getattr(self,name) except AttributeError: raise DistutilsSetupError( "%s: No such distribution setting" % name ) if old is not None and not isinstance(old,sequence): raise DistutilsSetupError( name+": this setting cannot be changed via include/exclude" ) elif old: setattr(self,name,[item for item in old if item not in value]) def _include_misc(self,name,value): """Handle 'include()' for list/tuple attrs without a special handler""" if not isinstance(value,sequence): raise DistutilsSetupError( "%s: setting must be a list (%r)" % (name, value) ) try: old = getattr(self,name) except AttributeError: raise DistutilsSetupError( "%s: No such distribution setting" % name ) if old is None: setattr(self,name,value) elif not isinstance(old,sequence): raise DistutilsSetupError( name+": this setting cannot be changed via include/exclude" ) else: setattr(self,name,old+[item for item in value if item not in old]) def exclude(self,**attrs): """Remove items from distribution that are named in keyword arguments For example, 'dist.exclude(py_modules=["x"])' would remove 'x' from the distribution's 'py_modules' attribute. Excluding packages uses the 'exclude_package()' method, so all of the package's contained packages, modules, and extensions are also excluded. Currently, this method only supports exclusion from attributes that are lists or tuples. If you need to add support for excluding from other attributes in this or a subclass, you can add an '_exclude_X' method, where 'X' is the name of the attribute. The method will be called with the value passed to 'exclude()'. So, 'dist.exclude(foo={"bar":"baz"})' will try to call 'dist._exclude_foo({"bar":"baz"})', which can then handle whatever special exclusion logic is needed. """ for k,v in attrs.items(): exclude = getattr(self, '_exclude_'+k, None) if exclude: exclude(v) else: self._exclude_misc(k,v) def _exclude_packages(self,packages): if not isinstance(packages,sequence): raise DistutilsSetupError( "packages: setting must be a list or tuple (%r)" % (packages,) ) map(self.exclude_package, packages) def _parse_command_opts(self, parser, args): # Remove --with-X/--without-X options when processing command args self.global_options = self.__class__.global_options self.negative_opt = self.__class__.negative_opt # First, expand any aliases command = args[0] aliases = self.get_option_dict('aliases') while command in aliases: src,alias = aliases[command] del aliases[command] # ensure each alias can expand only once! import shlex args[:1] = shlex.split(alias,True) command = args[0] nargs = _Distribution._parse_command_opts(self, parser, args) # Handle commands that want to consume all remaining arguments cmd_class = self.get_command_class(command) if getattr(cmd_class,'command_consumes_arguments',None): self.get_option_dict(command)['args'] = ("command line", nargs) if nargs is not None: return [] return nargs def get_cmdline_options(self): """Return a '{cmd: {opt:val}}' map of all command-line options Option names are all long, but do not include the leading '--', and contain dashes rather than underscores. If the option doesn't take an argument (e.g. '--quiet'), the 'val' is 'None'. Note that options provided by config files are intentionally excluded. """ d = {} for cmd,opts in self.command_options.items(): for opt,(src,val) in opts.items(): if src != "command line": continue opt = opt.replace('_','-') if val==0: cmdobj = self.get_command_obj(cmd) neg_opt = self.negative_opt.copy() neg_opt.update(getattr(cmdobj,'negative_opt',{})) for neg,pos in neg_opt.items(): if pos==opt: opt=neg val=None break else: raise AssertionError("Shouldn't be able to get here") elif val==1: val = None d.setdefault(cmd,{})[opt] = val return d def iter_distribution_names(self): """Yield all packages, modules, and extension names in distribution""" for pkg in self.packages or (): yield pkg for module in self.py_modules or (): yield module for ext in self.ext_modules or (): if isinstance(ext,tuple): name, buildinfo = ext else: name = ext.name if name.endswith('module'): name = name[:-6] yield name # Install it throughout the distutils for module in distutils.dist, distutils.core, distutils.cmd: module.Distribution = Distribution class Feature: """A subset of the distribution that can be excluded if unneeded/wanted Features are created using these keyword arguments: 'description' -- a short, human readable description of the feature, to be used in error messages, and option help messages. 'standard' -- if true, the feature is included by default if it is available on the current system. Otherwise, the feature is only included if requested via a command line '--with-X' option, or if another included feature requires it. The default setting is 'False'. 'available' -- if true, the feature is available for installation on the current system. The default setting is 'True'. 'optional' -- if true, the feature's inclusion can be controlled from the command line, using the '--with-X' or '--without-X' options. If false, the feature's inclusion status is determined automatically, based on 'availabile', 'standard', and whether any other feature requires it. The default setting is 'True'. 'require_features' -- a string or sequence of strings naming features that should also be included if this feature is included. Defaults to empty list. May also contain 'Require' objects that should be added/removed from the distribution. 'remove' -- a string or list of strings naming packages to be removed from the distribution if this feature is *not* included. If the feature *is* included, this argument is ignored. This argument exists to support removing features that "crosscut" a distribution, such as defining a 'tests' feature that removes all the 'tests' subpackages provided by other features. The default for this argument is an empty list. (Note: the named package(s) or modules must exist in the base distribution when the 'setup()' function is initially called.) other keywords -- any other keyword arguments are saved, and passed to the distribution's 'include()' and 'exclude()' methods when the feature is included or excluded, respectively. So, for example, you could pass 'packages=["a","b"]' to cause packages 'a' and 'b' to be added or removed from the distribution as appropriate. A feature must include at least one 'requires', 'remove', or other keyword argument. Otherwise, it can't affect the distribution in any way. Note also that you can subclass 'Feature' to create your own specialized feature types that modify the distribution in other ways when included or excluded. See the docstrings for the various methods here for more detail. Aside from the methods, the only feature attributes that distributions look at are 'description' and 'optional'. """ def __init__(self, description, standard=False, available=True, optional=True, require_features=(), remove=(), **extras ): self.description = description self.standard = standard self.available = available self.optional = optional if isinstance(require_features,(str,Require)): require_features = require_features, self.require_features = [ r for r in require_features if isinstance(r,str) ] er = [r for r in require_features if not isinstance(r,str)] if er: extras['require_features'] = er if isinstance(remove,str): remove = remove, self.remove = remove self.extras = extras if not remove and not require_features and not extras: raise DistutilsSetupError( "Feature %s: must define 'require_features', 'remove', or at least one" " of 'packages', 'py_modules', etc." ) def include_by_default(self): """Should this feature be included by default?""" return self.available and self.standard def include_in(self,dist): """Ensure feature and its requirements are included in distribution You may override this in a subclass to perform additional operations on the distribution. Note that this method may be called more than once per feature, and so should be idempotent. """ if not self.available: raise DistutilsPlatformError( self.description+" is required," "but is not available on this platform" ) dist.include(**self.extras) for f in self.require_features: dist.include_feature(f) def exclude_from(self,dist): """Ensure feature is excluded from distribution You may override this in a subclass to perform additional operations on the distribution. This method will be called at most once per feature, and only after all included features have been asked to include themselves. """ dist.exclude(**self.extras) if self.remove: for item in self.remove: dist.exclude_package(item) def validate(self,dist): """Verify that feature makes sense in context of distribution This method is called by the distribution just before it parses its command line. It checks to ensure that the 'remove' attribute, if any, contains only valid package/module names that are present in the base distribution when 'setup()' is called. You may override it in a subclass to perform any other required validation of the feature against a target distribution. """ for item in self.remove: if not dist.has_contents_for(item): raise DistutilsSetupError( "%s wants to be able to remove %s, but the distribution" " doesn't contain any packages or modules under %s" % (self.description, item, item) ) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/extension.py000066400000000000000000000020771221140116300250000ustar00rootroot00000000000000from distutils.core import Extension as _Extension from dist import _get_unpatched _Extension = _get_unpatched(_Extension) try: from Pyrex.Distutils.build_ext import build_ext except ImportError: have_pyrex = False else: have_pyrex = True class Extension(_Extension): """Extension that uses '.c' files in place of '.pyx' files""" if not have_pyrex: # convert .pyx extensions to .c def __init__(self,*args,**kw): _Extension.__init__(self,*args,**kw) sources = [] for s in self.sources: if s.endswith('.pyx'): sources.append(s[:-3]+'c') else: sources.append(s) self.sources = sources class Library(Extension): """Just like a regular Extension, but built as a library instead""" import sys, distutils.core, distutils.extension distutils.core.Extension = Extension distutils.extension.Extension = Extension if 'distutils.command.build_ext' in sys.modules: sys.modules['distutils.command.build_ext'].Extension = Extension tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/package_index.py000066400000000000000000000712221221140116300255440ustar00rootroot00000000000000"""PyPI and direct package downloading""" import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO import httplib, urllib from pkg_resources import * from distutils import log from distutils.errors import DistutilsError try: from hashlib import md5 except ImportError: from md5 import md5 from fnmatch import translate EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$') HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I) # this is here to fix emacs' cruddy broken syntax highlighting PYPI_MD5 = re.compile( '([^<]+)\n\s+\\(md5\\)' ) URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() def is_local(url_or_fname): """ Return True if url_or_fname is a "file:" url or if it is a schemaless thing (which is presumably a filename). """ mo = URL_SCHEME(url_or_fname) return not (mo and mo.group(1).lower()!='file') def url_or_fname_to_fname(url_or_fname): """ Assert that is_local(url_or_fname) then if it is a "file:" url, parse it and run url2pathname on it, else just return it. """ assert is_local(url_or_fname) mo = URL_SCHEME(url_or_fname) if mo: return urllib2.url2pathname(urlparse.urlparse(url)[2]) else: return url_or_fname __all__ = [ 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst', 'interpret_distro_name', ] def parse_bdist_wininst(name): """Return (base,pyversion) or (None,None) for possible .exe name""" lower = name.lower() base, py_ver = None, None if lower.endswith('.exe'): if lower.endswith('.win32.exe'): base = name[:-10] elif lower.startswith('.win32-py',-16): py_ver = name[-7:-4] base = name[:-16] return base,py_ver def egg_info_for_url(url): scheme, server, path, parameters, query, fragment = urlparse.urlparse(url) base = urllib2.unquote(path.split('/')[-1]) if server=='sourceforge.net' and base=='download': # XXX Yuck base = urllib2.unquote(path.split('/')[-2]) if '#' in base: base, fragment = base.split('#',1) return base,fragment def distros_for_url(url, metadata=None): """Yield egg or source distribution objects that might be found at a URL""" base, fragment = egg_info_for_url(url) for dist in distros_for_location(url, base, metadata): yield dist if fragment: match = EGG_FRAGMENT.match(fragment) if match: for dist in interpret_distro_name( url, match.group(1), metadata, precedence = CHECKOUT_DIST ): yield dist def distros_for_location(location, basename, metadata=None): """Yield egg or source distribution objects based on basename""" if basename.endswith('.egg.zip'): basename = basename[:-4] # strip the .zip if basename.endswith('.egg') and '-' in basename: # only one, unambiguous interpretation return [Distribution.from_location(location, basename, metadata)] if basename.endswith('.exe'): win_base, py_ver = parse_bdist_wininst(basename) if win_base is not None: return interpret_distro_name( location, win_base, metadata, py_ver, BINARY_DIST, "win32" ) # Try source distro extensions (.zip, .tgz, etc.) # for ext in EXTENSIONS: if basename.endswith(ext): basename = basename[:-len(ext)] return interpret_distro_name(location, basename, metadata) return [] # no extension matched def distros_for_filename(filename, metadata=None): """Yield possible egg or source distribution objects based on a filename""" return distros_for_location( normalize_path(filename), os.path.basename(filename), metadata ) def interpret_distro_name(location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None ): """Generate alternative interpretations of a source distro name Note: if `location` is a filesystem filename, you should call ``pkg_resources.normalize_path()`` on it before passing it to this routine! """ # Generate alternative interpretations of a source distro name # Because some packages are ambiguous as to name/versions split # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc. # So, we generate each possible interepretation (e.g. "adns, python-1.1.0" # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice, # the spurious interpretations should be ignored, because in the event # there's also an "adns" package, the spurious "python-1.1.0" version will # compare lower than any numeric version number, and is therefore unlikely # to match a request for it. It's still a potential problem, though, and # in the long run PyPI and the distutils should go for "safe" names and # versions in distribution archive names (sdist and bdist). parts = basename.split('-') if not py_version: for i,p in enumerate(parts[2:]): if len(p)==5 and p.startswith('py2.'): return # It's a bdist_dumb, not an sdist -- bail out for p in range(1,len(parts)+1): yield Distribution( location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]), py_version=py_version, precedence = precedence, platform = platform ) REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I) # this line is here to fix emacs' cruddy broken syntax highlighting def find_external_links(url, page): """Find rel="homepage" and rel="download" links in `page`, yielding URLs""" for match in REL.finditer(page): tag, rel = match.groups() rels = map(str.strip, rel.lower().split(',')) if 'homepage' in rels or 'download' in rels: for match in HREF.finditer(tag): yield urlparse.urljoin(url, htmldecode(match.group(1))) for tag in ("Home Page", "Download URL"): pos = page.find(tag) if pos!=-1: match = HREF.search(page,pos) if match: yield urlparse.urljoin(url, htmldecode(match.group(1))) user_agent = "Python-urllib/%s setuptools/%s" % ( urllib2.__version__, require('setuptools')[0].version ) class PackageIndex(Environment): """A distribution index that scans web pages for download URLs""" def __init__(self, index_url="http://pypi.python.org/simple", hosts=('*',), *args, **kw ): Environment.__init__(self,*args,**kw) self.index_url = index_url + "/"[:not index_url.endswith('/')] self.scanned_urls = {} self.fetched_urls = {} self.package_pages = {} self.allows = re.compile('|'.join(map(translate,hosts))).match self.to_scan = [] def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" if url in self.scanned_urls and not retrieve: return self.scanned_urls[url] = True if not URL_SCHEME(url): self.process_filename(url) return else: dists = list(distros_for_url(url)) if dists: if not self.url_ok(url): return self.debug("Found link: %s", url) if dists or not retrieve or url in self.fetched_urls: map(self.add, dists) return # don't need the actual page if not self.url_ok(url): self.fetched_urls[url] = True return self.info("Reading %s", url) self.fetched_urls[url] = True # prevent multiple fetch attempts f = self.open_url(url, "Download error: %s -- Some packages may not be found!") if f is None: return self.fetched_urls[f.url] = True if 'html' not in f.headers.get('content-type', '').lower(): f.close() # not html, we can't process it return base = f.url # handle redirects page = f.read() f.close() if url.startswith(self.index_url) and getattr(f,'code',None)!=404: page = self.process_index(url, page) for match in HREF.finditer(page): link = urlparse.urljoin(base, htmldecode(match.group(1))) self.process_url(link) def process_filename(self, fn, nested=False): # process filenames or directories if not os.path.exists(fn): self.warn("Not found: %s", fn) return if os.path.isdir(fn) and not nested: path = os.path.realpath(fn) for item in os.listdir(path): self.process_filename(os.path.join(path,item), True) dists = distros_for_filename(fn) if dists: self.debug("Found: %s", fn) map(self.add, dists) def url_ok(self, url, fatal=False): s = URL_SCHEME(url) if (s and s.group(1).lower()=='file') or self.allows(urlparse.urlparse(url)[1]): return True msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n" if fatal: raise DistutilsError(msg % url) else: self.warn(msg, url) def scan_egg_links(self, search_path): for item in search_path: if os.path.isdir(item): for entry in os.listdir(item): if entry.endswith('.egg-link'): self.scan_egg_link(item, entry) def scan_egg_link(self, path, entry): lines = filter(None, map(str.strip, file(os.path.join(path, entry)))) if len(lines)==2: for dist in find_distributions(os.path.join(path, lines[0])): dist.location = os.path.join(path, *lines) dist.precedence = SOURCE_DIST self.add(dist) def process_index(self,url,page): """Process the contents of a PyPI page""" def scan(link): # Process a URL to see if it's for a package page if link.startswith(self.index_url): parts = map( urllib2.unquote, link[len(self.index_url):].split('/') ) if len(parts)==2 and '#' not in parts[1]: # it's a package page, sanitize and index it pkg = safe_name(parts[0]) ver = safe_version(parts[1]) self.package_pages.setdefault(pkg.lower(),{})[link] = True return to_filename(pkg), to_filename(ver) return None, None # process an index page into the package-page index for match in HREF.finditer(page): scan( urlparse.urljoin(url, htmldecode(match.group(1))) ) pkg, ver = scan(url) # ensure this page is in the page index if pkg: # process individual package page for new_url in find_external_links(url, page): # Process the found URL base, frag = egg_info_for_url(new_url) if base.endswith('.py') and not frag: if ver: new_url+='#egg=%s-%s' % (pkg,ver) else: self.need_version_info(url) self.scan_url(new_url) return PYPI_MD5.sub( lambda m: '%s' % m.group(1,3,2), page ) else: return "" # no sense double-scanning non-package pages def need_version_info(self, url): self.scan_all( "Page at %s links to .py file(s) without version info; an index " "scan is required.", url ) def scan_all(self, msg=None, *args): if self.index_url not in self.fetched_urls: if msg: self.warn(msg,*args) self.info( "Scanning index of all packages (this may take a while)" ) self.scan_url(self.index_url) def find_packages(self, requirement): self.scan_url(self.index_url + requirement.unsafe_name+'/') if not self.package_pages.get(requirement.key): # Fall back to safe version of the name self.scan_url(self.index_url + requirement.project_name+'/') if not self.package_pages.get(requirement.key): # We couldn't find the target package, so search the index page too self.not_found_in_index(requirement) for url in list(self.package_pages.get(requirement.key,())): # scan each page that might be related to the desired package self.scan_url(url) def obtain(self, requirement, installer=None): self.prescan(); self.find_packages(requirement) for dist in self[requirement.key]: if dist in requirement: return dist self.debug("%s does not match %s", requirement, dist) return super(PackageIndex, self).obtain(requirement,installer) def check_md5(self, cs, info, filename, tfp): if re.match('md5=[0-9a-f]{32}$', info): self.debug("Validating md5 checksum for %s", filename) if cs.hexdigest()!=info[4:]: tfp.close() os.unlink(filename) raise DistutilsError( "MD5 validation failed for "+os.path.basename(filename)+ "; possible download problem?" ) def add_find_links(self, urls): """Add `urls` to the list that will be prescanned for searches""" for url in urls: if ( self.to_scan is None # if we have already "gone online" or not URL_SCHEME(url) # or it's a local file/directory or url.startswith('file:') or list(distros_for_url(url)) # or a direct package link ): # then go ahead and process it now self.scan_url(url) else: # otherwise, defer retrieval till later self.to_scan.append(url) def prescan(self): """Scan urls scheduled for prescanning (e.g. --find-links)""" if self.to_scan: map(self.scan_url, self.to_scan) self.to_scan = None # from now on, go ahead and process immediately def not_found_in_index(self, requirement): if self[requirement.key]: # we've seen at least one distro meth, msg = self.info, "Couldn't retrieve index page for %r" else: # no distros seen for this name, might be misspelled meth, msg = (self.warn, "Couldn't find index page for %r (maybe misspelled?)") meth(msg, requirement.unsafe_name) self.scan_all() def download(self, spec, tmpdir): """Locate and/or download `spec` to `tmpdir`, returning a local path `spec` may be a ``Requirement`` object, or a string containing a URL, an existing local filename, or a project/version requirement spec (i.e. the string form of a ``Requirement`` object). If it is the URL of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is automatically created alongside the downloaded file. If `spec` is a ``Requirement`` object or a string containing a project/version requirement spec, this method returns the location of a matching distribution (possibly after downloading it to `tmpdir`). If `spec` is a locally existing file or directory name, it is simply returned unchanged. If `spec` is a URL, it is downloaded to a subpath of `tmpdir`, and the local filename is returned. Various errors may be raised if a problem occurs during downloading. """ if not isinstance(spec,Requirement): scheme = URL_SCHEME(spec) if scheme: # It's a url, download it to tmpdir found = self._download_url(scheme.group(1), spec, tmpdir) base, fragment = egg_info_for_url(spec) if base.endswith('.py'): found = self.gen_setup(found,fragment,tmpdir) return found elif os.path.exists(spec): # Existing file or directory, just return it return spec else: try: spec = Requirement.parse(spec) except ValueError: raise DistutilsError( "Not a URL, existing file, or requirement spec: %r" % (spec,) ) return getattr(self.fetch_distribution(spec, tmpdir),'location',None) def fetch_distribution(self, requirement, tmpdir, force_scan=False, source=False, develop_ok=False, local_index=None, ): """Obtain a distribution suitable for fulfilling `requirement` `requirement` must be a ``pkg_resources.Requirement`` instance. If necessary, or if the `force_scan` flag is set, the requirement is searched for in the (online) package index as well as the locally installed packages. If a distribution matching `requirement` is found, the returned distribution's ``location`` is the value you would have gotten from calling the ``download()`` method with the matching distribution's URL or filename. If no matching distribution is found, ``None`` is returned. If the `source` flag is set, only source distributions and source checkout links will be considered. Unless the `develop_ok` flag is set, development and system eggs (i.e., those using the ``.egg-info`` format) will be ignored. """ # process a Requirement self.info("Searching for %s", requirement) skipped = {} dist = None def find(env, req): # Find a matching distribution; may be called more than once # first try to find a local dist for allow_remote in (False, True): # then try to find a platform-dependent dist for allow_platform_independent in (False, True): for dist in env[req.key]: if dist.precedence==DEVELOP_DIST and not develop_ok: if dist not in skipped: self.warn("Skipping development or system egg: %s",dist) skipped[dist] = 1 continue if ((is_local(dist.location) or allow_remote) and (dist in req) and ((allow_platform_independent or dist.platform is not None) and (dist.precedence<=SOURCE_DIST or not source))): return dist if force_scan: self.prescan() self.find_packages(requirement) dist = find(self, requirement) if local_index is not None: dist = dist or find(local_index, requirement) if dist is None and self.to_scan is not None: self.prescan() dist = find(self, requirement) if dist is None and not force_scan: self.find_packages(requirement) dist = find(self, requirement) if dist is None: self.warn( "No local packages or download links found for %s%s", (source and "a source distribution of " or ""), requirement, ) else: self.info("Best match: %s", dist) return dist.clone(location=self.download(dist.location, tmpdir)) def fetch(self, requirement, tmpdir, force_scan=False, source=False): """Obtain a file suitable for fulfilling `requirement` DEPRECATED; use the ``fetch_distribution()`` method now instead. For backward compatibility, this routine is identical but returns the ``location`` of the downloaded distribution instead of a distribution object. """ dist = self.fetch_distribution(requirement,tmpdir,force_scan,source) if dist is not None: return dist.location return None def gen_setup(self, filename, fragment, tmpdir): match = EGG_FRAGMENT.match(fragment) dists = match and [d for d in interpret_distro_name(filename, match.group(1), None) if d.version ] or [] if len(dists)==1: # unambiguous ``#egg`` fragment basename = os.path.basename(filename) # Make sure the file has been downloaded to the temp dir. if os.path.dirname(filename) != tmpdir: dst = os.path.join(tmpdir, basename) from setuptools.command.easy_install import samefile if not samefile(filename, dst): shutil.copy2(filename, dst) filename=dst file = open(os.path.join(tmpdir, 'setup.py'), 'w') file.write( "from setuptools import setup\n" "setup(name=%r, version=%r, py_modules=[%r])\n" % ( dists[0].project_name, dists[0].version, os.path.splitext(basename)[0] ) ) file.close() return filename elif match: raise DistutilsError( "Can't unambiguously interpret project/version identifier %r; " "any dashes in the name or version should be escaped using " "underscores. %r" % (fragment,dists) ) else: raise DistutilsError( "Can't process plain .py files without an '#egg=name-version'" " suffix to enable automatic setup script generation." ) dl_blocksize = 8192 def _download_to(self, url, filename): self.info("Downloading %s", url) # Download the file fp, tfp, info = None, None, None try: if '#' in url: url, info = url.split('#', 1) fp = self.open_url(url) if isinstance(fp, urllib2.HTTPError): raise DistutilsError( "Can't download %s: %s %s" % (url, fp.code,fp.msg) ) cs = md5() headers = fp.info() blocknum = 0 bs = self.dl_blocksize size = -1 if "content-length" in headers: size = int(headers["Content-Length"]) self.reporthook(url, filename, blocknum, bs, size) tfp = open(filename,'wb') while True: block = fp.read(bs) if block: cs.update(block) tfp.write(block) blocknum += 1 self.reporthook(url, filename, blocknum, bs, size) else: break if info: self.check_md5(cs, info, filename, tfp) return headers finally: if fp: fp.close() if tfp: tfp.close() def reporthook(self, url, filename, blocknum, blksize, size): pass # no-op def open_url(self, url, warning=None): if url.startswith('file:'): return local_open(url) try: return open_with_auth(url) except urllib2.HTTPError, v: return v except urllib2.URLError, v: reason = v.reason except httplib.HTTPException, v: reason = "%s: %s" % (v.__doc__ or v.__class__.__name__, v) if warning: self.warn(warning, reason) else: raise DistutilsError("Download error for %s: %s" % (url, reason)) def _download_url(self, scheme, url, tmpdir): # Determine download filename # name, fragment = egg_info_for_url(url) if name: while '..' in name: name = name.replace('..','.').replace('\\','_') else: name = "__downloaded__" # default if URL has no path contents if name.endswith('.egg.zip'): name = name[:-4] # strip the extra .zip before download filename = os.path.join(tmpdir,name) # Download the file # if scheme=='svn' or scheme.startswith('svn+'): return self._download_svn(url, filename) elif scheme=='file': return urllib2.url2pathname(urlparse.urlparse(url)[2]) else: self.url_ok(url, True) # raises error if not allowed return self._attempt_download(url, filename) def scan_url(self, url): self.process_url(url, True) def _attempt_download(self, url, filename): headers = self._download_to(url, filename) if 'html' in headers.get('content-type','').lower(): return self._download_html(url, headers, filename) else: return filename def _download_html(self, url, headers, filename): file = open(filename) for line in file: if line.strip(): # Check for a subversion index page if re.search(r'([^- ]+ - )?Revision \d+:', line): # it's a subversion index page: file.close() os.unlink(filename) return self._download_svn(url, filename) break # not an index page file.close() os.unlink(filename) raise DistutilsError("Unexpected HTML page found at "+url) def _download_svn(self, url, filename): url = url.split('#',1)[0] # remove any fragment for svn's sake self.info("Doing subversion checkout from %s to %s", url, filename) os.system("svn checkout -q %s %s" % (url, filename)) return filename def debug(self, msg, *args): log.debug(msg, *args) def info(self, msg, *args): log.info(msg, *args) def warn(self, msg, *args): log.warn(msg, *args) # This pattern matches a character entity reference (a decimal numeric # references, a hexadecimal numeric reference, or a named reference). entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub def uchr(c): if not isinstance(c, int): return c if c>255: return unichr(c) return chr(c) def decode_entity(match): what = match.group(1) if what.startswith('#x'): what = int(what[2:], 16) elif what.startswith('#'): what = int(what[1:]) else: from htmlentitydefs import name2codepoint what = name2codepoint.get(what, match.group(0)) return uchr(what) def htmldecode(text): """Decode HTML entities in the given text.""" return entity_sub(decode_entity, text) def open_with_auth(url): """Open a urllib2 request, handling HTTP authentication""" scheme, netloc, path, params, query, frag = urlparse.urlparse(url) if scheme in ('http', 'https'): auth, host = urllib.splituser(netloc) else: auth = None if auth: auth = "Basic " + urllib2.unquote(auth).encode('base64').strip() new_url = urlparse.urlunparse((scheme,host,path,params,query,frag)) request = urllib2.Request(new_url) request.add_header("Authorization", auth) else: request = urllib2.Request(url) request.add_header('User-Agent', user_agent) fp = urllib2.urlopen(request) if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url) if s2==scheme and h2==host: fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2)) return fp def fix_sf_url(url): return url # backward compatibility def local_open(url): """Read a local path, with special support for directories""" scheme, server, path, param, query, frag = urlparse.urlparse(url) filename = urllib2.url2pathname(path) if os.path.isfile(filename): return urllib2.urlopen(url) elif path.endswith('/') and os.path.isdir(filename): files = [] for f in os.listdir(filename): if f=='index.html': body = open(os.path.join(filename,f),'rb').read() break elif os.path.isdir(os.path.join(filename,f)): f+='/' files.append("<a href=%r>%s</a>" % (f,f)) else: body = ("<html><head><title>%s" % url) + \ "%s" % '\n'.join(files) status, message = 200, "OK" else: status, message, body = 404, "Path not found", "Not found" return urllib2.HTTPError(url, status, message, {'content-type':'text/html'}, cStringIO.StringIO(body)) # this line is a kludge to keep the trailing blank lines for pje's editor tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/sandbox.py000066400000000000000000000201401221140116300244110ustar00rootroot00000000000000import os, sys, __builtin__, tempfile, operator, pkg_resources _os = sys.modules[os.name] _open = open _file = file from distutils.errors import DistutilsError from pkg_resources import working_set __all__ = [ "AbstractSandbox", "DirectorySandbox", "SandboxViolation", "run_setup", ] def run_setup(setup_script, args): """Run a distutils setup script, sandboxed in its directory""" old_dir = os.getcwd() save_argv = sys.argv[:] save_path = sys.path[:] setup_dir = os.path.abspath(os.path.dirname(setup_script)) temp_dir = os.path.join(setup_dir,'temp') if not os.path.isdir(temp_dir): os.makedirs(temp_dir) save_tmp = tempfile.tempdir save_modules = sys.modules.copy() pr_state = pkg_resources.__getstate__() try: tempfile.tempdir = temp_dir; os.chdir(setup_dir) try: sys.argv[:] = [setup_script]+list(args) sys.path.insert(0, setup_dir) # reset to include setup dir, w/clean callback list working_set.__init__() working_set.callbacks.append(lambda dist:dist.activate()) DirectorySandbox(setup_dir).run( lambda: execfile( "setup.py", {'__file__':setup_script, '__name__':'__main__'} ) ) except SystemExit, v: if v.args and v.args[0]: raise # Normal exit, just return finally: pkg_resources.__setstate__(pr_state) sys.modules.update(save_modules) for key in list(sys.modules): if key not in save_modules: del sys.modules[key] os.chdir(old_dir) sys.path[:] = save_path sys.argv[:] = save_argv tempfile.tempdir = save_tmp class AbstractSandbox: """Wrap 'os' module and 'open()' builtin for virtualizing setup scripts""" _active = False def __init__(self): self._attrs = [ name for name in dir(_os) if not name.startswith('_') and hasattr(self,name) ] def _copy(self, source): for name in self._attrs: setattr(os, name, getattr(source,name)) def run(self, func): """Run 'func' under os sandboxing""" try: self._copy(self) __builtin__.file = self._file __builtin__.open = self._open self._active = True return func() finally: self._active = False __builtin__.open = _open __builtin__.file = _file self._copy(_os) def _mk_dual_path_wrapper(name): original = getattr(_os,name) def wrap(self,src,dst,*args,**kw): if self._active: src,dst = self._remap_pair(name,src,dst,*args,**kw) return original(src,dst,*args,**kw) return wrap for name in ["rename", "link", "symlink"]: if hasattr(_os,name): locals()[name] = _mk_dual_path_wrapper(name) def _mk_single_path_wrapper(name, original=None): original = original or getattr(_os,name) def wrap(self,path,*args,**kw): if self._active: path = self._remap_input(name,path,*args,**kw) return original(path,*args,**kw) return wrap _open = _mk_single_path_wrapper('open', _open) _file = _mk_single_path_wrapper('file', _file) for name in [ "stat", "listdir", "chdir", "open", "chmod", "chown", "mkdir", "remove", "unlink", "rmdir", "utime", "lchown", "chroot", "lstat", "startfile", "mkfifo", "mknod", "pathconf", "access" ]: if hasattr(_os,name): locals()[name] = _mk_single_path_wrapper(name) def _mk_single_with_return(name): original = getattr(_os,name) def wrap(self,path,*args,**kw): if self._active: path = self._remap_input(name,path,*args,**kw) return self._remap_output(name, original(path,*args,**kw)) return original(path,*args,**kw) return wrap for name in ['readlink', 'tempnam']: if hasattr(_os,name): locals()[name] = _mk_single_with_return(name) def _mk_query(name): original = getattr(_os,name) def wrap(self,*args,**kw): retval = original(*args,**kw) if self._active: return self._remap_output(name, retval) return retval return wrap for name in ['getcwd', 'tmpnam']: if hasattr(_os,name): locals()[name] = _mk_query(name) def _validate_path(self,path): """Called to remap or validate any path, whether input or output""" return path def _remap_input(self,operation,path,*args,**kw): """Called for path inputs""" return self._validate_path(path) def _remap_output(self,operation,path): """Called for path outputs""" return self._validate_path(path) def _remap_pair(self,operation,src,dst,*args,**kw): """Called for path pairs like rename, link, and symlink operations""" return ( self._remap_input(operation+'-from',src,*args,**kw), self._remap_input(operation+'-to',dst,*args,**kw) ) class DirectorySandbox(AbstractSandbox): """Restrict operations to a single subdirectory - pseudo-chroot""" write_ops = dict.fromkeys([ "open", "chmod", "chown", "mkdir", "remove", "unlink", "rmdir", "utime", "lchown", "chroot", "mkfifo", "mknod", "tempnam", ]) def __init__(self,sandbox): self._sandbox = os.path.normcase(os.path.realpath(sandbox)) self._prefix = os.path.join(self._sandbox,'') AbstractSandbox.__init__(self) def _violation(self, operation, *args, **kw): raise SandboxViolation(operation, args, kw) def _open(self, path, mode='r', *args, **kw): if mode not in ('r', 'rt', 'rb', 'rU', 'U') and not self._ok(path): self._violation("open", path, mode, *args, **kw) return _open(path,mode,*args,**kw) def tmpnam(self): self._violation("tmpnam") def _ok(self,path): if hasattr(_os,'devnull') and path==_os.devnull: return True active = self._active try: self._active = False realpath = os.path.normcase(os.path.realpath(path)) if realpath==self._sandbox or realpath.startswith(self._prefix): return True finally: self._active = active def _remap_input(self,operation,path,*args,**kw): """Called for path inputs""" if operation in self.write_ops and not self._ok(path): self._violation(operation, os.path.realpath(path), *args, **kw) return path def _remap_pair(self,operation,src,dst,*args,**kw): """Called for path pairs like rename, link, and symlink operations""" if not self._ok(src) or not self._ok(dst): self._violation(operation, src, dst, *args, **kw) return (src,dst) def _file(self, path, mode='r', *args, **kw): if mode not in ('r', 'rt', 'rb', 'rU', 'U') and not self._ok(path): self._violation("file", path, mode, *args, **kw) return _file(path,mode,*args,**kw) def open(self, file, flags, mode=0777): """Called for low-level os.open()""" if flags & WRITE_FLAGS and not self._ok(file): self._violation("os.open", file, flags, mode) return _os.open(file,flags,mode) WRITE_FLAGS = reduce( operator.or_, [getattr(_os, a, 0) for a in "O_WRONLY O_RDWR O_APPEND O_CREAT O_TRUNC O_TEMPORARY".split()] ) class SandboxViolation(DistutilsError): """A setup script attempted to modify the filesystem outside the sandbox""" def __str__(self): return """SandboxViolation: %s%r %s The package setup script has attempted to modify files on your system that are not within the EasyInstall build area, and has been aborted. This package cannot be safely installed by EasyInstall, and may not support alternate installation locations even if you run its setup script by hand. Please inform the package's author and the EasyInstall maintainers to find out if a fix or workaround is available.""" % self.args # tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/site-patch.py000066400000000000000000000044271221140116300250260ustar00rootroot00000000000000def __boot(): import sys, imp, os, os.path PYTHONPATH = os.environ.get('PYTHONPATH') if PYTHONPATH is None or (sys.platform=='win32' and not PYTHONPATH): PYTHONPATH = [] else: PYTHONPATH = PYTHONPATH.split(os.pathsep) pic = getattr(sys,'path_importer_cache',{}) stdpath = sys.path[len(PYTHONPATH):] mydir = os.path.dirname(__file__) #print "searching",stdpath,sys.path for item in stdpath: if item==mydir or not item: continue # skip if current dir. on Windows, or my own directory importer = pic.get(item) if importer is not None: loader = importer.find_module('site') if loader is not None: # This should actually reload the current module loader.load_module('site') break else: try: stream, path, descr = imp.find_module('site',[item]) except ImportError: continue if stream is None: continue try: # This should actually reload the current module imp.load_module('site',stream,path,descr) finally: stream.close() break else: raise ImportError("Couldn't find the real 'site' module") #print "loaded", __file__ known_paths = dict([(makepath(item)[1],1) for item in sys.path]) # 2.2 comp oldpos = getattr(sys,'__egginsert',0) # save old insertion position sys.__egginsert = 0 # and reset the current one for item in PYTHONPATH: addsitedir(item) sys.__egginsert += oldpos # restore effective old position d,nd = makepath(stdpath[0]) insert_at = None new_path = [] for item in sys.path: p,np = makepath(item) if np==nd and insert_at is None: # We've hit the first 'system' path entry, so added entries go here insert_at = len(new_path) if np in known_paths or insert_at is None: new_path.append(item) else: # new path after the insert point, back-insert it new_path.insert(insert_at, item) insert_at += 1 sys.path[:] = new_path if __name__=='site': __boot() del __boot tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/tests/000077500000000000000000000000001221140116300235465ustar00rootroot00000000000000tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/tests/__init__.py000066400000000000000000000301171221140116300256610ustar00rootroot00000000000000"""Tests for the 'setuptools' package""" from unittest import TestSuite, TestCase, makeSuite, defaultTestLoader import distutils.core, distutils.cmd from distutils.errors import DistutilsOptionError, DistutilsPlatformError from distutils.errors import DistutilsSetupError import setuptools, setuptools.dist from setuptools import Feature from distutils.core import Extension extract_constant, get_module_constant = None, None from setuptools.depends import * from distutils.version import StrictVersion, LooseVersion from distutils.util import convert_path import sys, os.path def additional_tests(): import doctest, unittest suite = unittest.TestSuite(( doctest.DocFileSuite('api_tests.txt', optionflags=doctest.ELLIPSIS, package=__name__, ), )) if sys.platform == 'win32': suite.addTest(doctest.DocFileSuite('win_script_wrapper.txt')) return suite def makeSetup(**args): """Return distribution from 'setup(**args)', without executing commands""" distutils.core._setup_stop_after = "commandline" # Don't let system command line leak into tests! args.setdefault('script_args',['install']) try: return setuptools.setup(**args) finally: distutils.core_setup_stop_after = None class DependsTests(TestCase): def testExtractConst(self): if not extract_constant: return # skip on non-bytecode platforms def f1(): global x,y,z x = "test" y = z # unrecognized name self.assertEqual(extract_constant(f1.func_code,'q', -1), None) # constant assigned self.assertEqual(extract_constant(f1.func_code,'x', -1), "test") # expression assigned self.assertEqual(extract_constant(f1.func_code,'y', -1), -1) # recognized name, not assigned self.assertEqual(extract_constant(f1.func_code,'z', -1), None) def testFindModule(self): self.assertRaises(ImportError, find_module, 'no-such.-thing') self.assertRaises(ImportError, find_module, 'setuptools.non-existent') f,p,i = find_module('setuptools.tests'); f.close() def testModuleExtract(self): if not get_module_constant: return # skip on non-bytecode platforms from distutils import __version__ self.assertEqual( get_module_constant('distutils','__version__'), __version__ ) self.assertEqual( get_module_constant('sys','version'), sys.version ) self.assertEqual( get_module_constant('setuptools.tests','__doc__'),__doc__ ) def testRequire(self): if not extract_constant: return # skip on non-bytecode platforms req = Require('Distutils','1.0.3','distutils') self.assertEqual(req.name, 'Distutils') self.assertEqual(req.module, 'distutils') self.assertEqual(req.requested_version, '1.0.3') self.assertEqual(req.attribute, '__version__') self.assertEqual(req.full_name(), 'Distutils-1.0.3') from distutils import __version__ self.assertEqual(req.get_version(), __version__) self.failUnless(req.version_ok('1.0.9')) self.failIf(req.version_ok('0.9.1')) self.failIf(req.version_ok('unknown')) self.failUnless(req.is_present()) self.failUnless(req.is_current()) req = Require('Distutils 3000','03000','distutils',format=LooseVersion) self.failUnless(req.is_present()) self.failIf(req.is_current()) self.failIf(req.version_ok('unknown')) req = Require('Do-what-I-mean','1.0','d-w-i-m') self.failIf(req.is_present()) self.failIf(req.is_current()) req = Require('Tests', None, 'tests', homepage="http://example.com") self.assertEqual(req.format, None) self.assertEqual(req.attribute, None) self.assertEqual(req.requested_version, None) self.assertEqual(req.full_name(), 'Tests') self.assertEqual(req.homepage, 'http://example.com') paths = [os.path.dirname(p) for p in __path__] self.failUnless(req.is_present(paths)) self.failUnless(req.is_current(paths)) class DistroTests(TestCase): def setUp(self): self.e1 = Extension('bar.ext',['bar.c']) self.e2 = Extension('c.y', ['y.c']) self.dist = makeSetup( packages=['a', 'a.b', 'a.b.c', 'b', 'c'], py_modules=['b.d','x'], ext_modules = (self.e1, self.e2), package_dir = {}, ) def testDistroType(self): self.failUnless(isinstance(self.dist,setuptools.dist.Distribution)) def testExcludePackage(self): self.dist.exclude_package('a') self.assertEqual(self.dist.packages, ['b','c']) self.dist.exclude_package('b') self.assertEqual(self.dist.packages, ['c']) self.assertEqual(self.dist.py_modules, ['x']) self.assertEqual(self.dist.ext_modules, [self.e1, self.e2]) self.dist.exclude_package('c') self.assertEqual(self.dist.packages, []) self.assertEqual(self.dist.py_modules, ['x']) self.assertEqual(self.dist.ext_modules, [self.e1]) # test removals from unspecified options makeSetup().exclude_package('x') def testIncludeExclude(self): # remove an extension self.dist.exclude(ext_modules=[self.e1]) self.assertEqual(self.dist.ext_modules, [self.e2]) # add it back in self.dist.include(ext_modules=[self.e1]) self.assertEqual(self.dist.ext_modules, [self.e2, self.e1]) # should not add duplicate self.dist.include(ext_modules=[self.e1]) self.assertEqual(self.dist.ext_modules, [self.e2, self.e1]) def testExcludePackages(self): self.dist.exclude(packages=['c','b','a']) self.assertEqual(self.dist.packages, []) self.assertEqual(self.dist.py_modules, ['x']) self.assertEqual(self.dist.ext_modules, [self.e1]) def testEmpty(self): dist = makeSetup() dist.include(packages=['a'], py_modules=['b'], ext_modules=[self.e2]) dist = makeSetup() dist.exclude(packages=['a'], py_modules=['b'], ext_modules=[self.e2]) def testContents(self): self.failUnless(self.dist.has_contents_for('a')) self.dist.exclude_package('a') self.failIf(self.dist.has_contents_for('a')) self.failUnless(self.dist.has_contents_for('b')) self.dist.exclude_package('b') self.failIf(self.dist.has_contents_for('b')) self.failUnless(self.dist.has_contents_for('c')) self.dist.exclude_package('c') self.failIf(self.dist.has_contents_for('c')) def testInvalidIncludeExclude(self): self.assertRaises(DistutilsSetupError, self.dist.include, nonexistent_option='x' ) self.assertRaises(DistutilsSetupError, self.dist.exclude, nonexistent_option='x' ) self.assertRaises(DistutilsSetupError, self.dist.include, packages={'x':'y'} ) self.assertRaises(DistutilsSetupError, self.dist.exclude, packages={'x':'y'} ) self.assertRaises(DistutilsSetupError, self.dist.include, ext_modules={'x':'y'} ) self.assertRaises(DistutilsSetupError, self.dist.exclude, ext_modules={'x':'y'} ) self.assertRaises(DistutilsSetupError, self.dist.include, package_dir=['q'] ) self.assertRaises(DistutilsSetupError, self.dist.exclude, package_dir=['q'] ) class FeatureTests(TestCase): def setUp(self): self.req = Require('Distutils','1.0.3','distutils') self.dist = makeSetup( features={ 'foo': Feature("foo",standard=True,require_features=['baz',self.req]), 'bar': Feature("bar", standard=True, packages=['pkg.bar'], py_modules=['bar_et'], remove=['bar.ext'], ), 'baz': Feature( "baz", optional=False, packages=['pkg.baz'], scripts = ['scripts/baz_it'], libraries=[('libfoo','foo/foofoo.c')] ), 'dwim': Feature("DWIM", available=False, remove='bazish'), }, script_args=['--without-bar', 'install'], packages = ['pkg.bar', 'pkg.foo'], py_modules = ['bar_et', 'bazish'], ext_modules = [Extension('bar.ext',['bar.c'])] ) def testDefaults(self): self.failIf( Feature( "test",standard=True,remove='x',available=False ).include_by_default() ) self.failUnless( Feature("test",standard=True,remove='x').include_by_default() ) # Feature must have either kwargs, removes, or require_features self.assertRaises(DistutilsSetupError, Feature, "test") def testAvailability(self): self.assertRaises( DistutilsPlatformError, self.dist.features['dwim'].include_in, self.dist ) def testFeatureOptions(self): dist = self.dist self.failUnless( ('with-dwim',None,'include DWIM') in dist.feature_options ) self.failUnless( ('without-dwim',None,'exclude DWIM (default)') in dist.feature_options ) self.failUnless( ('with-bar',None,'include bar (default)') in dist.feature_options ) self.failUnless( ('without-bar',None,'exclude bar') in dist.feature_options ) self.assertEqual(dist.feature_negopt['without-foo'],'with-foo') self.assertEqual(dist.feature_negopt['without-bar'],'with-bar') self.assertEqual(dist.feature_negopt['without-dwim'],'with-dwim') self.failIf('without-baz' in dist.feature_negopt) def testUseFeatures(self): dist = self.dist self.assertEqual(dist.with_foo,1) self.assertEqual(dist.with_bar,0) self.assertEqual(dist.with_baz,1) self.failIf('bar_et' in dist.py_modules) self.failIf('pkg.bar' in dist.packages) self.failUnless('pkg.baz' in dist.packages) self.failUnless('scripts/baz_it' in dist.scripts) self.failUnless(('libfoo','foo/foofoo.c') in dist.libraries) self.assertEqual(dist.ext_modules,[]) self.assertEqual(dist.require_features, [self.req]) # If we ask for bar, it should fail because we explicitly disabled # it on the command line self.assertRaises(DistutilsOptionError, dist.include_feature, 'bar') def testFeatureWithInvalidRemove(self): self.assertRaises( SystemExit, makeSetup, features = {'x':Feature('x', remove='y')} ) class TestCommandTests(TestCase): def testTestIsCommand(self): test_cmd = makeSetup().get_command_obj('test') self.failUnless(isinstance(test_cmd, distutils.cmd.Command)) def testLongOptSuiteWNoDefault(self): ts1 = makeSetup(script_args=['test','--test-suite=foo.tests.suite']) ts1 = ts1.get_command_obj('test') ts1.ensure_finalized() self.assertEqual(ts1.test_suite, 'foo.tests.suite') def testDefaultSuite(self): ts2 = makeSetup(test_suite='bar.tests.suite').get_command_obj('test') ts2.ensure_finalized() self.assertEqual(ts2.test_suite, 'bar.tests.suite') def testDefaultWModuleOnCmdLine(self): ts3 = makeSetup( test_suite='bar.tests', script_args=['test','-m','foo.tests'] ).get_command_obj('test') ts3.ensure_finalized() self.assertEqual(ts3.test_module, 'foo.tests') self.assertEqual(ts3.test_suite, 'foo.tests.test_suite') def testConflictingOptions(self): ts4 = makeSetup( script_args=['test','-m','bar.tests', '-s','foo.tests.suite'] ).get_command_obj('test') self.assertRaises(DistutilsOptionError, ts4.ensure_finalized) def testNoSuite(self): ts5 = makeSetup().get_command_obj('test') ts5.ensure_finalized() self.assertEqual(ts5.test_suite, None) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/tests/test_packageindex.py000066400000000000000000000013671221140116300276110ustar00rootroot00000000000000"""Package Index Tests """ # More would be better! import os, shutil, tempfile, unittest, urllib2 import pkg_resources import setuptools.package_index class TestPackageIndex(unittest.TestCase): def test_bad_urls(self): index = setuptools.package_index.PackageIndex() url = 'http://127.0.0.1/nonesuch/test_package_index' try: v = index.open_url(url) except Exception, v: self.assert_(url in str(v)) else: self.assert_(isinstance(v,urllib2.HTTPError)) def test_url_ok(self): index = setuptools.package_index.PackageIndex( hosts=('www.example.com',) ) url = 'file:///tmp/test_package_index' self.assert_(index.url_ok(url, True)) tahoe-lafs-1.10.0/setuptools-0.6c16dev4.egg/setuptools/tests/test_resources.py000066400000000000000000000456541221140116300272070ustar00rootroot00000000000000#!/usr/bin/python # -*- coding: utf-8 -*- # NOTE: the shebang and encoding lines are for ScriptHeaderTests; do not remove from unittest import TestCase, makeSuite; from pkg_resources import * from setuptools.command.easy_install import get_script_header, is_sh import os, pkg_resources, sys, StringIO try: frozenset except NameError: from sets import ImmutableSet as frozenset class Metadata(EmptyProvider): """Mock object to return metadata as if from an on-disk distribution""" def __init__(self,*pairs): self.metadata = dict(pairs) def has_metadata(self,name): return name in self.metadata def get_metadata(self,name): return self.metadata[name] def get_metadata_lines(self,name): return yield_lines(self.get_metadata(name)) class DistroTests(TestCase): def testCollection(self): # empty path should produce no distributions ad = Environment([], platform=None, python=None) self.assertEqual(list(ad), []) self.assertEqual(ad['FooPkg'],[]) ad.add(Distribution.from_filename("FooPkg-1.3_1.egg")) ad.add(Distribution.from_filename("FooPkg-1.4-py2.4-win32.egg")) ad.add(Distribution.from_filename("FooPkg-1.2-py2.4.egg")) # Name is in there now self.failUnless(ad['FooPkg']) # But only 1 package self.assertEqual(list(ad), ['foopkg']) # Distributions sort by version self.assertEqual( [dist.version for dist in ad['FooPkg']], ['1.4','1.3-1','1.2'] ) # Removing a distribution leaves sequence alone ad.remove(ad['FooPkg'][1]) self.assertEqual( [dist.version for dist in ad['FooPkg']], ['1.4','1.2'] ) # And inserting adds them in order ad.add(Distribution.from_filename("FooPkg-1.9.egg")) self.assertEqual( [dist.version for dist in ad['FooPkg']], ['1.9','1.4','1.2'] ) ws = WorkingSet([]) foo12 = Distribution.from_filename("FooPkg-1.2-py2.4.egg") foo14 = Distribution.from_filename("FooPkg-1.4-py2.4-win32.egg") req, = parse_requirements("FooPkg>=1.3") # Nominal case: no distros on path, should yield all applicable self.assertEqual(ad.best_match(req,ws).version, '1.9') # If a matching distro is already installed, should return only that ws.add(foo14); self.assertEqual(ad.best_match(req,ws).version, '1.4') # If the first matching distro is unsuitable, it's a version conflict ws = WorkingSet([]); ws.add(foo12); ws.add(foo14) self.assertRaises(VersionConflict, ad.best_match, req, ws) # If more than one match on the path, the first one takes precedence ws = WorkingSet([]); ws.add(foo14); ws.add(foo12); ws.add(foo14); self.assertEqual(ad.best_match(req,ws).version, '1.4') def checkFooPkg(self,d): self.assertEqual(d.project_name, "FooPkg") self.assertEqual(d.key, "foopkg") self.assertEqual(d.version, "1.3-1") self.assertEqual(d.py_version, "2.4") self.assertEqual(d.platform, "win32") self.assertEqual(d.parsed_version, parse_version("1.3-1")) def testDistroBasics(self): d = Distribution( "/some/path", project_name="FooPkg",version="1.3-1",py_version="2.4",platform="win32" ) self.checkFooPkg(d) d = Distribution("/some/path") self.assertEqual(d.py_version, sys.version[:3]) self.assertEqual(d.platform, None) def testDistroParse(self): d = Distribution.from_filename("FooPkg-1.3_1-py2.4-win32.egg") self.checkFooPkg(d) d = Distribution.from_filename("FooPkg-1.3_1-py2.4-win32.egg-info") self.checkFooPkg(d) def testDistroMetadata(self): d = Distribution( "/some/path", project_name="FooPkg", py_version="2.4", platform="win32", metadata = Metadata( ('PKG-INFO',"Metadata-Version: 1.0\nVersion: 1.3-1\n") ) ) self.checkFooPkg(d) def distRequires(self, txt): return Distribution("/foo", metadata=Metadata(('depends.txt', txt))) def checkRequires(self, dist, txt, extras=()): self.assertEqual( list(dist.requires(extras)), list(parse_requirements(txt)) ) def testDistroDependsSimple(self): for v in "Twisted>=1.5", "Twisted>=1.5\nZConfig>=2.0": self.checkRequires(self.distRequires(v), v) def testResolve(self): ad = Environment([]); ws = WorkingSet([]) # Resolving no requirements -> nothing to install self.assertEqual( list(ws.resolve([],ad)), [] ) # Request something not in the collection -> DistributionNotFound self.assertRaises( DistributionNotFound, ws.resolve, parse_requirements("Foo"), ad ) Foo = Distribution.from_filename( "/foo_dir/Foo-1.2.egg", metadata=Metadata(('depends.txt', "[bar]\nBaz>=2.0")) ) ad.add(Foo); ad.add(Distribution.from_filename("Foo-0.9.egg")) # Request thing(s) that are available -> list to activate for i in range(3): targets = list(ws.resolve(parse_requirements("Foo"), ad)) self.assertEqual(targets, [Foo]) map(ws.add,targets) self.assertRaises(VersionConflict, ws.resolve, parse_requirements("Foo==0.9"), ad) ws = WorkingSet([]) # reset # Request an extra that causes an unresolved dependency for "Baz" self.assertRaises( DistributionNotFound, ws.resolve,parse_requirements("Foo[bar]"), ad ) Baz = Distribution.from_filename( "/foo_dir/Baz-2.1.egg", metadata=Metadata(('depends.txt', "Foo")) ) ad.add(Baz) # Activation list now includes resolved dependency self.assertEqual( list(ws.resolve(parse_requirements("Foo[bar]"), ad)), [Foo,Baz] ) # Requests for conflicting versions produce VersionConflict self.assertRaises( VersionConflict, ws.resolve, parse_requirements("Foo==1.2\nFoo!=1.2"), ad ) def testDistroDependsOptions(self): d = self.distRequires(""" Twisted>=1.5 [docgen] ZConfig>=2.0 docutils>=0.3 [fastcgi] fcgiapp>=0.1""") self.checkRequires(d,"Twisted>=1.5") self.checkRequires( d,"Twisted>=1.5 ZConfig>=2.0 docutils>=0.3".split(), ["docgen"] ) self.checkRequires( d,"Twisted>=1.5 fcgiapp>=0.1".split(), ["fastcgi"] ) self.checkRequires( d,"Twisted>=1.5 ZConfig>=2.0 docutils>=0.3 fcgiapp>=0.1".split(), ["docgen","fastcgi"] ) self.checkRequires( d,"Twisted>=1.5 fcgiapp>=0.1 ZConfig>=2.0 docutils>=0.3".split(), ["fastcgi", "docgen"] ) self.assertRaises(UnknownExtra, d.requires, ["foo"]) class EntryPointTests(TestCase): def assertfields(self, ep): self.assertEqual(ep.name,"foo") self.assertEqual(ep.module_name,"setuptools.tests.test_resources") self.assertEqual(ep.attrs, ("EntryPointTests",)) self.assertEqual(ep.extras, ("x",)) self.failUnless(ep.load() is EntryPointTests) self.assertEqual( str(ep), "foo = setuptools.tests.test_resources:EntryPointTests [x]" ) def setUp(self): self.dist = Distribution.from_filename( "FooPkg-1.2-py2.4.egg", metadata=Metadata(('requires.txt','[x]'))) def testBasics(self): ep = EntryPoint( "foo", "setuptools.tests.test_resources", ["EntryPointTests"], ["x"], self.dist ) self.assertfields(ep) def testParse(self): s = "foo = setuptools.tests.test_resources:EntryPointTests [x]" ep = EntryPoint.parse(s, self.dist) self.assertfields(ep) ep = EntryPoint.parse("bar baz= spammity[PING]") self.assertEqual(ep.name,"bar baz") self.assertEqual(ep.module_name,"spammity") self.assertEqual(ep.attrs, ()) self.assertEqual(ep.extras, ("ping",)) ep = EntryPoint.parse(" fizzly = wocka:foo") self.assertEqual(ep.name,"fizzly") self.assertEqual(ep.module_name,"wocka") self.assertEqual(ep.attrs, ("foo",)) self.assertEqual(ep.extras, ()) def testRejects(self): for ep in [ "foo", "x=1=2", "x=a:b:c", "q=x/na", "fez=pish:tush-z", "x=f[a]>2", ]: try: EntryPoint.parse(ep) except ValueError: pass else: raise AssertionError("Should've been bad", ep) def checkSubMap(self, m): self.assertEqual(len(m), len(self.submap_expect)) for key, ep in self.submap_expect.iteritems(): self.assertEqual(repr(m.get(key)), repr(ep)) submap_expect = dict( feature1=EntryPoint('feature1', 'somemodule', ['somefunction']), feature2=EntryPoint('feature2', 'another.module', ['SomeClass'], ['extra1','extra2']), feature3=EntryPoint('feature3', 'this.module', extras=['something']) ) submap_str = """ # define features for blah blah feature1 = somemodule:somefunction feature2 = another.module:SomeClass [extra1,extra2] feature3 = this.module [something] """ def testParseList(self): self.checkSubMap(EntryPoint.parse_group("xyz", self.submap_str)) self.assertRaises(ValueError, EntryPoint.parse_group, "x a", "foo=bar") self.assertRaises(ValueError, EntryPoint.parse_group, "x", ["foo=baz", "foo=bar"]) def testParseMap(self): m = EntryPoint.parse_map({'xyz':self.submap_str}) self.checkSubMap(m['xyz']) self.assertEqual(m.keys(),['xyz']) m = EntryPoint.parse_map("[xyz]\n"+self.submap_str) self.checkSubMap(m['xyz']) self.assertEqual(m.keys(),['xyz']) self.assertRaises(ValueError, EntryPoint.parse_map, ["[xyz]", "[xyz]"]) self.assertRaises(ValueError, EntryPoint.parse_map, self.submap_str) class RequirementsTests(TestCase): def testBasics(self): r = Requirement.parse("Twisted>=1.2") self.assertEqual(str(r),"Twisted>=1.2") self.assertEqual(repr(r),"Requirement.parse('Twisted>=1.2')") self.assertEqual(r, Requirement("Twisted", [('>=','1.2')], ())) self.assertEqual(r, Requirement("twisTed", [('>=','1.2')], ())) self.assertNotEqual(r, Requirement("Twisted", [('>=','2.0')], ())) self.assertNotEqual(r, Requirement("Zope", [('>=','1.2')], ())) self.assertNotEqual(r, Requirement("Zope", [('>=','3.0')], ())) self.assertNotEqual(r, Requirement.parse("Twisted[extras]>=1.2")) def testOrdering(self): r1 = Requirement("Twisted", [('==','1.2c1'),('>=','1.2')], ()) r2 = Requirement("Twisted", [('>=','1.2'),('==','1.2c1')], ()) self.assertEqual(r1,r2) self.assertEqual(str(r1),str(r2)) self.assertEqual(str(r2),"Twisted==1.2c1,>=1.2") def testBasicContains(self): r = Requirement("Twisted", [('>=','1.2')], ()) foo_dist = Distribution.from_filename("FooPkg-1.3_1.egg") twist11 = Distribution.from_filename("Twisted-1.1.egg") twist12 = Distribution.from_filename("Twisted-1.2.egg") self.failUnless(parse_version('1.2') in r) self.failUnless(parse_version('1.1') not in r) self.failUnless('1.2' in r) self.failUnless('1.1' not in r) self.failUnless(foo_dist not in r) self.failUnless(twist11 not in r) self.failUnless(twist12 in r) def testAdvancedContains(self): r, = parse_requirements("Foo>=1.2,<=1.3,==1.9,>2.0,!=2.5,<3.0,==4.5") for v in ('1.2','1.2.2','1.3','1.9','2.0.1','2.3','2.6','3.0c1','4.5'): self.failUnless(v in r, (v,r)) for v in ('1.2c1','1.3.1','1.5','1.9.1','2.0','2.5','3.0','4.0'): self.failUnless(v not in r, (v,r)) def testOptionsAndHashing(self): r1 = Requirement.parse("Twisted[foo,bar]>=1.2") r2 = Requirement.parse("Twisted[bar,FOO]>=1.2") r3 = Requirement.parse("Twisted[BAR,FOO]>=1.2.0") self.assertEqual(r1,r2) self.assertEqual(r1,r3) self.assertEqual(r1.extras, ("foo","bar")) self.assertEqual(r2.extras, ("bar","foo")) # extras are normalized self.assertEqual(hash(r1), hash(r2)) self.assertEqual( hash(r1), hash(("twisted", ((">=",parse_version("1.2")),), frozenset(["foo","bar"]))) ) def testVersionEquality(self): r1 = Requirement.parse("setuptools==0.3a2") r2 = Requirement.parse("setuptools!=0.3a4") d = Distribution.from_filename self.failIf(d("setuptools-0.3a4.egg") in r1) self.failIf(d("setuptools-0.3a1.egg") in r1) self.failIf(d("setuptools-0.3a4.egg") in r2) self.failUnless(d("setuptools-0.3a2.egg") in r1) self.failUnless(d("setuptools-0.3a2.egg") in r2) self.failUnless(d("setuptools-0.3a3.egg") in r2) self.failUnless(d("setuptools-0.3a5.egg") in r2) class ParseTests(TestCase): def testEmptyParse(self): self.assertEqual(list(parse_requirements('')), []) def testYielding(self): for inp,out in [ ([], []), ('x',['x']), ([[]],[]), (' x\n y', ['x','y']), (['x\n\n','y'], ['x','y']), ]: self.assertEqual(list(pkg_resources.yield_lines(inp)),out) def testSplitting(self): self.assertEqual( list( pkg_resources.split_sections(""" x [Y] z a [b ] # foo c [ d] [q] v """ ) ), [(None,["x"]), ("Y",["z","a"]), ("b",["c"]), ("d",[]), ("q",["v"])] ) self.assertRaises(ValueError,list,pkg_resources.split_sections("[foo")) def testSafeName(self): self.assertEqual(safe_name("adns-python"), "adns-python") self.assertEqual(safe_name("WSGI Utils"), "WSGI-Utils") self.assertEqual(safe_name("WSGI Utils"), "WSGI-Utils") self.assertEqual(safe_name("Money$$$Maker"), "Money-Maker") self.assertNotEqual(safe_name("peak.web"), "peak-web") def testSafeVersion(self): self.assertEqual(safe_version("1.2-1"), "1.2-1") self.assertEqual(safe_version("1.2 alpha"), "1.2.alpha") self.assertEqual(safe_version("2.3.4 20050521"), "2.3.4.20050521") self.assertEqual(safe_version("Money$$$Maker"), "Money-Maker") self.assertEqual(safe_version("peak.web"), "peak.web") def testSimpleRequirements(self): self.assertEqual( list(parse_requirements('Twis-Ted>=1.2-1')), [Requirement('Twis-Ted',[('>=','1.2-1')], ())] ) self.assertEqual( list(parse_requirements('Twisted >=1.2, \ # more\n<2.0')), [Requirement('Twisted',[('>=','1.2'),('<','2.0')], ())] ) self.assertEqual( Requirement.parse("FooBar==1.99a3"), Requirement("FooBar", [('==','1.99a3')], ()) ) self.assertRaises(ValueError,Requirement.parse,">=2.3") self.assertRaises(ValueError,Requirement.parse,"x\\") self.assertRaises(ValueError,Requirement.parse,"x==2 q") self.assertRaises(ValueError,Requirement.parse,"X==1\nY==2") self.assertRaises(ValueError,Requirement.parse,"#") def testVersionEquality(self): def c(s1,s2): p1, p2 = parse_version(s1),parse_version(s2) self.assertEqual(p1,p2, (s1,s2,p1,p2)) c('1.2-rc1', '1.2rc1') c('0.4', '0.4.0') c('0.4.0.0', '0.4.0') c('0.4.0-0', '0.4-0') c('0pl1', '0.0pl1') c('0pre1', '0.0c1') c('0.0.0preview1', '0c1') c('0.0c1', '0-rc1') c('1.2a1', '1.2.a.1'); c('1.2...a', '1.2a') def testVersionOrdering(self): def c(s1,s2): p1, p2 = parse_version(s1),parse_version(s2) self.failUnless(p1 "easy_install will install a package that is already there" "be more like distutils with regard to --prefix=" "respect the PYTHONPATH" (Note: this patch does not work as intended when site.py has been modified. This will be fixed in a future version.) "python setup.py --help-commands raises exception due to conflict with distribute" * The following patch to setuptools introduced bugs, and has been reverted in zetuptoolz: $ svn log -r 45514 ------------------------------------------------------------------------ r45514 | phillip.eby | 2006-04-18 04:03:16 +0100 (Tue, 18 Apr 2006) | 9 lines Backport pkgutil, pydoc, and doctest from the 2.5 trunk to setuptools 0.7 trunk. (Sideport?) Setuptools 0.7 will install these in place of the 2.3/2.4 versions (at least of pydoc and doctest) to let them work properly with eggs. pkg_resources now depends on the 2.5 pkgutil, which is included here as _pkgutil, to work around the fact that some system packagers will install setuptools without overriding the stdlib modules. But users who install their own setuptools will get them, and the system packaged people probably don't need them. ------------------------------------------------------------------------ * If unpatched setuptools decides that it needs to change an existing site.py file that appears not to have been written by it (because the file does not start with "def __boot():"), it aborts the installation. zetuptoolz leaves the file alone and outputs a warning, but continues with the installation. * The scripts written by zetuptoolz have the following extra line: # generated by zetuptoolz after the header. * Windows-specific changes (native Python): Python distributions may have command-line or GUI scripts. On Windows, setuptools creates an executable wrapper to run each script. zetuptools uses a different approach that does not require an .exe wrapper. It writes approximately the same script file that is used on other platforms, but with a .pyscript extension. It also writes a shell-script wrapper (without any extension) that is only used when the command is run from a Cygwin shell. Some of the advantages of this approach are: * Unicode arguments are preserved (although the program will need to use some Windows-specific code to get at them in current versions of Python); * it works correctly on 64-bit Windows; * the zetuptoolz distribution need not contain either any binary executables, or any C code that needs to be compiled. See setuptools\tests\win_script_wrapper.txt for further details. Installing or building any distribution on Windows will automatically associate .pyscript with the native Python interpreter for the current user. It will also add .pyscript and .pyw to the PATHEXT variable for the current user, which is needed to allow scripts to be run without typing any extension. There is an additional setup.py command that can be used to perform these steps separately (which isn't normally needed, but might be useful for debugging): python setup.py scriptsetup Adding the --allusers option, i.e. python setup.py scriptsetup --allusers will make the .pyscript association and changes to the PATHEXT variable for all users of this Windows installation, except those that have it overridden in their per-user environment. In this case setup.py must be run with Administrator privileges, e.g. from a Command Prompt whose shortcut has been set to run as Administrator. tahoe-lafs-1.10.0/src/000077500000000000000000000000001221140116300143725ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/000077500000000000000000000000001221140116300163425ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/__init__.py000066400000000000000000000421021221140116300204520ustar00rootroot00000000000000""" Decentralized storage grid. community web site: U{https://tahoe-lafs.org/} """ class PackagingError(EnvironmentError): """ Raised when there is an error in packaging of Tahoe-LAFS or its dependencies which makes it impossible to proceed safely. """ pass __version__ = "unknown" try: from allmydata._version import __version__ except ImportError: # We're running in a tree that hasn't run "./setup.py darcsver", and didn't # come with a _version.py, so we don't know what our version is. This should # not happen very often. pass __appname__ = "unknown" try: from allmydata._appname import __appname__ except ImportError: # We're running in a tree that hasn't run "./setup.py". This shouldn't happen. pass # __full_version__ is the one that you ought to use when identifying yourself in the # "application" part of the Tahoe versioning scheme: # https://tahoe-lafs.org/trac/tahoe-lafs/wiki/Versioning __full_version__ = __appname__ + '/' + str(__version__) import os, platform, re, subprocess, sys, traceback _distributor_id_cmdline_re = re.compile("(?:Distributor ID:)\s*(.*)", re.I) _release_cmdline_re = re.compile("(?:Release:)\s*(.*)", re.I) _distributor_id_file_re = re.compile("(?:DISTRIB_ID\s*=)\s*(.*)", re.I) _release_file_re = re.compile("(?:DISTRIB_RELEASE\s*=)\s*(.*)", re.I) global _distname,_version _distname = None _version = None def get_linux_distro(): """ Tries to determine the name of the Linux OS distribution name. First, try to parse a file named "/etc/lsb-release". If it exists, and contains the "DISTRIB_ID=" line and the "DISTRIB_RELEASE=" line, then return the strings parsed from that file. If that doesn't work, then invoke platform.dist(). If that doesn't work, then try to execute "lsb_release", as standardized in 2001: http://refspecs.freestandards.org/LSB_1.0.0/gLSB/lsbrelease.html The current version of the standard is here: http://refspecs.freestandards.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/lsbrelease.html that lsb_release emitted, as strings. Returns a tuple (distname,version). Distname is what LSB calls a "distributor id", e.g. "Ubuntu". Version is what LSB calls a "release", e.g. "8.04". A version of this has been submitted to python as a patch for the standard library module "platform": http://bugs.python.org/issue3937 """ global _distname,_version if _distname and _version: return (_distname, _version) try: etclsbrel = open("/etc/lsb-release", "rU") for line in etclsbrel: m = _distributor_id_file_re.search(line) if m: _distname = m.group(1).strip() if _distname and _version: return (_distname, _version) m = _release_file_re.search(line) if m: _version = m.group(1).strip() if _distname and _version: return (_distname, _version) except EnvironmentError: pass (_distname, _version) = platform.dist()[:2] if _distname and _version: return (_distname, _version) try: p = subprocess.Popen(["lsb_release", "--all"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) rc = p.wait() if rc == 0: for line in p.stdout.readlines(): m = _distributor_id_cmdline_re.search(line) if m: _distname = m.group(1).strip() if _distname and _version: return (_distname, _version) m = _release_cmdline_re.search(p.stdout.read()) if m: _version = m.group(1).strip() if _distname and _version: return (_distname, _version) except EnvironmentError: pass if os.path.exists("/etc/arch-release"): return ("Arch_Linux", "") return (_distname,_version) def get_platform(): # Our version of platform.platform(), telling us both less and more than the # Python Standard Library's version does. # We omit details such as the Linux kernel version number, but we add a # more detailed and correct rendition of the Linux distribution and # distribution-version. if "linux" in platform.system().lower(): return platform.system()+"-"+"_".join(get_linux_distro())+"-"+platform.machine()+"-"+"_".join([x for x in platform.architecture() if x]) else: return platform.platform() from allmydata.util import verlib def normalized_version(verstr, what=None): try: return verlib.NormalizedVersion(verlib.suggest_normalized_version(verstr)) except (StandardError, verlib.IrrationalVersionError): cls, value, trace = sys.exc_info() raise PackagingError, ("could not parse %s due to %s: %s" % (what or repr(verstr), cls.__name__, value)), trace def get_package_versions_and_locations(): import warnings from _auto_deps import package_imports, global_deprecation_messages, deprecation_messages, \ user_warning_messages, runtime_warning_messages, warning_imports def package_dir(srcfile): return os.path.dirname(os.path.dirname(os.path.normcase(os.path.realpath(srcfile)))) # pkg_resources.require returns the distribution that pkg_resources attempted to put # on sys.path, which can differ from the one that we actually import due to #1258, # or any other bug that causes sys.path to be set up incorrectly. Therefore we # must import the packages in order to check their versions and paths. # This is to suppress various DeprecationWarnings, UserWarnings, and RuntimeWarnings # (listed in _auto_deps.py). for msg in global_deprecation_messages + deprecation_messages: warnings.filterwarnings("ignore", category=DeprecationWarning, message=msg, append=True) for msg in user_warning_messages: warnings.filterwarnings("ignore", category=UserWarning, message=msg, append=True) for msg in runtime_warning_messages: warnings.filterwarnings("ignore", category=RuntimeWarning, message=msg, append=True) try: for modulename in warning_imports: try: __import__(modulename) except ImportError: pass finally: # Leave suppressions for global_deprecation_messages active. for ign in runtime_warning_messages + user_warning_messages + deprecation_messages: warnings.filters.pop() packages = [] def get_version(module, attr): return str(getattr(module, attr, 'unknown')) for pkgname, modulename in [(__appname__, 'allmydata')] + package_imports: if modulename: try: __import__(modulename) module = sys.modules[modulename] except ImportError: etype, emsg, etrace = sys.exc_info() trace_info = (etype, str(emsg), ([None] + traceback.extract_tb(etrace))[-1]) packages.append( (pkgname, (None, None, trace_info)) ) else: comment = None if pkgname == 'setuptools' and hasattr(module, '_distribute'): # distribute does not report its version in any module variables comment = 'distribute' packages.append( (pkgname, (get_version(module, '__version__'), package_dir(module.__file__), comment)) ) elif pkgname == 'python': packages.append( (pkgname, (platform.python_version(), sys.executable, None)) ) elif pkgname == 'platform': packages.append( (pkgname, (get_platform(), None, None)) ) return packages def check_requirement(req, vers_and_locs): # TODO: check [] options # We support only disjunctions of <=, >=, and == reqlist = req.split(',') name = reqlist[0].split('<=')[0].split('>=')[0].split('==')[0].strip(' ').split('[')[0] if name not in vers_and_locs: raise PackagingError("no version info for %s" % (name,)) if req.strip(' ') == name: return (actual, location, comment) = vers_and_locs[name] if actual is None: # comment is (type, message, (filename, line number, function name, text)) for the original ImportError raise ImportError("for requirement %r: %s" % (req, comment)) if actual == 'unknown': return actualver = normalized_version(actual, what="actual version %r of %s from %r" % (actual, name, location)) for r in reqlist: s = r.split('<=') if len(s) == 2: required = s[1].strip(' ') if actualver <= normalized_version(required, what="required maximum version %r in %r" % (required, req)): return # maximum requirement met else: s = r.split('>=') if len(s) == 2: required = s[1].strip(' ') if actualver >= normalized_version(required, what="required minimum version %r in %r" % (required, req)): return # minimum requirement met else: s = r.split('==') if len(s) == 2: required = s[1].strip(' ') if actualver == normalized_version(required, what="required exact version %r in %r" % (required, req)): return # exact requirement met else: raise PackagingError("no version info or could not understand requirement %r" % (req,)) msg = ("We require %s, but could only find version %s.\n" % (req, actual)) if location and location != 'unknown': msg += "The version we found is from %r.\n" % (location,) msg += ("To resolve this problem, uninstall that version, either using your\n" "operating system's package manager or by moving aside the directory.") raise PackagingError(msg) _vers_and_locs_list = get_package_versions_and_locations() def cross_check_pkg_resources_versus_import(): """This function returns a list of errors due to any failed cross-checks.""" import pkg_resources from _auto_deps import install_requires pkg_resources_vers_and_locs = dict([(p.project_name.lower(), (str(p.version), p.location)) for p in pkg_resources.require(install_requires)]) return cross_check(pkg_resources_vers_and_locs, _vers_and_locs_list) def cross_check(pkg_resources_vers_and_locs, imported_vers_and_locs_list): """This function returns a list of errors due to any failed cross-checks.""" errors = [] not_pkg_resourceable = set(['python', 'platform', __appname__.lower()]) not_import_versionable = set(['zope.interface', 'mock', 'pyasn1']) ignorable = set(['argparse', 'pyutil', 'zbase32', 'distribute', 'twisted-web', 'twisted-core', 'twisted-conch']) for name, (imp_ver, imp_loc, imp_comment) in imported_vers_and_locs_list: name = name.lower() if name not in not_pkg_resourceable: if name not in pkg_resources_vers_and_locs: if name == "setuptools" and "distribute" in pkg_resources_vers_and_locs: pr_ver, pr_loc = pkg_resources_vers_and_locs["distribute"] if not (os.path.normpath(os.path.realpath(pr_loc)) == os.path.normpath(os.path.realpath(imp_loc)) and imp_comment == "distribute"): errors.append("Warning: dependency 'setuptools' found to be version %r of 'distribute' from %r " "by pkg_resources, but 'import setuptools' gave version %r [%s] from %r. " "A version mismatch is expected, but a location mismatch is not." % (pr_ver, pr_loc, imp_ver, imp_comment or 'probably *not* distribute', imp_loc)) else: errors.append("Warning: dependency %r (version %r imported from %r) was not found by pkg_resources." % (name, imp_ver, imp_loc)) continue pr_ver, pr_loc = pkg_resources_vers_and_locs[name] if imp_ver is None and imp_loc is None: errors.append("Warning: dependency %r could not be imported. pkg_resources thought it should be possible " "to import version %r from %r.\nThe exception trace was %r." % (name, pr_ver, pr_loc, imp_comment)) continue try: pr_normver = normalized_version(pr_ver) except Exception, e: errors.append("Warning: version number %r found for dependency %r by pkg_resources could not be parsed. " "The version found by import was %r from %r. " "pkg_resources thought it should be found at %r. " "The exception was %s: %s" % (pr_ver, name, imp_ver, imp_loc, pr_loc, e.__class__.__name__, e)) else: if imp_ver == 'unknown': if name not in not_import_versionable: errors.append("Warning: unexpectedly could not find a version number for dependency %r imported from %r. " "pkg_resources thought it should be version %r at %r." % (name, imp_loc, pr_ver, pr_loc)) else: try: imp_normver = normalized_version(imp_ver) except Exception, e: errors.append("Warning: version number %r found for dependency %r (imported from %r) could not be parsed. " "pkg_resources thought it should be version %r at %r. " "The exception was %s: %s" % (imp_ver, name, imp_loc, pr_ver, pr_loc, e.__class__.__name__, e)) else: if pr_ver == 'unknown' or (pr_normver != imp_normver): if not os.path.normpath(os.path.realpath(pr_loc)) == os.path.normpath(os.path.realpath(imp_loc)): errors.append("Warning: dependency %r found to have version number %r (normalized to %r, from %r) " "by pkg_resources, but version %r (normalized to %r, from %r) by import." % (name, pr_ver, str(pr_normver), pr_loc, imp_ver, str(imp_normver), imp_loc)) imported_packages = set([p.lower() for (p, _) in imported_vers_and_locs_list]) for pr_name, (pr_ver, pr_loc) in pkg_resources_vers_and_locs.iteritems(): if pr_name not in imported_packages and pr_name not in ignorable: errors.append("Warning: dependency %r (version %r) found by pkg_resources not found by import." % (pr_name, pr_ver)) return errors def get_error_string(errors, debug=False): from allmydata._auto_deps import install_requires msg = "\n%s\n" % ("\n".join(errors),) if debug: msg += ("\n" "For debugging purposes, the PYTHONPATH was\n" " %r\n" "install_requires was\n" " %r\n" "sys.path after importing pkg_resources was\n" " %s\n" % (os.environ.get('PYTHONPATH'), install_requires, (os.pathsep+"\n ").join(sys.path)) ) return msg def check_all_requirements(): """This function returns a list of errors due to any failed checks.""" from allmydata._auto_deps import install_requires errors = [] # We require at least 2.6 on all platforms. # (On Python 3, we'll have failed long before this point.) if sys.version_info < (2, 6): try: version_string = ".".join(map(str, sys.version_info)) except Exception: version_string = repr(sys.version_info) errors.append("Tahoe-LAFS currently requires Python v2.6 or greater (but less than v3), not %s" % (version_string,)) vers_and_locs = dict(_vers_and_locs_list) for requirement in install_requires: try: check_requirement(requirement, vers_and_locs) except (ImportError, PackagingError), e: errors.append("%s: %s" % (e.__class__.__name__, e)) if errors: raise PackagingError(get_error_string(errors, debug=True)) check_all_requirements() def get_package_versions(): return dict([(k, v) for k, (v, l, c) in _vers_and_locs_list]) def get_package_locations(): return dict([(k, l) for k, (v, l, c) in _vers_and_locs_list]) def get_package_versions_string(show_paths=False, debug=False): res = [] for p, (v, loc, comment) in _vers_and_locs_list: info = str(p) + ": " + str(v) if comment: info = info + " [%s]" % str(comment) if show_paths: info = info + " (%s)" % str(loc) res.append(info) output = "\n".join(res) + "\n" if not hasattr(sys, 'frozen'): errors = cross_check_pkg_resources_versus_import() if errors: output += get_error_string(errors, debug=debug) return output tahoe-lafs-1.10.0/src/allmydata/_appname.py000066400000000000000000000000371221140116300204740ustar00rootroot00000000000000__appname__ = 'allmydata-tahoe'tahoe-lafs-1.10.0/src/allmydata/_auto_deps.py000066400000000000000000000124661221140116300210470ustar00rootroot00000000000000# Note: please minimize imports in this file. In particular, do not import # any module from Tahoe-LAFS or its dependencies, and do not import any # modules at all at global level. That includes setuptools and pkg_resources. # It is ok to import modules from the Python Standard Library if they are # always available, or the import is protected by try...except ImportError. install_requires = [ # we require newer versions of setuptools (actually # zetuptoolz) to build, but can handle older versions to run "setuptools >= 0.6c6", "zfec >= 1.1.0", # Feisty has simplejson 1.4 "simplejson >= 1.4", # zope.interface >= 3.6.0 is required for Twisted >= 12.1.0. # zope.interface 3.6.3 and 3.6.4 are incompatible with Nevow (#1435). "zope.interface == 3.6.0, == 3.6.1, == 3.6.2, >= 3.6.5", # * On Windows we need at least Twisted 9.0 to avoid an indirect # dependency on pywin32. # * On Linux we need at least Twisted 10.1.0 for inotify support used by # the drop-upload frontend. # * We also need Twisted 10.1 for the FTP frontend in order for Twisted's # FTP server to support asynchronous close. # * When the cloud backend lands, it will depend on Twisted 10.2.0 which # includes the fix to https://twistedmatrix.com/trac/ticket/411 # * The SFTP frontend depends on Twisted 11.0.0 to fix the SSH server # rekeying bug http://twistedmatrix.com/trac/ticket/4395 # "Twisted >= 11.0.0", # * foolscap < 0.5.1 had a performance bug which spent O(N**2) CPU for # transferring large mutable files of size N. # * foolscap < 0.6 is incompatible with Twisted 10.2.0. # * foolscap 0.6.1 quiets a DeprecationWarning. # * foolscap < 0.6.3 is incompatible with Twisted-11.1.0 and newer. Since # current Twisted is 12.0, any build which needs twisted will grab a # version that requires foolscap>=0.6.3 # * pyOpenSSL is required by foolscap for it (foolscap) to provide secure # connections. Foolscap doesn't reliably declare this dependency in a # machine-readable way, so we need to declare a dependency on pyOpenSSL # ourselves. Tahoe-LAFS doesn't *really* depend directly on pyOpenSSL, # so if something changes in the relationship between foolscap and # pyOpenSSL, such as foolscap requiring a specific version of # pyOpenSSL, or foolscap switching from pyOpenSSL to a different crypto # library, we need to update this declaration here. # "foolscap >= 0.6.3", "pyOpenSSL", "Nevow >= 0.6.0", # Needed for SFTP. pyasn1 is needed by twisted.conch in Twisted >= 9.0. # pycrypto 2.2 doesn't work due to https://bugs.launchpad.net/pycrypto/+bug/620253 # pycrypto 2.4 doesn't work due to https://bugs.launchpad.net/pycrypto/+bug/881130 "pycrypto == 2.1.0, == 2.3, >= 2.4.1", "pyasn1 >= 0.0.8a", # http://www.voidspace.org.uk/python/mock/ , 0.8.0 provides "call" "mock >= 0.8.0", # pycryptopp-0.6.0 includes ed25519 "pycryptopp >= 0.6.0", # Will be needed to test web apps, but not yet. See #1001. #"windmill >= 1.3", ] # Includes some indirect dependencies, but does not include allmydata. # These are in the order they should be listed by --version, etc. package_imports = [ # package name module name ('foolscap', 'foolscap'), ('pycryptopp', 'pycryptopp'), ('zfec', 'zfec'), ('Twisted', 'twisted'), ('Nevow', 'nevow'), ('zope.interface', 'zope.interface'), ('python', None), ('platform', None), ('pyOpenSSL', 'OpenSSL'), ('simplejson', 'simplejson'), ('pycrypto', 'Crypto'), ('pyasn1', 'pyasn1'), ('mock', 'mock'), ] def require_more(): import sys # Don't try to get the version number of setuptools in frozen builds, because # that triggers 'site' processing that causes failures. Note that frozen # builds still (unfortunately) import pkg_resources in .tac files, so the # entry for setuptools in install_requires above isn't conditional. if not hasattr(sys, 'frozen'): package_imports.append(('setuptools', 'setuptools')) require_more() # These are suppressed globally: global_deprecation_messages = [ "BaseException.message has been deprecated as of Python 2.6", "twisted.internet.interfaces.IFinishableConsumer was deprecated in Twisted 11.1.0: Please use IConsumer (and IConsumer.unregisterProducer) instead.", ] # These are suppressed while importing dependencies: deprecation_messages = [ "the sha module is deprecated; use the hashlib module instead", "object.__new__\(\) takes no parameters", "The popen2 module is deprecated. Use the subprocess module.", "the md5 module is deprecated; use hashlib instead", "twisted.web.error.NoResource is deprecated since Twisted 9.0. See twisted.web.resource.NoResource.", "the sets module is deprecated", ] user_warning_messages = [ "Hashing uninitialized InterfaceClass instance", "Reliable disconnection notification requires pywin32 215 or later", ] runtime_warning_messages = [ "Not using mpz_powm_sec. You should rebuild using libgmp >= 5 to avoid timing attack vulnerability.", ] warning_imports = [ 'nevow', 'twisted.persisted.sob', 'twisted.python.filepath', 'Crypto.Hash.SHA', ] tahoe-lafs-1.10.0/src/allmydata/_version.py000066400000000000000000000003441221140116300205410ustar00rootroot00000000000000 # This _version.py is generated from git metadata by the tahoe setup.py. __pkgname__ = "allmydata-tahoe" real_version = "1.10.0" full_version = "f9af0633d8da426cbcaed3ff05ab6d7128148bb0" verstr = "1.10.0" __version__ = verstr tahoe-lafs-1.10.0/src/allmydata/blacklist.py000066400000000000000000000113441221140116300206670ustar00rootroot00000000000000 import os from zope.interface import implements from twisted.internet import defer from twisted.python import log as twisted_log from allmydata.interfaces import IFileNode, IFilesystemNode from allmydata.util import base32 from allmydata.util.encodingutil import quote_output class FileProhibited(Exception): """This client has been configured to prohibit access to this object.""" def __init__(self, reason): Exception.__init__(self, "Access Prohibited: %s" % quote_output(reason, encoding='utf-8', quotemarks=False)) self.reason = reason class Blacklist: def __init__(self, blacklist_fn): self.blacklist_fn = blacklist_fn self.last_mtime = None self.entries = {} self.read_blacklist() # sets .last_mtime and .entries def read_blacklist(self): try: current_mtime = os.stat(self.blacklist_fn).st_mtime except EnvironmentError: # unreadable blacklist file means no blacklist self.entries.clear() return try: if self.last_mtime is None or current_mtime > self.last_mtime: self.entries.clear() for line in open(self.blacklist_fn, "r").readlines(): line = line.strip() if not line or line.startswith("#"): continue si_s, reason = line.split(None, 1) si = base32.a2b(si_s) # must be valid base32 self.entries[si] = reason self.last_mtime = current_mtime except Exception, e: twisted_log.err(e, "unparseable blacklist file") raise def check_storageindex(self, si): self.read_blacklist() reason = self.entries.get(si, None) if reason is not None: # log this to logs/twistd.log, since web logs go there too twisted_log.msg("blacklist prohibited access to SI %s: %s" % (base32.b2a(si), reason)) return reason class ProhibitedNode: implements(IFileNode) def __init__(self, wrapped_node, reason): assert IFilesystemNode.providedBy(wrapped_node), wrapped_node self.wrapped_node = wrapped_node self.reason = reason def get_cap(self): return self.wrapped_node.get_cap() def get_readcap(self): return self.wrapped_node.get_readcap() def is_readonly(self): return self.wrapped_node.is_readonly() def is_mutable(self): return self.wrapped_node.is_mutable() def is_unknown(self): return self.wrapped_node.is_unknown() def is_allowed_in_immutable_directory(self): return self.wrapped_node.is_allowed_in_immutable_directory() def is_alleged_immutable(self): return self.wrapped_node.is_alleged_immutable() def raise_error(self): # We don't raise an exception here because that would prevent the node from being listed. pass def get_uri(self): return self.wrapped_node.get_uri() def get_write_uri(self): return self.wrapped_node.get_write_uri() def get_readonly_uri(self): return self.wrapped_node.get_readonly_uri() def get_storage_index(self): return self.wrapped_node.get_storage_index() def get_verify_cap(self): return self.wrapped_node.get_verify_cap() def get_repair_cap(self): return self.wrapped_node.get_repair_cap() def get_size(self): return None def get_current_size(self): return defer.succeed(None) def get_size_of_best_version(self): return defer.succeed(None) def check(self, monitor, verify, add_lease): return defer.succeed(None) def check_and_repair(self, monitor, verify, add_lease): return defer.succeed(None) def get_version(self): return None # Omitting any of these methods would fail safe; they are just to ensure correct error reporting. def get_best_readable_version(self): raise FileProhibited(self.reason) def download_best_version(self): raise FileProhibited(self.reason) def get_best_mutable_version(self): raise FileProhibited(self.reason) def overwrite(self, new_contents): raise FileProhibited(self.reason) def modify(self, modifier_cb): raise FileProhibited(self.reason) def get_servermap(self, mode): raise FileProhibited(self.reason) def download_version(self, servermap, version): raise FileProhibited(self.reason) def upload(self, new_contents, servermap): raise FileProhibited(self.reason) def get_writekey(self): raise FileProhibited(self.reason) def read(self, consumer, offset=0, size=None): raise FileProhibited(self.reason) tahoe-lafs-1.10.0/src/allmydata/check_results.py000066400000000000000000000266331221140116300215640ustar00rootroot00000000000000 from zope.interface import implements from allmydata.interfaces import ICheckResults, ICheckAndRepairResults, \ IDeepCheckResults, IDeepCheckAndRepairResults, IURI, IDisplayableServer from allmydata.util import base32 class CheckResults: implements(ICheckResults) def __init__(self, uri, storage_index, healthy, recoverable, needs_rebalancing, count_shares_needed, count_shares_expected, count_shares_good, count_good_share_hosts, count_recoverable_versions, count_unrecoverable_versions, servers_responding, sharemap, count_wrong_shares, list_corrupt_shares, count_corrupt_shares, list_incompatible_shares, count_incompatible_shares, summary, report, share_problems, servermap): assert IURI.providedBy(uri), uri self._uri = uri self._storage_index = storage_index self._summary = "" self._healthy = bool(healthy) if self._healthy: assert recoverable if not summary: summary = "healthy" else: if not summary: summary = "not healthy" self._recoverable = recoverable if not self._recoverable: assert not self._healthy self._needs_rebalancing_p = bool(needs_rebalancing) self._count_shares_needed = count_shares_needed self._count_shares_expected = count_shares_expected self._count_shares_good = count_shares_good self._count_good_share_hosts = count_good_share_hosts self._count_recoverable_versions = count_recoverable_versions self._count_unrecoverable_versions = count_unrecoverable_versions for server in servers_responding: assert IDisplayableServer.providedBy(server), server self._servers_responding = servers_responding for shnum, servers in sharemap.items(): for server in servers: assert IDisplayableServer.providedBy(server), server self._sharemap = sharemap self._count_wrong_shares = count_wrong_shares for (server, SI, shnum) in list_corrupt_shares: assert IDisplayableServer.providedBy(server), server self._list_corrupt_shares = list_corrupt_shares self._count_corrupt_shares = count_corrupt_shares for (server, SI, shnum) in list_incompatible_shares: assert IDisplayableServer.providedBy(server), server self._list_incompatible_shares = list_incompatible_shares self._count_incompatible_shares = count_incompatible_shares assert isinstance(summary, str) # should be a single string self._summary = summary assert not isinstance(report, str) # should be list of strings self._report = report if servermap: from allmydata.mutable.servermap import ServerMap assert isinstance(servermap, ServerMap), servermap self._servermap = servermap # mutable only self._share_problems = share_problems def get_storage_index(self): return self._storage_index def get_storage_index_string(self): return base32.b2a(self._storage_index) def get_uri(self): return self._uri def is_healthy(self): return self._healthy def is_recoverable(self): return self._recoverable def needs_rebalancing(self): return self._needs_rebalancing_p def get_encoding_needed(self): return self._count_shares_needed def get_encoding_expected(self): return self._count_shares_expected def get_share_counter_good(self): return self._count_shares_good def get_share_counter_wrong(self): return self._count_wrong_shares def get_corrupt_shares(self): return self._list_corrupt_shares def get_incompatible_shares(self): return self._list_incompatible_shares def get_servers_responding(self): return self._servers_responding def get_host_counter_good_shares(self): return self._count_good_share_hosts def get_version_counter_recoverable(self): return self._count_recoverable_versions def get_version_counter_unrecoverable(self): return self._count_unrecoverable_versions def get_sharemap(self): return self._sharemap def as_dict(self): sharemap = {} for shnum, servers in self._sharemap.items(): sharemap[shnum] = sorted([s.get_serverid() for s in servers]) responding = [s.get_serverid() for s in self._servers_responding] corrupt = [(s.get_serverid(), SI, shnum) for (s, SI, shnum) in self._list_corrupt_shares] incompatible = [(s.get_serverid(), SI, shnum) for (s, SI, shnum) in self._list_incompatible_shares] d = {"count-shares-needed": self._count_shares_needed, "count-shares-expected": self._count_shares_expected, "count-shares-good": self._count_shares_good, "count-good-share-hosts": self._count_good_share_hosts, "count-recoverable-versions": self._count_recoverable_versions, "count-unrecoverable-versions": self._count_unrecoverable_versions, "servers-responding": responding, "sharemap": sharemap, "count-wrong-shares": self._count_wrong_shares, "list-corrupt-shares": corrupt, "count-corrupt-shares": self._count_corrupt_shares, "list-incompatible-shares": incompatible, "count-incompatible-shares": self._count_incompatible_shares, } return d def get_summary(self): return self._summary def get_report(self): return self._report def get_share_problems(self): return self._share_problems def get_servermap(self): return self._servermap class CheckAndRepairResults: implements(ICheckAndRepairResults) def __init__(self, storage_index): self.storage_index = storage_index self.repair_attempted = False def get_storage_index(self): return self.storage_index def get_storage_index_string(self): return base32.b2a(self.storage_index) def get_repair_attempted(self): return self.repair_attempted def get_repair_successful(self): if not self.repair_attempted: return False return self.repair_successful def get_pre_repair_results(self): return self.pre_repair_results def get_post_repair_results(self): return self.post_repair_results class DeepResultsBase: def __init__(self, root_storage_index): self.root_storage_index = root_storage_index if root_storage_index is None: self.root_storage_index_s = "" # is this correct? else: self.root_storage_index_s = base32.b2a(root_storage_index) self.objects_checked = 0 self.objects_healthy = 0 self.objects_unhealthy = 0 self.objects_unrecoverable = 0 self.corrupt_shares = [] self.all_results = {} self.all_results_by_storage_index = {} self.stats = {} def update_stats(self, new_stats): self.stats.update(new_stats) def get_root_storage_index_string(self): return self.root_storage_index_s def get_corrupt_shares(self): return self.corrupt_shares def get_all_results(self): return self.all_results def get_results_for_storage_index(self, storage_index): return self.all_results_by_storage_index[storage_index] def get_stats(self): return self.stats class DeepCheckResults(DeepResultsBase): implements(IDeepCheckResults) def add_check(self, r, path): if not r: return # non-distributed object, i.e. LIT file r = ICheckResults(r) assert isinstance(path, (list, tuple)) self.objects_checked += 1 if r.is_healthy(): self.objects_healthy += 1 else: self.objects_unhealthy += 1 if not r.is_recoverable(): self.objects_unrecoverable += 1 self.all_results[tuple(path)] = r self.all_results_by_storage_index[r.get_storage_index()] = r self.corrupt_shares.extend(r.get_corrupt_shares()) def get_counters(self): return {"count-objects-checked": self.objects_checked, "count-objects-healthy": self.objects_healthy, "count-objects-unhealthy": self.objects_unhealthy, "count-objects-unrecoverable": self.objects_unrecoverable, "count-corrupt-shares": len(self.corrupt_shares), } class DeepCheckAndRepairResults(DeepResultsBase): implements(IDeepCheckAndRepairResults) def __init__(self, root_storage_index): DeepResultsBase.__init__(self, root_storage_index) self.objects_healthy_post_repair = 0 self.objects_unhealthy_post_repair = 0 self.objects_unrecoverable_post_repair = 0 self.repairs_attempted = 0 self.repairs_successful = 0 self.repairs_unsuccessful = 0 self.corrupt_shares_post_repair = [] def add_check_and_repair(self, r, path): if not r: return # non-distributed object, i.e. LIT file r = ICheckAndRepairResults(r) assert isinstance(path, (list, tuple)) pre_repair = r.get_pre_repair_results() post_repair = r.get_post_repair_results() self.objects_checked += 1 if pre_repair.is_healthy(): self.objects_healthy += 1 else: self.objects_unhealthy += 1 if not pre_repair.is_recoverable(): self.objects_unrecoverable += 1 self.corrupt_shares.extend(pre_repair.get_corrupt_shares()) if r.get_repair_attempted(): self.repairs_attempted += 1 if r.get_repair_successful(): self.repairs_successful += 1 else: self.repairs_unsuccessful += 1 if post_repair.is_healthy(): self.objects_healthy_post_repair += 1 else: self.objects_unhealthy_post_repair += 1 if not post_repair.is_recoverable(): self.objects_unrecoverable_post_repair += 1 self.all_results[tuple(path)] = r self.all_results_by_storage_index[r.get_storage_index()] = r self.corrupt_shares_post_repair.extend(post_repair.get_corrupt_shares()) def get_counters(self): return {"count-objects-checked": self.objects_checked, "count-objects-healthy-pre-repair": self.objects_healthy, "count-objects-unhealthy-pre-repair": self.objects_unhealthy, "count-objects-unrecoverable-pre-repair": self.objects_unrecoverable, "count-objects-healthy-post-repair": self.objects_healthy_post_repair, "count-objects-unhealthy-post-repair": self.objects_unhealthy_post_repair, "count-objects-unrecoverable-post-repair": self.objects_unrecoverable_post_repair, "count-repairs-attempted": self.repairs_attempted, "count-repairs-successful": self.repairs_successful, "count-repairs-unsuccessful": self.repairs_unsuccessful, "count-corrupt-shares-pre-repair": len(self.corrupt_shares), "count-corrupt-shares-post-repair": len(self.corrupt_shares_post_repair), } def get_remaining_corrupt_shares(self): return self.corrupt_shares_post_repair tahoe-lafs-1.10.0/src/allmydata/client.py000066400000000000000000000577631221140116300202140ustar00rootroot00000000000000import os, stat, time, weakref from allmydata import node from zope.interface import implements from twisted.internet import reactor, defer from twisted.application import service from twisted.application.internet import TimerService from pycryptopp.publickey import rsa import allmydata from allmydata.storage.server import StorageServer from allmydata import storage_client from allmydata.immutable.upload import Uploader from allmydata.immutable.offloaded import Helper from allmydata.control import ControlServer from allmydata.introducer.client import IntroducerClient from allmydata.util import hashutil, base32, pollmixin, log, keyutil, idlib from allmydata.util.encodingutil import get_filesystem_encoding from allmydata.util.abbreviate import parse_abbreviated_size from allmydata.util.time_format import parse_duration, parse_date from allmydata.stats import StatsProvider from allmydata.history import History from allmydata.interfaces import IStatsProducer, SDMF_VERSION, MDMF_VERSION from allmydata.nodemaker import NodeMaker from allmydata.blacklist import Blacklist from allmydata.node import OldConfigOptionError KiB=1024 MiB=1024*KiB GiB=1024*MiB TiB=1024*GiB PiB=1024*TiB def _make_secret(): return base32.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n" class SecretHolder: def __init__(self, lease_secret, convergence_secret): self._lease_secret = lease_secret self._convergence_secret = convergence_secret def get_renewal_secret(self): return hashutil.my_renewal_secret_hash(self._lease_secret) def get_cancel_secret(self): return hashutil.my_cancel_secret_hash(self._lease_secret) def get_convergence_secret(self): return self._convergence_secret class KeyGenerator: """I create RSA keys for mutable files. Each call to generate() returns a single keypair. The keysize is specified first by the keysize= argument to generate(), then with a default set by set_default_keysize(), then with a built-in default of 2048 bits.""" def __init__(self): self._remote = None self.default_keysize = 2048 def set_remote_generator(self, keygen): self._remote = keygen def set_default_keysize(self, keysize): """Call this to override the size of the RSA keys created for new mutable files which don't otherwise specify a size. This will affect all subsequent calls to generate() without a keysize= argument. The default size is 2048 bits. Test cases should call this method once during setup, to cause me to create smaller keys, so the unit tests run faster.""" self.default_keysize = keysize def generate(self, keysize=None): """I return a Deferred that fires with a (verifyingkey, signingkey) pair. I accept a keysize in bits (2048 bit keys are standard, smaller keys are used for testing). If you do not provide a keysize, I will use my default, which is set by a call to set_default_keysize(). If set_default_keysize() has never been called, I will create 2048 bit keys.""" keysize = keysize or self.default_keysize if self._remote: d = self._remote.callRemote('get_rsa_key_pair', keysize) def make_key_objs((verifying_key, signing_key)): v = rsa.create_verifying_key_from_string(verifying_key) s = rsa.create_signing_key_from_string(signing_key) return v, s d.addCallback(make_key_objs) return d else: # RSA key generation for a 2048 bit key takes between 0.8 and 3.2 # secs signer = rsa.generate(keysize) verifier = signer.get_verifying_key() return defer.succeed( (verifier, signer) ) class Terminator(service.Service): def __init__(self): self._clients = weakref.WeakKeyDictionary() def register(self, c): self._clients[c] = None def stopService(self): for c in self._clients: c.stop() return service.Service.stopService(self) class Client(node.Node, pollmixin.PollMixin): implements(IStatsProducer) PORTNUMFILE = "client.port" STOREDIR = 'storage' NODETYPE = "client" SUICIDE_PREVENTION_HOTLINE_FILE = "suicide_prevention_hotline" # This means that if a storage server treats me as though I were a # 1.0.0 storage client, it will work as they expect. OLDEST_SUPPORTED_VERSION = "1.0.0" # this is a tuple of (needed, desired, total, max_segment_size). 'needed' # is the number of shares required to reconstruct a file. 'desired' means # that we will abort an upload unless we can allocate space for at least # this many. 'total' is the total number of shares created by encoding. # If everybody has room then this is is how many we will upload. DEFAULT_ENCODING_PARAMETERS = {"k": 3, "happy": 7, "n": 10, "max_segment_size": 128*KiB, } def __init__(self, basedir="."): node.Node.__init__(self, basedir) self.started_timestamp = time.time() self.logSource="Client" self.DEFAULT_ENCODING_PARAMETERS = self.DEFAULT_ENCODING_PARAMETERS.copy() self.init_introducer_client() self.init_stats_provider() self.init_secrets() self.init_node_key() self.init_storage() self.init_control() self.helper = None if self.get_config("helper", "enabled", False, boolean=True): self.init_helper() self._key_generator = KeyGenerator() key_gen_furl = self.get_config("client", "key_generator.furl", None) if key_gen_furl: self.init_key_gen(key_gen_furl) self.init_client() # ControlServer and Helper are attached after Tub startup self.init_ftp_server() self.init_sftp_server() self.init_drop_uploader() hotline_file = os.path.join(self.basedir, self.SUICIDE_PREVENTION_HOTLINE_FILE) if os.path.exists(hotline_file): age = time.time() - os.stat(hotline_file)[stat.ST_MTIME] self.log("hotline file noticed (%ds old), starting timer" % age) hotline = TimerService(1.0, self._check_hotline, hotline_file) hotline.setServiceParent(self) # this needs to happen last, so it can use getServiceNamed() to # acquire references to StorageServer and other web-statusable things webport = self.get_config("node", "web.port", None) if webport: self.init_web(webport) # strports string def _sequencer(self): seqnum_s = self.get_config_from_file("announcement-seqnum") if not seqnum_s: seqnum_s = "0" seqnum = int(seqnum_s.strip()) seqnum += 1 # increment self.write_config("announcement-seqnum", "%d\n" % seqnum) nonce = _make_secret().strip() return seqnum, nonce def init_introducer_client(self): self.introducer_furl = self.get_config("client", "introducer.furl") ic = IntroducerClient(self.tub, self.introducer_furl, self.nickname, str(allmydata.__full_version__), str(self.OLDEST_SUPPORTED_VERSION), self.get_app_versions(), self._sequencer) self.introducer_client = ic # hold off on starting the IntroducerClient until our tub has been # started, so we'll have a useful address on our RemoteReference, so # that the introducer's status page will show us. d = self.when_tub_ready() def _start_introducer_client(res): ic.setServiceParent(self) d.addCallback(_start_introducer_client) d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="URyI5w") def init_stats_provider(self): gatherer_furl = self.get_config("client", "stats_gatherer.furl", None) self.stats_provider = StatsProvider(self, gatherer_furl) self.add_service(self.stats_provider) self.stats_provider.register_producer(self) def get_stats(self): return { 'node.uptime': time.time() - self.started_timestamp } def init_secrets(self): lease_s = self.get_or_create_private_config("secret", _make_secret) lease_secret = base32.a2b(lease_s) convergence_s = self.get_or_create_private_config('convergence', _make_secret) self.convergence = base32.a2b(convergence_s) self._secret_holder = SecretHolder(lease_secret, self.convergence) def init_node_key(self): # we only create the key once. On all subsequent runs, we re-use the # existing key def _make_key(): sk_vs,vk_vs = keyutil.make_keypair() return sk_vs+"\n" sk_vs = self.get_or_create_private_config("node.privkey", _make_key) sk,vk_vs = keyutil.parse_privkey(sk_vs.strip()) self.write_config("node.pubkey", vk_vs+"\n") self._node_key = sk def get_long_nodeid(self): # this matches what IServer.get_longname() says about us elsewhere vk_bytes = self._node_key.get_verifying_key_bytes() return "v0-"+base32.b2a(vk_bytes) def get_long_tubid(self): return idlib.nodeid_b2a(self.nodeid) def _init_permutation_seed(self, ss): seed = self.get_config_from_file("permutation-seed") if not seed: have_shares = ss.have_shares() if have_shares: # if the server has shares but not a recorded # permutation-seed, then it has been around since pre-#466 # days, and the clients who uploaded those shares used our # TubID as a permutation-seed. We should keep using that same # seed to keep the shares in the same place in the permuted # ring, so those clients don't have to perform excessive # searches. seed = base32.b2a(self.nodeid) else: # otherwise, we're free to use the more natural seed of our # pubkey-based serverid vk_bytes = self._node_key.get_verifying_key_bytes() seed = base32.b2a(vk_bytes) self.write_config("permutation-seed", seed+"\n") return seed.strip() def init_storage(self): # should we run a storage server (and publish it for others to use)? if not self.get_config("storage", "enabled", True, boolean=True): return readonly = self.get_config("storage", "readonly", False, boolean=True) storedir = os.path.join(self.basedir, self.STOREDIR) data = self.get_config("storage", "reserved_space", None) try: reserved = parse_abbreviated_size(data) except ValueError: log.msg("[storage]reserved_space= contains unparseable value %s" % data) raise if reserved is None: reserved = 0 discard = self.get_config("storage", "debug_discard", False, boolean=True) expire = self.get_config("storage", "expire.enabled", False, boolean=True) if expire: mode = self.get_config("storage", "expire.mode") # require a mode else: mode = self.get_config("storage", "expire.mode", "age") o_l_d = self.get_config("storage", "expire.override_lease_duration", None) if o_l_d is not None: o_l_d = parse_duration(o_l_d) cutoff_date = None if mode == "cutoff-date": cutoff_date = self.get_config("storage", "expire.cutoff_date") cutoff_date = parse_date(cutoff_date) sharetypes = [] if self.get_config("storage", "expire.immutable", True, boolean=True): sharetypes.append("immutable") if self.get_config("storage", "expire.mutable", True, boolean=True): sharetypes.append("mutable") expiration_sharetypes = tuple(sharetypes) ss = StorageServer(storedir, self.nodeid, reserved_space=reserved, discard_storage=discard, readonly_storage=readonly, stats_provider=self.stats_provider, expiration_enabled=expire, expiration_mode=mode, expiration_override_lease_duration=o_l_d, expiration_cutoff_date=cutoff_date, expiration_sharetypes=expiration_sharetypes) self.add_service(ss) d = self.when_tub_ready() # we can't do registerReference until the Tub is ready def _publish(res): furl_file = os.path.join(self.basedir, "private", "storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(ss, furlFile=furl_file) ann = {"anonymous-storage-FURL": furl, "permutation-seed-base32": self._init_permutation_seed(ss), } self.introducer_client.publish("storage", ann, self._node_key) d.addCallback(_publish) d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="aLGBKw") def init_client(self): helper_furl = self.get_config("client", "helper.furl", None) if helper_furl in ("None", ""): helper_furl = None DEP = self.DEFAULT_ENCODING_PARAMETERS DEP["k"] = int(self.get_config("client", "shares.needed", DEP["k"])) DEP["n"] = int(self.get_config("client", "shares.total", DEP["n"])) DEP["happy"] = int(self.get_config("client", "shares.happy", DEP["happy"])) self.init_client_storage_broker() self.history = History(self.stats_provider) self.terminator = Terminator() self.terminator.setServiceParent(self) self.add_service(Uploader(helper_furl, self.stats_provider, self.history)) self.init_blacklist() self.init_nodemaker() def init_client_storage_broker(self): # create a StorageFarmBroker object, for use by Uploader/Downloader # (and everybody else who wants to use storage servers) sb = storage_client.StorageFarmBroker(self.tub, permute_peers=True) self.storage_broker = sb # load static server specifications from tahoe.cfg, if any. # Not quite ready yet. #if self.config.has_section("client-server-selection"): # server_params = {} # maps serverid to dict of parameters # for (name, value) in self.config.items("client-server-selection"): # pieces = name.split(".") # if pieces[0] == "server": # serverid = pieces[1] # if serverid not in server_params: # server_params[serverid] = {} # server_params[serverid][pieces[2]] = value # for serverid, params in server_params.items(): # server_type = params.pop("type") # if server_type == "tahoe-foolscap": # s = storage_client.NativeStorageClient(*params) # else: # msg = ("unrecognized server type '%s' in " # "tahoe.cfg [client-server-selection]server.%s.type" # % (server_type, serverid)) # raise storage_client.UnknownServerTypeError(msg) # sb.add_server(s.serverid, s) # check to see if we're supposed to use the introducer too if self.get_config("client-server-selection", "use_introducer", default=True, boolean=True): sb.use_introducer(self.introducer_client) def get_storage_broker(self): return self.storage_broker def init_blacklist(self): fn = os.path.join(self.basedir, "access.blacklist") self.blacklist = Blacklist(fn) def init_nodemaker(self): default = self.get_config("client", "mutable.format", default="SDMF") if default.upper() == "MDMF": self.mutable_file_default = MDMF_VERSION else: self.mutable_file_default = SDMF_VERSION self.nodemaker = NodeMaker(self.storage_broker, self._secret_holder, self.get_history(), self.getServiceNamed("uploader"), self.terminator, self.get_encoding_parameters(), self.mutable_file_default, self._key_generator, self.blacklist) def get_history(self): return self.history def init_control(self): d = self.when_tub_ready() def _publish(res): c = ControlServer() c.setServiceParent(self) control_url = self.tub.registerReference(c) self.write_private_config("control.furl", control_url + "\n") d.addCallback(_publish) d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="d3tNXA") def init_helper(self): d = self.when_tub_ready() def _publish(self): self.helper = Helper(os.path.join(self.basedir, "helper"), self.storage_broker, self._secret_holder, self.stats_provider, self.history) # TODO: this is confusing. BASEDIR/private/helper.furl is created # by the helper. BASEDIR/helper.furl is consumed by the client # who wants to use the helper. I like having the filename be the # same, since that makes 'cp' work smoothly, but the difference # between config inputs and generated outputs is hard to see. helper_furlfile = os.path.join(self.basedir, "private", "helper.furl").encode(get_filesystem_encoding()) self.tub.registerReference(self.helper, furlFile=helper_furlfile) d.addCallback(_publish) d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="K0mW5w") def init_key_gen(self, key_gen_furl): d = self.when_tub_ready() def _subscribe(self): self.tub.connectTo(key_gen_furl, self._got_key_generator) d.addCallback(_subscribe) d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="z9DMzw") def _got_key_generator(self, key_generator): self._key_generator.set_remote_generator(key_generator) key_generator.notifyOnDisconnect(self._lost_key_generator) def _lost_key_generator(self): self._key_generator.set_remote_generator(None) def set_default_mutable_keysize(self, keysize): self._key_generator.set_default_keysize(keysize) def init_web(self, webport): self.log("init_web(webport=%s)", args=(webport,)) from allmydata.webish import WebishServer nodeurl_path = os.path.join(self.basedir, "node.url") staticdir = self.get_config("node", "web.static", "public_html") staticdir = os.path.expanduser(staticdir) ws = WebishServer(self, webport, nodeurl_path, staticdir) self.add_service(ws) def init_ftp_server(self): if self.get_config("ftpd", "enabled", False, boolean=True): accountfile = self.get_config("ftpd", "accounts.file", None) accounturl = self.get_config("ftpd", "accounts.url", None) ftp_portstr = self.get_config("ftpd", "port", "8021") from allmydata.frontends import ftpd s = ftpd.FTPServer(self, accountfile, accounturl, ftp_portstr) s.setServiceParent(self) def init_sftp_server(self): if self.get_config("sftpd", "enabled", False, boolean=True): accountfile = self.get_config("sftpd", "accounts.file", None) accounturl = self.get_config("sftpd", "accounts.url", None) sftp_portstr = self.get_config("sftpd", "port", "8022") pubkey_file = self.get_config("sftpd", "host_pubkey_file") privkey_file = self.get_config("sftpd", "host_privkey_file") from allmydata.frontends import sftpd s = sftpd.SFTPServer(self, accountfile, accounturl, sftp_portstr, pubkey_file, privkey_file) s.setServiceParent(self) def init_drop_uploader(self): if self.get_config("drop_upload", "enabled", False, boolean=True): if self.get_config("drop_upload", "upload.dircap", None): raise OldConfigOptionError("The [drop_upload]upload.dircap option is no longer supported; please " "put the cap in a 'private/drop_upload_dircap' file, and delete this option.") upload_dircap = self.get_or_create_private_config("drop_upload_dircap") local_dir_utf8 = self.get_config("drop_upload", "local.directory") try: from allmydata.frontends import drop_upload s = drop_upload.DropUploader(self, upload_dircap, local_dir_utf8) s.setServiceParent(self) s.startService() except Exception, e: self.log("couldn't start drop-uploader: %r", args=(e,)) def _check_hotline(self, hotline_file): if os.path.exists(hotline_file): mtime = os.stat(hotline_file)[stat.ST_MTIME] if mtime > time.time() - 120.0: return else: self.log("hotline file too old, shutting down") else: self.log("hotline file missing, shutting down") reactor.stop() def get_encoding_parameters(self): return self.DEFAULT_ENCODING_PARAMETERS def connected_to_introducer(self): if self.introducer_client: return self.introducer_client.connected_to_introducer() return False def get_renewal_secret(self): # this will go away return self._secret_holder.get_renewal_secret() def get_cancel_secret(self): return self._secret_holder.get_cancel_secret() def debug_wait_for_client_connections(self, num_clients): """Return a Deferred that fires (with None) when we have connections to the given number of peers. Useful for tests that set up a temporary test network and need to know when it is safe to proceed with an upload or download.""" def _check(): return len(self.storage_broker.get_connected_servers()) >= num_clients d = self.poll(_check, 0.5) d.addCallback(lambda res: None) return d # these four methods are the primitives for creating filenodes and # dirnodes. The first takes a URI and produces a filenode or (new-style) # dirnode. The other three create brand-new filenodes/dirnodes. def create_node_from_uri(self, write_uri, read_uri=None, deep_immutable=False, name=""): # This returns synchronously. # Note that it does *not* validate the write_uri and read_uri; instead we # may get an opaque node if there were any problems. return self.nodemaker.create_from_cap(write_uri, read_uri, deep_immutable=deep_immutable, name=name) def create_dirnode(self, initial_children={}, version=None): d = self.nodemaker.create_new_mutable_directory(initial_children, version=version) return d def create_immutable_dirnode(self, children, convergence=None): return self.nodemaker.create_immutable_directory(children, convergence) def create_mutable_file(self, contents=None, keysize=None, version=None): return self.nodemaker.create_mutable_file(contents, keysize, version=version) def upload(self, uploadable): uploader = self.getServiceNamed("uploader") return uploader.upload(uploadable) tahoe-lafs-1.10.0/src/allmydata/codec.py000066400000000000000000000055351221140116300200010ustar00rootroot00000000000000# -*- test-case-name: allmydata.test.test_encode_share -*- from zope.interface import implements from twisted.internet import defer from allmydata.util import mathutil from allmydata.util.assertutil import precondition from allmydata.interfaces import ICodecEncoder, ICodecDecoder import zfec class CRSEncoder(object): implements(ICodecEncoder) ENCODER_TYPE = "crs" def set_params(self, data_size, required_shares, max_shares): assert required_shares <= max_shares self.data_size = data_size self.required_shares = required_shares self.max_shares = max_shares self.share_size = mathutil.div_ceil(data_size, required_shares) self.last_share_padding = mathutil.pad_size(self.share_size, required_shares) self.encoder = zfec.Encoder(required_shares, max_shares) def get_encoder_type(self): return self.ENCODER_TYPE def get_params(self): return (self.data_size, self.required_shares, self.max_shares) def get_serialized_params(self): return "%d-%d-%d" % (self.data_size, self.required_shares, self.max_shares) def get_block_size(self): return self.share_size def encode(self, inshares, desired_share_ids=None): precondition(desired_share_ids is None or len(desired_share_ids) <= self.max_shares, desired_share_ids, self.max_shares) if desired_share_ids is None: desired_share_ids = range(self.max_shares) for inshare in inshares: assert len(inshare) == self.share_size, (len(inshare), self.share_size, self.data_size, self.required_shares) shares = self.encoder.encode(inshares, desired_share_ids) return defer.succeed((shares, desired_share_ids)) class CRSDecoder(object): implements(ICodecDecoder) def set_params(self, data_size, required_shares, max_shares): self.data_size = data_size self.required_shares = required_shares self.max_shares = max_shares self.chunk_size = self.required_shares self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size) self.share_size = self.num_chunks self.decoder = zfec.Decoder(self.required_shares, self.max_shares) def get_needed_shares(self): return self.required_shares def decode(self, some_shares, their_shareids): precondition(len(some_shares) == len(their_shareids), len(some_shares), len(their_shareids)) precondition(len(some_shares) == self.required_shares, len(some_shares), self.required_shares) data = self.decoder.decode(some_shares, [int(s) for s in their_shareids]) return defer.succeed(data) def parse_params(serializedparams): pieces = serializedparams.split("-") return int(pieces[0]), int(pieces[1]), int(pieces[2]) tahoe-lafs-1.10.0/src/allmydata/control.py000066400000000000000000000204251221140116300203770ustar00rootroot00000000000000 import os, time from zope.interface import implements from twisted.application import service from twisted.internet import defer from twisted.internet.interfaces import IConsumer from foolscap.api import Referenceable from allmydata.interfaces import RIControlClient, IFileNode from allmydata.util import fileutil, mathutil from allmydata.immutable import upload from allmydata.mutable.publish import MutableData from twisted.python import log def get_memory_usage(): # this is obviously linux-specific stat_names = ("VmPeak", "VmSize", #"VmHWM", "VmData") stats = {} try: for line in open("/proc/self/status", "r").readlines(): name, right = line.split(":",2) if name in stat_names: assert right.endswith(" kB\n") right = right[:-4] stats[name] = int(right) * 1024 except: # Probably not on (a compatible version of) Linux stats['VmSize'] = 0 stats['VmPeak'] = 0 return stats def log_memory_usage(where=""): stats = get_memory_usage() log.msg("VmSize: %9d VmPeak: %9d %s" % (stats["VmSize"], stats["VmPeak"], where)) class FileWritingConsumer: implements(IConsumer) def __init__(self, filename): self.done = False self.f = open(filename, "wb") def registerProducer(self, p, streaming): if streaming: p.resumeProducing() else: while not self.done: p.resumeProducing() def write(self, data): self.f.write(data) def unregisterProducer(self): self.done = True self.f.close() class ControlServer(Referenceable, service.Service): implements(RIControlClient) def remote_wait_for_client_connections(self, num_clients): return self.parent.debug_wait_for_client_connections(num_clients) def remote_upload_from_file_to_uri(self, filename, convergence): uploader = self.parent.getServiceNamed("uploader") u = upload.FileName(filename, convergence=convergence) d = uploader.upload(u) d.addCallback(lambda results: results.get_uri()) return d def remote_download_from_uri_to_file(self, uri, filename): filenode = self.parent.create_node_from_uri(uri, name=filename) if not IFileNode.providedBy(filenode): raise AssertionError("The URI does not reference a file.") c = FileWritingConsumer(filename) d = filenode.read(c) d.addCallback(lambda res: filename) return d def remote_speed_test(self, count, size, mutable): assert size > 8 log.msg("speed_test: count=%d, size=%d, mutable=%s" % (count, size, mutable)) st = SpeedTest(self.parent, count, size, mutable) return st.run() def remote_get_memory_usage(self): return get_memory_usage() def remote_measure_peer_response_time(self): # I'd like to average together several pings, but I don't want this # phase to take more than 10 seconds. Expect worst-case latency to be # 300ms. results = {} sb = self.parent.get_storage_broker() everyone = sb.get_connected_servers() num_pings = int(mathutil.div_ceil(10, (len(everyone) * 0.3))) everyone = list(everyone) * num_pings d = self._do_one_ping(None, everyone, results) return d def _do_one_ping(self, res, everyone_left, results): if not everyone_left: return results server = everyone_left.pop(0) server_name = server.get_longname() connection = server.get_rref() start = time.time() d = connection.callRemote("get_buckets", "\x00"*16) def _done(ignored): stop = time.time() elapsed = stop - start if server_name in results: results[server_name].append(elapsed) else: results[server_name] = [elapsed] d.addCallback(_done) d.addCallback(self._do_one_ping, everyone_left, results) def _average(res): averaged = {} for server_name,times in results.iteritems(): averaged[server_name] = sum(times) / len(times) return averaged d.addCallback(_average) return d class SpeedTest: def __init__(self, parent, count, size, mutable): self.parent = parent self.count = count self.size = size self.mutable_mode = mutable self.uris = {} self.basedir = os.path.join(self.parent.basedir, "_speed_test_data") def run(self): self.create_data() d = self.do_upload() d.addCallback(lambda res: self.do_download()) d.addBoth(self.do_cleanup) d.addCallback(lambda res: (self.upload_time, self.download_time)) return d def create_data(self): fileutil.make_dirs(self.basedir) for i in range(self.count): s = self.size fn = os.path.join(self.basedir, str(i)) if os.path.exists(fn): os.unlink(fn) f = open(fn, "w") f.write(os.urandom(8)) s -= 8 while s > 0: chunk = min(s, 4096) f.write("\x00" * chunk) s -= chunk f.close() def do_upload(self): d = defer.succeed(None) def _create_slot(res): d1 = self.parent.create_mutable_file("") def _created(n): self._n = n d1.addCallback(_created) return d1 if self.mutable_mode == "upload": d.addCallback(_create_slot) def _start(res): self._start = time.time() d.addCallback(_start) def _record_uri(uri, i): self.uris[i] = uri def _upload_one_file(ignored, i): if i >= self.count: return fn = os.path.join(self.basedir, str(i)) if self.mutable_mode == "create": data = open(fn,"rb").read() d1 = self.parent.create_mutable_file(data) d1.addCallback(lambda n: n.get_uri()) elif self.mutable_mode == "upload": data = open(fn,"rb").read() d1 = self._n.overwrite(MutableData(data)) d1.addCallback(lambda res: self._n.get_uri()) else: up = upload.FileName(fn, convergence=None) d1 = self.parent.upload(up) d1.addCallback(lambda results: results.get_uri()) d1.addCallback(_record_uri, i) d1.addCallback(_upload_one_file, i+1) return d1 d.addCallback(_upload_one_file, 0) def _upload_done(ignored): stop = time.time() self.upload_time = stop - self._start d.addCallback(_upload_done) return d def do_download(self): start = time.time() d = defer.succeed(None) def _download_one_file(ignored, i): if i >= self.count: return n = self.parent.create_node_from_uri(self.uris[i]) if not IFileNode.providedBy(n): raise AssertionError("The URI does not reference a file.") if n.is_mutable(): d1 = n.download_best_version() else: d1 = n.read(DiscardingConsumer()) d1.addCallback(_download_one_file, i+1) return d1 d.addCallback(_download_one_file, 0) def _download_done(ignored): stop = time.time() self.download_time = stop - start d.addCallback(_download_done) return d def do_cleanup(self, res): for i in range(self.count): fn = os.path.join(self.basedir, str(i)) os.unlink(fn) return res class DiscardingConsumer: implements(IConsumer) def __init__(self): self.done = False def registerProducer(self, p, streaming): if streaming: p.resumeProducing() else: while not self.done: p.resumeProducing() def write(self, data): pass def unregisterProducer(self): self.done = True tahoe-lafs-1.10.0/src/allmydata/debugshell.py000066400000000000000000000002161221140116300210310ustar00rootroot00000000000000 # 'app' is overwritten by manhole when the connection is established. We set # it to None now to keep pyflakes from complaining. app = None tahoe-lafs-1.10.0/src/allmydata/dirnode.py000066400000000000000000001163341221140116300203500ustar00rootroot00000000000000 import time, math, unicodedata from zope.interface import implements from twisted.internet import defer from foolscap.api import fireEventually import simplejson from allmydata.mutable.common import NotWriteableError from allmydata.mutable.filenode import MutableFileNode from allmydata.unknown import UnknownNode, strip_prefix_for_ro from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \ IImmutableFileNode, IMutableFileNode, \ ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \ MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError from allmydata.check_results import DeepCheckResults, \ DeepCheckAndRepairResults from allmydata.monitor import Monitor from allmydata.util import hashutil, mathutil, base32, log from allmydata.util.encodingutil import quote_output from allmydata.util.assertutil import precondition from allmydata.util.netstring import netstring, split_netstring from allmydata.util.consumer import download_to_data from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap from pycryptopp.cipher.aes import AES from allmydata.util.dictutil import AuxValueDict def update_metadata(metadata, new_metadata, now): """Updates 'metadata' in-place with the information in 'new_metadata'. Timestamps are set according to the time 'now'.""" if metadata is None: metadata = {} old_ctime = None if 'ctime' in metadata: old_ctime = metadata['ctime'] if new_metadata is not None: # Overwrite all metadata. newmd = new_metadata.copy() # Except 'tahoe'. if 'tahoe' in newmd: del newmd['tahoe'] if 'tahoe' in metadata: newmd['tahoe'] = metadata['tahoe'] metadata = newmd # update timestamps sysmd = metadata.get('tahoe', {}) if 'linkcrtime' not in sysmd: # In Tahoe < 1.4.0 we used the word 'ctime' to mean what Tahoe >= 1.4.0 # calls 'linkcrtime'. This field is only used if it was in the old metadata, # and 'tahoe:linkcrtime' was not. if old_ctime is not None: sysmd['linkcrtime'] = old_ctime else: sysmd['linkcrtime'] = now sysmd['linkmotime'] = now metadata['tahoe'] = sysmd return metadata # 'x' at the end of a variable name indicates that it holds a Unicode string that may not # be NFC-normalized. def normalize(namex): return unicodedata.normalize('NFC', namex) # TODO: {Deleter,MetadataSetter,Adder}.modify all start by unpacking the # contents and end by repacking them. It might be better to apply them to # the unpacked contents. class Deleter: def __init__(self, node, namex, must_exist=True, must_be_directory=False, must_be_file=False): self.node = node self.name = normalize(namex) self.must_exist = must_exist self.must_be_directory = must_be_directory self.must_be_file = must_be_file def modify(self, old_contents, servermap, first_time): children = self.node._unpack_contents(old_contents) if self.name not in children: if first_time and self.must_exist: raise NoSuchChildError(self.name) self.old_child = None return None self.old_child, metadata = children[self.name] # Unknown children can be removed regardless of must_be_directory or must_be_file. if self.must_be_directory and IFileNode.providedBy(self.old_child): raise ChildOfWrongTypeError("delete required a directory, not a file") if self.must_be_file and IDirectoryNode.providedBy(self.old_child): raise ChildOfWrongTypeError("delete required a file, not a directory") del children[self.name] new_contents = self.node._pack_contents(children) return new_contents class MetadataSetter: def __init__(self, node, namex, metadata, create_readonly_node=None): self.node = node self.name = normalize(namex) self.metadata = metadata self.create_readonly_node = create_readonly_node def modify(self, old_contents, servermap, first_time): children = self.node._unpack_contents(old_contents) name = self.name if name not in children: raise NoSuchChildError(name) now = time.time() child = children[name][0] metadata = update_metadata(children[name][1].copy(), self.metadata, now) if self.create_readonly_node and metadata.get('no-write', False): child = self.create_readonly_node(child, name) children[name] = (child, metadata) new_contents = self.node._pack_contents(children) return new_contents class Adder: def __init__(self, node, entries=None, overwrite=True, create_readonly_node=None): self.node = node if entries is None: entries = {} precondition(isinstance(entries, dict), entries) precondition(overwrite in (True, False, "only-files"), overwrite) # keys of 'entries' may not be normalized. self.entries = entries self.overwrite = overwrite self.create_readonly_node = create_readonly_node def set_node(self, namex, node, metadata): precondition(IFilesystemNode.providedBy(node), node) self.entries[namex] = (node, metadata) def modify(self, old_contents, servermap, first_time): children = self.node._unpack_contents(old_contents) now = time.time() for (namex, (child, new_metadata)) in self.entries.iteritems(): name = normalize(namex) precondition(IFilesystemNode.providedBy(child), child) # Strictly speaking this is redundant because we would raise the # error again in _pack_normalized_children. child.raise_error() metadata = None if name in children: if not self.overwrite: raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8')) if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]): raise ExistingChildError("child %s already exists as a directory" % quote_output(name, encoding='utf-8')) metadata = children[name][1].copy() metadata = update_metadata(metadata, new_metadata, now) if self.create_readonly_node and metadata.get('no-write', False): child = self.create_readonly_node(child, name) children[name] = (child, metadata) new_contents = self.node._pack_contents(children) return new_contents def _encrypt_rw_uri(writekey, rw_uri): precondition(isinstance(rw_uri, str), rw_uri) precondition(isinstance(writekey, str), writekey) salt = hashutil.mutable_rwcap_salt_hash(rw_uri) key = hashutil.mutable_rwcap_key_hash(salt, writekey) cryptor = AES(key) crypttext = cryptor.process(rw_uri) mac = hashutil.hmac(key, salt + crypttext) assert len(mac) == 32 return salt + crypttext + mac # The MAC is not checked by readers in Tahoe >= 1.3.0, but we still # produce it for the sake of older readers. def pack_children(childrenx, writekey, deep_immutable=False): # initial_children must have metadata (i.e. {} instead of None) children = {} for (namex, (node, metadata)) in childrenx.iteritems(): precondition(isinstance(metadata, dict), "directory creation requires metadata to be a dict, not None", metadata) children[normalize(namex)] = (node, metadata) return _pack_normalized_children(children, writekey=writekey, deep_immutable=deep_immutable) ZERO_LEN_NETSTR=netstring('') def _pack_normalized_children(children, writekey, deep_immutable=False): """Take a dict that maps: children[unicode_nfc_name] = (IFileSystemNode, metadata_dict) and pack it into a single string, for use as the contents of the backing file. This is the same format as is returned by _unpack_contents. I also accept an AuxValueDict, in which case I'll use the auxilliary cached data as the pre-packed entry, which is faster than re-packing everything each time. If writekey is provided then I will superencrypt the child's writecap with writekey. If deep_immutable is True, I will require that all my children are deeply immutable, and will raise a MustBeDeepImmutableError if not. """ precondition((writekey is None) or isinstance(writekey, str), writekey) has_aux = isinstance(children, AuxValueDict) entries = [] for name in sorted(children.keys()): assert isinstance(name, unicode) entry = None (child, metadata) = children[name] child.raise_error() if deep_immutable and not child.is_allowed_in_immutable_directory(): raise MustBeDeepImmutableError("child %s is not allowed in an immutable directory" % quote_output(name, encoding='utf-8'), name) if has_aux: entry = children.get_aux(name) if not entry: assert IFilesystemNode.providedBy(child), (name,child) assert isinstance(metadata, dict) rw_uri = child.get_write_uri() if rw_uri is None: rw_uri = "" assert isinstance(rw_uri, str), rw_uri # should be prevented by MustBeDeepImmutableError check above assert not (rw_uri and deep_immutable) ro_uri = child.get_readonly_uri() if ro_uri is None: ro_uri = "" assert isinstance(ro_uri, str), ro_uri if writekey is not None: writecap = netstring(_encrypt_rw_uri(writekey, rw_uri)) else: writecap = ZERO_LEN_NETSTR entry = "".join([netstring(name.encode("utf-8")), netstring(strip_prefix_for_ro(ro_uri, deep_immutable)), writecap, netstring(simplejson.dumps(metadata))]) entries.append(netstring(entry)) return "".join(entries) class DirectoryNode: implements(IDirectoryNode, ICheckable, IDeepCheckable) filenode_class = MutableFileNode def __init__(self, filenode, nodemaker, uploader): assert IFileNode.providedBy(filenode), filenode assert not IDirectoryNode.providedBy(filenode), filenode self._node = filenode filenode_cap = filenode.get_cap() self._uri = wrap_dirnode_cap(filenode_cap) self._nodemaker = nodemaker self._uploader = uploader def __repr__(self): return "<%s %s-%s %s>" % (self.__class__.__name__, self.is_readonly() and "RO" or "RW", self.is_mutable() and "MUT" or "IMM", hasattr(self, '_uri') and self._uri.abbrev()) def get_size(self): """Return the size of our backing mutable file, in bytes, if we've fetched it. Otherwise return None. This returns synchronously.""" return self._node.get_size() def get_current_size(self): """Calculate the size of our backing mutable file, in bytes. Returns a Deferred that fires with the result.""" return self._node.get_current_size() def _read(self): if self._node.is_mutable(): # use the IMutableFileNode API. d = self._node.download_best_version() else: d = download_to_data(self._node) d.addCallback(self._unpack_contents) return d def _decrypt_rwcapdata(self, encwrcap): salt = encwrcap[:16] crypttext = encwrcap[16:-32] key = hashutil.mutable_rwcap_key_hash(salt, self._node.get_writekey()) cryptor = AES(key) plaintext = cryptor.process(crypttext) return plaintext def _create_and_validate_node(self, rw_uri, ro_uri, name): # name is just for error reporting node = self._nodemaker.create_from_cap(rw_uri, ro_uri, deep_immutable=not self.is_mutable(), name=name) node.raise_error() return node def _create_readonly_node(self, node, name): # name is just for error reporting if not node.is_unknown() and node.is_readonly(): return node return self._create_and_validate_node(None, node.get_readonly_uri(), name=name) def _unpack_contents(self, data): # the directory is serialized as a list of netstrings, one per child. # Each child is serialized as a list of four netstrings: (name, ro_uri, # rwcapdata, metadata), in which the name, ro_uri, metadata are in # cleartext. The 'name' is UTF-8 encoded, and should be normalized to NFC. # The rwcapdata is formatted as: # pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac) assert isinstance(data, str), (repr(data), type(data)) # an empty directory is serialized as an empty string if data == "": return AuxValueDict() writeable = not self.is_readonly() mutable = self.is_mutable() children = AuxValueDict() position = 0 while position < len(data): entries, position = split_netstring(data, 1, position) entry = entries[0] (namex_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4) if not mutable and len(rwcapdata) > 0: raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty") # A name containing characters that are unassigned in one version of Unicode might # not be normalized wrt a later version. See the note in section 'Normalization Stability' # at . # Therefore we normalize names going both in and out of directories. name = normalize(namex_utf8.decode("utf-8")) rw_uri = "" if writeable: rw_uri = self._decrypt_rwcapdata(rwcapdata) # Since the encryption uses CTR mode, it currently leaks the length of the # plaintext rw_uri -- and therefore whether it is present, i.e. whether the # dirnode is writeable (ticket #925). By stripping trailing spaces in # Tahoe >= 1.6.0, we may make it easier for future versions to plug this leak. # ro_uri is treated in the same way for consistency. # rw_uri and ro_uri will be either None or a non-empty string. rw_uri = rw_uri.rstrip(' ') or None ro_uri = ro_uri.rstrip(' ') or None try: child = self._create_and_validate_node(rw_uri, ro_uri, name) if mutable or child.is_allowed_in_immutable_directory(): metadata = simplejson.loads(metadata_s) assert isinstance(metadata, dict) children[name] = (child, metadata) children.set_with_aux(name, (child, metadata), auxilliary=entry) else: log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory", name=quote_output(name, encoding='utf-8'), facility="tahoe.webish", level=log.UNUSUAL) except CapConstraintError, e: log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n" "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'), facility="tahoe.webish", level=log.UNUSUAL) return children def _pack_contents(self, children): # expects children in the same format as _unpack_contents returns return _pack_normalized_children(children, self._node.get_writekey()) def is_readonly(self): return self._node.is_readonly() def is_mutable(self): return self._node.is_mutable() def is_unknown(self): return False def is_allowed_in_immutable_directory(self): return not self._node.is_mutable() def raise_error(self): pass def get_uri(self): return self._uri.to_string() def get_write_uri(self): if self.is_readonly(): return None return self._uri.to_string() def get_readonly_uri(self): return self._uri.get_readonly().to_string() def get_cap(self): return self._uri def get_readcap(self): return self._uri.get_readonly() def get_verify_cap(self): return self._uri.get_verify_cap() def get_repair_cap(self): if self._node.is_readonly(): return None # readonly (mutable) dirnodes are not yet repairable return self._uri def get_storage_index(self): return self._uri.get_storage_index() def check(self, monitor, verify=False, add_lease=False): """Perform a file check. See IChecker.check for details.""" return self._node.check(monitor, verify, add_lease) def check_and_repair(self, monitor, verify=False, add_lease=False): return self._node.check_and_repair(monitor, verify, add_lease) def list(self): """I return a Deferred that fires with a dictionary mapping child name to a tuple of (IFilesystemNode, metadata).""" return self._read() def has_child(self, namex): """I return a Deferred that fires with a boolean, True if there exists a child of the given name, False if not.""" name = normalize(namex) d = self._read() d.addCallback(lambda children: children.has_key(name)) return d def _get(self, children, name): child = children.get(name) if child is None: raise NoSuchChildError(name) return child[0] def _get_with_metadata(self, children, name): child = children.get(name) if child is None: raise NoSuchChildError(name) return child def get(self, namex): """I return a Deferred that fires with the named child node, which is an IFilesystemNode.""" name = normalize(namex) d = self._read() d.addCallback(self._get, name) return d def get_child_and_metadata(self, namex): """I return a Deferred that fires with the (node, metadata) pair for the named child. The node is an IFilesystemNode, and the metadata is a dictionary.""" name = normalize(namex) d = self._read() d.addCallback(self._get_with_metadata, name) return d def get_metadata_for(self, namex): name = normalize(namex) d = self._read() d.addCallback(lambda children: children[name][1]) return d def set_metadata_for(self, namex, metadata): name = normalize(namex) if self.is_readonly(): return defer.fail(NotWriteableError()) assert isinstance(metadata, dict) s = MetadataSetter(self, name, metadata, create_readonly_node=self._create_readonly_node) d = self._node.modify(s.modify) d.addCallback(lambda res: self) return d def get_child_at_path(self, pathx): """Transform a child path into an IFilesystemNode. I perform a recursive series of 'get' operations to find the named descendant node. I return a Deferred that fires with the node, or errbacks with IndexError if the node could not be found. The path can be either a single string (slash-separated) or a list of path-name elements. """ d = self.get_child_and_metadata_at_path(pathx) d.addCallback(lambda (node, metadata): node) return d def get_child_and_metadata_at_path(self, pathx): """Transform a child path into an IFilesystemNode and a metadata dictionary from the last edge that was traversed. """ if not pathx: return defer.succeed((self, {})) if isinstance(pathx, (list, tuple)): pass else: pathx = pathx.split("/") for p in pathx: assert isinstance(p, unicode), p childnamex = pathx[0] remaining_pathx = pathx[1:] if remaining_pathx: d = self.get(childnamex) d.addCallback(lambda node: node.get_child_and_metadata_at_path(remaining_pathx)) return d d = self.get_child_and_metadata(childnamex) return d def set_uri(self, namex, writecap, readcap, metadata=None, overwrite=True): precondition(isinstance(writecap, (str,type(None))), writecap) precondition(isinstance(readcap, (str,type(None))), readcap) # We now allow packing unknown nodes, provided they are valid # for this type of directory. child_node = self._create_and_validate_node(writecap, readcap, namex) d = self.set_node(namex, child_node, metadata, overwrite) d.addCallback(lambda res: child_node) return d def set_children(self, entries, overwrite=True): # this takes URIs a = Adder(self, overwrite=overwrite, create_readonly_node=self._create_readonly_node) for (namex, e) in entries.iteritems(): assert isinstance(namex, unicode), namex if len(e) == 2: writecap, readcap = e metadata = None else: assert len(e) == 3 writecap, readcap, metadata = e precondition(isinstance(writecap, (str,type(None))), writecap) precondition(isinstance(readcap, (str,type(None))), readcap) # We now allow packing unknown nodes, provided they are valid # for this type of directory. child_node = self._create_and_validate_node(writecap, readcap, namex) a.set_node(namex, child_node, metadata) d = self._node.modify(a.modify) d.addCallback(lambda ign: self) return d def set_node(self, namex, child, metadata=None, overwrite=True): """I add a child at the specific name. I return a Deferred that fires when the operation finishes. This Deferred will fire with the child node that was just added. I will replace any existing child of the same name. If this directory node is read-only, the Deferred will errback with a NotWriteableError.""" precondition(IFilesystemNode.providedBy(child), child) if self.is_readonly(): return defer.fail(NotWriteableError()) assert IFilesystemNode.providedBy(child), child a = Adder(self, overwrite=overwrite, create_readonly_node=self._create_readonly_node) a.set_node(namex, child, metadata) d = self._node.modify(a.modify) d.addCallback(lambda res: child) return d def set_nodes(self, entries, overwrite=True): precondition(isinstance(entries, dict), entries) if self.is_readonly(): return defer.fail(NotWriteableError()) a = Adder(self, entries, overwrite=overwrite, create_readonly_node=self._create_readonly_node) d = self._node.modify(a.modify) d.addCallback(lambda res: self) return d def add_file(self, namex, uploadable, metadata=None, overwrite=True): """I upload a file (using the given IUploadable), then attach the resulting FileNode to the directory at the given name. I return a Deferred that fires (with the IFileNode of the uploaded file) when the operation completes.""" name = normalize(namex) if self.is_readonly(): return defer.fail(NotWriteableError()) d = self._uploader.upload(uploadable) d.addCallback(lambda results: self._create_and_validate_node(results.get_uri(), None, name)) d.addCallback(lambda node: self.set_node(name, node, metadata, overwrite)) return d def delete(self, namex, must_exist=True, must_be_directory=False, must_be_file=False): """I remove the child at the specific name. I return a Deferred that fires (with the node just removed) when the operation finishes.""" if self.is_readonly(): return defer.fail(NotWriteableError()) deleter = Deleter(self, namex, must_exist=must_exist, must_be_directory=must_be_directory, must_be_file=must_be_file) d = self._node.modify(deleter.modify) d.addCallback(lambda res: deleter.old_child) return d # XXX: Too many arguments? Worthwhile to break into mutable/immutable? def create_subdirectory(self, namex, initial_children={}, overwrite=True, mutable=True, mutable_version=None, metadata=None): name = normalize(namex) if self.is_readonly(): return defer.fail(NotWriteableError()) if mutable: if mutable_version: d = self._nodemaker.create_new_mutable_directory(initial_children, version=mutable_version) else: d = self._nodemaker.create_new_mutable_directory(initial_children) else: # mutable version doesn't make sense for immmutable directories. assert mutable_version is None d = self._nodemaker.create_immutable_directory(initial_children) def _created(child): entries = {name: (child, metadata)} a = Adder(self, entries, overwrite=overwrite, create_readonly_node=self._create_readonly_node) d = self._node.modify(a.modify) d.addCallback(lambda res: child) return d d.addCallback(_created) return d def move_child_to(self, current_child_namex, new_parent, new_child_namex=None, overwrite=True): """ I take one of my child links and move it to a new parent. The child link is referenced by name. In the new parent, the child link will live at 'new_child_namex', which defaults to 'current_child_namex'. I return a Deferred that fires when the operation finishes. 'new_child_namex' and 'current_child_namex' need not be normalized. The overwrite parameter may be True (overwrite any existing child), False (error if the new child link already exists), or "only-files" (error if the new child link exists and points to a directory). """ if self.is_readonly() or new_parent.is_readonly(): return defer.fail(NotWriteableError()) current_child_name = normalize(current_child_namex) if new_child_namex is None: new_child_name = current_child_name else: new_child_name = normalize(new_child_namex) from_uri = self.get_write_uri() if new_parent.get_write_uri() == from_uri and new_child_name == current_child_name: # needed for correctness, otherwise we would delete the child return defer.succeed("redundant rename/relink") d = self.get_child_and_metadata(current_child_name) def _got_child( (child, metadata) ): return new_parent.set_node(new_child_name, child, metadata, overwrite=overwrite) d.addCallback(_got_child) d.addCallback(lambda child: self.delete(current_child_name)) return d def deep_traverse(self, walker): """Perform a recursive walk, using this dirnode as a root, notifying the 'walker' instance of everything I encounter. I call walker.enter_directory(parent, children) once for each dirnode I visit, immediately after retrieving the list of children. I pass in the parent dirnode and the dict of childname->(childnode,metadata). This function should *not* traverse the children: I will do that. enter_directory() is most useful for the deep-stats number that counts how large a directory is. I call walker.add_node(node, path) for each node (both files and directories) I can reach. Most work should be done here. I avoid loops by keeping track of verifier-caps and refusing to call walker.add_node() or traverse a node that I've seen before. This means that any file or directory will only be given to the walker once. If files or directories are referenced multiple times by a directory structure, this may appear to under-count or miss some of them. I return a Monitor which can be used to wait for the operation to finish, learn about its progress, or cancel the operation. """ # this is just a tree-walker, except that following each edge # requires a Deferred. We used to use a ConcurrencyLimiter to limit # fanout to 10 simultaneous operations, but the memory load of the # queued operations was excessive (in one case, with 330k dirnodes, # it caused the process to run into the 3.0GB-ish per-process 32bit # linux memory limit, and crashed). So we use a single big Deferred # chain, and do a strict depth-first traversal, one node at a time. # This can be slower, because we aren't pipelining directory reads, # but it brought the memory footprint down by roughly 50%. monitor = Monitor() walker.set_monitor(monitor) found = set([self.get_verify_cap()]) d = self._deep_traverse_dirnode(self, [], walker, monitor, found) d.addCallback(lambda ignored: walker.finish()) d.addBoth(monitor.finish) d.addErrback(lambda f: None) return monitor def _deep_traverse_dirnode(self, node, path, walker, monitor, found): # process this directory, then walk its children monitor.raise_if_cancelled() d = defer.maybeDeferred(walker.add_node, node, path) d.addCallback(lambda ignored: node.list()) d.addCallback(self._deep_traverse_dirnode_children, node, path, walker, monitor, found) return d def _deep_traverse_dirnode_children(self, children, parent, path, walker, monitor, found): monitor.raise_if_cancelled() d = defer.maybeDeferred(walker.enter_directory, parent, children) # we process file-like children first, so we can drop their FileNode # objects as quickly as possible. Tests suggest that a FileNode (held # in the client's nodecache) consumes about 2440 bytes. dirnodes (not # in the nodecache) seem to consume about 2000 bytes. dirkids = [] filekids = [] for name, (child, metadata) in sorted(children.iteritems()): childpath = path + [name] if isinstance(child, UnknownNode): walker.add_node(child, childpath) continue verifier = child.get_verify_cap() # allow LIT files (for which verifier==None) to be processed if (verifier is not None) and (verifier in found): continue found.add(verifier) if IDirectoryNode.providedBy(child): dirkids.append( (child, childpath) ) else: filekids.append( (child, childpath) ) for i, (child, childpath) in enumerate(filekids): d.addCallback(lambda ignored, child=child, childpath=childpath: walker.add_node(child, childpath)) # to work around the Deferred tail-recursion problem # (specifically the defer.succeed flavor) requires us to avoid # doing more than 158 LIT files in a row. We insert a turn break # once every 100 files (LIT or CHK) to preserve some stack space # for other code. This is a different expression of the same # Twisted problem as in #237. if i % 100 == 99: d.addCallback(lambda ignored: fireEventually()) for (child, childpath) in dirkids: d.addCallback(lambda ignored, child=child, childpath=childpath: self._deep_traverse_dirnode(child, childpath, walker, monitor, found)) return d def build_manifest(self): """Return a Monitor, with a ['status'] that will be a list of (path, cap) tuples, for all nodes (directories and files) reachable from this one.""" walker = ManifestWalker(self) return self.deep_traverse(walker) def start_deep_stats(self): # Since deep_traverse tracks verifier caps, we avoid double-counting # children for which we've got both a write-cap and a read-cap return self.deep_traverse(DeepStats(self)) def start_deep_check(self, verify=False, add_lease=False): return self.deep_traverse(DeepChecker(self, verify, repair=False, add_lease=add_lease)) def start_deep_check_and_repair(self, verify=False, add_lease=False): return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease)) class DeepStats: def __init__(self, origin): self.origin = origin self.stats = {} for k in ["count-immutable-files", "count-mutable-files", "count-literal-files", "count-files", "count-directories", "count-unknown", "size-immutable-files", #"size-mutable-files", "size-literal-files", "size-directories", "largest-directory", "largest-directory-children", "largest-immutable-file", #"largest-mutable-file", ]: self.stats[k] = 0 self.histograms = {} for k in ["size-files-histogram"]: self.histograms[k] = {} # maps (min,max) to count self.buckets = [ (0,0), (1,3)] self.root = math.sqrt(10) def set_monitor(self, monitor): self.monitor = monitor monitor.origin_si = self.origin.get_storage_index() monitor.set_status(self.get_results()) def add_node(self, node, childpath): if isinstance(node, UnknownNode): self.add("count-unknown") elif IDirectoryNode.providedBy(node): self.add("count-directories") elif IMutableFileNode.providedBy(node): self.add("count-files") self.add("count-mutable-files") # TODO: update the servermap, compute a size, add it to # size-mutable-files, max it into "largest-mutable-file" elif IImmutableFileNode.providedBy(node): # CHK and LIT self.add("count-files") size = node.get_size() self.histogram("size-files-histogram", size) theuri = from_string(node.get_uri()) if isinstance(theuri, LiteralFileURI): self.add("count-literal-files") self.add("size-literal-files", size) else: self.add("count-immutable-files") self.add("size-immutable-files", size) self.max("largest-immutable-file", size) def enter_directory(self, parent, children): dirsize_bytes = parent.get_size() if dirsize_bytes is not None: self.add("size-directories", dirsize_bytes) self.max("largest-directory", dirsize_bytes) dirsize_children = len(children) self.max("largest-directory-children", dirsize_children) def add(self, key, value=1): self.stats[key] += value def max(self, key, value): self.stats[key] = max(self.stats[key], value) def which_bucket(self, size): # return (min,max) such that min <= size <= max # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100), # (101,316), (317, 1000), etc: two per decade assert size >= 0 i = 0 while True: if i >= len(self.buckets): # extend the list new_lower = self.buckets[i-1][1]+1 new_upper = int(mathutil.next_power_of_k(new_lower, self.root)) self.buckets.append( (new_lower, new_upper) ) maybe = self.buckets[i] if maybe[0] <= size <= maybe[1]: return maybe i += 1 def histogram(self, key, size): bucket = self.which_bucket(size) h = self.histograms[key] if bucket not in h: h[bucket] = 0 h[bucket] += 1 def get_results(self): stats = self.stats.copy() for key in self.histograms: h = self.histograms[key] out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ] out.sort() stats[key] = out return stats def finish(self): return self.get_results() class ManifestWalker(DeepStats): def __init__(self, origin): DeepStats.__init__(self, origin) self.manifest = [] self.storage_index_strings = set() self.verifycaps = set() def add_node(self, node, path): self.manifest.append( (tuple(path), node.get_uri()) ) si = node.get_storage_index() if si: self.storage_index_strings.add(base32.b2a(si)) v = node.get_verify_cap() if v: self.verifycaps.add(v.to_string()) return DeepStats.add_node(self, node, path) def get_results(self): stats = DeepStats.get_results(self) return {"manifest": self.manifest, "verifycaps": self.verifycaps, "storage-index": self.storage_index_strings, "stats": stats, } class DeepChecker: def __init__(self, root, verify, repair, add_lease): root_si = root.get_storage_index() if root_si: root_si_base32 = base32.b2a(root_si) else: root_si_base32 = "" self._lp = log.msg(format="deep-check starting (%(si)s)," " verify=%(verify)s, repair=%(repair)s", si=root_si_base32, verify=verify, repair=repair) self._verify = verify self._repair = repair self._add_lease = add_lease if repair: self._results = DeepCheckAndRepairResults(root_si) else: self._results = DeepCheckResults(root_si) self._stats = DeepStats(root) def set_monitor(self, monitor): self.monitor = monitor monitor.set_status(self._results) def add_node(self, node, childpath): if self._repair: d = node.check_and_repair(self.monitor, self._verify, self._add_lease) d.addCallback(self._results.add_check_and_repair, childpath) else: d = node.check(self.monitor, self._verify, self._add_lease) d.addCallback(self._results.add_check, childpath) d.addCallback(lambda ignored: self._stats.add_node(node, childpath)) return d def enter_directory(self, parent, children): return self._stats.enter_directory(parent, children) def finish(self): log.msg("deep-check done", parent=self._lp) self._results.update_stats(self._stats.get_results()) return self._results # use client.create_dirnode() to make one of these tahoe-lafs-1.10.0/src/allmydata/frontends/000077500000000000000000000000001221140116300203445ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/frontends/__init__.py000066400000000000000000000000001221140116300224430ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/frontends/auth.py000066400000000000000000000075331221140116300216670ustar00rootroot00000000000000import os from zope.interface import implements from twisted.web.client import getPage from twisted.internet import defer from twisted.cred import error, checkers, credentials from allmydata.util import base32 class NeedRootcapLookupScheme(Exception): """Accountname+Password-based access schemes require some kind of mechanism to translate name+passwd pairs into a rootcap, either a file of name/passwd/rootcap tuples, or a server to do the translation.""" class FTPAvatarID: def __init__(self, username, rootcap): self.username = username self.rootcap = rootcap class AccountFileChecker: implements(checkers.ICredentialsChecker) credentialInterfaces = (credentials.IUsernamePassword, credentials.IUsernameHashedPassword) def __init__(self, client, accountfile): self.client = client self.passwords = {} self.pubkeys = {} self.rootcaps = {} for line in open(os.path.expanduser(accountfile), "r"): line = line.strip() if line.startswith("#") or not line: continue name, passwd, rest = line.split(None, 2) if passwd in ("ssh-dss", "ssh-rsa"): bits = rest.split() keystring = " ".join(bits[-1]) rootcap = bits[-1] self.pubkeys[name] = keystring else: self.passwords[name] = passwd rootcap = rest self.rootcaps[name] = rootcap def _cbPasswordMatch(self, matched, username): if matched: return FTPAvatarID(username, self.rootcaps[username]) raise error.UnauthorizedLogin def requestAvatarId(self, credentials): if credentials.username in self.passwords: d = defer.maybeDeferred(credentials.checkPassword, self.passwords[credentials.username]) d.addCallback(self._cbPasswordMatch, str(credentials.username)) return d return defer.fail(error.UnauthorizedLogin()) class AccountURLChecker: implements(checkers.ICredentialsChecker) credentialInterfaces = (credentials.IUsernamePassword,) def __init__(self, client, auth_url): self.client = client self.auth_url = auth_url def _cbPasswordMatch(self, rootcap, username): return FTPAvatarID(username, rootcap) def post_form(self, username, password): sepbase = base32.b2a(os.urandom(4)) sep = "--" + sepbase form = [] form.append(sep) fields = {"action": "authenticate", "email": username, "passwd": password, } for name, value in fields.iteritems(): form.append('Content-Disposition: form-data; name="%s"' % name) form.append('') assert isinstance(value, str) form.append(value) form.append(sep) form[-1] += "--" body = "\r\n".join(form) + "\r\n" headers = {"content-type": "multipart/form-data; boundary=%s" % sepbase, } return getPage(self.auth_url, method="POST", postdata=body, headers=headers, followRedirect=True, timeout=30) def _parse_response(self, res): rootcap = res.strip() if rootcap == "0": raise error.UnauthorizedLogin return rootcap def requestAvatarId(self, credentials): # construct a POST to the login form. While this could theoretically # be done with something like the stdlib 'email' package, I can't # figure out how, so we just slam together a form manually. d = self.post_form(credentials.username, credentials.password) d.addCallback(self._parse_response) d.addCallback(self._cbPasswordMatch, str(credentials.username)) return d tahoe-lafs-1.10.0/src/allmydata/frontends/drop_upload.py000066400000000000000000000125151221140116300232320ustar00rootroot00000000000000 import os, sys from twisted.internet import defer from twisted.python.filepath import FilePath from twisted.application import service from foolscap.api import eventually from allmydata.interfaces import IDirectoryNode from allmydata.util.encodingutil import quote_output, get_filesystem_encoding from allmydata.immutable.upload import FileName class DropUploader(service.MultiService): name = 'drop-upload' def __init__(self, client, upload_dircap, local_dir_utf8, inotify=None): service.MultiService.__init__(self) try: local_dir_u = os.path.expanduser(local_dir_utf8.decode('utf-8')) if sys.platform == "win32": local_dir = local_dir_u else: local_dir = local_dir_u.encode(get_filesystem_encoding()) except (UnicodeEncodeError, UnicodeDecodeError): raise AssertionError("The '[drop_upload] local.directory' parameter %s was not valid UTF-8 or " "could not be represented in the filesystem encoding." % quote_output(local_dir_utf8)) self._client = client self._stats_provider = client.stats_provider self._convergence = client.convergence self._local_path = FilePath(local_dir) if inotify is None: from twisted.internet import inotify self._inotify = inotify if not self._local_path.exists(): raise AssertionError("The '[drop_upload] local.directory' parameter was %s but there is no directory at that location." % quote_output(local_dir_u)) if not self._local_path.isdir(): raise AssertionError("The '[drop_upload] local.directory' parameter was %s but the thing at that location is not a directory." % quote_output(local_dir_u)) # TODO: allow a path rather than a cap URI. self._parent = self._client.create_node_from_uri(upload_dircap) if not IDirectoryNode.providedBy(self._parent): raise AssertionError("The '[drop_upload] upload.dircap' parameter does not refer to a directory.") if self._parent.is_unknown() or self._parent.is_readonly(): raise AssertionError("The '[drop_upload] upload.dircap' parameter is not a writecap to a directory.") self._uploaded_callback = lambda ign: None self._notifier = inotify.INotify() # We don't watch for IN_CREATE, because that would cause us to read and upload a # possibly-incomplete file before the application has closed it. There should always # be an IN_CLOSE_WRITE after an IN_CREATE (I think). # TODO: what about IN_MOVE_SELF or IN_UNMOUNT? mask = inotify.IN_CLOSE_WRITE | inotify.IN_MOVED_TO | inotify.IN_ONLYDIR self._notifier.watch(self._local_path, mask=mask, callbacks=[self._notify]) def startService(self): service.MultiService.startService(self) d = self._notifier.startReading() self._stats_provider.count('drop_upload.dirs_monitored', 1) return d def _notify(self, opaque, path, events_mask): self._log("inotify event %r, %r, %r\n" % (opaque, path, ', '.join(self._inotify.humanReadableMask(events_mask)))) self._stats_provider.count('drop_upload.files_queued', 1) eventually(self._process, opaque, path, events_mask) def _process(self, opaque, path, events_mask): d = defer.succeed(None) # FIXME: if this already exists as a mutable file, we replace the directory entry, # but we should probably modify the file (as the SFTP frontend does). def _add_file(ign): name = path.basename() # on Windows the name is already Unicode if not isinstance(name, unicode): name = name.decode(get_filesystem_encoding()) u = FileName(path.path, self._convergence) return self._parent.add_file(name, u) d.addCallback(_add_file) def _succeeded(ign): self._stats_provider.count('drop_upload.files_queued', -1) self._stats_provider.count('drop_upload.files_uploaded', 1) def _failed(f): self._stats_provider.count('drop_upload.files_queued', -1) if path.exists(): self._log("drop-upload: %r failed to upload due to %r" % (path.path, f)) self._stats_provider.count('drop_upload.files_failed', 1) return f else: self._log("drop-upload: notified file %r disappeared " "(this is normal for temporary files): %r" % (path.path, f)) self._stats_provider.count('drop_upload.files_disappeared', 1) return None d.addCallbacks(_succeeded, _failed) d.addBoth(self._uploaded_callback) return d def set_uploaded_callback(self, callback): """This sets a function that will be called after a file has been uploaded.""" self._uploaded_callback = callback def finish(self, for_tests=False): self._notifier.stopReading() self._stats_provider.count('drop_upload.dirs_monitored', -1) if for_tests and hasattr(self._notifier, 'wait_until_stopped'): return self._notifier.wait_until_stopped() else: return defer.succeed(None) def _log(self, msg): self._client.log(msg) #open("events", "ab+").write(msg) tahoe-lafs-1.10.0/src/allmydata/frontends/ftpd.py000066400000000000000000000255531221140116300216650ustar00rootroot00000000000000 from zope.interface import implements from twisted.application import service, strports from twisted.internet import defer from twisted.internet.interfaces import IConsumer from twisted.cred import portal from twisted.protocols import ftp from allmydata.interfaces import IDirectoryNode, ExistingChildError, \ NoSuchChildError from allmydata.immutable.upload import FileHandle from allmydata.util.fileutil import EncryptedTemporaryFile class ReadFile: implements(ftp.IReadFile) def __init__(self, node): self.node = node def send(self, consumer): d = self.node.read(consumer) return d # when consumed class FileWriter: implements(IConsumer) def registerProducer(self, producer, streaming): if not streaming: raise NotImplementedError("Non-streaming producer not supported.") # we write the data to a temporary file, since Tahoe can't do # streaming upload yet. self.f = EncryptedTemporaryFile() return None def unregisterProducer(self): # the upload actually happens in WriteFile.close() pass def write(self, data): self.f.write(data) class WriteFile: implements(ftp.IWriteFile) def __init__(self, parent, childname, convergence): self.parent = parent self.childname = childname self.convergence = convergence def receive(self): self.c = FileWriter() return defer.succeed(self.c) def close(self): u = FileHandle(self.c.f, self.convergence) d = self.parent.add_file(self.childname, u) return d class NoParentError(Exception): pass class Handler: implements(ftp.IFTPShell) def __init__(self, client, rootnode, username, convergence): self.client = client self.root = rootnode self.username = username self.convergence = convergence def makeDirectory(self, path): d = self._get_root(path) d.addCallback(lambda (root,path): self._get_or_create_directories(root, path)) return d def _get_or_create_directories(self, node, path): if not IDirectoryNode.providedBy(node): # unfortunately it is too late to provide the name of the # blocking directory in the error message. raise ftp.FileExistsError("cannot create directory because there " "is a file in the way") if not path: return defer.succeed(node) d = node.get(path[0]) def _maybe_create(f): f.trap(NoSuchChildError) return node.create_subdirectory(path[0]) d.addErrback(_maybe_create) d.addCallback(self._get_or_create_directories, path[1:]) return d def _get_parent(self, path): # fire with (parentnode, childname) path = [unicode(p) for p in path] if not path: raise NoParentError childname = path[-1] d = self._get_root(path) def _got_root((root, path)): if not path: raise NoParentError return root.get_child_at_path(path[:-1]) d.addCallback(_got_root) def _got_parent(parent): return (parent, childname) d.addCallback(_got_parent) return d def _remove_thing(self, path, must_be_directory=False, must_be_file=False): d = defer.maybeDeferred(self._get_parent, path) def _convert_error(f): f.trap(NoParentError) raise ftp.PermissionDeniedError("cannot delete root directory") d.addErrback(_convert_error) def _got_parent( (parent, childname) ): d = parent.get(childname) def _got_child(child): if must_be_directory and not IDirectoryNode.providedBy(child): raise ftp.IsNotADirectoryError("rmdir called on a file") if must_be_file and IDirectoryNode.providedBy(child): raise ftp.IsADirectoryError("rmfile called on a directory") return parent.delete(childname) d.addCallback(_got_child) d.addErrback(self._convert_error) return d d.addCallback(_got_parent) return d def removeDirectory(self, path): return self._remove_thing(path, must_be_directory=True) def removeFile(self, path): return self._remove_thing(path, must_be_file=True) def rename(self, fromPath, toPath): # the target directory must already exist d = self._get_parent(fromPath) def _got_from_parent( (fromparent, childname) ): d = self._get_parent(toPath) d.addCallback(lambda (toparent, tochildname): fromparent.move_child_to(childname, toparent, tochildname, overwrite=False)) return d d.addCallback(_got_from_parent) d.addErrback(self._convert_error) return d def access(self, path): # we allow access to everything that exists. We are required to raise # an error for paths that don't exist: FTP clients (at least ncftp) # uses this to decide whether to mkdir or not. d = self._get_node_and_metadata_for_path(path) d.addErrback(self._convert_error) d.addCallback(lambda res: None) return d def _convert_error(self, f): if f.check(NoSuchChildError): childname = f.value.args[0].encode("utf-8") msg = "'%s' doesn't exist" % childname raise ftp.FileNotFoundError(msg) if f.check(ExistingChildError): msg = f.value.args[0].encode("utf-8") raise ftp.FileExistsError(msg) return f def _get_root(self, path): # return (root, remaining_path) path = [unicode(p) for p in path] if path and path[0] == "uri": d = defer.maybeDeferred(self.client.create_node_from_uri, str(path[1])) d.addCallback(lambda root: (root, path[2:])) else: d = defer.succeed((self.root,path)) return d def _get_node_and_metadata_for_path(self, path): d = self._get_root(path) def _got_root((root,path)): if path: return root.get_child_and_metadata_at_path(path) else: return (root,{}) d.addCallback(_got_root) return d def _populate_row(self, keys, (childnode, metadata)): values = [] isdir = bool(IDirectoryNode.providedBy(childnode)) for key in keys: if key == "size": if isdir: value = 0 else: value = childnode.get_size() or 0 elif key == "directory": value = isdir elif key == "permissions": value = 0600 elif key == "hardlinks": value = 1 elif key == "modified": # follow sftpd convention (i.e. linkmotime in preference to mtime) if "linkmotime" in metadata.get("tahoe", {}): value = metadata["tahoe"]["linkmotime"] else: value = metadata.get("mtime", 0) elif key == "owner": value = self.username elif key == "group": value = self.username else: value = "??" values.append(value) return values def stat(self, path, keys=()): # for files only, I think d = self._get_node_and_metadata_for_path(path) def _render((node,metadata)): assert not IDirectoryNode.providedBy(node) return self._populate_row(keys, (node,metadata)) d.addCallback(_render) d.addErrback(self._convert_error) return d def list(self, path, keys=()): # the interface claims that path is a list of unicodes, but in # practice it is not d = self._get_node_and_metadata_for_path(path) def _list((node, metadata)): if IDirectoryNode.providedBy(node): return node.list() return { path[-1]: (node, metadata) } # need last-edge metadata d.addCallback(_list) def _render(children): results = [] for (name, childnode) in children.iteritems(): # the interface claims that the result should have a unicode # object as the name, but it fails unless you give it a # bytestring results.append( (name.encode("utf-8"), self._populate_row(keys, childnode) ) ) return results d.addCallback(_render) d.addErrback(self._convert_error) return d def openForReading(self, path): d = self._get_node_and_metadata_for_path(path) d.addCallback(lambda (node,metadata): ReadFile(node)) d.addErrback(self._convert_error) return d def openForWriting(self, path): path = [unicode(p) for p in path] if not path: raise ftp.PermissionDeniedError("cannot STOR to root directory") childname = path[-1] d = self._get_root(path) def _got_root((root, path)): if not path: raise ftp.PermissionDeniedError("cannot STOR to root directory") return root.get_child_at_path(path[:-1]) d.addCallback(_got_root) def _got_parent(parent): return WriteFile(parent, childname, self.convergence) d.addCallback(_got_parent) return d from allmydata.frontends.auth import AccountURLChecker, AccountFileChecker, NeedRootcapLookupScheme class Dispatcher: implements(portal.IRealm) def __init__(self, client): self.client = client def requestAvatar(self, avatarID, mind, interface): assert interface == ftp.IFTPShell rootnode = self.client.create_node_from_uri(avatarID.rootcap) convergence = self.client.convergence s = Handler(self.client, rootnode, avatarID.username, convergence) def logout(): pass return (interface, s, None) class FTPServer(service.MultiService): def __init__(self, client, accountfile, accounturl, ftp_portstr): service.MultiService.__init__(self) r = Dispatcher(client) p = portal.Portal(r) if accountfile: c = AccountFileChecker(self, accountfile) p.registerChecker(c) if accounturl: c = AccountURLChecker(self, accounturl) p.registerChecker(c) if not accountfile and not accounturl: # we could leave this anonymous, with just the /uri/CAP form raise NeedRootcapLookupScheme("must provide some translation") f = ftp.FTPFactory(p) s = strports.service(ftp_portstr, f) s.setServiceParent(self) tahoe-lafs-1.10.0/src/allmydata/frontends/sftpd.py000066400000000000000000002617041221140116300220500ustar00rootroot00000000000000 import heapq, traceback, array, stat, struct from types import NoneType from stat import S_IFREG, S_IFDIR from time import time, strftime, localtime from zope.interface import implements from twisted.python import components from twisted.application import service, strports from twisted.conch.ssh import factory, keys, session from twisted.conch.ssh.filetransfer import FileTransferServer, SFTPError, \ FX_NO_SUCH_FILE, FX_OP_UNSUPPORTED, FX_PERMISSION_DENIED, FX_EOF, \ FX_BAD_MESSAGE, FX_FAILURE, FX_OK from twisted.conch.ssh.filetransfer import FXF_READ, FXF_WRITE, FXF_APPEND, \ FXF_CREAT, FXF_TRUNC, FXF_EXCL from twisted.conch.interfaces import ISFTPServer, ISFTPFile, IConchUser, ISession from twisted.conch.avatar import ConchUser from twisted.conch.openssh_compat import primes from twisted.cred import portal from twisted.internet.error import ProcessDone, ProcessTerminated from twisted.python.failure import Failure from twisted.internet.interfaces import ITransport from twisted.internet import defer from twisted.internet.interfaces import IConsumer from foolscap.api import eventually from allmydata.util import deferredutil from allmydata.util.assertutil import _assert, precondition from allmydata.util.consumer import download_to_data from allmydata.interfaces import IFileNode, IDirectoryNode, ExistingChildError, \ NoSuchChildError, ChildOfWrongTypeError from allmydata.mutable.common import NotWriteableError from allmydata.mutable.publish import MutableFileHandle from allmydata.immutable.upload import FileHandle from allmydata.dirnode import update_metadata from allmydata.util.fileutil import EncryptedTemporaryFile noisy = True use_foolscap_logging = True from allmydata.util.log import NOISY, OPERATIONAL, WEIRD, \ msg as _msg, err as _err, PrefixingLogMixin as _PrefixingLogMixin if use_foolscap_logging: (logmsg, logerr, PrefixingLogMixin) = (_msg, _err, _PrefixingLogMixin) else: # pragma: no cover def logmsg(s, level=None): print s def logerr(s, level=None): print s class PrefixingLogMixin: def __init__(self, facility=None, prefix=''): self.prefix = prefix def log(self, s, level=None): print "%r %s" % (self.prefix, s) def eventually_callback(d): return lambda res: eventually(d.callback, res) def eventually_errback(d): return lambda err: eventually(d.errback, err) def _utf8(x): if isinstance(x, unicode): return x.encode('utf-8') if isinstance(x, str): return x return repr(x) def _to_sftp_time(t): """SFTP times are unsigned 32-bit integers representing UTC seconds (ignoring leap seconds) since the Unix epoch, January 1 1970 00:00 UTC. A Tahoe time is the corresponding float.""" return long(t) & 0xFFFFFFFFL def _convert_error(res, request): """If res is not a Failure, return it, otherwise reraise the appropriate SFTPError.""" if not isinstance(res, Failure): logged_res = res if isinstance(res, str): logged_res = "" % (len(res),) logmsg("SUCCESS %r %r" % (request, logged_res,), level=OPERATIONAL) return res err = res logmsg("RAISE %r %r" % (request, err.value), level=OPERATIONAL) try: if noisy: logmsg(traceback.format_exc(err.value), level=NOISY) except Exception: # pragma: no cover pass # The message argument to SFTPError must not reveal information that # might compromise anonymity, if we are running over an anonymous network. if err.check(SFTPError): # original raiser of SFTPError has responsibility to ensure anonymity raise err if err.check(NoSuchChildError): childname = _utf8(err.value.args[0]) raise SFTPError(FX_NO_SUCH_FILE, childname) if err.check(NotWriteableError) or err.check(ChildOfWrongTypeError): msg = _utf8(err.value.args[0]) raise SFTPError(FX_PERMISSION_DENIED, msg) if err.check(ExistingChildError): # Versions of SFTP after v3 (which is what twisted.conch implements) # define a specific error code for this case: FX_FILE_ALREADY_EXISTS. # However v3 doesn't; instead, other servers such as sshd return # FX_FAILURE. The gvfs SFTP backend, for example, depends on this # to translate the error to the equivalent of POSIX EEXIST, which is # necessary for some picky programs (such as gedit). msg = _utf8(err.value.args[0]) raise SFTPError(FX_FAILURE, msg) if err.check(NotImplementedError): raise SFTPError(FX_OP_UNSUPPORTED, _utf8(err.value)) if err.check(EOFError): raise SFTPError(FX_EOF, "end of file reached") if err.check(defer.FirstError): _convert_error(err.value.subFailure, request) # We assume that the error message is not anonymity-sensitive. raise SFTPError(FX_FAILURE, _utf8(err.value)) def _repr_flags(flags): return "|".join([f for f in [(flags & FXF_READ) and "FXF_READ" or None, (flags & FXF_WRITE) and "FXF_WRITE" or None, (flags & FXF_APPEND) and "FXF_APPEND" or None, (flags & FXF_CREAT) and "FXF_CREAT" or None, (flags & FXF_TRUNC) and "FXF_TRUNC" or None, (flags & FXF_EXCL) and "FXF_EXCL" or None, ] if f]) def _lsLine(name, attrs): st_uid = "tahoe" st_gid = "tahoe" st_mtime = attrs.get("mtime", 0) st_mode = attrs["permissions"] # Some clients won't tolerate '?' in the size field (#1337). st_size = attrs.get("size", 0) # We don't know how many links there really are to this object. st_nlink = 1 # Based on . # We previously could not call the version in Twisted because we needed the change # (released in Twisted v8.2). # Since we now depend on Twisted v10.1, consider calling Twisted's version. mode = st_mode perms = array.array('c', '-'*10) ft = stat.S_IFMT(mode) if stat.S_ISDIR(ft): perms[0] = 'd' elif stat.S_ISREG(ft): perms[0] = '-' else: perms[0] = '?' # user if mode&stat.S_IRUSR: perms[1] = 'r' if mode&stat.S_IWUSR: perms[2] = 'w' if mode&stat.S_IXUSR: perms[3] = 'x' # group if mode&stat.S_IRGRP: perms[4] = 'r' if mode&stat.S_IWGRP: perms[5] = 'w' if mode&stat.S_IXGRP: perms[6] = 'x' # other if mode&stat.S_IROTH: perms[7] = 'r' if mode&stat.S_IWOTH: perms[8] = 'w' if mode&stat.S_IXOTH: perms[9] = 'x' # suid/sgid never set l = perms.tostring() l += str(st_nlink).rjust(5) + ' ' un = str(st_uid) l += un.ljust(9) gr = str(st_gid) l += gr.ljust(9) sz = str(st_size) l += sz.rjust(8) l += ' ' day = 60 * 60 * 24 sixmo = day * 7 * 26 now = time() if st_mtime + sixmo < now or st_mtime > now + day: # mtime is more than 6 months ago, or more than one day in the future l += strftime("%b %d %Y ", localtime(st_mtime)) else: l += strftime("%b %d %H:%M ", localtime(st_mtime)) l += name return l def _no_write(parent_readonly, child, metadata=None): """Whether child should be listed as having read-only permissions in parent.""" if child.is_unknown(): return True elif child.is_mutable(): return child.is_readonly() elif parent_readonly or IDirectoryNode.providedBy(child): return True else: return metadata is not None and metadata.get('no-write', False) def _populate_attrs(childnode, metadata, size=None): attrs = {} # The permissions must have the S_IFDIR (040000) or S_IFREG (0100000) # bits, otherwise the client may refuse to open a directory. # Also, sshfs run as a non-root user requires files and directories # to be world-readable/writeable. # It is important that we never set the executable bits on files. # # Directories and unknown nodes have no size, and SFTP doesn't # require us to make one up. # # childnode might be None, meaning that the file doesn't exist yet, # but we're going to write it later. if childnode and childnode.is_unknown(): perms = 0 elif childnode and IDirectoryNode.providedBy(childnode): perms = S_IFDIR | 0777 else: # For files, omit the size if we don't immediately know it. if childnode and size is None: size = childnode.get_size() if size is not None: _assert(isinstance(size, (int, long)) and not isinstance(size, bool), size=size) attrs['size'] = size perms = S_IFREG | 0666 if metadata: if metadata.get('no-write', False): perms &= S_IFDIR | S_IFREG | 0555 # clear 'w' bits # See webapi.txt for what these times mean. # We would prefer to omit atime, but SFTP version 3 can only # accept mtime if atime is also set. if 'linkmotime' in metadata.get('tahoe', {}): attrs['ctime'] = attrs['mtime'] = attrs['atime'] = _to_sftp_time(metadata['tahoe']['linkmotime']) elif 'mtime' in metadata: attrs['ctime'] = attrs['mtime'] = attrs['atime'] = _to_sftp_time(metadata['mtime']) if 'linkcrtime' in metadata.get('tahoe', {}): attrs['createtime'] = _to_sftp_time(metadata['tahoe']['linkcrtime']) attrs['permissions'] = perms # twisted.conch.ssh.filetransfer only implements SFTP version 3, # which doesn't include SSH_FILEXFER_ATTR_FLAGS. return attrs def _attrs_to_metadata(attrs): metadata = {} for key in attrs: if key == "mtime" or key == "ctime" or key == "createtime": metadata[key] = long(attrs[key]) elif key.startswith("ext_"): metadata[key] = str(attrs[key]) perms = attrs.get('permissions', stat.S_IWUSR) if not (perms & stat.S_IWUSR): metadata['no-write'] = True return metadata def _direntry_for(filenode_or_parent, childname, filenode=None): precondition(isinstance(childname, (unicode, NoneType)), childname=childname) if childname is None: filenode_or_parent = filenode if filenode_or_parent: rw_uri = filenode_or_parent.get_write_uri() if rw_uri and childname: return rw_uri + "/" + childname.encode('utf-8') else: return rw_uri return None class OverwriteableFileConsumer(PrefixingLogMixin): implements(IConsumer) """I act both as a consumer for the download of the original file contents, and as a wrapper for a temporary file that records the downloaded data and any overwrites. I use a priority queue to keep track of which regions of the file have been overwritten but not yet downloaded, so that the download does not clobber overwritten data. I use another priority queue to record milestones at which to make callbacks indicating that a given number of bytes have been downloaded. The temporary file reflects the contents of the file that I represent, except that: - regions that have neither been downloaded nor overwritten, if present, contain garbage. - the temporary file may be shorter than the represented file (it is never longer). The latter's current size is stored in self.current_size. This abstraction is mostly independent of SFTP. Consider moving it, if it is found useful for other frontends.""" def __init__(self, download_size, tempfile_maker): PrefixingLogMixin.__init__(self, facility="tahoe.sftp") if noisy: self.log(".__init__(%r, %r)" % (download_size, tempfile_maker), level=NOISY) self.download_size = download_size self.current_size = download_size self.f = tempfile_maker() self.downloaded = 0 self.milestones = [] # empty heap of (offset, d) self.overwrites = [] # empty heap of (start, end) self.is_closed = False self.done = defer.Deferred() self.done_status = None # None -> not complete, Failure -> download failed, str -> download succeeded self.producer = None def get_file(self): return self.f def get_current_size(self): return self.current_size def set_current_size(self, size): if noisy: self.log(".set_current_size(%r), current_size = %r, downloaded = %r" % (size, self.current_size, self.downloaded), level=NOISY) if size < self.current_size or size < self.downloaded: self.f.truncate(size) if size > self.current_size: self.overwrite(self.current_size, "\x00" * (size - self.current_size)) self.current_size = size # make the invariant self.download_size <= self.current_size be true again if size < self.download_size: self.download_size = size if self.downloaded >= self.download_size: self.download_done("size changed") def registerProducer(self, p, streaming): if noisy: self.log(".registerProducer(%r, streaming=%r)" % (p, streaming), level=NOISY) if self.producer is not None: raise RuntimeError("producer is already registered") self.producer = p if streaming: # call resumeProducing once to start things off p.resumeProducing() else: def _iterate(): if self.done_status is None: p.resumeProducing() eventually(_iterate) _iterate() def write(self, data): if noisy: self.log(".write()" % (len(data),), level=NOISY) if self.is_closed: return if self.downloaded >= self.download_size: return next_downloaded = self.downloaded + len(data) if next_downloaded > self.download_size: data = data[:(self.download_size - self.downloaded)] while len(self.overwrites) > 0: (start, end) = self.overwrites[0] if start >= next_downloaded: # This and all remaining overwrites are after the data we just downloaded. break if start > self.downloaded: # The data we just downloaded has been partially overwritten. # Write the prefix of it that precedes the overwritten region. self.f.seek(self.downloaded) self.f.write(data[:(start - self.downloaded)]) # This merges consecutive overwrites if possible, which allows us to detect the # case where the download can be stopped early because the remaining region # to download has already been fully overwritten. heapq.heappop(self.overwrites) while len(self.overwrites) > 0: (start1, end1) = self.overwrites[0] if start1 > end: break end = end1 heapq.heappop(self.overwrites) if end >= next_downloaded: # This overwrite extends past the downloaded data, so there is no # more data to consider on this call. heapq.heappush(self.overwrites, (next_downloaded, end)) self._update_downloaded(next_downloaded) return elif end >= self.downloaded: data = data[(end - self.downloaded):] self._update_downloaded(end) self.f.seek(self.downloaded) self.f.write(data) self._update_downloaded(next_downloaded) def _update_downloaded(self, new_downloaded): self.downloaded = new_downloaded milestone = new_downloaded if len(self.overwrites) > 0: (start, end) = self.overwrites[0] if start <= new_downloaded and end > milestone: milestone = end while len(self.milestones) > 0: (next, d) = self.milestones[0] if next > milestone: return if noisy: self.log("MILESTONE %r %r" % (next, d), level=NOISY) heapq.heappop(self.milestones) eventually_callback(d)("reached") if milestone >= self.download_size: self.download_done("reached download size") def overwrite(self, offset, data): if noisy: self.log(".overwrite(%r, )" % (offset, len(data)), level=NOISY) if self.is_closed: self.log("overwrite called on a closed OverwriteableFileConsumer", level=WEIRD) raise SFTPError(FX_BAD_MESSAGE, "cannot write to a closed file handle") if offset > self.current_size: # Normally writing at an offset beyond the current end-of-file # would leave a hole that appears filled with zeroes. However, an # EncryptedTemporaryFile doesn't behave like that (if there is a # hole in the file on disk, the zeroes that are read back will be # XORed with the keystream). So we must explicitly write zeroes in # the gap between the current EOF and the offset. self.f.seek(self.current_size) self.f.write("\x00" * (offset - self.current_size)) start = self.current_size else: self.f.seek(offset) start = offset self.f.write(data) end = offset + len(data) self.current_size = max(self.current_size, end) if end > self.downloaded: heapq.heappush(self.overwrites, (start, end)) def read(self, offset, length): """When the data has been read, callback the Deferred that we return with this data. Otherwise errback the Deferred that we return. The caller must perform no more overwrites until the Deferred has fired.""" if noisy: self.log(".read(%r, %r), current_size = %r" % (offset, length, self.current_size), level=NOISY) if self.is_closed: self.log("read called on a closed OverwriteableFileConsumer", level=WEIRD) raise SFTPError(FX_BAD_MESSAGE, "cannot read from a closed file handle") # Note that the overwrite method is synchronous. When a write request is processed # (e.g. a writeChunk request on the async queue of GeneralSFTPFile), overwrite will # be called and will update self.current_size if necessary before returning. Therefore, # self.current_size will be up-to-date for a subsequent call to this read method, and # so it is correct to do the check for a read past the end-of-file here. if offset >= self.current_size: def _eof(): raise EOFError("read past end of file") return defer.execute(_eof) if offset + length > self.current_size: length = self.current_size - offset if noisy: self.log("truncating read to %r bytes" % (length,), level=NOISY) needed = min(offset + length, self.download_size) # If we fail to reach the needed number of bytes, the read request will fail. d = self.when_reached_or_failed(needed) def _reached_in_read(res): # It is not necessarily the case that self.downloaded >= needed, because # the file might have been truncated (thus truncating the download) and # then extended. _assert(self.current_size >= offset + length, current_size=self.current_size, offset=offset, length=length) if noisy: self.log("_reached_in_read(%r), self.f = %r" % (res, self.f,), level=NOISY) self.f.seek(offset) return self.f.read(length) d.addCallback(_reached_in_read) return d def when_reached_or_failed(self, index): if noisy: self.log(".when_reached_or_failed(%r)" % (index,), level=NOISY) def _reached(res): if noisy: self.log("reached %r with result %r" % (index, res), level=NOISY) return res if self.done_status is not None: return defer.execute(_reached, self.done_status) if index <= self.downloaded: # already reached successfully if noisy: self.log("already reached %r successfully" % (index,), level=NOISY) return defer.succeed("already reached successfully") d = defer.Deferred() d.addCallback(_reached) heapq.heappush(self.milestones, (index, d)) return d def when_done(self): d = defer.Deferred() self.done.addCallback(lambda ign: eventually_callback(d)(self.done_status)) return d def download_done(self, res): _assert(isinstance(res, (str, Failure)), res=res) # Only the first call to download_done counts, but we log subsequent calls # (multiple calls are normal). if self.done_status is not None: self.log("IGNORING extra call to download_done with result %r; previous result was %r" % (res, self.done_status), level=OPERATIONAL) return self.log("DONE with result %r" % (res,), level=OPERATIONAL) # We avoid errbacking self.done so that we are not left with an 'Unhandled error in Deferred' # in case when_done() is never called. Instead we stash the failure in self.done_status, # from where the callback added in when_done() can retrieve it. self.done_status = res eventually_callback(self.done)(None) while len(self.milestones) > 0: (next, d) = self.milestones[0] if noisy: self.log("MILESTONE FINISH %r %r %r" % (next, d, res), level=NOISY) heapq.heappop(self.milestones) # The callback means that the milestone has been reached if # it is ever going to be. Note that the file may have been # truncated to before the milestone. eventually_callback(d)(res) def close(self): if not self.is_closed: self.is_closed = True try: self.f.close() except Exception, e: self.log("suppressed %r from close of temporary file %r" % (e, self.f), level=WEIRD) self.download_done("closed") return self.done_status def unregisterProducer(self): # This will happen just before our client calls download_done, which will tell # us the outcome of the download; we don't know the outcome at this point. self.producer = None self.log("producer unregistered", level=NOISY) SIZE_THRESHOLD = 1000 class ShortReadOnlySFTPFile(PrefixingLogMixin): implements(ISFTPFile) """I represent a file handle to a particular file on an SFTP connection. I am used only for short immutable files opened in read-only mode. When I am created, the file contents start to be downloaded to memory. self.async is used to delay read requests until the download has finished.""" def __init__(self, userpath, filenode, metadata): PrefixingLogMixin.__init__(self, facility="tahoe.sftp", prefix=userpath) if noisy: self.log(".__init__(%r, %r, %r)" % (userpath, filenode, metadata), level=NOISY) precondition(isinstance(userpath, str) and IFileNode.providedBy(filenode), userpath=userpath, filenode=filenode) self.filenode = filenode self.metadata = metadata self.async = download_to_data(filenode) self.closed = False def readChunk(self, offset, length): request = ".readChunk(%r, %r)" % (offset, length) self.log(request, level=OPERATIONAL) if self.closed: def _closed(): raise SFTPError(FX_BAD_MESSAGE, "cannot read from a closed file handle") return defer.execute(_closed) d = defer.Deferred() def _read(data): if noisy: self.log("_read() in readChunk(%r, %r)" % (len(data), offset, length), level=NOISY) # "In response to this request, the server will read as many bytes as it # can from the file (up to 'len'), and return them in a SSH_FXP_DATA # message. If an error occurs or EOF is encountered before reading any # data, the server will respond with SSH_FXP_STATUS. For normal disk # files, it is guaranteed that this will read the specified number of # bytes, or up to end of file." # # i.e. we respond with an EOF error iff offset is already at EOF. if offset >= len(data): eventually_errback(d)(Failure(SFTPError(FX_EOF, "read at or past end of file"))) else: eventually_callback(d)(data[offset:offset+length]) # truncated if offset+length > len(data) return data self.async.addCallbacks(_read, eventually_errback(d)) d.addBoth(_convert_error, request) return d def writeChunk(self, offset, data): self.log(".writeChunk(%r, ) denied" % (offset, len(data)), level=OPERATIONAL) def _denied(): raise SFTPError(FX_PERMISSION_DENIED, "file handle was not opened for writing") return defer.execute(_denied) def close(self): self.log(".close()", level=OPERATIONAL) self.closed = True return defer.succeed(None) def getAttrs(self): request = ".getAttrs()" self.log(request, level=OPERATIONAL) if self.closed: def _closed(): raise SFTPError(FX_BAD_MESSAGE, "cannot get attributes for a closed file handle") return defer.execute(_closed) d = defer.execute(_populate_attrs, self.filenode, self.metadata) d.addBoth(_convert_error, request) return d def setAttrs(self, attrs): self.log(".setAttrs(%r) denied" % (attrs,), level=OPERATIONAL) def _denied(): raise SFTPError(FX_PERMISSION_DENIED, "file handle was not opened for writing") return defer.execute(_denied) class GeneralSFTPFile(PrefixingLogMixin): implements(ISFTPFile) """I represent a file handle to a particular file on an SFTP connection. I wrap an instance of OverwriteableFileConsumer, which is responsible for storing the file contents. In order to allow write requests to be satisfied immediately, there is effectively a FIFO queue between requests made to this file handle, and requests to my OverwriteableFileConsumer. This queue is implemented by the callback chain of self.async. When first constructed, I am in an 'unopened' state that causes most operations to be delayed until 'open' is called.""" def __init__(self, userpath, flags, close_notify, convergence): PrefixingLogMixin.__init__(self, facility="tahoe.sftp", prefix=userpath) if noisy: self.log(".__init__(%r, %r = %r, %r, )" % (userpath, flags, _repr_flags(flags), close_notify), level=NOISY) precondition(isinstance(userpath, str), userpath=userpath) self.userpath = userpath self.flags = flags self.close_notify = close_notify self.convergence = convergence self.async = defer.Deferred() # Creating or truncating the file is a change, but if FXF_EXCL is set, a zero-length file has already been created. self.has_changed = (flags & (FXF_CREAT | FXF_TRUNC)) and not (flags & FXF_EXCL) self.closed = False self.abandoned = False self.parent = None self.childname = None self.filenode = None self.metadata = None # self.consumer should only be relied on in callbacks for self.async, since it might # not be set before then. self.consumer = None def open(self, parent=None, childname=None, filenode=None, metadata=None): self.log(".open(parent=%r, childname=%r, filenode=%r, metadata=%r)" % (parent, childname, filenode, metadata), level=OPERATIONAL) precondition(isinstance(childname, (unicode, NoneType)), childname=childname) precondition(filenode is None or IFileNode.providedBy(filenode), filenode=filenode) precondition(not self.closed, sftpfile=self) # If the file has been renamed, the new (parent, childname) takes precedence. if self.parent is None: self.parent = parent if self.childname is None: self.childname = childname self.filenode = filenode self.metadata = metadata tempfile_maker = EncryptedTemporaryFile if (self.flags & FXF_TRUNC) or not filenode: # We're either truncating or creating the file, so we don't need the old contents. self.consumer = OverwriteableFileConsumer(0, tempfile_maker) self.consumer.download_done("download not needed") else: self.async.addCallback(lambda ignored: filenode.get_best_readable_version()) def _read(version): if noisy: self.log("_read", level=NOISY) download_size = version.get_size() _assert(download_size is not None) self.consumer = OverwriteableFileConsumer(download_size, tempfile_maker) d = version.read(self.consumer, 0, None) def _finished(res): if not isinstance(res, Failure): res = "download finished" self.consumer.download_done(res) d.addBoth(_finished) # It is correct to drop d here. self.async.addCallback(_read) eventually_callback(self.async)(None) if noisy: self.log("open done", level=NOISY) return self def get_userpath(self): return self.userpath def get_direntry(self): return _direntry_for(self.parent, self.childname) def rename(self, new_userpath, new_parent, new_childname): self.log(".rename(%r, %r, %r)" % (new_userpath, new_parent, new_childname), level=OPERATIONAL) precondition(isinstance(new_userpath, str) and isinstance(new_childname, unicode), new_userpath=new_userpath, new_childname=new_childname) self.userpath = new_userpath self.parent = new_parent self.childname = new_childname def abandon(self): self.log(".abandon()", level=OPERATIONAL) self.abandoned = True def sync(self, ign=None): # The ign argument allows some_file.sync to be used as a callback. self.log(".sync()", level=OPERATIONAL) d = defer.Deferred() self.async.addBoth(eventually_callback(d)) def _done(res): if noisy: self.log("_done(%r) in .sync()" % (res,), level=NOISY) return res d.addBoth(_done) return d def readChunk(self, offset, length): request = ".readChunk(%r, %r)" % (offset, length) self.log(request, level=OPERATIONAL) if not (self.flags & FXF_READ): def _denied(): raise SFTPError(FX_PERMISSION_DENIED, "file handle was not opened for reading") return defer.execute(_denied) if self.closed: def _closed(): raise SFTPError(FX_BAD_MESSAGE, "cannot read from a closed file handle") return defer.execute(_closed) d = defer.Deferred() def _read(ign): if noisy: self.log("_read in readChunk(%r, %r)" % (offset, length), level=NOISY) d2 = self.consumer.read(offset, length) d2.addBoth(eventually_callback(d)) # It is correct to drop d2 here. return None self.async.addCallbacks(_read, eventually_errback(d)) d.addBoth(_convert_error, request) return d def writeChunk(self, offset, data): self.log(".writeChunk(%r, )" % (offset, len(data)), level=OPERATIONAL) if not (self.flags & FXF_WRITE): def _denied(): raise SFTPError(FX_PERMISSION_DENIED, "file handle was not opened for writing") return defer.execute(_denied) if self.closed: def _closed(): raise SFTPError(FX_BAD_MESSAGE, "cannot write to a closed file handle") return defer.execute(_closed) self.has_changed = True # Note that we return without waiting for the write to occur. Reads and # close wait for prior writes, and will fail if any prior operation failed. # This is ok because SFTP makes no guarantee that the write completes # before the request does. In fact it explicitly allows write errors to be # delayed until close: # "One should note that on some server platforms even a close can fail. # This can happen e.g. if the server operating system caches writes, # and an error occurs while flushing cached writes during the close." def _write(ign): if noisy: self.log("_write in .writeChunk(%r, ), current_size = %r" % (offset, len(data), self.consumer.get_current_size()), level=NOISY) # FXF_APPEND means that we should always write at the current end of file. write_offset = offset if self.flags & FXF_APPEND: write_offset = self.consumer.get_current_size() self.consumer.overwrite(write_offset, data) if noisy: self.log("overwrite done", level=NOISY) return None self.async.addCallback(_write) # don't addErrback to self.async, just allow subsequent async ops to fail. return defer.succeed(None) def _do_close(self, res, d=None): if noisy: self.log("_do_close(%r)" % (res,), level=NOISY) status = None if self.consumer: status = self.consumer.close() # We must close_notify before re-firing self.async. if self.close_notify: self.close_notify(self.userpath, self.parent, self.childname, self) if not isinstance(res, Failure) and isinstance(status, Failure): res = status if d: eventually_callback(d)(res) elif isinstance(res, Failure): self.log("suppressing %r" % (res,), level=OPERATIONAL) def close(self): request = ".close()" self.log(request, level=OPERATIONAL) if self.closed: return defer.succeed(None) # This means that close has been called, not that the close has succeeded. self.closed = True if not (self.flags & (FXF_WRITE | FXF_CREAT)): # We never fail a close of a handle opened only for reading, even if the file # failed to download. (We could not do so deterministically, because it would # depend on whether we reached the point of failure before abandoning the # download.) Any reads that depended on file content that could not be downloaded # will have failed. It is important that we don't close the consumer until # previous read operations have completed. self.async.addBoth(self._do_close) return defer.succeed(None) # We must capture the abandoned, parent, and childname variables synchronously # at the close call. This is needed by the correctness arguments in the comments # for _abandon_any_heisenfiles and _rename_heisenfiles. # Note that the file must have been opened before it can be closed. abandoned = self.abandoned parent = self.parent childname = self.childname # has_changed is set when writeChunk is called, not when the write occurs, so # it is correct to optimize out the commit if it is False at the close call. has_changed = self.has_changed def _commit(ign): d2 = self.consumer.when_done() if self.filenode and self.filenode.is_mutable(): self.log("update mutable file %r childname=%r metadata=%r" % (self.filenode, childname, self.metadata), level=OPERATIONAL) if self.metadata.get('no-write', False) and not self.filenode.is_readonly(): _assert(parent and childname, parent=parent, childname=childname, metadata=self.metadata) d2.addCallback(lambda ign: parent.set_metadata_for(childname, self.metadata)) d2.addCallback(lambda ign: self.filenode.overwrite(MutableFileHandle(self.consumer.get_file()))) else: def _add_file(ign): self.log("_add_file childname=%r" % (childname,), level=OPERATIONAL) u = FileHandle(self.consumer.get_file(), self.convergence) return parent.add_file(childname, u, metadata=self.metadata) d2.addCallback(_add_file) return d2 # If the file has been abandoned, we don't want the close operation to get "stuck", # even if self.async fails to re-fire. Completing the close independently of self.async # in that case should ensure that dropping an ssh connection is sufficient to abandon # any heisenfiles that were not explicitly closed in that connection. if abandoned or not has_changed: d = defer.succeed(None) self.async.addBoth(self._do_close) else: d = defer.Deferred() self.async.addCallback(_commit) self.async.addBoth(self._do_close, d) d.addBoth(_convert_error, request) return d def getAttrs(self): request = ".getAttrs()" self.log(request, level=OPERATIONAL) if self.closed: def _closed(): raise SFTPError(FX_BAD_MESSAGE, "cannot get attributes for a closed file handle") return defer.execute(_closed) # Optimization for read-only handles, when we already know the metadata. if not (self.flags & (FXF_WRITE | FXF_CREAT)) and self.metadata and self.filenode and not self.filenode.is_mutable(): return defer.succeed(_populate_attrs(self.filenode, self.metadata)) d = defer.Deferred() def _get(ign): if noisy: self.log("_get(%r) in %r, filenode = %r, metadata = %r" % (ign, request, self.filenode, self.metadata), level=NOISY) # self.filenode might be None, but that's ok. attrs = _populate_attrs(self.filenode, self.metadata, size=self.consumer.get_current_size()) eventually_callback(d)(attrs) return None self.async.addCallbacks(_get, eventually_errback(d)) d.addBoth(_convert_error, request) return d def setAttrs(self, attrs, only_if_at=None): request = ".setAttrs(%r, only_if_at=%r)" % (attrs, only_if_at) self.log(request, level=OPERATIONAL) if not (self.flags & FXF_WRITE): def _denied(): raise SFTPError(FX_PERMISSION_DENIED, "file handle was not opened for writing") return defer.execute(_denied) if self.closed: def _closed(): raise SFTPError(FX_BAD_MESSAGE, "cannot set attributes for a closed file handle") return defer.execute(_closed) size = attrs.get("size", None) if size is not None and (not isinstance(size, (int, long)) or size < 0): def _bad(): raise SFTPError(FX_BAD_MESSAGE, "new size is not a valid nonnegative integer") return defer.execute(_bad) d = defer.Deferred() def _set(ign): if noisy: self.log("_set(%r) in %r" % (ign, request), level=NOISY) current_direntry = _direntry_for(self.parent, self.childname, self.filenode) if only_if_at and only_if_at != current_direntry: if noisy: self.log("not setting attributes: current_direntry=%r in %r" % (current_direntry, request), level=NOISY) return None now = time() self.metadata = update_metadata(self.metadata, _attrs_to_metadata(attrs), now) if size is not None: # TODO: should we refuse to truncate a file opened with FXF_APPEND? # self.consumer.set_current_size(size) eventually_callback(d)(None) return None self.async.addCallbacks(_set, eventually_errback(d)) d.addBoth(_convert_error, request) return d class StoppableList: def __init__(self, items): self.items = items def __iter__(self): for i in self.items: yield i def close(self): pass class Reason: def __init__(self, value): self.value = value # A "heisenfile" is a file that has been opened with write flags # (FXF_WRITE and/or FXF_CREAT) and not yet close-notified. # 'all_heisenfiles' maps from a direntry string to a list of # GeneralSFTPFile. # # A direntry string is parent_write_uri + "/" + childname_utf8 for # an immutable file, or file_write_uri for a mutable file. # Updates to this dict are single-threaded. all_heisenfiles = {} def _reload(): global all_heisenfiles all_heisenfiles = {} class SFTPUserHandler(ConchUser, PrefixingLogMixin): implements(ISFTPServer) def __init__(self, client, rootnode, username): ConchUser.__init__(self) PrefixingLogMixin.__init__(self, facility="tahoe.sftp", prefix=username) if noisy: self.log(".__init__(%r, %r, %r)" % (client, rootnode, username), level=NOISY) self.channelLookup["session"] = session.SSHSession self.subsystemLookup["sftp"] = FileTransferServer self._client = client self._root = rootnode self._username = username self._convergence = client.convergence # maps from UTF-8 paths for this user, to files written and still open self._heisenfiles = {} def gotVersion(self, otherVersion, extData): self.log(".gotVersion(%r, %r)" % (otherVersion, extData), level=OPERATIONAL) # advertise the same extensions as the OpenSSH SFTP server # return {'posix-rename@openssh.com': '1', 'statvfs@openssh.com': '2', 'fstatvfs@openssh.com': '2', } def logout(self): self.log(".logout()", level=OPERATIONAL) for files in self._heisenfiles.itervalues(): for f in files: f.abandon() def _add_heisenfile_by_path(self, file): self.log("._add_heisenfile_by_path(%r)" % (file,), level=OPERATIONAL) userpath = file.get_userpath() if userpath in self._heisenfiles: self._heisenfiles[userpath] += [file] else: self._heisenfiles[userpath] = [file] def _add_heisenfile_by_direntry(self, file): self.log("._add_heisenfile_by_direntry(%r)" % (file,), level=OPERATIONAL) direntry = file.get_direntry() if direntry: if direntry in all_heisenfiles: all_heisenfiles[direntry] += [file] else: all_heisenfiles[direntry] = [file] def _abandon_any_heisenfiles(self, userpath, direntry): request = "._abandon_any_heisenfiles(%r, %r)" % (userpath, direntry) self.log(request, level=OPERATIONAL) precondition(isinstance(userpath, str), userpath=userpath) # First we synchronously mark all heisenfiles matching the userpath or direntry # as abandoned, and remove them from the two heisenfile dicts. Then we .sync() # each file that we abandoned. # # For each file, the call to .abandon() occurs: # * before the file is closed, in which case it will never be committed # (uploaded+linked or published); or # * after it is closed but before it has been close_notified, in which case the # .sync() ensures that it has been committed (successfully or not) before we # return. # # This avoids a race that might otherwise cause the file to be committed after # the remove operation has completed. # # We return a Deferred that fires with True if any files were abandoned (this # does not mean that they were not committed; it is used to determine whether # a NoSuchChildError from the attempt to delete the file should be suppressed). files = [] if direntry in all_heisenfiles: files = all_heisenfiles[direntry] del all_heisenfiles[direntry] if userpath in self._heisenfiles: files += self._heisenfiles[userpath] del self._heisenfiles[userpath] if noisy: self.log("files = %r in %r" % (files, request), level=NOISY) for f in files: f.abandon() d = defer.succeed(None) for f in files: d.addBoth(f.sync) def _done(ign): self.log("done %r" % (request,), level=OPERATIONAL) return len(files) > 0 d.addBoth(_done) return d def _rename_heisenfiles(self, from_userpath, from_parent, from_childname, to_userpath, to_parent, to_childname, overwrite=True): request = ("._rename_heisenfiles(%r, %r, %r, %r, %r, %r, overwrite=%r)" % (from_userpath, from_parent, from_childname, to_userpath, to_parent, to_childname, overwrite)) self.log(request, level=OPERATIONAL) precondition((isinstance(from_userpath, str) and isinstance(from_childname, unicode) and isinstance(to_userpath, str) and isinstance(to_childname, unicode)), from_userpath=from_userpath, from_childname=from_childname, to_userpath=to_userpath, to_childname=to_childname) if noisy: self.log("all_heisenfiles = %r\nself._heisenfiles = %r" % (all_heisenfiles, self._heisenfiles), level=NOISY) # First we synchronously rename all heisenfiles matching the userpath or direntry. # Then we .sync() each file that we renamed. # # For each file, the call to .rename occurs: # * before the file is closed, in which case it will be committed at the # new direntry; or # * after it is closed but before it has been close_notified, in which case the # .sync() ensures that it has been committed (successfully or not) before we # return. # # This avoids a race that might otherwise cause the file to be committed at the # old name after the rename operation has completed. # # Note that if overwrite is False, the caller should already have checked # whether a real direntry exists at the destination. It is possible that another # direntry (heisen or real) comes to exist at the destination after that check, # but in that case it is correct for the rename to succeed (and for the commit # of the heisenfile at the destination to possibly clobber the other entry, since # that can happen anyway when we have concurrent write handles to the same direntry). # # We return a Deferred that fires with True if any files were renamed (this # does not mean that they were not committed; it is used to determine whether # a NoSuchChildError from the rename attempt should be suppressed). If overwrite # is False and there were already heisenfiles at the destination userpath or # direntry, we return a Deferred that fails with SFTPError(FX_PERMISSION_DENIED). from_direntry = _direntry_for(from_parent, from_childname) to_direntry = _direntry_for(to_parent, to_childname) if noisy: self.log("from_direntry = %r, to_direntry = %r, len(all_heisenfiles) = %r, len(self._heisenfiles) = %r in %r" % (from_direntry, to_direntry, len(all_heisenfiles), len(self._heisenfiles), request), level=NOISY) if not overwrite and (to_userpath in self._heisenfiles or to_direntry in all_heisenfiles): def _existing(): raise SFTPError(FX_PERMISSION_DENIED, "cannot rename to existing path " + to_userpath) if noisy: self.log("existing", level=NOISY) return defer.execute(_existing) from_files = [] if from_direntry in all_heisenfiles: from_files = all_heisenfiles[from_direntry] del all_heisenfiles[from_direntry] if from_userpath in self._heisenfiles: from_files += self._heisenfiles[from_userpath] del self._heisenfiles[from_userpath] if noisy: self.log("from_files = %r in %r" % (from_files, request), level=NOISY) for f in from_files: f.rename(to_userpath, to_parent, to_childname) self._add_heisenfile_by_path(f) self._add_heisenfile_by_direntry(f) d = defer.succeed(None) for f in from_files: d.addBoth(f.sync) def _done(ign): if noisy: self.log("done: len(all_heisenfiles) = %r, len(self._heisenfiles) = %r in %r" % (len(all_heisenfiles), len(self._heisenfiles), request), level=NOISY) return len(from_files) > 0 d.addBoth(_done) return d def _update_attrs_for_heisenfiles(self, userpath, direntry, attrs): request = "._update_attrs_for_heisenfiles(%r, %r, %r)" % (userpath, direntry, attrs) self.log(request, level=OPERATIONAL) _assert(isinstance(userpath, str) and isinstance(direntry, str), userpath=userpath, direntry=direntry) files = [] if direntry in all_heisenfiles: files = all_heisenfiles[direntry] if userpath in self._heisenfiles: files += self._heisenfiles[userpath] if noisy: self.log("files = %r in %r" % (files, request), level=NOISY) # We set the metadata for all heisenfiles at this path or direntry. # Since a direntry includes a write URI, we must have authority to # change the metadata of heisenfiles found in the all_heisenfiles dict. # However that's not necessarily the case for heisenfiles found by # path. Therefore we tell the setAttrs method of each file to only # perform the update if the file is at the correct direntry. d = defer.succeed(None) for f in files: d.addBoth(f.setAttrs, attrs, only_if_at=direntry) def _done(ign): self.log("done %r" % (request,), level=OPERATIONAL) # TODO: this should not return True if only_if_at caused all files to be skipped. return len(files) > 0 d.addBoth(_done) return d def _sync_heisenfiles(self, userpath, direntry, ignore=None): request = "._sync_heisenfiles(%r, %r, ignore=%r)" % (userpath, direntry, ignore) self.log(request, level=OPERATIONAL) _assert(isinstance(userpath, str) and isinstance(direntry, (str, NoneType)), userpath=userpath, direntry=direntry) files = [] if direntry in all_heisenfiles: files = all_heisenfiles[direntry] if userpath in self._heisenfiles: files += self._heisenfiles[userpath] if noisy: self.log("files = %r in %r" % (files, request), level=NOISY) d = defer.succeed(None) for f in files: if f is not ignore: d.addBoth(f.sync) def _done(ign): self.log("done %r" % (request,), level=OPERATIONAL) return None d.addBoth(_done) return d def _remove_heisenfile(self, userpath, parent, childname, file_to_remove): if noisy: self.log("._remove_heisenfile(%r, %r, %r, %r)" % (userpath, parent, childname, file_to_remove), level=NOISY) _assert(isinstance(userpath, str) and isinstance(childname, (unicode, NoneType)), userpath=userpath, childname=childname) direntry = _direntry_for(parent, childname) if direntry in all_heisenfiles: all_old_files = all_heisenfiles[direntry] all_new_files = [f for f in all_old_files if f is not file_to_remove] if len(all_new_files) > 0: all_heisenfiles[direntry] = all_new_files else: del all_heisenfiles[direntry] if userpath in self._heisenfiles: old_files = self._heisenfiles[userpath] new_files = [f for f in old_files if f is not file_to_remove] if len(new_files) > 0: self._heisenfiles[userpath] = new_files else: del self._heisenfiles[userpath] if noisy: self.log("all_heisenfiles = %r\nself._heisenfiles = %r" % (all_heisenfiles, self._heisenfiles), level=NOISY) def _make_file(self, existing_file, userpath, flags, parent=None, childname=None, filenode=None, metadata=None): if noisy: self.log("._make_file(%r, %r, %r = %r, parent=%r, childname=%r, filenode=%r, metadata=%r)" % (existing_file, userpath, flags, _repr_flags(flags), parent, childname, filenode, metadata), level=NOISY) _assert((isinstance(userpath, str) and isinstance(childname, (unicode, NoneType)) and (metadata is None or 'no-write' in metadata)), userpath=userpath, childname=childname, metadata=metadata) writing = (flags & (FXF_WRITE | FXF_CREAT)) != 0 direntry = _direntry_for(parent, childname, filenode) d = self._sync_heisenfiles(userpath, direntry, ignore=existing_file) if not writing and (flags & FXF_READ) and filenode and not filenode.is_mutable() and filenode.get_size() <= SIZE_THRESHOLD: d.addCallback(lambda ign: ShortReadOnlySFTPFile(userpath, filenode, metadata)) else: close_notify = None if writing: close_notify = self._remove_heisenfile d.addCallback(lambda ign: existing_file or GeneralSFTPFile(userpath, flags, close_notify, self._convergence)) def _got_file(file): file.open(parent=parent, childname=childname, filenode=filenode, metadata=metadata) if writing: self._add_heisenfile_by_direntry(file) return file d.addCallback(_got_file) return d def openFile(self, pathstring, flags, attrs, delay=None): request = ".openFile(%r, %r = %r, %r, delay=%r)" % (pathstring, flags, _repr_flags(flags), attrs, delay) self.log(request, level=OPERATIONAL) # This is used for both reading and writing. # First exclude invalid combinations of flags, and empty paths. if not (flags & (FXF_READ | FXF_WRITE)): def _bad_readwrite(): raise SFTPError(FX_BAD_MESSAGE, "invalid file open flags: at least one of FXF_READ and FXF_WRITE must be set") return defer.execute(_bad_readwrite) if (flags & FXF_EXCL) and not (flags & FXF_CREAT): def _bad_exclcreat(): raise SFTPError(FX_BAD_MESSAGE, "invalid file open flags: FXF_EXCL cannot be set without FXF_CREAT") return defer.execute(_bad_exclcreat) path = self._path_from_string(pathstring) if not path: def _emptypath(): raise SFTPError(FX_NO_SUCH_FILE, "path cannot be empty") return defer.execute(_emptypath) # The combination of flags is potentially valid. # To work around clients that have race condition bugs, a getAttr, rename, or # remove request following an 'open' request with FXF_WRITE or FXF_CREAT flags, # should succeed even if the 'open' request has not yet completed. So we now # synchronously add a file object into the self._heisenfiles dict, indexed # by its UTF-8 userpath. (We can't yet add it to the all_heisenfiles dict, # because we don't yet have a user-independent path for the file.) The file # object does not know its filenode, parent, or childname at this point. userpath = self._path_to_utf8(path) if flags & (FXF_WRITE | FXF_CREAT): file = GeneralSFTPFile(userpath, flags, self._remove_heisenfile, self._convergence) self._add_heisenfile_by_path(file) else: # We haven't decided which file implementation to use yet. file = None desired_metadata = _attrs_to_metadata(attrs) # Now there are two major cases: # # 1. The path is specified as /uri/FILECAP, with no parent directory. # If the FILECAP is mutable and writeable, then we can open it in write-only # or read/write mode (non-exclusively), otherwise we can only open it in # read-only mode. The open should succeed immediately as long as FILECAP is # a valid known filecap that grants the required permission. # # 2. The path is specified relative to a parent. We find the parent dirnode and # get the child's URI and metadata if it exists. There are four subcases: # a. the child does not exist: FXF_CREAT must be set, and we must be able # to write to the parent directory. # b. the child exists but is not a valid known filecap: fail # c. the child is mutable: if we are trying to open it write-only or # read/write, then we must be able to write to the file. # d. the child is immutable: if we are trying to open it write-only or # read/write, then we must be able to write to the parent directory. # # To reduce latency, open normally succeeds as soon as these conditions are # met, even though there might be a failure in downloading the existing file # or uploading a new one. However, there is an exception: if a file has been # written, then closed, and is now being reopened, then we have to delay the # open until the previous upload/publish has completed. This is necessary # because sshfs does not wait for the result of an FXF_CLOSE message before # reporting to the client that a file has been closed. It applies both to # mutable files, and to directory entries linked to an immutable file. # # Note that the permission checks below are for more precise error reporting on # the open call; later operations would fail even if we did not make these checks. d = delay or defer.succeed(None) d.addCallback(lambda ign: self._get_root(path)) def _got_root( (root, path) ): if root.is_unknown(): raise SFTPError(FX_PERMISSION_DENIED, "cannot open an unknown cap (or child of an unknown object). " "Upgrading the gateway to a later Tahoe-LAFS version may help") if not path: # case 1 if noisy: self.log("case 1: root = %r, path[:-1] = %r" % (root, path[:-1]), level=NOISY) if not IFileNode.providedBy(root): raise SFTPError(FX_PERMISSION_DENIED, "cannot open a directory cap") if (flags & FXF_WRITE) and root.is_readonly(): raise SFTPError(FX_PERMISSION_DENIED, "cannot write to a non-writeable filecap without a parent directory") if flags & FXF_EXCL: raise SFTPError(FX_FAILURE, "cannot create a file exclusively when it already exists") # The file does not need to be added to all_heisenfiles, because it is not # associated with a directory entry that needs to be updated. metadata = update_metadata(None, desired_metadata, time()) # We have to decide what to pass for the 'parent_readonly' argument to _no_write, # given that we don't actually have a parent. This only affects the permissions # reported by a getAttrs on this file handle in the case of an immutable file. # We choose 'parent_readonly=True' since that will cause the permissions to be # reported as r--r--r--, which is appropriate because an immutable file can't be # written via this path. metadata['no-write'] = _no_write(True, root) return self._make_file(file, userpath, flags, filenode=root, metadata=metadata) else: # case 2 childname = path[-1] if noisy: self.log("case 2: root = %r, childname = %r, desired_metadata = %r, path[:-1] = %r" % (root, childname, desired_metadata, path[:-1]), level=NOISY) d2 = root.get_child_at_path(path[:-1]) def _got_parent(parent): if noisy: self.log("_got_parent(%r)" % (parent,), level=NOISY) if parent.is_unknown(): raise SFTPError(FX_PERMISSION_DENIED, "cannot open a child of an unknown object. " "Upgrading the gateway to a later Tahoe-LAFS version may help") parent_readonly = parent.is_readonly() d3 = defer.succeed(None) if flags & FXF_EXCL: # FXF_EXCL means that the link to the file (not the file itself) must # be created atomically wrt updates by this storage client. # That is, we need to create the link before returning success to the # SFTP open request (and not just on close, as would normally be the # case). We make the link initially point to a zero-length LIT file, # which is consistent with what might happen on a POSIX filesystem. if parent_readonly: raise SFTPError(FX_FAILURE, "cannot create a file exclusively when the parent directory is read-only") # 'overwrite=False' ensures failure if the link already exists. # FIXME: should use a single call to set_uri and return (child, metadata) (#1035) zero_length_lit = "URI:LIT:" if noisy: self.log("%r.set_uri(%r, None, readcap=%r, overwrite=False)" % (parent, zero_length_lit, childname), level=NOISY) d3.addCallback(lambda ign: parent.set_uri(childname, None, readcap=zero_length_lit, metadata=desired_metadata, overwrite=False)) def _seturi_done(child): if noisy: self.log("%r.get_metadata_for(%r)" % (parent, childname), level=NOISY) d4 = parent.get_metadata_for(childname) d4.addCallback(lambda metadata: (child, metadata)) return d4 d3.addCallback(_seturi_done) else: if noisy: self.log("%r.get_child_and_metadata(%r)" % (parent, childname), level=NOISY) d3.addCallback(lambda ign: parent.get_child_and_metadata(childname)) def _got_child( (filenode, current_metadata) ): if noisy: self.log("_got_child( (%r, %r) )" % (filenode, current_metadata), level=NOISY) metadata = update_metadata(current_metadata, desired_metadata, time()) # Ignore the permissions of the desired_metadata in an open call. The permissions # can only be set by setAttrs. metadata['no-write'] = _no_write(parent_readonly, filenode, current_metadata) if filenode.is_unknown(): raise SFTPError(FX_PERMISSION_DENIED, "cannot open an unknown cap. Upgrading the gateway " "to a later Tahoe-LAFS version may help") if not IFileNode.providedBy(filenode): raise SFTPError(FX_PERMISSION_DENIED, "cannot open a directory as if it were a file") if (flags & FXF_WRITE) and metadata['no-write']: raise SFTPError(FX_PERMISSION_DENIED, "cannot open a non-writeable file for writing") return self._make_file(file, userpath, flags, parent=parent, childname=childname, filenode=filenode, metadata=metadata) def _no_child(f): if noisy: self.log("_no_child(%r)" % (f,), level=NOISY) f.trap(NoSuchChildError) if not (flags & FXF_CREAT): raise SFTPError(FX_NO_SUCH_FILE, "the file does not exist, and was not opened with the creation (CREAT) flag") if parent_readonly: raise SFTPError(FX_PERMISSION_DENIED, "cannot create a file when the parent directory is read-only") return self._make_file(file, userpath, flags, parent=parent, childname=childname) d3.addCallbacks(_got_child, _no_child) return d3 d2.addCallback(_got_parent) return d2 d.addCallback(_got_root) def _remove_on_error(err): if file: self._remove_heisenfile(userpath, None, None, file) return err d.addErrback(_remove_on_error) d.addBoth(_convert_error, request) return d def renameFile(self, from_pathstring, to_pathstring, overwrite=False): request = ".renameFile(%r, %r)" % (from_pathstring, to_pathstring) self.log(request, level=OPERATIONAL) from_path = self._path_from_string(from_pathstring) to_path = self._path_from_string(to_pathstring) from_userpath = self._path_to_utf8(from_path) to_userpath = self._path_to_utf8(to_path) # the target directory must already exist d = deferredutil.gatherResults([self._get_parent_or_node(from_path), self._get_parent_or_node(to_path)]) def _got( (from_pair, to_pair) ): if noisy: self.log("_got( (%r, %r) ) in .renameFile(%r, %r, overwrite=%r)" % (from_pair, to_pair, from_pathstring, to_pathstring, overwrite), level=NOISY) (from_parent, from_childname) = from_pair (to_parent, to_childname) = to_pair if from_childname is None: raise SFTPError(FX_NO_SUCH_FILE, "cannot rename a source object specified by URI") if to_childname is None: raise SFTPError(FX_NO_SUCH_FILE, "cannot rename to a destination specified by URI") # # "It is an error if there already exists a file with the name specified # by newpath." # OpenSSH's SFTP server returns FX_PERMISSION_DENIED for this error. # # For the standard SSH_FXP_RENAME operation, overwrite=False. # We also support the posix-rename@openssh.com extension, which uses overwrite=True. d2 = defer.succeed(None) if not overwrite: d2.addCallback(lambda ign: to_parent.get(to_childname)) def _expect_fail(res): if not isinstance(res, Failure): raise SFTPError(FX_PERMISSION_DENIED, "cannot rename to existing path " + to_userpath) # It is OK if we fail for errors other than NoSuchChildError, since that probably # indicates some problem accessing the destination directory. res.trap(NoSuchChildError) d2.addBoth(_expect_fail) # If there are heisenfiles to be written at the 'from' direntry, then ensure # they will now be written at the 'to' direntry instead. d2.addCallback(lambda ign: self._rename_heisenfiles(from_userpath, from_parent, from_childname, to_userpath, to_parent, to_childname, overwrite=overwrite)) def _move(renamed): # FIXME: use move_child_to_path to avoid possible data loss due to #943 #d3 = from_parent.move_child_to_path(from_childname, to_root, to_path, overwrite=overwrite) d3 = from_parent.move_child_to(from_childname, to_parent, to_childname, overwrite=overwrite) def _check(err): if noisy: self.log("_check(%r) in .renameFile(%r, %r, overwrite=%r)" % (err, from_pathstring, to_pathstring, overwrite), level=NOISY) if not isinstance(err, Failure) or (renamed and err.check(NoSuchChildError)): return None if not overwrite and err.check(ExistingChildError): raise SFTPError(FX_PERMISSION_DENIED, "cannot rename to existing path " + to_userpath) return err d3.addBoth(_check) return d3 d2.addCallback(_move) return d2 d.addCallback(_got) d.addBoth(_convert_error, request) return d def makeDirectory(self, pathstring, attrs): request = ".makeDirectory(%r, %r)" % (pathstring, attrs) self.log(request, level=OPERATIONAL) path = self._path_from_string(pathstring) metadata = _attrs_to_metadata(attrs) if 'no-write' in metadata: def _denied(): raise SFTPError(FX_PERMISSION_DENIED, "cannot create a directory that is initially read-only") return defer.execute(_denied) d = self._get_root(path) d.addCallback(lambda (root, path): self._get_or_create_directories(root, path, metadata)) d.addBoth(_convert_error, request) return d def _get_or_create_directories(self, node, path, metadata): if not IDirectoryNode.providedBy(node): # TODO: provide the name of the blocking file in the error message. def _blocked(): raise SFTPError(FX_FAILURE, "cannot create directory because there " "is a file in the way") # close enough return defer.execute(_blocked) if not path: return defer.succeed(node) d = node.get(path[0]) def _maybe_create(f): f.trap(NoSuchChildError) return node.create_subdirectory(path[0]) d.addErrback(_maybe_create) d.addCallback(self._get_or_create_directories, path[1:], metadata) return d def removeFile(self, pathstring): request = ".removeFile(%r)" % (pathstring,) self.log(request, level=OPERATIONAL) path = self._path_from_string(pathstring) d = self._remove_object(path, must_be_file=True) d.addBoth(_convert_error, request) return d def removeDirectory(self, pathstring): request = ".removeDirectory(%r)" % (pathstring,) self.log(request, level=OPERATIONAL) path = self._path_from_string(pathstring) d = self._remove_object(path, must_be_directory=True) d.addBoth(_convert_error, request) return d def _remove_object(self, path, must_be_directory=False, must_be_file=False): userpath = self._path_to_utf8(path) d = self._get_parent_or_node(path) def _got_parent( (parent, childname) ): if childname is None: raise SFTPError(FX_NO_SUCH_FILE, "cannot remove an object specified by URI") direntry = _direntry_for(parent, childname) d2 = defer.succeed(False) if not must_be_directory: d2.addCallback(lambda ign: self._abandon_any_heisenfiles(userpath, direntry)) d2.addCallback(lambda abandoned: parent.delete(childname, must_exist=not abandoned, must_be_directory=must_be_directory, must_be_file=must_be_file)) return d2 d.addCallback(_got_parent) return d def openDirectory(self, pathstring): request = ".openDirectory(%r)" % (pathstring,) self.log(request, level=OPERATIONAL) path = self._path_from_string(pathstring) d = self._get_parent_or_node(path) def _got_parent_or_node( (parent_or_node, childname) ): if noisy: self.log("_got_parent_or_node( (%r, %r) ) in openDirectory(%r)" % (parent_or_node, childname, pathstring), level=NOISY) if childname is None: return parent_or_node else: return parent_or_node.get(childname) d.addCallback(_got_parent_or_node) def _list(dirnode): if dirnode.is_unknown(): raise SFTPError(FX_PERMISSION_DENIED, "cannot list an unknown cap as a directory. Upgrading the gateway " "to a later Tahoe-LAFS version may help") if not IDirectoryNode.providedBy(dirnode): raise SFTPError(FX_PERMISSION_DENIED, "cannot list a file as if it were a directory") d2 = dirnode.list() def _render(children): parent_readonly = dirnode.is_readonly() results = [] for filename, (child, metadata) in children.iteritems(): # The file size may be cached or absent. metadata['no-write'] = _no_write(parent_readonly, child, metadata) attrs = _populate_attrs(child, metadata) filename_utf8 = filename.encode('utf-8') longname = _lsLine(filename_utf8, attrs) results.append( (filename_utf8, longname, attrs) ) return StoppableList(results) d2.addCallback(_render) return d2 d.addCallback(_list) d.addBoth(_convert_error, request) return d def getAttrs(self, pathstring, followLinks): request = ".getAttrs(%r, followLinks=%r)" % (pathstring, followLinks) self.log(request, level=OPERATIONAL) # When asked about a specific file, report its current size. # TODO: the modification time for a mutable file should be # reported as the update time of the best version. But that # information isn't currently stored in mutable shares, I think. path = self._path_from_string(pathstring) userpath = self._path_to_utf8(path) d = self._get_parent_or_node(path) def _got_parent_or_node( (parent_or_node, childname) ): if noisy: self.log("_got_parent_or_node( (%r, %r) )" % (parent_or_node, childname), level=NOISY) # Some clients will incorrectly try to get the attributes # of a file immediately after opening it, before it has been put # into the all_heisenfiles table. This is a race condition bug in # the client, but we handle it anyway by calling .sync() on all # files matching either the path or the direntry. direntry = _direntry_for(parent_or_node, childname) d2 = self._sync_heisenfiles(userpath, direntry) if childname is None: node = parent_or_node d2.addCallback(lambda ign: node.get_current_size()) d2.addCallback(lambda size: _populate_attrs(node, {'no-write': node.is_unknown() or node.is_readonly()}, size=size)) else: parent = parent_or_node d2.addCallback(lambda ign: parent.get_child_and_metadata_at_path([childname])) def _got( (child, metadata) ): if noisy: self.log("_got( (%r, %r) )" % (child, metadata), level=NOISY) _assert(IDirectoryNode.providedBy(parent), parent=parent) metadata['no-write'] = _no_write(parent.is_readonly(), child, metadata) d3 = child.get_current_size() d3.addCallback(lambda size: _populate_attrs(child, metadata, size=size)) return d3 def _nosuch(err): if noisy: self.log("_nosuch(%r)" % (err,), level=NOISY) err.trap(NoSuchChildError) if noisy: self.log("checking open files:\nself._heisenfiles = %r\nall_heisenfiles = %r\ndirentry=%r" % (self._heisenfiles, all_heisenfiles, direntry), level=NOISY) if direntry in all_heisenfiles: files = all_heisenfiles[direntry] if len(files) == 0: # pragma: no cover return err # use the heisenfile that was most recently opened return files[-1].getAttrs() return err d2.addCallbacks(_got, _nosuch) return d2 d.addCallback(_got_parent_or_node) d.addBoth(_convert_error, request) return d def setAttrs(self, pathstring, attrs): request = ".setAttrs(%r, %r)" % (pathstring, attrs) self.log(request, level=OPERATIONAL) if "size" in attrs: # this would require us to download and re-upload the truncated/extended # file contents def _unsupported(): raise SFTPError(FX_OP_UNSUPPORTED, "setAttrs wth size attribute unsupported") return defer.execute(_unsupported) path = self._path_from_string(pathstring) userpath = self._path_to_utf8(path) d = self._get_parent_or_node(path) def _got_parent_or_node( (parent_or_node, childname) ): if noisy: self.log("_got_parent_or_node( (%r, %r) )" % (parent_or_node, childname), level=NOISY) direntry = _direntry_for(parent_or_node, childname) d2 = self._update_attrs_for_heisenfiles(userpath, direntry, attrs) def _update(updated_heisenfiles): if childname is None: if updated_heisenfiles: return None raise SFTPError(FX_NO_SUCH_FILE, userpath) else: desired_metadata = _attrs_to_metadata(attrs) if noisy: self.log("desired_metadata = %r" % (desired_metadata,), level=NOISY) d3 = parent_or_node.set_metadata_for(childname, desired_metadata) def _nosuch(err): if updated_heisenfiles: err.trap(NoSuchChildError) else: return err d3.addErrback(_nosuch) return d3 d2.addCallback(_update) d2.addCallback(lambda ign: None) return d2 d.addCallback(_got_parent_or_node) d.addBoth(_convert_error, request) return d def readLink(self, pathstring): self.log(".readLink(%r)" % (pathstring,), level=OPERATIONAL) def _unsupported(): raise SFTPError(FX_OP_UNSUPPORTED, "readLink") return defer.execute(_unsupported) def makeLink(self, linkPathstring, targetPathstring): self.log(".makeLink(%r, %r)" % (linkPathstring, targetPathstring), level=OPERATIONAL) # If this is implemented, note the reversal of arguments described in point 7 of # . def _unsupported(): raise SFTPError(FX_OP_UNSUPPORTED, "makeLink") return defer.execute(_unsupported) def extendedRequest(self, extensionName, extensionData): self.log(".extendedRequest(%r, )" % (extensionName, len(extensionData)), level=OPERATIONAL) # We implement the three main OpenSSH SFTP extensions; see # if extensionName == 'posix-rename@openssh.com': def _bad(): raise SFTPError(FX_BAD_MESSAGE, "could not parse posix-rename@openssh.com request") if 4 > len(extensionData): return defer.execute(_bad) (fromPathLen,) = struct.unpack('>L', extensionData[0:4]) if 8 + fromPathLen > len(extensionData): return defer.execute(_bad) (toPathLen,) = struct.unpack('>L', extensionData[(4 + fromPathLen):(8 + fromPathLen)]) if 8 + fromPathLen + toPathLen != len(extensionData): return defer.execute(_bad) fromPathstring = extensionData[4:(4 + fromPathLen)] toPathstring = extensionData[(8 + fromPathLen):] d = self.renameFile(fromPathstring, toPathstring, overwrite=True) # Twisted conch assumes that the response from an extended request is either # an error, or an FXP_EXTENDED_REPLY. But it happens to do the right thing # (respond with an FXP_STATUS message) if we return a Failure with code FX_OK. def _succeeded(ign): raise SFTPError(FX_OK, "request succeeded") d.addCallback(_succeeded) return d if extensionName == 'statvfs@openssh.com' or extensionName == 'fstatvfs@openssh.com': # f_bsize and f_frsize should be the same to avoid a bug in 'df' return defer.succeed(struct.pack('>11Q', 1024, # uint64 f_bsize /* file system block size */ 1024, # uint64 f_frsize /* fundamental fs block size */ 628318530, # uint64 f_blocks /* number of blocks (unit f_frsize) */ 314159265, # uint64 f_bfree /* free blocks in file system */ 314159265, # uint64 f_bavail /* free blocks for non-root */ 200000000, # uint64 f_files /* total file inodes */ 100000000, # uint64 f_ffree /* free file inodes */ 100000000, # uint64 f_favail /* free file inodes for non-root */ 0x1AF5, # uint64 f_fsid /* file system id */ 2, # uint64 f_flag /* bit mask = ST_NOSUID; not ST_RDONLY */ 65535, # uint64 f_namemax /* maximum filename length */ )) def _unsupported(): raise SFTPError(FX_OP_UNSUPPORTED, "unsupported %r request " % (extensionName, len(extensionData))) return defer.execute(_unsupported) def realPath(self, pathstring): self.log(".realPath(%r)" % (pathstring,), level=OPERATIONAL) return self._path_to_utf8(self._path_from_string(pathstring)) def _path_to_utf8(self, path): return (u"/" + u"/".join(path)).encode('utf-8') def _path_from_string(self, pathstring): if noisy: self.log("CONVERT %r" % (pathstring,), level=NOISY) _assert(isinstance(pathstring, str), pathstring=pathstring) # The home directory is the root directory. pathstring = pathstring.strip("/") if pathstring == "" or pathstring == ".": path_utf8 = [] else: path_utf8 = pathstring.split("/") # # "Servers SHOULD interpret a path name component ".." as referring to # the parent directory, and "." as referring to the current directory." path = [] for p_utf8 in path_utf8: if p_utf8 == "..": # ignore excess .. components at the root if len(path) > 0: path = path[:-1] elif p_utf8 != ".": try: p = p_utf8.decode('utf-8', 'strict') except UnicodeError: raise SFTPError(FX_NO_SUCH_FILE, "path could not be decoded as UTF-8") path.append(p) if noisy: self.log(" PATH %r" % (path,), level=NOISY) return path def _get_root(self, path): # return Deferred (root, remaining_path) d = defer.succeed(None) if path and path[0] == u"uri": d.addCallback(lambda ign: self._client.create_node_from_uri(path[1].encode('utf-8'))) d.addCallback(lambda root: (root, path[2:])) else: d.addCallback(lambda ign: (self._root, path)) return d def _get_parent_or_node(self, path): # return Deferred (parent, childname) or (node, None) d = self._get_root(path) def _got_root( (root, remaining_path) ): if not remaining_path: return (root, None) else: d2 = root.get_child_at_path(remaining_path[:-1]) d2.addCallback(lambda parent: (parent, remaining_path[-1])) return d2 d.addCallback(_got_root) return d class FakeTransport: implements(ITransport) def write(self, data): logmsg("FakeTransport.write()" % (len(data),), level=NOISY) def writeSequence(self, data): logmsg("FakeTransport.writeSequence(...)", level=NOISY) def loseConnection(self): logmsg("FakeTransport.loseConnection()", level=NOISY) # getPeer and getHost can just raise errors, since we don't know what to return class ShellSession(PrefixingLogMixin): implements(ISession) def __init__(self, userHandler): PrefixingLogMixin.__init__(self, facility="tahoe.sftp") if noisy: self.log(".__init__(%r)" % (userHandler), level=NOISY) def getPty(self, terminal, windowSize, attrs): self.log(".getPty(%r, %r, %r)" % (terminal, windowSize, attrs), level=OPERATIONAL) def openShell(self, protocol): self.log(".openShell(%r)" % (protocol,), level=OPERATIONAL) if hasattr(protocol, 'transport') and protocol.transport is None: protocol.transport = FakeTransport() # work around Twisted bug return self._unsupported(protocol) def execCommand(self, protocol, cmd): self.log(".execCommand(%r, %r)" % (protocol, cmd), level=OPERATIONAL) if hasattr(protocol, 'transport') and protocol.transport is None: protocol.transport = FakeTransport() # work around Twisted bug d = defer.succeed(None) if cmd == "df -P -k /": d.addCallback(lambda ign: protocol.write( "Filesystem 1024-blocks Used Available Capacity Mounted on\r\n" "tahoe 628318530 314159265 314159265 50% /\r\n")) d.addCallback(lambda ign: protocol.processEnded(Reason(ProcessDone(None)))) else: d.addCallback(lambda ign: self._unsupported(protocol)) return d def _unsupported(self, protocol): d = defer.succeed(None) d.addCallback(lambda ign: protocol.errReceived( "This server supports only the SFTP protocol. It does not support SCP,\r\n" "interactive shell sessions, or commands other than one needed by sshfs.\r\n")) d.addCallback(lambda ign: protocol.processEnded(Reason(ProcessTerminated(exitCode=1)))) return d def windowChanged(self, newWindowSize): self.log(".windowChanged(%r)" % (newWindowSize,), level=OPERATIONAL) def eofReceived(self): self.log(".eofReceived()", level=OPERATIONAL) def closed(self): self.log(".closed()", level=OPERATIONAL) # If you have an SFTPUserHandler and want something that provides ISession, you get # ShellSession(userHandler). # We use adaptation because this must be a different object to the SFTPUserHandler. components.registerAdapter(ShellSession, SFTPUserHandler, ISession) from allmydata.frontends.auth import AccountURLChecker, AccountFileChecker, NeedRootcapLookupScheme class Dispatcher: implements(portal.IRealm) def __init__(self, client): self._client = client def requestAvatar(self, avatarID, mind, interface): _assert(interface == IConchUser, interface=interface) rootnode = self._client.create_node_from_uri(avatarID.rootcap) handler = SFTPUserHandler(self._client, rootnode, avatarID.username) return (interface, handler, handler.logout) class SFTPServer(service.MultiService): def __init__(self, client, accountfile, accounturl, sftp_portstr, pubkey_file, privkey_file): service.MultiService.__init__(self) r = Dispatcher(client) p = portal.Portal(r) if accountfile: c = AccountFileChecker(self, accountfile) p.registerChecker(c) if accounturl: c = AccountURLChecker(self, accounturl) p.registerChecker(c) if not accountfile and not accounturl: # we could leave this anonymous, with just the /uri/CAP form raise NeedRootcapLookupScheme("must provide an account file or URL") pubkey = keys.Key.fromFile(pubkey_file) privkey = keys.Key.fromFile(privkey_file) class SSHFactory(factory.SSHFactory): publicKeys = {pubkey.sshType(): pubkey} privateKeys = {privkey.sshType(): privkey} def getPrimes(self): try: # if present, this enables diffie-hellman-group-exchange return primes.parseModuliFile("/etc/ssh/moduli") except IOError: return None f = SSHFactory() f.portal = p s = strports.service(sftp_portstr, f) s.setServiceParent(self) tahoe-lafs-1.10.0/src/allmydata/hashtree.py000066400000000000000000000456151221140116300205320ustar00rootroot00000000000000# -*- test-case-name: allmydata.test.test_hashtree -*- from allmydata.util import mathutil # from the pyutil library """ Read and write chunks from files. Version 1.0.0. A file is divided into blocks, each of which has size L{BLOCK_SIZE} (except for the last block, which may be smaller). Blocks are encoded into chunks. One publishes the hash of the entire file. Clients who want to download the file first obtain the hash, then the clients can receive chunks in any order. Cryptographic hashing is used to verify each received chunk before writing to disk. Thus it is impossible to download corrupt data if one has the correct file hash. One obtains the hash of a complete file via L{CompleteChunkFile.file_hash}. One can read chunks from a complete file by the sequence operations of C{len()} and subscripting on a L{CompleteChunkFile} object. One can open an empty or partially downloaded file with L{PartialChunkFile}, and read and write chunks to this file. A chunk will fail to write if its contents and index are not consistent with the overall file hash passed to L{PartialChunkFile} when the partial chunk file was first created. The chunks have an overhead of less than 4% for files of size less than C{10**20} bytes. Benchmarks: - On a 3 GHz Pentium 3, it took 3.4 minutes to first make a L{CompleteChunkFile} object for a 4 GB file. Up to 10 MB of memory was used as the constructor ran. A metafile filename was passed to the constructor, and so the hash information was written to the metafile. The object used a negligible amount of memory after the constructor was finished. - Creation of L{CompleteChunkFile} objects in future runs of the program took negligible time, since the hash information was already stored in the metafile. @var BLOCK_SIZE: Size of a block. See L{BlockFile}. @var MAX_CHUNK_SIZE: Upper bound on the size of a chunk. See L{CompleteChunkFile}. free (adj.): unencumbered; not under the control of others Written by Connelly Barnes in 2005 and released into the public domain with no warranty of any kind, either expressed or implied. It probably won't make your computer catch on fire, or eat your children, but it might. Use at your own risk. """ from allmydata.util import base32 from allmydata.util.hashutil import tagged_hash, tagged_pair_hash __version__ = '1.0.0-allmydata' BLOCK_SIZE = 65536 MAX_CHUNK_SIZE = BLOCK_SIZE + 4096 def roundup_pow2(x): """ Round integer C{x} up to the nearest power of 2. """ ans = 1 while ans < x: ans *= 2 return ans class CompleteBinaryTreeMixin: """ Adds convenience methods to a complete binary tree. Assumes the total number of elements in the binary tree may be accessed via C{__len__}, and that each element can be retrieved using list subscripting. Tree is indexed like so:: 0 / \ 1 2 / \ / \ 3 4 5 6 / \ / \ / \ / \ 7 8 9 10 11 12 13 14 """ def parent(self, i): """ Index of the parent of C{i}. """ if i < 1 or (hasattr(self, '__len__') and i >= len(self)): raise IndexError('index out of range: ' + repr(i)) return (i - 1) // 2 def lchild(self, i): """ Index of the left child of C{i}. """ ans = 2 * i + 1 if i < 0 or (hasattr(self, '__len__') and ans >= len(self)): raise IndexError('index out of range: ' + repr(i)) return ans def rchild(self, i): """ Index of right child of C{i}. """ ans = 2 * i + 2 if i < 0 or (hasattr(self, '__len__') and ans >= len(self)): raise IndexError('index out of range: ' + repr(i)) return ans def sibling(self, i): """ Index of sibling of C{i}. """ parent = self.parent(i) if self.lchild(parent) == i: return self.rchild(parent) else: return self.lchild(parent) def needed_for(self, i): """ Return a list of node indices that are necessary for the hash chain. """ if i < 0 or i >= len(self): raise IndexError('index out of range: 0 >= %s < %s' % (i, len(self))) needed = [] here = i while here != 0: needed.append(self.sibling(here)) here = self.parent(here) return needed def depth_first(self, i=0): yield i, 0 try: for child,childdepth in self.depth_first(self.lchild(i)): yield child, childdepth+1 except IndexError: pass try: for child,childdepth in self.depth_first(self.rchild(i)): yield child, childdepth+1 except IndexError: pass def dump(self): lines = [] for i,depth in self.depth_first(): lines.append("%s%3d: %s" % (" "*depth, i, base32.b2a_or_none(self[i]))) return "\n".join(lines) + "\n" def get_leaf_index(self, leafnum): return self.first_leaf_num + leafnum def get_leaf(self, leafnum): return self[self.first_leaf_num + leafnum] def depth_of(i): """Return the depth or level of the given node. Level 0 contains node 0 Level 1 contains nodes 1 and 2. Level 2 contains nodes 3,4,5,6.""" return mathutil.log_floor(i+1, 2) def empty_leaf_hash(i): return tagged_hash('Merkle tree empty leaf', "%d" % i) def pair_hash(a, b): return tagged_pair_hash('Merkle tree internal node', a, b) class HashTree(CompleteBinaryTreeMixin, list): """ Compute Merkle hashes at any node in a complete binary tree. Tree is indexed like so:: 0 / \ 1 2 / \ / \ 3 4 5 6 / \ / \ / \ / \ 7 8 9 10 11 12 13 14 <- List passed to constructor. """ def __init__(self, L): """ Create complete binary tree from list of hash strings. The list is augmented by hashes so its length is a power of 2, and then this is used as the bottom row of the hash tree. The augmenting is done so that if the augmented element is at index C{i}, then its value is C{hash(tagged_hash('Merkle tree empty leaf', '%d'%i))}. """ # Augment the list. start = len(L) end = roundup_pow2(len(L)) self.first_leaf_num = end - 1 L = L + [None] * (end - start) for i in range(start, end): L[i] = empty_leaf_hash(i) # Form each row of the tree. rows = [L] while len(rows[-1]) != 1: last = rows[-1] rows += [[pair_hash(last[2*i], last[2*i+1]) for i in xrange(len(last)//2)]] # Flatten the list of rows into a single list. rows.reverse() self[:] = sum(rows, []) def needed_hashes(self, leafnum, include_leaf=False): """Which hashes will someone need to validate a given data block? I am used to answer a question: supposing you have the data block that is used to form leaf hash N, and you want to validate that it, which hashes would you need? I accept a leaf number and return a set of 'hash index' values, which are integers from 0 to len(self). In the 'hash index' number space, hash[0] is the root hash, while hash[len(self)-1] is the last leaf hash. This method can be used to find out which hashes you should request from some untrusted source (usually the same source that provides the data block), so you can minimize storage or transmission overhead. It can also be used to determine which hashes you should send to a remote data store so that it will be able to provide validatable data in the future. I will not include '0' (the root hash) in the result, since the root is generally stored somewhere that is more trusted than the source of the remaining hashes. I will include the leaf hash itself only if you ask me to, by passing include_leaf=True. """ needed = set(self.needed_for(self.first_leaf_num + leafnum)) if include_leaf: needed.add(self.first_leaf_num + leafnum) return needed class NotEnoughHashesError(Exception): pass class BadHashError(Exception): pass class IncompleteHashTree(CompleteBinaryTreeMixin, list): """I am a hash tree which may or may not be complete. I can be used to validate inbound data from some untrustworthy provider who has a subset of leaves and a sufficient subset of internal nodes. Initially I am completely unpopulated. Over time, I will become filled with hashes, just enough to validate particular leaf nodes. If you desire to validate leaf number N, first find out which hashes I need by calling needed_hashes(N). This will return a list of node numbers (which will nominally be the sibling chain between the given leaf and the root, but if I already have some of those nodes, needed_hashes(N) will only return a subset). Obtain these hashes from the data provider, then tell me about them with set_hash(i, HASH). Once I have enough hashes, you can tell me the hash of the leaf with set_leaf_hash(N, HASH), and I will either return None or raise BadHashError. The first hash to be set will probably be 0 (the root hash), since this is the one that will come from someone more trustworthy than the data provider. """ def __init__(self, num_leaves): L = [None] * num_leaves start = len(L) end = roundup_pow2(len(L)) self.first_leaf_num = end - 1 L = L + [None] * (end - start) rows = [L] while len(rows[-1]) != 1: last = rows[-1] rows += [[None for i in xrange(len(last)//2)]] # Flatten the list of rows into a single list. rows.reverse() self[:] = sum(rows, []) def needed_hashes(self, leafnum, include_leaf=False): """Which new hashes do I need to validate a given data block? I am much like HashTree.needed_hashes(), except that I don't include hashes that I already know about. When needed_hashes() is called on an empty IncompleteHashTree, it will return the same set as a HashTree of the same size. But later, once hashes have been added with set_hashes(), I will ask for fewer hashes, since some of the necessary ones have already been set. """ maybe_needed = set(self.needed_for(self.first_leaf_num + leafnum)) if include_leaf: maybe_needed.add(self.first_leaf_num + leafnum) return set([i for i in maybe_needed if self[i] is None]) def _name_hash(self, i): name = "[%d of %d]" % (i, len(self)) if i >= self.first_leaf_num: leafnum = i - self.first_leaf_num numleaves = len(self) - self.first_leaf_num name += " (leaf [%d] of %d)" % (leafnum, numleaves) return name def set_hashes(self, hashes={}, leaves={}): """Add a bunch of hashes to the tree. I will validate these to the best of my ability. If I already have a copy of any of the new hashes, the new values must equal the existing ones, or I will raise BadHashError. If adding a hash allows me to compute a parent hash, those parent hashes must match or I will raise BadHashError. If I raise BadHashError, I will forget about all the hashes that you tried to add, leaving my state exactly the same as before I was called. If I return successfully, I will remember all those hashes. I insist upon being able to validate all of the hashes that were given to me. If I cannot do this because I'm missing some hashes, I will raise NotEnoughHashesError (and forget about all the hashes that you tried to add). Note that this means that the root hash must either be included in 'hashes', or it must have been provided at some point in the past. 'leaves' is a dictionary uses 'leaf index' values, which range from 0 (the left-most leaf) to num_leaves-1 (the right-most leaf), and form the base of the tree. 'hashes' uses 'hash_index' values, which range from 0 (the root of the tree) to 2*num_leaves-2 (the right-most leaf). leaf[i] is the same as hash[num_leaves-1+i]. The best way to use me is to start by obtaining the root hash from some 'good' channel and populate me with it: iht = IncompleteHashTree(numleaves) roothash = trusted_channel.get_roothash() iht.set_hashes(hashes={0: roothash}) Then use the 'bad' channel to obtain data block 0 and the corresponding hash chain (a dict with the same hashes that needed_hashes(0) tells you, e.g. {0:h0, 2:h2, 4:h4, 8:h8} when len(L)=8). Hash the data block to create leaf0, then feed everything into set_hashes() and see if it raises an exception or not:: otherhashes = untrusted_channel.get_hashes() # otherhashes.keys() should == iht.needed_hashes(leaves=[0]) datablock0 = untrusted_channel.get_data(0) leaf0 = HASH(datablock0) # HASH() is probably hashutil.tagged_hash(tag, datablock0) iht.set_hashes(otherhashes, leaves={0: leaf0}) If the set_hashes() call doesn't raise an exception, the data block was valid. If it raises BadHashError, then either the data block was corrupted or one of the received hashes was corrupted. If it raises NotEnoughHashesError, then the otherhashes dictionary was incomplete. """ assert isinstance(hashes, dict) for h in hashes.values(): assert isinstance(h, str) assert isinstance(leaves, dict) for h in leaves.values(): assert isinstance(h, str) new_hashes = hashes.copy() for leafnum,leafhash in leaves.iteritems(): hashnum = self.first_leaf_num + leafnum if hashnum in new_hashes: if new_hashes[hashnum] != leafhash: raise BadHashError("got conflicting hashes in my " "arguments: leaves[%d] != hashes[%d]" % (leafnum, hashnum)) new_hashes[hashnum] = leafhash remove_upon_failure = set() # we'll remove these if the check fails # visualize this method in the following way: # A: start with the empty or partially-populated tree as shown in # the HashTree docstring # B: add all of our input hashes to the tree, filling in some of the # holes. Don't overwrite anything, but new values must equal the # existing ones. Mark everything that was added with a red dot # (meaning "not yet validated") # C: start with the lowest/deepest level. Pick any red-dotted node, # hash it with its sibling to compute the parent hash. Add the # parent to the tree just like in step B (if the parent already # exists, the values must be equal; if not, add our computed # value with a red dot). If we have no sibling, throw # NotEnoughHashesError, since we won't be able to validate this # node. Remove the red dot. If there was a red dot on our # sibling, remove it too. # D: finish all red-dotted nodes in one level before moving up to # the next. # E: if we hit NotEnoughHashesError or BadHashError before getting # to the root, discard every hash we've added. try: num_levels = depth_of(len(self)-1) # hashes_to_check[level] is set(index). This holds the "red dots" # described above hashes_to_check = [set() for level in range(num_levels+1)] # first we provisionally add all hashes to the tree, comparing # any duplicates for i,h in new_hashes.iteritems(): if self[i]: if self[i] != h: raise BadHashError("new hash %s does not match " "existing hash %s at %s" % (base32.b2a(h), base32.b2a(self[i]), self._name_hash(i))) else: level = depth_of(i) hashes_to_check[level].add(i) self[i] = h remove_upon_failure.add(i) for level in reversed(range(len(hashes_to_check))): this_level = hashes_to_check[level] while this_level: i = this_level.pop() if i == 0: # The root has no sibling. How lonely. You can't # really *check* the root; you either accept it # because the caller told you what it is by including # it in hashes, or you accept it because you # calculated it from its two children. You probably # want to set the root (from a trusted source) before # adding any children from an untrusted source. continue siblingnum = self.sibling(i) if self[siblingnum] is None: # without a sibling, we can't compute a parent, and # we can't verify this node raise NotEnoughHashesError("unable to validate [%d]"%i) parentnum = self.parent(i) # make sure we know right from left leftnum, rightnum = sorted([i, siblingnum]) new_parent_hash = pair_hash(self[leftnum], self[rightnum]) if self[parentnum]: if self[parentnum] != new_parent_hash: raise BadHashError("h([%d]+[%d]) != h[%d]" % (leftnum, rightnum, parentnum)) else: self[parentnum] = new_parent_hash remove_upon_failure.add(parentnum) parent_level = depth_of(parentnum) assert parent_level == level-1 hashes_to_check[parent_level].add(parentnum) # our sibling is now as valid as this node this_level.discard(siblingnum) # we're done! except (BadHashError, NotEnoughHashesError): for i in remove_upon_failure: self[i] = None raise tahoe-lafs-1.10.0/src/allmydata/history.py000066400000000000000000000074321221140116300204230ustar00rootroot00000000000000 import weakref class History: """Keep track of recent operations, for a status display.""" name = "history" MAX_DOWNLOAD_STATUSES = 10 MAX_UPLOAD_STATUSES = 10 MAX_MAPUPDATE_STATUSES = 20 MAX_PUBLISH_STATUSES = 20 MAX_RETRIEVE_STATUSES = 20 def __init__(self, stats_provider=None): self.stats_provider = stats_provider self.all_downloads_statuses = weakref.WeakKeyDictionary() self.recent_download_statuses = [] self.all_upload_statuses = weakref.WeakKeyDictionary() self.recent_upload_statuses = [] self.all_mapupdate_status = weakref.WeakKeyDictionary() self.recent_mapupdate_status = [] self.all_publish_status = weakref.WeakKeyDictionary() self.recent_publish_status = [] self.all_retrieve_status = weakref.WeakKeyDictionary() self.recent_retrieve_status = [] self.all_helper_upload_statuses = weakref.WeakKeyDictionary() self.recent_helper_upload_statuses = [] def add_download(self, download_status): self.all_downloads_statuses[download_status] = None self.recent_download_statuses.append(download_status) while len(self.recent_download_statuses) > self.MAX_DOWNLOAD_STATUSES: self.recent_download_statuses.pop(0) def list_all_download_statuses(self): for ds in self.all_downloads_statuses: yield ds def add_upload(self, upload_status): self.all_upload_statuses[upload_status] = None self.recent_upload_statuses.append(upload_status) while len(self.recent_upload_statuses) > self.MAX_UPLOAD_STATUSES: self.recent_upload_statuses.pop(0) def list_all_upload_statuses(self): for us in self.all_upload_statuses: yield us def notify_mapupdate(self, p): self.all_mapupdate_status[p] = None self.recent_mapupdate_status.append(p) while len(self.recent_mapupdate_status) > self.MAX_MAPUPDATE_STATUSES: self.recent_mapupdate_status.pop(0) def notify_publish(self, p, size): self.all_publish_status[p] = None self.recent_publish_status.append(p) if self.stats_provider: self.stats_provider.count('mutable.files_published', 1) # We must be told bytes_published as an argument, since the # publish_status does not yet know how much data it will be asked # to send. When we move to MDMF we'll need to find a better way # to handle this. self.stats_provider.count('mutable.bytes_published', size) while len(self.recent_publish_status) > self.MAX_PUBLISH_STATUSES: self.recent_publish_status.pop(0) def notify_retrieve(self, r): self.all_retrieve_status[r] = None self.recent_retrieve_status.append(r) if self.stats_provider: self.stats_provider.count('mutable.files_retrieved', 1) self.stats_provider.count('mutable.bytes_retrieved', r.get_size()) while len(self.recent_retrieve_status) > self.MAX_RETRIEVE_STATUSES: self.recent_retrieve_status.pop(0) def list_all_mapupdate_statuses(self): for s in self.all_mapupdate_status: yield s def list_all_publish_statuses(self): for s in self.all_publish_status: yield s def list_all_retrieve_statuses(self): for s in self.all_retrieve_status: yield s def notify_helper_upload(self, s): self.all_helper_upload_statuses[s] = None self.recent_helper_upload_statuses.append(s) while len(self.recent_helper_upload_statuses) > self.MAX_UPLOAD_STATUSES: self.recent_helper_upload_statuses.pop(0) def list_all_helper_statuses(self): for s in self.all_helper_upload_statuses: yield s tahoe-lafs-1.10.0/src/allmydata/immutable/000077500000000000000000000000001221140116300203215ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/immutable/__init__.py000066400000000000000000000000001221140116300224200ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/immutable/checker.py000066400000000000000000001136441221140116300223100ustar00rootroot00000000000000from zope.interface import implements from twisted.internet import defer from foolscap.api import DeadReferenceError, RemoteException from allmydata import hashtree, codec, uri from allmydata.interfaces import IValidatedThingProxy, IVerifierURI from allmydata.hashtree import IncompleteHashTree from allmydata.check_results import CheckResults from allmydata.uri import CHKFileVerifierURI from allmydata.util.assertutil import precondition from allmydata.util import base32, deferredutil, dictutil, log, mathutil from allmydata.util.hashutil import file_renewal_secret_hash, \ file_cancel_secret_hash, bucket_renewal_secret_hash, \ bucket_cancel_secret_hash, uri_extension_hash, CRYPTO_VAL_SIZE, \ block_hash from allmydata.immutable import layout class IntegrityCheckReject(Exception): pass class BadURIExtension(IntegrityCheckReject): pass class BadURIExtensionHashValue(IntegrityCheckReject): pass class BadOrMissingHash(IntegrityCheckReject): pass class UnsupportedErasureCodec(BadURIExtension): pass class ValidatedExtendedURIProxy: implements(IValidatedThingProxy) """ I am a front-end for a remote UEB (using a local ReadBucketProxy), responsible for retrieving and validating the elements from the UEB.""" def __init__(self, readbucketproxy, verifycap, fetch_failures=None): # fetch_failures is for debugging -- see test_encode.py self._fetch_failures = fetch_failures self._readbucketproxy = readbucketproxy precondition(IVerifierURI.providedBy(verifycap), verifycap) self._verifycap = verifycap # required self.segment_size = None self.crypttext_root_hash = None self.share_root_hash = None # computed self.block_size = None self.share_size = None self.num_segments = None self.tail_data_size = None self.tail_segment_size = None # optional self.crypttext_hash = None def __str__(self): return "<%s %s>" % (self.__class__.__name__, self._verifycap.to_string()) def _check_integrity(self, data): h = uri_extension_hash(data) if h != self._verifycap.uri_extension_hash: msg = ("The copy of uri_extension we received from %s was bad: wanted %s, got %s" % (self._readbucketproxy, base32.b2a(self._verifycap.uri_extension_hash), base32.b2a(h))) if self._fetch_failures is not None: self._fetch_failures["uri_extension"] += 1 raise BadURIExtensionHashValue(msg) else: return data def _parse_and_validate(self, data): self.share_size = mathutil.div_ceil(self._verifycap.size, self._verifycap.needed_shares) d = uri.unpack_extension(data) # There are several kinds of things that can be found in a UEB. # First, things that we really need to learn from the UEB in order to # do this download. Next: things which are optional but not redundant # -- if they are present in the UEB they will get used. Next, things # that are optional and redundant. These things are required to be # consistent: they don't have to be in the UEB, but if they are in # the UEB then they will be checked for consistency with the # already-known facts, and if they are inconsistent then an exception # will be raised. These things aren't actually used -- they are just # tested for consistency and ignored. Finally: things which are # deprecated -- they ought not be in the UEB at all, and if they are # present then a warning will be logged but they are otherwise # ignored. # First, things that we really need to learn from the UEB: # segment_size, crypttext_root_hash, and share_root_hash. self.segment_size = d['segment_size'] self.block_size = mathutil.div_ceil(self.segment_size, self._verifycap.needed_shares) self.num_segments = mathutil.div_ceil(self._verifycap.size, self.segment_size) self.tail_data_size = self._verifycap.size % self.segment_size if not self.tail_data_size: self.tail_data_size = self.segment_size # padding for erasure code self.tail_segment_size = mathutil.next_multiple(self.tail_data_size, self._verifycap.needed_shares) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.crypttext_root_hash = d['crypttext_root_hash'] self.share_root_hash = d['share_root_hash'] # Next: things that are optional and not redundant: crypttext_hash if d.has_key('crypttext_hash'): self.crypttext_hash = d['crypttext_hash'] if len(self.crypttext_hash) != CRYPTO_VAL_SIZE: raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),)) # Next: things that are optional, redundant, and required to be # consistent: codec_name, codec_params, tail_codec_params, # num_segments, size, needed_shares, total_shares if d.has_key('codec_name'): if d['codec_name'] != "crs": raise UnsupportedErasureCodec(d['codec_name']) if d.has_key('codec_params'): ucpss, ucpns, ucpts = codec.parse_params(d['codec_params']) if ucpss != self.segment_size: raise BadURIExtension("inconsistent erasure code params: " "ucpss: %s != self.segment_size: %s" % (ucpss, self.segment_size)) if ucpns != self._verifycap.needed_shares: raise BadURIExtension("inconsistent erasure code params: ucpns: %s != " "self._verifycap.needed_shares: %s" % (ucpns, self._verifycap.needed_shares)) if ucpts != self._verifycap.total_shares: raise BadURIExtension("inconsistent erasure code params: ucpts: %s != " "self._verifycap.total_shares: %s" % (ucpts, self._verifycap.total_shares)) if d.has_key('tail_codec_params'): utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params']) if utcpss != self.tail_segment_size: raise BadURIExtension("inconsistent erasure code params: utcpss: %s != " "self.tail_segment_size: %s, self._verifycap.size: %s, " "self.segment_size: %s, self._verifycap.needed_shares: %s" % (utcpss, self.tail_segment_size, self._verifycap.size, self.segment_size, self._verifycap.needed_shares)) if utcpns != self._verifycap.needed_shares: raise BadURIExtension("inconsistent erasure code params: utcpns: %s != " "self._verifycap.needed_shares: %s" % (utcpns, self._verifycap.needed_shares)) if utcpts != self._verifycap.total_shares: raise BadURIExtension("inconsistent erasure code params: utcpts: %s != " "self._verifycap.total_shares: %s" % (utcpts, self._verifycap.total_shares)) if d.has_key('num_segments'): if d['num_segments'] != self.num_segments: raise BadURIExtension("inconsistent num_segments: size: %s, " "segment_size: %s, computed_num_segments: %s, " "ueb_num_segments: %s" % (self._verifycap.size, self.segment_size, self.num_segments, d['num_segments'])) if d.has_key('size'): if d['size'] != self._verifycap.size: raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" % (self._verifycap.size, d['size'])) if d.has_key('needed_shares'): if d['needed_shares'] != self._verifycap.needed_shares: raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB " "needed shares: %s" % (self._verifycap.total_shares, d['needed_shares'])) if d.has_key('total_shares'): if d['total_shares'] != self._verifycap.total_shares: raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB " "total shares: %s" % (self._verifycap.total_shares, d['total_shares'])) # Finally, things that are deprecated and ignored: plaintext_hash, # plaintext_root_hash if d.get('plaintext_hash'): log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons " "and is no longer used. Ignoring. %s" % (self,)) if d.get('plaintext_root_hash'): log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security " "reasons and is no longer used. Ignoring. %s" % (self,)) return self def start(self): """Fetch the UEB from bucket, compare its hash to the hash from verifycap, then parse it. Returns a deferred which is called back with self once the fetch is successful, or is erred back if it fails.""" d = self._readbucketproxy.get_uri_extension() d.addCallback(self._check_integrity) d.addCallback(self._parse_and_validate) return d class ValidatedReadBucketProxy(log.PrefixingLogMixin): """I am a front-end for a remote storage bucket, responsible for retrieving and validating data from that bucket. My get_block() method is used by BlockDownloaders. """ def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size): """ share_hash_tree is required to have already been initialized with the root hash (the number-0 hash), using the share_root_hash from the UEB""" precondition(share_hash_tree[0] is not None, share_hash_tree) prefix = "%d-%s-%s" % (sharenum, bucket, base32.b2a_l(share_hash_tree[0][:8], 60)) log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.download", prefix=prefix) self.sharenum = sharenum self.bucket = bucket self.share_hash_tree = share_hash_tree self.num_blocks = num_blocks self.block_size = block_size self.share_size = share_size self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks) def get_all_sharehashes(self): """Retrieve and validate all the share-hash-tree nodes that are included in this share, regardless of whether we need them to validate the share or not. Each share contains a minimal Merkle tree chain, but there is lots of overlap, so usually we'll be using hashes from other shares and not reading every single hash from this share. The Verifier uses this function to read and validate every single hash from this share. Call this (and wait for the Deferred it returns to fire) before calling get_block() for the first time: this lets us check that the share share contains enough hashes to validate its own data, and avoids downloading any share hash twice. I return a Deferred which errbacks upon failure, probably with BadOrMissingHash.""" d = self.bucket.get_share_hashes() def _got_share_hashes(sh): sharehashes = dict(sh) try: self.share_hash_tree.set_hashes(sharehashes) except IndexError, le: raise BadOrMissingHash(le) except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: raise BadOrMissingHash(le) d.addCallback(_got_share_hashes) return d def get_all_blockhashes(self): """Retrieve and validate all the block-hash-tree nodes that are included in this share. Each share contains a full Merkle tree, but we usually only fetch the minimal subset necessary for any particular block. This function fetches everything at once. The Verifier uses this function to validate the block hash tree. Call this (and wait for the Deferred it returns to fire) after calling get_all_sharehashes() and before calling get_block() for the first time: this lets us check that the share contains all block hashes and avoids downloading them multiple times. I return a Deferred which errbacks upon failure, probably with BadOrMissingHash. """ # get_block_hashes(anything) currently always returns everything needed = list(range(len(self.block_hash_tree))) d = self.bucket.get_block_hashes(needed) def _got_block_hashes(blockhashes): if len(blockhashes) < len(self.block_hash_tree): raise BadOrMissingHash() bh = dict(enumerate(blockhashes)) try: self.block_hash_tree.set_hashes(bh) except IndexError, le: raise BadOrMissingHash(le) except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: raise BadOrMissingHash(le) d.addCallback(_got_block_hashes) return d def get_all_crypttext_hashes(self, crypttext_hash_tree): """Retrieve and validate all the crypttext-hash-tree nodes that are in this share. Normally we don't look at these at all: the download process fetches them incrementally as needed to validate each segment of ciphertext. But this is a convenient place to give the Verifier a function to validate all of these at once. Call this with a new hashtree object for each share, initialized with the crypttext hash tree root. I return a Deferred which errbacks upon failure, probably with BadOrMissingHash. """ # get_crypttext_hashes() always returns everything d = self.bucket.get_crypttext_hashes() def _got_crypttext_hashes(hashes): if len(hashes) < len(crypttext_hash_tree): raise BadOrMissingHash() ct_hashes = dict(enumerate(hashes)) try: crypttext_hash_tree.set_hashes(ct_hashes) except IndexError, le: raise BadOrMissingHash(le) except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: raise BadOrMissingHash(le) d.addCallback(_got_crypttext_hashes) return d def get_block(self, blocknum): # the first time we use this bucket, we need to fetch enough elements # of the share hash tree to validate it from our share hash up to the # hashroot. if self.share_hash_tree.needed_hashes(self.sharenum): d1 = self.bucket.get_share_hashes() else: d1 = defer.succeed([]) # We might need to grab some elements of our block hash tree, to # validate the requested block up to the share hash. blockhashesneeded = self.block_hash_tree.needed_hashes(blocknum, include_leaf=True) # We don't need the root of the block hash tree, as that comes in the # share tree. blockhashesneeded.discard(0) d2 = self.bucket.get_block_hashes(blockhashesneeded) if blocknum < self.num_blocks-1: thisblocksize = self.block_size else: thisblocksize = self.share_size % self.block_size if thisblocksize == 0: thisblocksize = self.block_size d3 = self.bucket.get_block_data(blocknum, self.block_size, thisblocksize) dl = deferredutil.gatherResults([d1, d2, d3]) dl.addCallback(self._got_data, blocknum) return dl def _got_data(self, results, blocknum): precondition(blocknum < self.num_blocks, self, blocknum, self.num_blocks) sharehashes, blockhashes, blockdata = results try: sharehashes = dict(sharehashes) except ValueError, le: le.args = tuple(le.args + (sharehashes,)) raise blockhashes = dict(enumerate(blockhashes)) candidate_share_hash = None # in case we log it in the except block below blockhash = None # in case we log it in the except block below try: if self.share_hash_tree.needed_hashes(self.sharenum): # This will raise exception if the values being passed do not # match the root node of self.share_hash_tree. try: self.share_hash_tree.set_hashes(sharehashes) except IndexError, le: # Weird -- sharehashes contained index numbers outside of # the range that fit into this hash tree. raise BadOrMissingHash(le) # To validate a block we need the root of the block hash tree, # which is also one of the leafs of the share hash tree, and is # called "the share hash". if not self.block_hash_tree[0]: # empty -- no root node yet # Get the share hash from the share hash tree. share_hash = self.share_hash_tree.get_leaf(self.sharenum) if not share_hash: # No root node in block_hash_tree and also the share hash # wasn't sent by the server. raise hashtree.NotEnoughHashesError self.block_hash_tree.set_hashes({0: share_hash}) if self.block_hash_tree.needed_hashes(blocknum): self.block_hash_tree.set_hashes(blockhashes) blockhash = block_hash(blockdata) self.block_hash_tree.set_hashes(leaves={blocknum: blockhash}) #self.log("checking block_hash(shareid=%d, blocknum=%d) len=%d " # "%r .. %r: %s" % # (self.sharenum, blocknum, len(blockdata), # blockdata[:50], blockdata[-50:], base32.b2a(blockhash))) except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: # log.WEIRD: indicates undetected disk/network error, or more # likely a programming error self.log("hash failure in block=%d, shnum=%d on %s" % (blocknum, self.sharenum, self.bucket)) if self.block_hash_tree.needed_hashes(blocknum): self.log(""" failure occurred when checking the block_hash_tree. This suggests that either the block data was bad, or that the block hashes we received along with it were bad.""") else: self.log(""" the failure probably occurred when checking the share_hash_tree, which suggests that the share hashes we received from the remote peer were bad.""") self.log(" have candidate_share_hash: %s" % bool(candidate_share_hash)) self.log(" block length: %d" % len(blockdata)) self.log(" block hash: %s" % base32.b2a_or_none(blockhash)) if len(blockdata) < 100: self.log(" block data: %r" % (blockdata,)) else: self.log(" block data start/end: %r .. %r" % (blockdata[:50], blockdata[-50:])) self.log(" share hash tree:\n" + self.share_hash_tree.dump()) self.log(" block hash tree:\n" + self.block_hash_tree.dump()) lines = [] for i,h in sorted(sharehashes.items()): lines.append("%3d: %s" % (i, base32.b2a_or_none(h))) self.log(" sharehashes:\n" + "\n".join(lines) + "\n") lines = [] for i,h in blockhashes.items(): lines.append("%3d: %s" % (i, base32.b2a_or_none(h))) log.msg(" blockhashes:\n" + "\n".join(lines) + "\n") raise BadOrMissingHash(le) # If we made it here, the block is good. If the hash trees didn't # like what they saw, they would have raised a BadHashError, causing # our caller to see a Failure and thus ignore this block (as well as # dropping this bucket). return blockdata class Checker(log.PrefixingLogMixin): """I query all servers to see if M uniquely-numbered shares are available. If the verify flag was passed to my constructor, then for each share I download every data block and all metadata from each server and perform a cryptographic integrity check on all of it. If not, I just ask each server 'Which shares do you have?' and believe its answer. In either case, I wait until I have gotten responses from all servers. This fact -- that I wait -- means that an ill-behaved server which fails to answer my questions will make me wait indefinitely. If it is ill-behaved in a way that triggers the underlying foolscap timeouts, then I will wait only as long as those foolscap timeouts, but if it is ill-behaved in a way which placates the foolscap timeouts but still doesn't answer my question then I will wait indefinitely. Before I send any new request to a server, I always ask the 'monitor' object that was passed into my constructor whether this task has been cancelled (by invoking its raise_if_cancelled() method). """ def __init__(self, verifycap, servers, verify, add_lease, secret_holder, monitor): assert precondition(isinstance(verifycap, CHKFileVerifierURI), verifycap, type(verifycap)) prefix = "%s" % base32.b2a_l(verifycap.get_storage_index()[:8], 60) log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.checker", prefix=prefix) self._verifycap = verifycap self._monitor = monitor self._servers = servers self._verify = verify # bool: verify what the servers claim, or not? self._add_lease = add_lease frs = file_renewal_secret_hash(secret_holder.get_renewal_secret(), self._verifycap.get_storage_index()) self.file_renewal_secret = frs fcs = file_cancel_secret_hash(secret_holder.get_cancel_secret(), self._verifycap.get_storage_index()) self.file_cancel_secret = fcs def _get_renewal_secret(self, seed): return bucket_renewal_secret_hash(self.file_renewal_secret, seed) def _get_cancel_secret(self, seed): return bucket_cancel_secret_hash(self.file_cancel_secret, seed) def _get_buckets(self, s, storageindex): """Return a deferred that eventually fires with ({sharenum: bucket}, serverid, success). In case the server is disconnected or returns a Failure then it fires with ({}, serverid, False) (A server disconnecting or returning a Failure when we ask it for buckets is the same, for our purposes, as a server that says it has none, except that we want to track and report whether or not each server responded.)""" rref = s.get_rref() lease_seed = s.get_lease_seed() if self._add_lease: renew_secret = self._get_renewal_secret(lease_seed) cancel_secret = self._get_cancel_secret(lease_seed) d2 = rref.callRemote("add_lease", storageindex, renew_secret, cancel_secret) d2.addErrback(self._add_lease_failed, s.get_name(), storageindex) d = rref.callRemote("get_buckets", storageindex) def _wrap_results(res): return (res, True) def _trap_errs(f): level = log.WEIRD if f.check(DeadReferenceError): level = log.UNUSUAL self.log("failure from server on 'get_buckets' the REMOTE failure was:", facility="tahoe.immutable.checker", failure=f, level=level, umid="AX7wZQ") return ({}, False) d.addCallbacks(_wrap_results, _trap_errs) return d def _add_lease_failed(self, f, server_name, storage_index): # Older versions of Tahoe didn't handle the add-lease message very # well: <=1.1.0 throws a NameError because it doesn't implement # remote_add_lease(), 1.2.0/1.3.0 throw IndexError on unknown buckets # (which is most of them, since we send add-lease to everybody, # before we know whether or not they have any shares for us), and # 1.2.0 throws KeyError even on known buckets due to an internal bug # in the latency-measuring code. # we want to ignore the known-harmless errors and log the others. In # particular we want to log any local errors caused by coding # problems. if f.check(DeadReferenceError): return if f.check(RemoteException): if f.value.failure.check(KeyError, IndexError, NameError): # this may ignore a bit too much, but that only hurts us # during debugging return self.log(format="error in add_lease from [%(name)s]: %(f_value)s", name=server_name, f_value=str(f.value), failure=f, level=log.WEIRD, umid="atbAxw") return # local errors are cause for alarm log.err(f, format="local error in add_lease to [%(name)s]: %(f_value)s", name=server_name, f_value=str(f.value), level=log.WEIRD, umid="hEGuQg") def _download_and_verify(self, server, sharenum, bucket): """Start an attempt to download and verify every block in this bucket and return a deferred that will eventually fire once the attempt completes. If you download and verify every block then fire with (True, sharenum, None), else if the share data couldn't be parsed because it was of an unknown version number fire with (False, sharenum, 'incompatible'), else if any of the blocks were invalid, fire with (False, sharenum, 'corrupt'), else if the server disconnected (False, sharenum, 'disconnect'), else if the server returned a Failure during the process fire with (False, sharenum, 'failure'). If there is an internal error such as an uncaught exception in this code, then the deferred will errback, but if there is a remote error such as the server failing or the returned data being incorrect then it will not errback -- it will fire normally with the indicated results.""" vcap = self._verifycap b = layout.ReadBucketProxy(bucket, server, vcap.get_storage_index()) veup = ValidatedExtendedURIProxy(b, vcap) d = veup.start() def _got_ueb(vup): share_hash_tree = IncompleteHashTree(vcap.total_shares) share_hash_tree.set_hashes({0: vup.share_root_hash}) vrbp = ValidatedReadBucketProxy(sharenum, b, share_hash_tree, vup.num_segments, vup.block_size, vup.share_size) # note: normal download doesn't use get_all_sharehashes(), # because it gets more data than necessary. We've discussed the # security properties of having verification and download look # identical (so the server couldn't, say, provide good responses # for one and not the other), but I think that full verification # is more important than defending against inconsistent server # behavior. Besides, they can't pass the verifier without storing # all the data, so there's not so much to be gained by behaving # inconsistently. d = vrbp.get_all_sharehashes() # we fill share_hash_tree before fetching any blocks, so the # block fetches won't send redundant share-hash-tree requests, to # speed things up. Then we fetch+validate all the blockhashes. d.addCallback(lambda ign: vrbp.get_all_blockhashes()) cht = IncompleteHashTree(vup.num_segments) cht.set_hashes({0: vup.crypttext_root_hash}) d.addCallback(lambda ign: vrbp.get_all_crypttext_hashes(cht)) d.addCallback(lambda ign: vrbp) return d d.addCallback(_got_ueb) def _discard_result(r): assert isinstance(r, str), r # to free up the RAM return None def _get_blocks(vrbp): def _get_block(ign, blocknum): db = vrbp.get_block(blocknum) db.addCallback(_discard_result) return db dbs = defer.succeed(None) for blocknum in range(veup.num_segments): dbs.addCallback(_get_block, blocknum) # The Deferred we return will fire after every block of this # share has been downloaded and verified successfully, or else it # will errback as soon as the first error is observed. return dbs d.addCallback(_get_blocks) # if none of those errbacked, the blocks (and the hashes above them) # are good def _all_good(ign): return (True, sharenum, None) d.addCallback(_all_good) # but if anything fails, we'll land here def _errb(f): # We didn't succeed at fetching and verifying all the blocks of # this share. Handle each reason for failure differently. if f.check(DeadReferenceError): return (False, sharenum, 'disconnect') elif f.check(RemoteException): return (False, sharenum, 'failure') elif f.check(layout.ShareVersionIncompatible): return (False, sharenum, 'incompatible') elif f.check(layout.LayoutInvalid, layout.RidiculouslyLargeURIExtensionBlock, BadOrMissingHash, BadURIExtensionHashValue): return (False, sharenum, 'corrupt') # if it wasn't one of those reasons, re-raise the error return f d.addErrback(_errb) return d def _verify_server_shares(self, s): """ Return a deferred which eventually fires with a tuple of (set(sharenum), server, set(corruptsharenum), set(incompatiblesharenum), success) showing all the shares verified to be served by this server, and all the corrupt shares served by the server, and all the incompatible shares served by the server. In case the server is disconnected or returns a Failure then it fires with the last element False. A server disconnecting or returning a failure when we ask it for shares is the same, for our purposes, as a server that says it has none or offers invalid ones, except that we want to track and report the server's behavior. Similarly, the presence of corrupt shares is mainly of use for diagnostics -- you can typically treat it as just like being no share at all by just observing its absence from the verified shares dict and ignoring its presence in the corrupt shares dict. The 'success' argument means whether the server responded to *any* queries during this process, so if it responded to some queries and then disconnected and ceased responding, or returned a failure, it is still marked with the True flag for 'success'. """ d = self._get_buckets(s, self._verifycap.get_storage_index()) def _got_buckets(result): bucketdict, success = result shareverds = [] for (sharenum, bucket) in bucketdict.items(): d = self._download_and_verify(s, sharenum, bucket) shareverds.append(d) dl = deferredutil.gatherResults(shareverds) def collect(results): verified = set() corrupt = set() incompatible = set() for succ, sharenum, whynot in results: if succ: verified.add(sharenum) else: if whynot == 'corrupt': corrupt.add(sharenum) elif whynot == 'incompatible': incompatible.add(sharenum) return (verified, s, corrupt, incompatible, success) dl.addCallback(collect) return dl def _err(f): f.trap(RemoteException, DeadReferenceError) return (set(), s, set(), set(), False) d.addCallbacks(_got_buckets, _err) return d def _check_server_shares(self, s): """Return a deferred which eventually fires with a tuple of (set(sharenum), server, set(), set(), responded) showing all the shares claimed to be served by this server. In case the server is disconnected then it fires with (set(), server, set(), set(), False) (a server disconnecting when we ask it for buckets is the same, for our purposes, as a server that says it has none, except that we want to track and report whether or not each server responded.)""" def _curry_empty_corrupted(res): buckets, responded = res return (set(buckets), s, set(), set(), responded) d = self._get_buckets(s, self._verifycap.get_storage_index()) d.addCallback(_curry_empty_corrupted) return d def _format_results(self, results): SI = self._verifycap.get_storage_index() verifiedshares = dictutil.DictOfSets() # {sharenum: set(server)} servers = {} # {server: set(sharenums)} corruptshare_locators = [] # (server, storageindex, sharenum) incompatibleshare_locators = [] # (server, storageindex, sharenum) servers_responding = set() # server for verified, server, corrupt, incompatible, responded in results: servers.setdefault(server, set()).update(verified) for sharenum in verified: verifiedshares.setdefault(sharenum, set()).add(server) for sharenum in corrupt: corruptshare_locators.append((server, SI, sharenum)) for sharenum in incompatible: incompatibleshare_locators.append((server, SI, sharenum)) if responded: servers_responding.add(server) good_share_hosts = len([s for s in servers.keys() if servers[s]]) assert len(verifiedshares) <= self._verifycap.total_shares, (verifiedshares.keys(), self._verifycap.total_shares) if len(verifiedshares) == self._verifycap.total_shares: healthy = True summary = "Healthy" else: healthy = False summary = ("Not Healthy: %d shares (enc %d-of-%d)" % (len(verifiedshares), self._verifycap.needed_shares, self._verifycap.total_shares)) if len(verifiedshares) >= self._verifycap.needed_shares: recoverable = 1 unrecoverable = 0 else: recoverable = 0 unrecoverable = 1 # The file needs rebalancing if the set of servers that have at least # one share is less than the number of uniquely-numbered shares # available. # TODO: this may be wrong, see ticket #1115 comment:27 and ticket #1784. needs_rebalancing = bool(good_share_hosts < len(verifiedshares)) cr = CheckResults(self._verifycap, SI, healthy=healthy, recoverable=bool(recoverable), needs_rebalancing=needs_rebalancing, count_shares_needed=self._verifycap.needed_shares, count_shares_expected=self._verifycap.total_shares, count_shares_good=len(verifiedshares), count_good_share_hosts=good_share_hosts, count_recoverable_versions=recoverable, count_unrecoverable_versions=unrecoverable, servers_responding=list(servers_responding), sharemap=verifiedshares, count_wrong_shares=0, # no such thing, for immutable list_corrupt_shares=corruptshare_locators, count_corrupt_shares=len(corruptshare_locators), list_incompatible_shares=incompatibleshare_locators, count_incompatible_shares=len(incompatibleshare_locators), summary=summary, report=[], share_problems=[], servermap=None) return cr def start(self): ds = [] if self._verify: for s in self._servers: ds.append(self._verify_server_shares(s)) else: for s in self._servers: ds.append(self._check_server_shares(s)) return deferredutil.gatherResults(ds).addCallback(self._format_results) tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/000077500000000000000000000000001221140116300224575ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/__init__.py000066400000000000000000000000001221140116300245560ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/common.py000066400000000000000000000004511221140116300243210ustar00rootroot00000000000000 (AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, BADSEGNUM) = \ ("AVAILABLE", "PENDING", "OVERDUE", "COMPLETE", "CORRUPT", "DEAD", "BADSEGNUM") class BadSegmentNumberError(Exception): pass class WrongSegmentError(Exception): pass class BadCiphertextHashError(Exception): pass tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/fetcher.py000066400000000000000000000272271221140116300244630ustar00rootroot00000000000000 from twisted.python.failure import Failure from foolscap.api import eventually from allmydata.interfaces import NotEnoughSharesError, NoSharesError from allmydata.util import log from allmydata.util.dictutil import DictOfSets from common import OVERDUE, COMPLETE, CORRUPT, DEAD, BADSEGNUM, \ BadSegmentNumberError class SegmentFetcher: """I am responsible for acquiring blocks for a single segment. I will use the Share instances passed to my add_shares() method to locate, retrieve, and validate those blocks. I expect my parent node to call my no_more_shares() method when there are no more shares available. I will call my parent's want_more_shares() method when I want more: I expect to see at least one call to add_shares or no_more_shares afterwards. When I have enough validated blocks, I will call my parent's process_blocks() method with a dictionary that maps shnum to blockdata. If I am unable to provide enough blocks, I will call my parent's fetch_failed() method with (self, f). After either of these events, I will shut down and do no further work. My parent can also call my stop() method to have me shut down early.""" def __init__(self, node, segnum, k, logparent): self._node = node # _Node self.segnum = segnum self._k = k self._shares = [] # unused Share instances, sorted by "goodness" # (RTT), then shnum. This is populated when DYHB # responses arrive, or (for later segments) at # startup. We remove shares from it when we call # sh.get_block() on them. self._shares_from_server = DictOfSets() # maps server to set of # Shares on that server for # which we have outstanding # get_block() calls. self._max_shares_per_server = 1 # how many Shares we're allowed to # pull from each server. This starts # at 1 and grows if we don't have # sufficient diversity. self._active_share_map = {} # maps shnum to outstanding (and not # OVERDUE) Share that provides it. self._overdue_share_map = DictOfSets() # shares in the OVERDUE state self._lp = logparent self._share_observers = {} # maps Share to EventStreamObserver for # active ones self._blocks = {} # maps shnum to validated block data self._no_more_shares = False self._last_failure = None self._running = True def stop(self): log.msg("SegmentFetcher(%s).stop" % self._node._si_prefix, level=log.NOISY, parent=self._lp, umid="LWyqpg") self._cancel_all_requests() self._running = False # help GC ??? XXX del self._shares, self._shares_from_server, self._active_share_map del self._share_observers # called by our parent _Node def add_shares(self, shares): # called when ShareFinder locates a new share, and when a non-initial # segment fetch is started and we already know about shares from the # previous segment self._shares.extend(shares) self._shares.sort(key=lambda s: (s._dyhb_rtt, s._shnum) ) eventually(self.loop) def no_more_shares(self): # ShareFinder tells us it's reached the end of its list self._no_more_shares = True eventually(self.loop) # internal methods def loop(self): try: # if any exception occurs here, kill the download self._do_loop() except BaseException: self._node.fetch_failed(self, Failure()) raise def _do_loop(self): k = self._k if not self._running: return numsegs, authoritative = self._node.get_num_segments() if authoritative and self.segnum >= numsegs: # oops, we were asking for a segment number beyond the end of the # file. This is an error. self.stop() e = BadSegmentNumberError("segnum=%d, numsegs=%d" % (self.segnum, self._node.num_segments)) f = Failure(e) self._node.fetch_failed(self, f) return #print "LOOP", self._blocks.keys(), "active:", self._active_share_map, "overdue:", self._overdue_share_map, "unused:", self._shares # Should we sent out more requests? while len(set(self._blocks.keys()) | set(self._active_share_map.keys()) ) < k: # we don't have data or active requests for enough shares. Are # there any unused shares we can start using? (sent_something, want_more_diversity) = self._find_and_use_share() if sent_something: # great. loop back around in case we need to send more. continue if want_more_diversity: # we could have sent something if we'd been allowed to pull # more shares per server. Increase the limit and try again. self._max_shares_per_server += 1 log.msg("SegmentFetcher(%s) increasing diversity limit to %d" % (self._node._si_prefix, self._max_shares_per_server), level=log.NOISY, umid="xY2pBA") # Also ask for more shares, in the hopes of achieving better # diversity for the next segment. self._ask_for_more_shares() continue # we need more shares than the ones in self._shares to make # progress self._ask_for_more_shares() if self._no_more_shares: # But there are no more shares to be had. If we're going to # succeed, it will be with the shares we've already seen. # Will they be enough? if len(set(self._blocks.keys()) | set(self._active_share_map.keys()) | set(self._overdue_share_map.keys()) ) < k: # nope. bail. self._no_shares_error() # this calls self.stop() return # our outstanding or overdue requests may yet work. # more shares may be coming. Wait until then. return # are we done? if len(set(self._blocks.keys())) >= k: # yay! self.stop() self._node.process_blocks(self.segnum, self._blocks) return def _no_shares_error(self): if not (self._shares or self._active_share_map or self._overdue_share_map or self._blocks): format = ("no shares (need %(k)d)." " Last failure: %(last_failure)s") args = { "k": self._k, "last_failure": self._last_failure } error = NoSharesError else: format = ("ran out of shares: complete=%(complete)s" " pending=%(pending)s overdue=%(overdue)s" " unused=%(unused)s need %(k)d." " Last failure: %(last_failure)s") def join(shnums): return ",".join(["sh%d" % shnum for shnum in sorted(shnums)]) pending_s = ",".join([str(sh) for sh in self._active_share_map.values()]) overdue = set() for shares in self._overdue_share_map.values(): overdue |= shares overdue_s = ",".join([str(sh) for sh in overdue]) args = {"complete": join(self._blocks.keys()), "pending": pending_s, "overdue": overdue_s, # 'unused' should be zero "unused": ",".join([str(sh) for sh in self._shares]), "k": self._k, "last_failure": self._last_failure, } error = NotEnoughSharesError log.msg(format=format, level=log.UNUSUAL, parent=self._lp, umid="1DsnTg", **args) e = error(format % args) f = Failure(e) self.stop() self._node.fetch_failed(self, f) def _find_and_use_share(self): sent_something = False want_more_diversity = False for sh in self._shares: # find one good share to fetch shnum = sh._shnum ; server = sh._server # XXX if shnum in self._blocks: continue # don't request data we already have if shnum in self._active_share_map: # note: OVERDUE shares are removed from _active_share_map # and added to _overdue_share_map instead. continue # don't send redundant requests sfs = self._shares_from_server if len(sfs.get(server,set())) >= self._max_shares_per_server: # don't pull too much from a single server want_more_diversity = True continue # ok, we can use this share self._shares.remove(sh) self._active_share_map[shnum] = sh self._shares_from_server.add(server, sh) self._start_share(sh, shnum) sent_something = True break return (sent_something, want_more_diversity) def _start_share(self, share, shnum): self._share_observers[share] = o = share.get_block(self.segnum) o.subscribe(self._block_request_activity, share=share, shnum=shnum) def _ask_for_more_shares(self): if not self._no_more_shares: self._node.want_more_shares() # that will trigger the ShareFinder to keep looking, and call our # add_shares() or no_more_shares() later. def _cancel_all_requests(self): for o in self._share_observers.values(): o.cancel() self._share_observers = {} def _block_request_activity(self, share, shnum, state, block=None, f=None): # called by Shares, in response to our s.send_request() calls. if not self._running: return log.msg("SegmentFetcher(%s)._block_request_activity: %s -> %s" % (self._node._si_prefix, repr(share), state), level=log.NOISY, parent=self._lp, umid="vilNWA") # COMPLETE, CORRUPT, DEAD, BADSEGNUM are terminal. Remove the share # from all our tracking lists. if state in (COMPLETE, CORRUPT, DEAD, BADSEGNUM): self._share_observers.pop(share, None) server = share._server # XXX self._shares_from_server.discard(server, share) if self._active_share_map.get(shnum) is share: del self._active_share_map[shnum] self._overdue_share_map.discard(shnum, share) if state is COMPLETE: # 'block' is fully validated and complete self._blocks[shnum] = block if state is OVERDUE: # no longer active, but still might complete del self._active_share_map[shnum] self._overdue_share_map.add(shnum, share) # OVERDUE is not terminal: it will eventually transition to # COMPLETE, CORRUPT, or DEAD. if state is DEAD: self._last_failure = f if state is BADSEGNUM: # our main loop will ask the DownloadNode each time for the # number of segments, so we'll deal with this in the top of # _do_loop pass eventually(self.loop) tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/finder.py000066400000000000000000000220121221140116300242750ustar00rootroot00000000000000 import time now = time.time from foolscap.api import eventually from allmydata.util import base32, log from twisted.internet import reactor from share import Share, CommonShare def incidentally(res, f, *args, **kwargs): """Add me to a Deferred chain like this: d.addBoth(incidentally, func, arg) and I'll behave as if you'd added the following function: def _(res): func(arg) return res This is useful if you want to execute an expression when the Deferred fires, but don't care about its value. """ f(*args, **kwargs) return res class RequestToken: def __init__(self, server): self.server = server class ShareFinder: OVERDUE_TIMEOUT = 10.0 def __init__(self, storage_broker, verifycap, node, download_status, logparent=None, max_outstanding_requests=10): self.running = True # stopped by Share.stop, from Terminator self.verifycap = verifycap self._started = False self._storage_broker = storage_broker self.share_consumer = self.node = node self.max_outstanding_requests = max_outstanding_requests self._hungry = False self._commonshares = {} # shnum to CommonShare instance self.pending_requests = set() self.overdue_requests = set() # subset of pending_requests self.overdue_timers = {} self._storage_index = verifycap.storage_index self._si_prefix = base32.b2a_l(self._storage_index[:8], 60) self._node_logparent = logparent self._download_status = download_status self._lp = log.msg(format="ShareFinder[si=%(si)s] starting", si=self._si_prefix, level=log.NOISY, parent=logparent, umid="2xjj2A") def update_num_segments(self): (numsegs, authoritative) = self.node.get_num_segments() assert authoritative for cs in self._commonshares.values(): cs.set_authoritative_num_segments(numsegs) def start_finding_servers(self): # don't get servers until somebody uses us: creating the # ImmutableFileNode should not cause work to happen yet. Test case is # test_dirnode, which creates us with storage_broker=None if not self._started: si = self.verifycap.storage_index servers = self._storage_broker.get_servers_for_psi(si) self._servers = iter(servers) self._started = True def log(self, *args, **kwargs): if "parent" not in kwargs: kwargs["parent"] = self._lp return log.msg(*args, **kwargs) def stop(self): self.running = False while self.overdue_timers: req,t = self.overdue_timers.popitem() t.cancel() # called by our parent CiphertextDownloader def hungry(self): self.log(format="ShareFinder[si=%(si)s] hungry", si=self._si_prefix, level=log.NOISY, umid="NywYaQ") self.start_finding_servers() self._hungry = True eventually(self.loop) # internal methods def loop(self): pending_s = ",".join([rt.server.get_name() for rt in self.pending_requests]) # sort? self.log(format="ShareFinder loop: running=%(running)s" " hungry=%(hungry)s, pending=%(pending)s", running=self.running, hungry=self._hungry, pending=pending_s, level=log.NOISY, umid="kRtS4Q") if not self.running: return if not self._hungry: return non_overdue = self.pending_requests - self.overdue_requests if len(non_overdue) >= self.max_outstanding_requests: # cannot send more requests, must wait for some to retire return server = None try: if self._servers: server = self._servers.next() except StopIteration: self._servers = None if server: self.send_request(server) # we loop again to get parallel queries. The check above will # prevent us from looping forever. eventually(self.loop) return if self.pending_requests: # no server, but there are still requests in flight: maybe one of # them will make progress return self.log(format="ShareFinder.loop: no_more_shares, ever", level=log.UNUSUAL, umid="XjQlzg") # we've run out of servers (so we can't send any more requests), and # we have nothing in flight. No further progress can be made. They # are destined to remain hungry. eventually(self.share_consumer.no_more_shares) def send_request(self, server): req = RequestToken(server) self.pending_requests.add(req) lp = self.log(format="sending DYHB to [%(name)s]", name=server.get_name(), level=log.NOISY, umid="Io7pyg") time_sent = now() d_ev = self._download_status.add_dyhb_request(server, time_sent) # TODO: get the timer from a Server object, it knows best self.overdue_timers[req] = reactor.callLater(self.OVERDUE_TIMEOUT, self.overdue, req) d = server.get_rref().callRemote("get_buckets", self._storage_index) d.addBoth(incidentally, self._request_retired, req) d.addCallbacks(self._got_response, self._got_error, callbackArgs=(server, req, d_ev, time_sent, lp), errbackArgs=(server, req, d_ev, lp)) d.addErrback(log.err, format="error in send_request", level=log.WEIRD, parent=lp, umid="rpdV0w") d.addCallback(incidentally, eventually, self.loop) def _request_retired(self, req): self.pending_requests.discard(req) self.overdue_requests.discard(req) if req in self.overdue_timers: self.overdue_timers[req].cancel() del self.overdue_timers[req] def overdue(self, req): del self.overdue_timers[req] assert req in self.pending_requests # paranoia, should never be false self.overdue_requests.add(req) eventually(self.loop) def _got_response(self, buckets, server, req, d_ev, time_sent, lp): shnums = sorted([shnum for shnum in buckets]) time_received = now() d_ev.finished(shnums, time_received) dyhb_rtt = time_received - time_sent if not buckets: self.log(format="no shares from [%(name)s]", name=server.get_name(), level=log.NOISY, parent=lp, umid="U7d4JA") return shnums_s = ",".join([str(shnum) for shnum in shnums]) self.log(format="got shnums [%(shnums)s] from [%(name)s]", shnums=shnums_s, name=server.get_name(), level=log.NOISY, parent=lp, umid="0fcEZw") shares = [] for shnum, bucket in buckets.iteritems(): s = self._create_share(shnum, bucket, server, dyhb_rtt) shares.append(s) self._deliver_shares(shares) def _create_share(self, shnum, bucket, server, dyhb_rtt): if shnum in self._commonshares: cs = self._commonshares[shnum] else: numsegs, authoritative = self.node.get_num_segments() cs = CommonShare(numsegs, self._si_prefix, shnum, self._node_logparent) if authoritative: cs.set_authoritative_num_segments(numsegs) # Share._get_satisfaction is responsible for updating # CommonShare.set_numsegs after we know the UEB. Alternatives: # 1: d = self.node.get_num_segments() # d.addCallback(cs.got_numsegs) # the problem is that the OneShotObserverList I was using # inserts an eventual-send between _get_satisfaction's # _satisfy_UEB and _satisfy_block_hash_tree, and the # CommonShare didn't get the num_segs message before # being asked to set block hash values. To resolve this # would require an immediate ObserverList instead of # an eventual-send -based one # 2: break _get_satisfaction into Deferred-attached pieces. # Yuck. self._commonshares[shnum] = cs s = Share(bucket, server, self.verifycap, cs, self.node, self._download_status, shnum, dyhb_rtt, self._node_logparent) return s def _deliver_shares(self, shares): # they will call hungry() again if they want more self._hungry = False shares_s = ",".join([str(sh) for sh in shares]) self.log(format="delivering shares: %s" % shares_s, level=log.NOISY, umid="2n1qQw") eventually(self.share_consumer.got_shares, shares) def _got_error(self, f, server, req, d_ev, lp): d_ev.error(now()) self.log(format="got error from [%(name)s]", name=server.get_name(), failure=f, level=log.UNUSUAL, parent=lp, umid="zUKdCw") tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/node.py000066400000000000000000000570601221140116300237660ustar00rootroot00000000000000 import time now = time.time from zope.interface import Interface from twisted.python.failure import Failure from twisted.internet import defer from foolscap.api import eventually from allmydata import uri from allmydata.codec import CRSDecoder from allmydata.util import base32, log, hashutil, mathutil, observer from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE from allmydata.hashtree import IncompleteHashTree, BadHashError, \ NotEnoughHashesError # local imports from finder import ShareFinder from fetcher import SegmentFetcher from segmentation import Segmentation from common import BadCiphertextHashError class IDownloadStatusHandlingConsumer(Interface): def set_download_status_read_event(read_ev): """Record the DownloadStatus 'read event', to be updated with the time it takes to decrypt each chunk of data.""" class Cancel: def __init__(self, f): self._f = f self.active = True def cancel(self): if self.active: self.active = False self._f(self) class DownloadNode: """Internal class which manages downloads and holds state. External callers use CiphertextFileNode instead.""" # Share._node points to me def __init__(self, verifycap, storage_broker, secret_holder, terminator, history, download_status): assert isinstance(verifycap, uri.CHKFileVerifierURI) self._verifycap = verifycap self._storage_broker = storage_broker self._si_prefix = base32.b2a_l(verifycap.storage_index[:8], 60) self.running = True if terminator: terminator.register(self) # calls self.stop() at stopService() # the rules are: # 1: Only send network requests if you're active (self.running is True) # 2: Use TimerService, not reactor.callLater # 3: You can do eventual-sends any time. # These rules should mean that once # stopService()+flushEventualQueue() fires, everything will be done. self._secret_holder = secret_holder self._history = history self._download_status = download_status k, N = self._verifycap.needed_shares, self._verifycap.total_shares self.share_hash_tree = IncompleteHashTree(N) # we guess the segment size, so Segmentation can pull non-initial # segments in a single roundtrip. This populates # .guessed_segment_size, .guessed_num_segments, and # .ciphertext_hash_tree (with a dummy, to let us guess which hashes # we'll need) self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE) # filled in when we parse a valid UEB self.have_UEB = False self.segment_size = None self.tail_segment_size = None self.tail_segment_padded = None self.num_segments = None self.block_size = None self.tail_block_size = None # things to track callers that want data # _segment_requests can have duplicates self._segment_requests = [] # (segnum, d, cancel_handle, seg_ev, lp) self._active_segment = None # a SegmentFetcher, with .segnum self._segsize_observers = observer.OneShotObserverList() # we create one top-level logparent for this _Node, and another one # for each read() call. Segmentation and get_segment() messages are # associated with the read() call, everything else is tied to the # _Node's log entry. lp = log.msg(format="Immutable.DownloadNode(%(si)s) created:" " size=%(size)d," " guessed_segsize=%(guessed_segsize)d," " guessed_numsegs=%(guessed_numsegs)d", si=self._si_prefix, size=verifycap.size, guessed_segsize=self.guessed_segment_size, guessed_numsegs=self.guessed_num_segments, level=log.OPERATIONAL, umid="uJ0zAQ") self._lp = lp self._sharefinder = ShareFinder(storage_broker, verifycap, self, self._download_status, lp) self._shares = set() def _build_guessed_tables(self, max_segment_size): size = min(self._verifycap.size, max_segment_size) s = mathutil.next_multiple(size, self._verifycap.needed_shares) self.guessed_segment_size = s r = self._calculate_sizes(self.guessed_segment_size) self.guessed_num_segments = r["num_segments"] # as with CommonShare, our ciphertext_hash_tree is a stub until we # get the real num_segments self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments) self.ciphertext_hash_tree_leaves = self.guessed_num_segments def __repr__(self): return "ImmutableDownloadNode(%s)" % (self._si_prefix,) def stop(self): # called by the Terminator at shutdown, mostly for tests if self._active_segment: self._active_segment.stop() self._active_segment = None self._sharefinder.stop() # things called by outside callers, via CiphertextFileNode. get_segment() # may also be called by Segmentation. def read(self, consumer, offset, size): """I am the main entry point, from which FileNode.read() can get data. I feed the consumer with the desired range of ciphertext. I return a Deferred that fires (with the consumer) when the read is finished. Note that there is no notion of a 'file pointer': each call to read() uses an independent offset= value. """ # for concurrent operations: each gets its own Segmentation manager if size is None: size = self._verifycap.size # ignore overruns: clip size so offset+size does not go past EOF, and # so size is not negative (which indicates that offset >= EOF) size = max(0, min(size, self._verifycap.size-offset)) read_ev = self._download_status.add_read_event(offset, size, now()) if IDownloadStatusHandlingConsumer.providedBy(consumer): consumer.set_download_status_read_event(read_ev) consumer.set_download_status(self._download_status) lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)", si=base32.b2a(self._verifycap.storage_index)[:8], offset=offset, size=size, level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww") if self._history: sp = self._history.stats_provider sp.count("downloader.files_downloaded", 1) # really read() calls sp.count("downloader.bytes_downloaded", size) if size == 0: read_ev.finished(now()) # no data, so no producer, so no register/unregisterProducer return defer.succeed(consumer) # for concurrent operations, each read() gets its own Segmentation # manager s = Segmentation(self, offset, size, consumer, read_ev, lp) # this raises an interesting question: what segments to fetch? if # offset=0, always fetch the first segment, and then allow # Segmentation to be responsible for pulling the subsequent ones if # the first wasn't large enough. If offset>0, we're going to need an # extra roundtrip to get the UEB (and therefore the segment size) # before we can figure out which segment to get. TODO: allow the # offset-table-guessing code (which starts by guessing the segsize) # to assist the offset>0 process. d = s.start() def _done(res): read_ev.finished(now()) return res d.addBoth(_done) return d def get_segment(self, segnum, logparent=None): """Begin downloading a segment. I return a tuple (d, c): 'd' is a Deferred that fires with (offset,data) when the desired segment is available, and c is an object on which c.cancel() can be called to disavow interest in the segment (after which 'd' will never fire). You probably need to know the segment size before calling this, unless you want the first few bytes of the file. If you ask for a segment number which turns out to be too large, the Deferred will errback with BadSegmentNumberError. The Deferred fires with the offset of the first byte of the data segment, so that you can call get_segment() before knowing the segment size, and still know which data you received. The Deferred can also errback with other fatal problems, such as NotEnoughSharesError, NoSharesError, or BadCiphertextHashError. """ lp = log.msg(format="imm Node(%(si)s).get_segment(%(segnum)d)", si=base32.b2a(self._verifycap.storage_index)[:8], segnum=segnum, level=log.OPERATIONAL, parent=logparent, umid="UKFjDQ") seg_ev = self._download_status.add_segment_request(segnum, now()) d = defer.Deferred() c = Cancel(self._cancel_request) self._segment_requests.append( (segnum, d, c, seg_ev, lp) ) self._start_new_segment() return (d, c) def get_segsize(self): """Return a Deferred that fires when we know the real segment size.""" if self.segment_size: return defer.succeed(self.segment_size) # TODO: this downloads (and discards) the first segment of the file. # We could make this more efficient by writing # fetcher.SegmentSizeFetcher, with the job of finding a single valid # share and extracting the UEB. We'd add Share.get_UEB() to request # just the UEB. (d,c) = self.get_segment(0) # this ensures that an error during get_segment() will errback the # caller, so Repair won't wait forever on completely missing files d.addCallback(lambda ign: self._segsize_observers.when_fired()) return d # things called by the Segmentation object used to transform # arbitrary-sized read() calls into quantized segment fetches def _start_new_segment(self): if self._active_segment is None and self._segment_requests: (segnum, d, c, seg_ev, lp) = self._segment_requests[0] k = self._verifycap.needed_shares log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d", node=repr(self), segnum=segnum, level=log.NOISY, parent=lp, umid="wAlnHQ") self._active_segment = fetcher = SegmentFetcher(self, segnum, k, lp) seg_ev.activate(now()) active_shares = [s for s in self._shares if s.is_alive()] fetcher.add_shares(active_shares) # this triggers the loop # called by our child ShareFinder def got_shares(self, shares): self._shares.update(shares) if self._active_segment: self._active_segment.add_shares(shares) def no_more_shares(self): self._no_more_shares = True if self._active_segment: self._active_segment.no_more_shares() # things called by our Share instances def validate_and_store_UEB(self, UEB_s): log.msg("validate_and_store_UEB", level=log.OPERATIONAL, parent=self._lp, umid="7sTrPw") h = hashutil.uri_extension_hash(UEB_s) if h != self._verifycap.uri_extension_hash: raise BadHashError self._parse_and_store_UEB(UEB_s) # sets self._stuff # TODO: a malformed (but authentic) UEB could throw an assertion in # _parse_and_store_UEB, and we should abandon the download. self.have_UEB = True # inform the ShareFinder about our correct number of segments. This # will update the block-hash-trees in all existing CommonShare # instances, and will populate new ones with the correct value. self._sharefinder.update_num_segments() def _parse_and_store_UEB(self, UEB_s): # Note: the UEB contains needed_shares and total_shares. These are # redundant and inferior (the filecap contains the authoritative # values). However, because it is possible to encode the same file in # multiple ways, and the encoders might choose (poorly) to use the # same key for both (therefore getting the same SI), we might # encounter shares for both types. The UEB hashes will be different, # however, and we'll disregard the "other" encoding's shares as # corrupted. # therefore, we ignore d['total_shares'] and d['needed_shares']. d = uri.unpack_extension(UEB_s) log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s", ueb=repr(uri.unpack_extension_readable(UEB_s)), vcap=self._verifycap.to_string(), level=log.NOISY, parent=self._lp, umid="cVqZnA") k, N = self._verifycap.needed_shares, self._verifycap.total_shares self.segment_size = d['segment_size'] self._segsize_observers.fire(self.segment_size) r = self._calculate_sizes(self.segment_size) self.tail_segment_size = r["tail_segment_size"] self.tail_segment_padded = r["tail_segment_padded"] self.num_segments = r["num_segments"] self.block_size = r["block_size"] self.tail_block_size = r["tail_block_size"] log.msg("actual sizes: %s" % (r,), level=log.NOISY, parent=self._lp, umid="PY6P5Q") if (self.segment_size == self.guessed_segment_size and self.num_segments == self.guessed_num_segments): log.msg("my guess was right!", level=log.NOISY, parent=self._lp, umid="x340Ow") else: log.msg("my guess was wrong! Extra round trips for me.", level=log.NOISY, parent=self._lp, umid="tb7RJw") # zfec.Decode() instantiation is fast, but still, let's use the same # codec instance for all but the last segment. 3-of-10 takes 15us on # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is # 2.5ms, worst-case 254-of-255 is 9.3ms self._codec = CRSDecoder() self._codec.set_params(self.segment_size, k, N) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.ciphertext_hash_tree was a guess before: # this is where we create it for real. self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments) self.ciphertext_hash_tree_leaves = self.num_segments self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']}) self.share_hash_tree.set_hashes({0: d['share_root_hash']}) # Our job is a fast download, not verification, so we ignore any # redundant fields. The Verifier uses a different code path which # does not ignore them. def _calculate_sizes(self, segment_size): # segments of ciphertext size = self._verifycap.size k = self._verifycap.needed_shares # this assert matches the one in encode.py:127 inside # Encoded._got_all_encoding_parameters, where the UEB is constructed assert segment_size % k == 0 # the last segment is usually short. We don't store a whole segsize, # but we do pad the segment up to a multiple of k, because the # encoder requires that. tail_segment_size = size % segment_size if tail_segment_size == 0: tail_segment_size = segment_size padded = mathutil.next_multiple(tail_segment_size, k) tail_segment_padded = padded num_segments = mathutil.div_ceil(size, segment_size) # each segment is turned into N blocks. All but the last are of size # block_size, and the last is of size tail_block_size block_size = segment_size / k tail_block_size = tail_segment_padded / k return { "tail_segment_size": tail_segment_size, "tail_segment_padded": tail_segment_padded, "num_segments": num_segments, "block_size": block_size, "tail_block_size": tail_block_size, } def process_share_hashes(self, share_hashes): for hashnum in share_hashes: if hashnum >= len(self.share_hash_tree): # "BadHashError" is normally for e.g. a corrupt block. We # sort of abuse it here to mean a badly numbered hash (which # indicates corruption in the number bytes, rather than in # the data bytes). raise BadHashError("hashnum %d doesn't fit in hashtree(%d)" % (hashnum, len(self.share_hash_tree))) self.share_hash_tree.set_hashes(share_hashes) def get_desired_ciphertext_hashes(self, segnum): if segnum < self.ciphertext_hash_tree_leaves: return self.ciphertext_hash_tree.needed_hashes(segnum, include_leaf=True) return [] def get_needed_ciphertext_hashes(self, segnum): cht = self.ciphertext_hash_tree return cht.needed_hashes(segnum, include_leaf=True) def process_ciphertext_hashes(self, hashes): assert self.num_segments is not None # this may raise BadHashError or NotEnoughHashesError self.ciphertext_hash_tree.set_hashes(hashes) # called by our child SegmentFetcher def want_more_shares(self): self._sharefinder.hungry() def fetch_failed(self, sf, f): assert sf is self._active_segment # deliver error upwards for (d,c,seg_ev) in self._extract_requests(sf.segnum): seg_ev.error(now()) eventually(self._deliver, d, c, f) self._active_segment = None self._start_new_segment() def process_blocks(self, segnum, blocks): start = now() d = defer.maybeDeferred(self._decode_blocks, segnum, blocks) d.addCallback(self._check_ciphertext_hash, segnum) def _deliver(result): log.msg(format="delivering segment(%(segnum)d)", segnum=segnum, level=log.OPERATIONAL, parent=self._lp, umid="j60Ojg") when = now() if isinstance(result, Failure): # this catches failures in decode or ciphertext hash for (d,c,seg_ev) in self._extract_requests(segnum): seg_ev.error(when) eventually(self._deliver, d, c, result) else: (offset, segment, decodetime) = result for (d,c,seg_ev) in self._extract_requests(segnum): # when we have two requests for the same segment, the # second one will not be "activated" before the data is # delivered, so to allow the status-reporting code to see # consistent behavior, we activate them all now. The # SegmentEvent will ignore duplicate activate() calls. # Note that this will result in an inaccurate "receive # speed" for the second request. seg_ev.activate(when) seg_ev.deliver(when, offset, len(segment), decodetime) eventually(self._deliver, d, c, result) self._download_status.add_misc_event("process_block", start, now()) self._active_segment = None self._start_new_segment() d.addBoth(_deliver) d.addErrback(log.err, "unhandled error during process_blocks", level=log.WEIRD, parent=self._lp, umid="MkEsCg") def _decode_blocks(self, segnum, blocks): start = now() tail = (segnum == self.num_segments-1) codec = self._codec block_size = self.block_size decoded_size = self.segment_size if tail: # account for the padding in the last segment codec = CRSDecoder() k, N = self._verifycap.needed_shares, self._verifycap.total_shares codec.set_params(self.tail_segment_padded, k, N) block_size = self.tail_block_size decoded_size = self.tail_segment_padded shares = [] shareids = [] for (shareid, share) in blocks.iteritems(): assert len(share) == block_size shareids.append(shareid) shares.append(share) del blocks d = codec.decode(shares, shareids) # segment del shares def _process(buffers): decodetime = now() - start segment = "".join(buffers) assert len(segment) == decoded_size del buffers if tail: segment = segment[:self.tail_segment_size] self._download_status.add_misc_event("decode", start, now()) return (segment, decodetime) d.addCallback(_process) return d def _check_ciphertext_hash(self, (segment, decodetime), segnum): start = now() assert self._active_segment.segnum == segnum assert self.segment_size is not None offset = segnum * self.segment_size h = hashutil.crypttext_segment_hash(segment) try: self.ciphertext_hash_tree.set_hashes(leaves={segnum: h}) self._download_status.add_misc_event("CThash", start, now()) return (offset, segment, decodetime) except (BadHashError, NotEnoughHashesError): format = ("hash failure in ciphertext_hash_tree:" " segnum=%(segnum)d, SI=%(si)s") log.msg(format=format, segnum=segnum, si=self._si_prefix, failure=Failure(), level=log.WEIRD, parent=self._lp, umid="MTwNnw") # this is especially weird, because we made it past the share # hash tree. It implies that we're using the wrong encoding, or # that the uploader deliberately constructed a bad UEB. msg = format % {"segnum": segnum, "si": self._si_prefix} raise BadCiphertextHashError(msg) def _deliver(self, d, c, result): # this method exists to handle cancel() that occurs between # _got_segment and _deliver if c.active: c.active = False # it is now too late to cancel d.callback(result) # might actually be an errback def _extract_requests(self, segnum): """Remove matching requests and return their (d,c) tuples so that the caller can retire them.""" retire = [(d,c,seg_ev) for (segnum0,d,c,seg_ev,lp) in self._segment_requests if segnum0 == segnum] self._segment_requests = [t for t in self._segment_requests if t[0] != segnum] return retire def _cancel_request(self, c): self._segment_requests = [t for t in self._segment_requests if t[2] != c] segnums = [segnum for (segnum,d,c,seg_ev,lp) in self._segment_requests] # self._active_segment might be None in rare circumstances, so make # sure we tolerate it if self._active_segment and self._active_segment.segnum not in segnums: self._active_segment.stop() self._active_segment = None self._start_new_segment() # called by ShareFinder to choose hashtree sizes in CommonShares, and by # SegmentFetcher to tell if it is still fetching a valid segnum. def get_num_segments(self): # returns (best_num_segments, authoritative) if self.num_segments is None: return (self.guessed_num_segments, False) return (self.num_segments, True) tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/segmentation.py000066400000000000000000000153631221140116300255360ustar00rootroot00000000000000 import time now = time.time from zope.interface import implements from twisted.internet import defer from twisted.internet.interfaces import IPushProducer from foolscap.api import eventually from allmydata.util import log from allmydata.util.spans import overlap from allmydata.interfaces import DownloadStopped from common import BadSegmentNumberError, WrongSegmentError class Segmentation: """I am responsible for a single offset+size read of the file. I handle segmentation: I figure out which segments are necessary, request them (from my CiphertextDownloader) in order, and trim the segments down to match the offset+size span. I use the Producer/Consumer interface to only request one segment at a time. """ implements(IPushProducer) def __init__(self, node, offset, size, consumer, read_ev, logparent=None): self._node = node self._hungry = True self._active_segnum = None self._cancel_segment_request = None # these are updated as we deliver data. At any given time, we still # want to download file[offset:offset+size] self._offset = offset self._size = size assert offset+size <= node._verifycap.size self._consumer = consumer self._read_ev = read_ev self._start_pause = None self._lp = logparent def start(self): self._alive = True self._deferred = defer.Deferred() self._deferred.addBoth(self._done) self._consumer.registerProducer(self, True) self._maybe_fetch_next() return self._deferred def _done(self, res): self._consumer.unregisterProducer() return res def _maybe_fetch_next(self): if not self._alive or not self._hungry: return if self._active_segnum is not None: return self._fetch_next() def _fetch_next(self): if self._size == 0: # done! self._alive = False self._hungry = False self._deferred.callback(self._consumer) return n = self._node have_actual_segment_size = n.segment_size is not None guess_s = "" if not have_actual_segment_size: guess_s = "probably " segment_size = n.segment_size or n.guessed_segment_size if self._offset == 0: # great! we want segment0 for sure wanted_segnum = 0 else: # this might be a guess wanted_segnum = self._offset // segment_size log.msg(format="_fetch_next(offset=%(offset)d) %(guess)swants segnum=%(segnum)d", offset=self._offset, guess=guess_s, segnum=wanted_segnum, level=log.NOISY, parent=self._lp, umid="5WfN0w") self._active_segnum = wanted_segnum d,c = n.get_segment(wanted_segnum, self._lp) self._cancel_segment_request = c d.addBoth(self._request_retired) d.addCallback(self._got_segment, wanted_segnum) if not have_actual_segment_size: # we can retry once d.addErrback(self._retry_bad_segment) d.addErrback(self._error) def _request_retired(self, res): self._active_segnum = None self._cancel_segment_request = None return res def _got_segment(self, (segment_start,segment,decodetime), wanted_segnum): self._cancel_segment_request = None # we got file[segment_start:segment_start+len(segment)] # we want file[self._offset:self._offset+self._size] log.msg(format="Segmentation got data:" " want [%(wantstart)d-%(wantend)d)," " given [%(segstart)d-%(segend)d), for segnum=%(segnum)d", wantstart=self._offset, wantend=self._offset+self._size, segstart=segment_start, segend=segment_start+len(segment), segnum=wanted_segnum, level=log.OPERATIONAL, parent=self._lp, umid="32dHcg") o = overlap(segment_start, len(segment), self._offset, self._size) # the overlap is file[o[0]:o[0]+o[1]] if not o or o[0] != self._offset: # we didn't get the first byte, so we can't use this segment log.msg("Segmentation handed wrong data:" " want [%d-%d), given [%d-%d), for segnum=%d," " for si=%s" % (self._offset, self._offset+self._size, segment_start, segment_start+len(segment), wanted_segnum, self._node._si_prefix), level=log.UNUSUAL, parent=self._lp, umid="STlIiA") # we may retry if the segnum we asked was based on a guess raise WrongSegmentError("I was given the wrong data.") offset_in_segment = self._offset - segment_start desired_data = segment[offset_in_segment:offset_in_segment+o[1]] self._offset += len(desired_data) self._size -= len(desired_data) self._consumer.write(desired_data) # the consumer might call our .pauseProducing() inside that write() # call, setting self._hungry=False self._read_ev.update(len(desired_data), 0, 0) # note: filenode.DecryptingConsumer is responsible for calling # _read_ev.update with how much decrypt_time was consumed self._maybe_fetch_next() def _retry_bad_segment(self, f): f.trap(WrongSegmentError, BadSegmentNumberError) # we guessed the segnum wrong: either one that doesn't overlap with # the start of our desired region, or one that's beyond the end of # the world. Now that we have the right information, we're allowed to # retry once. assert self._node.segment_size is not None return self._maybe_fetch_next() def _error(self, f): log.msg("Error in Segmentation", failure=f, level=log.WEIRD, parent=self._lp, umid="EYlXBg") self._alive = False self._hungry = False self._deferred.errback(f) def stopProducing(self): log.msg("asked to stopProducing", level=log.NOISY, parent=self._lp, umid="XIyL9w") self._hungry = False self._alive = False # cancel any outstanding segment request if self._cancel_segment_request: self._cancel_segment_request.cancel() self._cancel_segment_request = None e = DownloadStopped("our Consumer called stopProducing()") self._deferred.errback(e) def pauseProducing(self): self._hungry = False self._start_pause = now() def resumeProducing(self): self._hungry = True eventually(self._maybe_fetch_next) if self._start_pause is not None: paused = now() - self._start_pause self._read_ev.update(0, 0, paused) self._start_pause = None tahoe-lafs-1.10.0/src/allmydata/immutable/downloader/share.py000066400000000000000000001253511221140116300241420ustar00rootroot00000000000000 import struct import time now = time.time from twisted.python.failure import Failure from foolscap.api import eventually from allmydata.util import base32, log, hashutil, mathutil from allmydata.util.spans import Spans, DataSpans from allmydata.interfaces import HASH_SIZE from allmydata.hashtree import IncompleteHashTree, BadHashError, \ NotEnoughHashesError from allmydata.immutable.layout import make_write_bucket_proxy from allmydata.util.observer import EventStreamObserver from common import COMPLETE, CORRUPT, DEAD, BADSEGNUM class LayoutInvalid(Exception): pass class DataUnavailable(Exception): pass class Share: """I represent a single instance of a single share (e.g. I reference the shnum2 for share SI=abcde on server xy12t, not the one on server ab45q). I am associated with a CommonShare that remembers data that is held in common among e.g. SI=abcde/shnum2 across all servers. I am also associated with a CiphertextFileNode for e.g. SI=abcde (all shares, all servers). """ # this is a specific implementation of IShare for tahoe's native storage # servers. A different backend would use a different class. def __init__(self, rref, server, verifycap, commonshare, node, download_status, shnum, dyhb_rtt, logparent): self._rref = rref self._server = server self._node = node # holds share_hash_tree and UEB self.actual_segment_size = node.segment_size # might still be None # XXX change node.guessed_segment_size to # node.best_guess_segment_size(), which should give us the real ones # if known, else its guess. self._guess_offsets(verifycap, node.guessed_segment_size) self.actual_offsets = None self._UEB_length = None self._commonshare = commonshare # holds block_hash_tree self._download_status = download_status self._storage_index = verifycap.storage_index self._si_prefix = base32.b2a(verifycap.storage_index)[:8] self._shnum = shnum self._dyhb_rtt = dyhb_rtt # self._alive becomes False upon fatal corruption or server error self._alive = True self._loop_scheduled = False self._lp = log.msg(format="%(share)s created", share=repr(self), level=log.NOISY, parent=logparent, umid="P7hv2w") self._pending = Spans() # request sent but no response received yet self._received = DataSpans() # ACK response received, with data self._unavailable = Spans() # NAK response received, no data # any given byte of the share can be in one of four states: # in: _wanted, _requested, _received # FALSE FALSE FALSE : don't care about it at all # TRUE FALSE FALSE : want it, haven't yet asked for it # TRUE TRUE FALSE : request is in-flight # or didn't get it # FALSE TRUE TRUE : got it, haven't used it yet # FALSE TRUE FALSE : got it and used it # FALSE FALSE FALSE : block consumed, ready to ask again # # when we request data and get a NAK, we leave it in _requested # to remind ourself to not ask for it again. We don't explicitly # remove it from anything (maybe this should change). # # We retain the hashtrees in the Node, so we leave those spans in # _requested (and never ask for them again, as long as the Node is # alive). But we don't retain data blocks (too big), so when we # consume a data block, we remove it from _requested, so a later # download can re-fetch it. self._requested_blocks = [] # (segnum, set(observer2..)) v = server.get_version() ver = v["http://allmydata.org/tahoe/protocols/storage/v1"] self._overrun_ok = ver["tolerates-immutable-read-overrun"] # If _overrun_ok and we guess the offsets correctly, we can get # everything in one RTT. If _overrun_ok and we guess wrong, we might # need two RTT (but we could get lucky and do it in one). If overrun # is *not* ok (tahoe-1.3.0 or earlier), we need four RTT: 1=version, # 2=offset table, 3=UEB_length and everything else (hashes, block), # 4=UEB. self.had_corruption = False # for unit tests def __repr__(self): return "Share(sh%d-on-%s)" % (self._shnum, self._server.get_name()) def is_alive(self): # XXX: reconsider. If the share sees a single error, should it remain # dead for all time? Or should the next segment try again? This DEAD # state is stored elsewhere too (SegmentFetcher per-share states?) # and needs to be consistent. We clear _alive in self._fail(), which # is called upon a network error, or layout failure, or hash failure # in the UEB or a hash tree. We do not _fail() for a hash failure in # a block, but of course we still tell our callers about # state=CORRUPT so they'll find a different share. return self._alive def _guess_offsets(self, verifycap, guessed_segment_size): self.guessed_segment_size = guessed_segment_size size = verifycap.size k = verifycap.needed_shares N = verifycap.total_shares r = self._node._calculate_sizes(guessed_segment_size) # num_segments, block_size/tail_block_size # guessed_segment_size/tail_segment_size/tail_segment_padded share_size = mathutil.div_ceil(size, k) # share_size is the amount of block data that will be put into each # share, summed over all segments. It does not include hashes, the # UEB, or other overhead. # use the upload-side code to get this as accurate as possible ht = IncompleteHashTree(N) num_share_hashes = len(ht.needed_hashes(0, include_leaf=True)) wbp = make_write_bucket_proxy(None, None, share_size, r["block_size"], r["num_segments"], num_share_hashes, 0) self._fieldsize = wbp.fieldsize self._fieldstruct = wbp.fieldstruct self.guessed_offsets = wbp._offsets # called by our client, the SegmentFetcher def get_block(self, segnum): """Add a block number to the list of requests. This will eventually result in a fetch of the data necessary to validate the block, then the block itself. The fetch order is generally first-come-first-served, but requests may be answered out-of-order if data becomes available sooner. I return an EventStreamObserver, which has two uses. The first is to call o.subscribe(), which gives me a place to send state changes and eventually the data block. The second is o.cancel(), which removes the request (if it is still active). I will distribute the following events through my EventStreamObserver: - state=OVERDUE: ?? I believe I should have had an answer by now. You may want to ask another share instead. - state=BADSEGNUM: the segnum you asked for is too large. I must fetch a valid UEB before I can determine this, so the notification is asynchronous - state=COMPLETE, block=data: here is a valid block - state=CORRUPT: this share contains corrupted data - state=DEAD, f=Failure: the server reported an error, this share is unusable """ log.msg("%s.get_block(%d)" % (repr(self), segnum), level=log.NOISY, parent=self._lp, umid="RTo9MQ") assert segnum >= 0 o = EventStreamObserver() o.set_canceler(self, "_cancel_block_request") for i,(segnum0,observers) in enumerate(self._requested_blocks): if segnum0 == segnum: observers.add(o) break else: self._requested_blocks.append( (segnum, set([o])) ) self.schedule_loop() return o def _cancel_block_request(self, o): new_requests = [] for e in self._requested_blocks: (segnum0, observers) = e observers.discard(o) if observers: new_requests.append(e) self._requested_blocks = new_requests # internal methods def _active_segnum_and_observers(self): if self._requested_blocks: # we only retrieve information for one segment at a time, to # minimize alacrity (first come, first served) return self._requested_blocks[0] return None, [] def schedule_loop(self): if self._loop_scheduled: return self._loop_scheduled = True eventually(self.loop) def loop(self): self._loop_scheduled = False if not self._alive: return try: # if any exceptions occur here, kill the download log.msg("%s.loop, reqs=[%s], pending=%s, received=%s," " unavailable=%s" % (repr(self), ",".join([str(req[0]) for req in self._requested_blocks]), self._pending.dump(), self._received.dump(), self._unavailable.dump() ), level=log.NOISY, parent=self._lp, umid="BaL1zw") self._do_loop() # all exception cases call self._fail(), which clears self._alive except (BadHashError, NotEnoughHashesError, LayoutInvalid), e: # Abandon this share. We do this if we see corruption in the # offset table, the UEB, or a hash tree. We don't abandon the # whole share if we see corruption in a data block (we abandon # just the one block, and still try to get data from other blocks # on the same server). In theory, we could get good data from a # share with a corrupt UEB (by first getting the UEB from some # other share), or corrupt hash trees, but the logic to decide # when this is safe is non-trivial. So for now, give up at the # first sign of corruption. # # _satisfy_*() code which detects corruption should first call # self._signal_corruption(), and then raise the exception. log.msg(format="corruption detected in %(share)s", share=repr(self), level=log.UNUSUAL, parent=self._lp, umid="gWspVw") self._fail(Failure(e), log.UNUSUAL) except DataUnavailable, e: # Abandon this share. log.msg(format="need data that will never be available" " from %s: pending=%s, received=%s, unavailable=%s" % (repr(self), self._pending.dump(), self._received.dump(), self._unavailable.dump() ), level=log.UNUSUAL, parent=self._lp, umid="F7yJnQ") self._fail(Failure(e), log.UNUSUAL) except BaseException: self._fail(Failure()) raise log.msg("%s.loop done, reqs=[%s], pending=%s, received=%s," " unavailable=%s" % (repr(self), ",".join([str(req[0]) for req in self._requested_blocks]), self._pending.dump(), self._received.dump(), self._unavailable.dump() ), level=log.NOISY, parent=self._lp, umid="9lRaRA") def _do_loop(self): # we are (eventually) called after all state transitions: # new segments added to self._requested_blocks # new data received from servers (responses to our read() calls) # impatience timer fires (server appears slow) # First, consume all of the information that we currently have, for # all the segments people currently want. start = now() while self._get_satisfaction(): pass self._download_status.add_misc_event("satisfy", start, now()) # When we get no satisfaction (from the data we've received so far), # we determine what data we desire (to satisfy more requests). The # number of segments is finite, so I can't get no satisfaction # forever. start = now() wanted, needed = self._desire() self._download_status.add_misc_event("desire", start, now()) # Finally, send out requests for whatever we need (desire minus # have). You can't always get what you want, but if you try # sometimes, you just might find, you get what you need. self._send_requests(wanted + needed) # and sometimes you can't even get what you need start = now() disappointment = needed & self._unavailable if disappointment.len(): self.had_corruption = True raise DataUnavailable("need %s but will never get it" % disappointment.dump()) self._download_status.add_misc_event("checkdis", start, now()) def _get_satisfaction(self): # return True if we retired a data block, and should therefore be # called again. Return False if we don't retire a data block (even if # we do retire some other data, like hash chains). if self.actual_offsets is None: if not self._satisfy_offsets(): # can't even look at anything without the offset table return False if not self._node.have_UEB: if not self._satisfy_UEB(): # can't check any hashes without the UEB return False # the call to _satisfy_UEB() will immediately set the # authoritative num_segments in all our CommonShares. If we # guessed wrong, we might stil be working on a bogus segnum # (beyond the real range). We catch this and signal BADSEGNUM # before invoking any further code that touches hashtrees. self.actual_segment_size = self._node.segment_size # might be updated assert self.actual_segment_size is not None # knowing the UEB means knowing num_segments assert self._node.num_segments is not None segnum, observers = self._active_segnum_and_observers() # if segnum is None, we don't really need to do anything (we have no # outstanding readers right now), but we'll fill in the bits that # aren't tied to any particular segment. if segnum is not None and segnum >= self._node.num_segments: for o in observers: o.notify(state=BADSEGNUM) self._requested_blocks.pop(0) return True if self._node.share_hash_tree.needed_hashes(self._shnum): if not self._satisfy_share_hash_tree(): # can't check block_hash_tree without a root return False if self._commonshare.need_block_hash_root(): block_hash_root = self._node.share_hash_tree.get_leaf(self._shnum) self._commonshare.set_block_hash_root(block_hash_root) if segnum is None: return False # we don't want any particular segment right now # block_hash_tree needed_hashes = self._commonshare.get_needed_block_hashes(segnum) if needed_hashes: if not self._satisfy_block_hash_tree(needed_hashes): # can't check block without block_hash_tree return False # ciphertext_hash_tree needed_hashes = self._node.get_needed_ciphertext_hashes(segnum) if needed_hashes: if not self._satisfy_ciphertext_hash_tree(needed_hashes): # can't check decoded blocks without ciphertext_hash_tree return False # data blocks return self._satisfy_data_block(segnum, observers) def _satisfy_offsets(self): version_s = self._received.get(0, 4) if version_s is None: return False (version,) = struct.unpack(">L", version_s) if version == 1: table_start = 0x0c self._fieldsize = 0x4 self._fieldstruct = "L" elif version == 2: table_start = 0x14 self._fieldsize = 0x8 self._fieldstruct = "Q" else: self.had_corruption = True raise LayoutInvalid("unknown version %d (I understand 1 and 2)" % version) offset_table_size = 6 * self._fieldsize table_s = self._received.pop(table_start, offset_table_size) if table_s is None: return False fields = struct.unpack(">"+6*self._fieldstruct, table_s) offsets = {} for i,field in enumerate(['data', 'plaintext_hash_tree', # UNUSED 'crypttext_hash_tree', 'block_hashes', 'share_hashes', 'uri_extension', ] ): offsets[field] = fields[i] self.actual_offsets = offsets log.msg("actual offsets: data=%d, plaintext_hash_tree=%d, crypttext_hash_tree=%d, block_hashes=%d, share_hashes=%d, uri_extension=%d" % tuple(fields), level=log.NOISY, parent=self._lp, umid="jedQcw") self._received.remove(0, 4) # don't need this anymore # validate the offsets a bit share_hashes_size = offsets["uri_extension"] - offsets["share_hashes"] if share_hashes_size < 0 or share_hashes_size % (2+HASH_SIZE) != 0: # the share hash chain is stored as (hashnum,hash) pairs self.had_corruption = True raise LayoutInvalid("share hashes malformed -- should be a" " multiple of %d bytes -- not %d" % (2+HASH_SIZE, share_hashes_size)) block_hashes_size = offsets["share_hashes"] - offsets["block_hashes"] if block_hashes_size < 0 or block_hashes_size % (HASH_SIZE) != 0: # the block hash tree is stored as a list of hashes self.had_corruption = True raise LayoutInvalid("block hashes malformed -- should be a" " multiple of %d bytes -- not %d" % (HASH_SIZE, block_hashes_size)) # we only look at 'crypttext_hash_tree' if the UEB says we're # actually using it. Same with 'plaintext_hash_tree'. This gives us # some wiggle room: a place to stash data for later extensions. return True def _satisfy_UEB(self): o = self.actual_offsets fsize = self._fieldsize UEB_length_s = self._received.get(o["uri_extension"], fsize) if not UEB_length_s: return False (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s) UEB_s = self._received.pop(o["uri_extension"]+fsize, UEB_length) if not UEB_s: return False self._received.remove(o["uri_extension"], fsize) try: self._node.validate_and_store_UEB(UEB_s) return True except (LayoutInvalid, BadHashError), e: # TODO: if this UEB was bad, we'll keep trying to validate it # over and over again. Only log.err on the first one, or better # yet skip all but the first f = Failure(e) self._signal_corruption(f, o["uri_extension"], fsize+UEB_length) self.had_corruption = True raise def _satisfy_share_hash_tree(self): # the share hash chain is stored as (hashnum,hash) tuples, so you # can't fetch just the pieces you need, because you don't know # exactly where they are. So fetch everything, and parse the results # later. o = self.actual_offsets hashlen = o["uri_extension"] - o["share_hashes"] assert hashlen % (2+HASH_SIZE) == 0 hashdata = self._received.get(o["share_hashes"], hashlen) if not hashdata: return False share_hashes = {} for i in range(0, hashlen, 2+HASH_SIZE): (hashnum,) = struct.unpack(">H", hashdata[i:i+2]) hashvalue = hashdata[i+2:i+2+HASH_SIZE] share_hashes[hashnum] = hashvalue # TODO: if they give us an empty set of hashes, # process_share_hashes() won't fail. We must ensure that this # situation doesn't allow unverified shares through. Manual testing # shows that set_block_hash_root() throws an assert because an # internal node is None instead of an actual hash, but we want # something better. It's probably best to add a method to # IncompleteHashTree which takes a leaf number and raises an # exception unless that leaf is present and fully validated. try: self._node.process_share_hashes(share_hashes) # adds to self._node.share_hash_tree except (BadHashError, NotEnoughHashesError), e: f = Failure(e) self._signal_corruption(f, o["share_hashes"], hashlen) self.had_corruption = True raise self._received.remove(o["share_hashes"], hashlen) return True def _signal_corruption(self, f, start, offset): # there was corruption somewhere in the given range reason = "corruption in share[%d-%d): %s" % (start, start+offset, str(f.value)) self._rref.callRemoteOnly("advise_corrupt_share", reason) def _satisfy_block_hash_tree(self, needed_hashes): o_bh = self.actual_offsets["block_hashes"] block_hashes = {} for hashnum in needed_hashes: hashdata = self._received.get(o_bh+hashnum*HASH_SIZE, HASH_SIZE) if hashdata: block_hashes[hashnum] = hashdata else: return False # missing some hashes # note that we don't submit any hashes to the block_hash_tree until # we've gotten them all, because the hash tree will throw an # exception if we only give it a partial set (which it therefore # cannot validate) try: self._commonshare.process_block_hashes(block_hashes) except (BadHashError, NotEnoughHashesError), e: f = Failure(e) hashnums = ",".join([str(n) for n in sorted(block_hashes.keys())]) log.msg(format="hash failure in block_hashes=(%(hashnums)s)," " from %(share)s", hashnums=hashnums, shnum=self._shnum, share=repr(self), failure=f, level=log.WEIRD, parent=self._lp, umid="yNyFdA") hsize = max(0, max(needed_hashes)) * HASH_SIZE self._signal_corruption(f, o_bh, hsize) self.had_corruption = True raise for hashnum in needed_hashes: self._received.remove(o_bh+hashnum*HASH_SIZE, HASH_SIZE) return True def _satisfy_ciphertext_hash_tree(self, needed_hashes): start = self.actual_offsets["crypttext_hash_tree"] hashes = {} for hashnum in needed_hashes: hashdata = self._received.get(start+hashnum*HASH_SIZE, HASH_SIZE) if hashdata: hashes[hashnum] = hashdata else: return False # missing some hashes # we don't submit any hashes to the ciphertext_hash_tree until we've # gotten them all try: self._node.process_ciphertext_hashes(hashes) except (BadHashError, NotEnoughHashesError), e: f = Failure(e) hashnums = ",".join([str(n) for n in sorted(hashes.keys())]) log.msg(format="hash failure in ciphertext_hashes=(%(hashnums)s)," " from %(share)s", hashnums=hashnums, share=repr(self), failure=f, level=log.WEIRD, parent=self._lp, umid="iZI0TA") hsize = max(0, max(needed_hashes))*HASH_SIZE self._signal_corruption(f, start, hsize) self.had_corruption = True raise for hashnum in needed_hashes: self._received.remove(start+hashnum*HASH_SIZE, HASH_SIZE) return True def _satisfy_data_block(self, segnum, observers): tail = (segnum == self._node.num_segments-1) datastart = self.actual_offsets["data"] blockstart = datastart + segnum * self._node.block_size blocklen = self._node.block_size if tail: blocklen = self._node.tail_block_size block = self._received.pop(blockstart, blocklen) if not block: log.msg("no data for block %s (want [%d:+%d])" % (repr(self), blockstart, blocklen), level=log.NOISY, parent=self._lp, umid="aK0RFw") return False log.msg(format="%(share)s._satisfy_data_block [%(start)d:+%(length)d]", share=repr(self), start=blockstart, length=blocklen, level=log.NOISY, parent=self._lp, umid="uTDNZg") # this block is being retired, either as COMPLETE or CORRUPT, since # no further data reads will help assert self._requested_blocks[0][0] == segnum try: self._commonshare.check_block(segnum, block) # hurrah, we have a valid block. Deliver it. for o in observers: # goes to SegmentFetcher._block_request_activity o.notify(state=COMPLETE, block=block) # now clear our received data, to dodge the #1170 spans.py # complexity bug self._received = DataSpans() except (BadHashError, NotEnoughHashesError), e: # rats, we have a corrupt block. Notify our clients that they # need to look elsewhere, and advise the server. Unlike # corruption in other parts of the share, this doesn't cause us # to abandon the whole share. f = Failure(e) log.msg(format="hash failure in block %(segnum)d, from %(share)s", segnum=segnum, share=repr(self), failure=f, level=log.WEIRD, parent=self._lp, umid="mZjkqA") for o in observers: o.notify(state=CORRUPT) self._signal_corruption(f, blockstart, blocklen) self.had_corruption = True # in either case, we've retired this block self._requested_blocks.pop(0) # popping the request keeps us from turning around and wanting the # block again right away return True # got satisfaction def _desire(self): segnum, observers = self._active_segnum_and_observers() # maybe None # 'want_it' is for data we merely want: we know that we don't really # need it. This includes speculative reads, like the first 1KB of the # share (for the offset table) and the first 2KB of the UEB. # # 'need_it' is for data that, if we have the real offset table, we'll # need. If we are only guessing at the offset table, it's merely # wanted. (The share is abandoned if we can't get data that we really # need). # # 'gotta_gotta_have_it' is for data that we absolutely need, # independent of whether we're still guessing about the offset table: # the version number and the offset table itself. # # Mr. Popeil, I'm in trouble, need your assistance on the double. Aww.. desire = Spans(), Spans(), Spans() (want_it, need_it, gotta_gotta_have_it) = desire self.actual_segment_size = self._node.segment_size # might be updated o = self.actual_offsets or self.guessed_offsets segsize = self.actual_segment_size or self.guessed_segment_size r = self._node._calculate_sizes(segsize) if not self.actual_offsets: # all _desire functions add bits to the three desire[] spans self._desire_offsets(desire) # we can use guessed offsets as long as this server tolerates # overrun. Otherwise, we must wait for the offsets to arrive before # we try to read anything else. if self.actual_offsets or self._overrun_ok: if not self._node.have_UEB: self._desire_UEB(desire, o) self._desire_share_hashes(desire, o) if segnum is not None: # They might be asking for a segment number that is beyond # what we guess the file contains, but _desire_block_hashes # and _desire_data will tolerate that. self._desire_block_hashes(desire, o, segnum) self._desire_data(desire, o, r, segnum, segsize) log.msg("end _desire: want_it=%s need_it=%s gotta=%s" % (want_it.dump(), need_it.dump(), gotta_gotta_have_it.dump()), level=log.NOISY, parent=self._lp, umid="IG7CgA") if self.actual_offsets: return (want_it, need_it+gotta_gotta_have_it) else: return (want_it+need_it, gotta_gotta_have_it) def _desire_offsets(self, desire): (want_it, need_it, gotta_gotta_have_it) = desire if self._overrun_ok: # easy! this includes version number, sizes, and offsets want_it.add(0, 1024) return # v1 has an offset table that lives [0x0,0x24). v2 lives [0x0,0x44). # To be conservative, only request the data that we know lives there, # even if that means more roundtrips. gotta_gotta_have_it.add(0, 4) # version number, always safe version_s = self._received.get(0, 4) if not version_s: return (version,) = struct.unpack(">L", version_s) # The code in _satisfy_offsets will have checked this version # already. There is no code path to get this far with version>2. assert 1 <= version <= 2, "can't get here, version=%d" % version if version == 1: table_start = 0x0c fieldsize = 0x4 elif version == 2: table_start = 0x14 fieldsize = 0x8 offset_table_size = 6 * fieldsize gotta_gotta_have_it.add(table_start, offset_table_size) def _desire_UEB(self, desire, o): (want_it, need_it, gotta_gotta_have_it) = desire # UEB data is stored as (length,data). if self._overrun_ok: # We can pre-fetch 2kb, which should probably cover it. If it # turns out to be larger, we'll come back here later with a known # length and fetch the rest. want_it.add(o["uri_extension"], 2048) # now, while that is probably enough to fetch the whole UEB, it # might not be, so we need to do the next few steps as well. In # most cases, the following steps will not actually add anything # to need_it need_it.add(o["uri_extension"], self._fieldsize) # only use a length if we're sure it's correct, otherwise we'll # probably fetch a huge number if not self.actual_offsets: return UEB_length_s = self._received.get(o["uri_extension"], self._fieldsize) if UEB_length_s: (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s) # we know the length, so make sure we grab everything need_it.add(o["uri_extension"]+self._fieldsize, UEB_length) def _desire_share_hashes(self, desire, o): (want_it, need_it, gotta_gotta_have_it) = desire if self._node.share_hash_tree.needed_hashes(self._shnum): hashlen = o["uri_extension"] - o["share_hashes"] need_it.add(o["share_hashes"], hashlen) def _desire_block_hashes(self, desire, o, segnum): (want_it, need_it, gotta_gotta_have_it) = desire # block hash chain for hashnum in self._commonshare.get_desired_block_hashes(segnum): need_it.add(o["block_hashes"]+hashnum*HASH_SIZE, HASH_SIZE) # ciphertext hash chain for hashnum in self._node.get_desired_ciphertext_hashes(segnum): need_it.add(o["crypttext_hash_tree"]+hashnum*HASH_SIZE, HASH_SIZE) def _desire_data(self, desire, o, r, segnum, segsize): if segnum > r["num_segments"]: # they're asking for a segment that's beyond what we think is the # end of the file. We won't get here if we've already learned the # real UEB: _get_satisfaction() will notice the out-of-bounds and # terminate the loop. So we must still be guessing, which means # that they might be correct in asking for such a large segnum. # But if they're right, then our segsize/segnum guess is # certainly wrong, which means we don't know what data blocks to # ask for yet. So don't bother adding anything. When the UEB # comes back and we learn the correct segsize/segnums, we'll # either reject the request or have enough information to proceed # normally. This costs one roundtrip. log.msg("_desire_data: segnum(%d) looks wrong (numsegs=%d)" % (segnum, r["num_segments"]), level=log.UNUSUAL, parent=self._lp, umid="tuYRQQ") return (want_it, need_it, gotta_gotta_have_it) = desire tail = (segnum == r["num_segments"]-1) datastart = o["data"] blockstart = datastart + segnum * r["block_size"] blocklen = r["block_size"] if tail: blocklen = r["tail_block_size"] need_it.add(blockstart, blocklen) def _send_requests(self, desired): ask = desired - self._pending - self._received.get_spans() log.msg("%s._send_requests, desired=%s, pending=%s, ask=%s" % (repr(self), desired.dump(), self._pending.dump(), ask.dump()), level=log.NOISY, parent=self._lp, umid="E94CVA") # XXX At one time, this code distinguished between data blocks and # hashes, and made sure to send (small) requests for hashes before # sending (big) requests for blocks. The idea was to make sure that # all hashes arrive before the blocks, so the blocks can be consumed # and released in a single turn. I removed this for simplicity. # Reconsider the removal: maybe bring it back. ds = self._download_status for (start, length) in ask: # TODO: quantize to reasonably-large blocks self._pending.add(start, length) lp = log.msg(format="%(share)s._send_request" " [%(start)d:+%(length)d]", share=repr(self), start=start, length=length, level=log.NOISY, parent=self._lp, umid="sgVAyA") block_ev = ds.add_block_request(self._server, self._shnum, start, length, now()) d = self._send_request(start, length) d.addCallback(self._got_data, start, length, block_ev, lp) d.addErrback(self._got_error, start, length, block_ev, lp) d.addCallback(self._trigger_loop) d.addErrback(lambda f: log.err(format="unhandled error during send_request", failure=f, parent=self._lp, level=log.WEIRD, umid="qZu0wg")) def _send_request(self, start, length): return self._rref.callRemote("read", start, length) def _got_data(self, data, start, length, block_ev, lp): block_ev.finished(len(data), now()) if not self._alive: return log.msg(format="%(share)s._got_data [%(start)d:+%(length)d] -> %(datalen)d", share=repr(self), start=start, length=length, datalen=len(data), level=log.NOISY, parent=lp, umid="5Qn6VQ") self._pending.remove(start, length) self._received.add(start, data) # if we ask for [a:c], and we get back [a:b] (b self.last_timestamp: self.last_timestamp = when def add_known_share(self, server, shnum): # XXX use me self.known_shares.append( (server, shnum) ) def add_problem(self, p): self.problems.append(p) # IDownloadStatus methods def get_counter(self): return self.counter def get_storage_index(self): return self.storage_index def get_size(self): return self.size def get_status(self): # mention all outstanding segment requests outstanding = set() errorful = set() outstanding = set([s_ev["segment_number"] for s_ev in self.segment_events if s_ev["finish_time"] is None]) errorful = set([s_ev["segment_number"] for s_ev in self.segment_events if s_ev["success"] is False]) def join(segnums): if len(segnums) == 1: return "segment %s" % list(segnums)[0] else: return "segments %s" % (",".join([str(i) for i in sorted(segnums)])) error_s = "" if errorful: error_s = "; errors on %s" % join(errorful) if outstanding: s = "fetching %s" % join(outstanding) else: s = "idle" return s + error_s def get_progress(self): # measure all read events that aren't completely done, return the # total percentage complete for them if not self.read_events: return 0.0 total_outstanding, total_received = 0, 0 for r_ev in self.read_events: if r_ev["finish_time"] is None: total_outstanding += r_ev["length"] total_received += r_ev["bytes_returned"] # else ignore completed requests if not total_outstanding: return 1.0 return 1.0 * total_received / total_outstanding def using_helper(self): return False def get_active(self): # a download is considered active if it has at least one outstanding # read() call for r_ev in self.read_events: if r_ev["finish_time"] is None: return True return False def get_started(self): return self.first_timestamp def get_results(self): return None # TODO tahoe-lafs-1.10.0/src/allmydata/immutable/encode.py000066400000000000000000000726721221140116300221460ustar00rootroot00000000000000# -*- test-case-name: allmydata.test.test_encode -*- import time from zope.interface import implements from twisted.internet import defer from foolscap.api import fireEventually from allmydata import uri from allmydata.storage.server import si_b2a from allmydata.hashtree import HashTree from allmydata.util import mathutil, hashutil, base32, log, happinessutil from allmydata.util.assertutil import _assert, precondition from allmydata.codec import CRSEncoder from allmydata.interfaces import IEncoder, IStorageBucketWriter, \ IEncryptedUploadable, IUploadStatus, UploadUnhappinessError """ The goal of the encoder is to turn the original file into a series of 'shares'. Each share is going to a 'shareholder' (nominally each shareholder is a different host, but for small grids there may be overlap). The number of shares is chosen to hit our reliability goals (more shares on more machines means more reliability), and is limited by overhead (proportional to numshares or log(numshares)) and the encoding technology in use (zfec permits only 256 shares total). It is also constrained by the amount of data we want to send to each host. For estimating purposes, think of 10 shares out of which we need 3 to reconstruct the file. The encoder starts by cutting the original file into segments. All segments except the last are of equal size. The segment size is chosen to constrain the memory footprint (which will probably vary between 1x and 4x segment size) and to constrain the overhead (which will be proportional to log(number of segments)). Each segment (A,B,C) is read into memory, encrypted, and encoded into blocks. The 'share' (say, share #1) that makes it out to a host is a collection of these blocks (block A1, B1, C1), plus some hash-tree information necessary to validate the data upon retrieval. Only one segment is handled at a time: all blocks for segment A are delivered before any work is begun on segment B. As blocks are created, we retain the hash of each one. The list of block hashes for a single share (say, hash(A1), hash(B1), hash(C1)) is used to form the base of a Merkle hash tree for that share, called the block hash tree. This hash tree has one terminal leaf per block. The complete block hash tree is sent to the shareholder after all the data has been sent. At retrieval time, the decoder will ask for specific pieces of this tree before asking for blocks, whichever it needs to validate those blocks. (Note: we don't really need to generate this whole block hash tree ourselves. It would be sufficient to have the shareholder generate it and just tell us the root. This gives us an extra level of validation on the transfer, though, and it is relatively cheap to compute.) Each of these block hash trees has a root hash. The collection of these root hashes for all shares are collected into the 'share hash tree', which has one terminal leaf per share. After sending the blocks and the complete block hash tree to each shareholder, we send them the portion of the share hash tree that is necessary to validate their share. The root of the share hash tree is put into the URI. """ class UploadAborted(Exception): pass KiB=1024 MiB=1024*KiB GiB=1024*MiB TiB=1024*GiB PiB=1024*TiB class Encoder(object): implements(IEncoder) def __init__(self, log_parent=None, upload_status=None): object.__init__(self) self.uri_extension_data = {} self._codec = None self._status = None if upload_status: self._status = IUploadStatus(upload_status) precondition(log_parent is None or isinstance(log_parent, int), log_parent) self._log_number = log.msg("creating Encoder %s" % self, facility="tahoe.encoder", parent=log_parent) self._aborted = False def __repr__(self): if hasattr(self, "_storage_index"): return "" % si_b2a(self._storage_index)[:5] return "" def log(self, *args, **kwargs): if "parent" not in kwargs: kwargs["parent"] = self._log_number if "facility" not in kwargs: kwargs["facility"] = "tahoe.encoder" return log.msg(*args, **kwargs) def set_encrypted_uploadable(self, uploadable): eu = self._uploadable = IEncryptedUploadable(uploadable) d = eu.get_size() def _got_size(size): self.log(format="file size: %(size)d", size=size) self.file_size = size d.addCallback(_got_size) d.addCallback(lambda res: eu.get_all_encoding_parameters()) d.addCallback(self._got_all_encoding_parameters) d.addCallback(lambda res: eu.get_storage_index()) def _done(storage_index): self._storage_index = storage_index return self d.addCallback(_done) return d def _got_all_encoding_parameters(self, params): assert not self._codec k, happy, n, segsize = params self.required_shares = k self.servers_of_happiness = happy self.num_shares = n self.segment_size = segsize self.log("got encoding parameters: %d/%d/%d %d" % (k,happy,n, segsize)) self.log("now setting up codec") assert self.segment_size % self.required_shares == 0 self.num_segments = mathutil.div_ceil(self.file_size, self.segment_size) self._codec = CRSEncoder() self._codec.set_params(self.segment_size, self.required_shares, self.num_shares) data = self.uri_extension_data data['codec_name'] = self._codec.get_encoder_type() data['codec_params'] = self._codec.get_serialized_params() data['size'] = self.file_size data['segment_size'] = self.segment_size self.share_size = mathutil.div_ceil(self.file_size, self.required_shares) data['num_segments'] = self.num_segments data['needed_shares'] = self.required_shares data['total_shares'] = self.num_shares # the "tail" is the last segment. This segment may or may not be # shorter than all other segments. We use the "tail codec" to handle # it. If the tail is short, we use a different codec instance. In # addition, the tail codec must be fed data which has been padded out # to the right size. tail_size = self.file_size % self.segment_size if not tail_size: tail_size = self.segment_size # the tail codec is responsible for encoding tail_size bytes padded_tail_size = mathutil.next_multiple(tail_size, self.required_shares) self._tail_codec = CRSEncoder() self._tail_codec.set_params(padded_tail_size, self.required_shares, self.num_shares) data['tail_codec_params'] = self._tail_codec.get_serialized_params() def _get_share_size(self): share_size = mathutil.div_ceil(self.file_size, self.required_shares) overhead = self._compute_overhead() return share_size + overhead def _compute_overhead(self): return 0 def get_param(self, name): assert self._codec if name == "storage_index": return self._storage_index elif name == "share_counts": return (self.required_shares, self.servers_of_happiness, self.num_shares) elif name == "num_segments": return self.num_segments elif name == "segment_size": return self.segment_size elif name == "block_size": return self._codec.get_block_size() elif name == "share_size": return self._get_share_size() elif name == "serialized_params": return self._codec.get_serialized_params() else: raise KeyError("unknown parameter name '%s'" % name) def set_shareholders(self, landlords, servermap): assert isinstance(landlords, dict) for k in landlords: assert IStorageBucketWriter.providedBy(landlords[k]) self.landlords = landlords.copy() assert isinstance(servermap, dict) for v in servermap.itervalues(): assert isinstance(v, set) self.servermap = servermap.copy() def start(self): """ Returns a Deferred that will fire with the verify cap (an instance of uri.CHKFileVerifierURI).""" self.log("%s starting" % (self,)) #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares) assert self._codec self._crypttext_hasher = hashutil.crypttext_hasher() self._crypttext_hashes = [] self.segment_num = 0 self.block_hashes = [[] for x in range(self.num_shares)] # block_hashes[i] is a list that will be accumulated and then send # to landlord[i]. This list contains a hash of each segment_share # that we sent to that landlord. self.share_root_hashes = [None] * self.num_shares self._times = { "cumulative_encoding": 0.0, "cumulative_sending": 0.0, "hashes_and_close": 0.0, "total_encode_and_push": 0.0, } self._start_total_timestamp = time.time() d = fireEventually() d.addCallback(lambda res: self.start_all_shareholders()) for i in range(self.num_segments-1): # note to self: this form doesn't work, because lambda only # captures the slot, not the value #d.addCallback(lambda res: self.do_segment(i)) # use this form instead: d.addCallback(lambda res, i=i: self._encode_segment(i)) d.addCallback(self._send_segment, i) d.addCallback(self._turn_barrier) last_segnum = self.num_segments - 1 d.addCallback(lambda res: self._encode_tail_segment(last_segnum)) d.addCallback(self._send_segment, last_segnum) d.addCallback(self._turn_barrier) d.addCallback(lambda res: self.finish_hashing()) d.addCallback(lambda res: self.send_crypttext_hash_tree_to_all_shareholders()) d.addCallback(lambda res: self.send_all_block_hash_trees()) d.addCallback(lambda res: self.send_all_share_hash_trees()) d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders()) d.addCallback(lambda res: self.close_all_shareholders()) d.addCallbacks(self.done, self.err) return d def set_status(self, status): if self._status: self._status.set_status(status) def set_encode_and_push_progress(self, sent_segments=None, extra=0.0): if self._status: # we treat the final hash+close as an extra segment if sent_segments is None: sent_segments = self.num_segments progress = float(sent_segments + extra) / (self.num_segments + 1) self._status.set_progress(2, progress) def abort(self): self.log("aborting upload", level=log.UNUSUAL) assert self._codec, "don't call abort before start" self._aborted = True # the next segment read (in _gather_data inside _encode_segment) will # raise UploadAborted(), which will bypass the rest of the upload # chain. If we've sent the final segment's shares, it's too late to # abort. TODO: allow abort any time up to close_all_shareholders. def _turn_barrier(self, res): # putting this method in a Deferred chain imposes a guaranteed # reactor turn between the pre- and post- portions of that chain. # This can be useful to limit memory consumption: since Deferreds do # not do tail recursion, code which uses defer.succeed(result) for # consistency will cause objects to live for longer than you might # normally expect. return fireEventually(res) def start_all_shareholders(self): self.log("starting shareholders", level=log.NOISY) self.set_status("Starting shareholders") dl = [] for shareid in list(self.landlords): d = self.landlords[shareid].put_header() d.addErrback(self._remove_shareholder, shareid, "start") dl.append(d) return self._gather_responses(dl) def _encode_segment(self, segnum): codec = self._codec start = time.time() # the ICodecEncoder API wants to receive a total of self.segment_size # bytes on each encode() call, broken up into a number of # identically-sized pieces. Due to the way the codec algorithm works, # these pieces need to be the same size as the share which the codec # will generate. Therefore we must feed it with input_piece_size that # equals the output share size. input_piece_size = codec.get_block_size() # as a result, the number of input pieces per encode() call will be # equal to the number of required shares with which the codec was # constructed. You can think of the codec as chopping up a # 'segment_size' of data into 'required_shares' shares (not doing any # fancy math at all, just doing a split), then creating some number # of additional shares which can be substituted if the primary ones # are unavailable # we read data from the source one segment at a time, and then chop # it into 'input_piece_size' pieces before handing it to the codec crypttext_segment_hasher = hashutil.crypttext_segment_hasher() # memory footprint: we only hold a tiny piece of the plaintext at any # given time. We build up a segment's worth of cryptttext, then hand # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB = # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the # footprint to 430KiB at the expense of more hash-tree overhead. d = self._gather_data(self.required_shares, input_piece_size, crypttext_segment_hasher) def _done_gathering(chunks): for c in chunks: assert len(c) == input_piece_size self._crypttext_hashes.append(crypttext_segment_hasher.digest()) # during this call, we hit 5*segsize memory return codec.encode(chunks) d.addCallback(_done_gathering) def _done(res): elapsed = time.time() - start self._times["cumulative_encoding"] += elapsed return res d.addCallback(_done) return d def _encode_tail_segment(self, segnum): start = time.time() codec = self._tail_codec input_piece_size = codec.get_block_size() crypttext_segment_hasher = hashutil.crypttext_segment_hasher() d = self._gather_data(self.required_shares, input_piece_size, crypttext_segment_hasher, allow_short=True) def _done_gathering(chunks): for c in chunks: # a short trailing chunk will have been padded by # _gather_data assert len(c) == input_piece_size self._crypttext_hashes.append(crypttext_segment_hasher.digest()) return codec.encode(chunks) d.addCallback(_done_gathering) def _done(res): elapsed = time.time() - start self._times["cumulative_encoding"] += elapsed return res d.addCallback(_done) return d def _gather_data(self, num_chunks, input_chunk_size, crypttext_segment_hasher, allow_short=False): """Return a Deferred that will fire when the required number of chunks have been read (and hashed and encrypted). The Deferred fires with a list of chunks, each of size input_chunk_size.""" # I originally built this to allow read_encrypted() to behave badly: # to let it return more or less data than you asked for. It would # stash the leftovers until later, and then recurse until it got # enough. I don't think that was actually useful. # # who defines read_encrypted? # offloaded.LocalCiphertextReader: real disk file: exact # upload.EncryptAnUploadable: Uploadable, but a wrapper that makes # it exact. The return value is a list of 50KiB chunks, to reduce # the memory footprint of the encryption process. # repairer.Repairer: immutable.filenode.CiphertextFileNode: exact # # This has been redefined to require read_encrypted() to behave like # a local file: return exactly the amount requested unless it hits # EOF. # -warner if self._aborted: raise UploadAborted() read_size = num_chunks * input_chunk_size d = self._uploadable.read_encrypted(read_size, hash_only=False) def _got(data): assert isinstance(data, (list,tuple)) if self._aborted: raise UploadAborted() data = "".join(data) precondition(len(data) <= read_size, len(data), read_size) if not allow_short: precondition(len(data) == read_size, len(data), read_size) crypttext_segment_hasher.update(data) self._crypttext_hasher.update(data) if allow_short and len(data) < read_size: # padding data += "\x00" * (read_size - len(data)) encrypted_pieces = [data[i:i+input_chunk_size] for i in range(0, len(data), input_chunk_size)] return encrypted_pieces d.addCallback(_got) return d def _send_segment(self, (shares, shareids), segnum): # To generate the URI, we must generate the roothash, so we must # generate all shares, even if we aren't actually giving them to # anybody. This means that the set of shares we create will be equal # to or larger than the set of landlords. If we have any landlord who # *doesn't* have a share, that's an error. _assert(set(self.landlords.keys()).issubset(set(shareids)), shareids=shareids, landlords=self.landlords) start = time.time() dl = [] self.set_status("Sending segment %d of %d" % (segnum+1, self.num_segments)) self.set_encode_and_push_progress(segnum) lognum = self.log("send_segment(%d)" % segnum, level=log.NOISY) for i in range(len(shares)): block = shares[i] shareid = shareids[i] d = self.send_block(shareid, segnum, block, lognum) dl.append(d) block_hash = hashutil.block_hash(block) #from allmydata.util import base32 #log.msg("creating block (shareid=%d, blocknum=%d) " # "len=%d %r .. %r: %s" % # (shareid, segnum, len(block), # block[:50], block[-50:], base32.b2a(block_hash))) self.block_hashes[shareid].append(block_hash) dl = self._gather_responses(dl) def _logit(res): self.log("%s uploaded %s / %s bytes (%d%%) of your file." % (self, self.segment_size*(segnum+1), self.segment_size*self.num_segments, 100 * (segnum+1) / self.num_segments, ), level=log.OPERATIONAL) elapsed = time.time() - start self._times["cumulative_sending"] += elapsed return res dl.addCallback(_logit) return dl def send_block(self, shareid, segment_num, block, lognum): if shareid not in self.landlords: return defer.succeed(None) sh = self.landlords[shareid] lognum2 = self.log("put_block to %s" % self.landlords[shareid], parent=lognum, level=log.NOISY) d = sh.put_block(segment_num, block) def _done(res): self.log("put_block done", parent=lognum2, level=log.NOISY) return res d.addCallback(_done) d.addErrback(self._remove_shareholder, shareid, "segnum=%d" % segment_num) return d def _remove_shareholder(self, why, shareid, where): ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d", method=where, shnum=shareid, level=log.UNUSUAL, failure=why) if shareid in self.landlords: self.landlords[shareid].abort() peerid = self.landlords[shareid].get_peerid() assert peerid del self.landlords[shareid] self.servermap[shareid].remove(peerid) if not self.servermap[shareid]: del self.servermap[shareid] else: # even more UNUSUAL self.log("they weren't in our list of landlords", parent=ln, level=log.WEIRD, umid="TQGFRw") happiness = happinessutil.servers_of_happiness(self.servermap) if happiness < self.servers_of_happiness: peerids = set(happinessutil.shares_by_server(self.servermap).keys()) msg = happinessutil.failure_message(len(peerids), self.required_shares, self.servers_of_happiness, happiness) msg = "%s: %s" % (msg, why) raise UploadUnhappinessError(msg) self.log("but we can still continue with %s shares, we'll be happy " "with at least %s" % (happiness, self.servers_of_happiness), parent=ln) def _gather_responses(self, dl): d = defer.DeferredList(dl, fireOnOneErrback=True) def _eatUploadUnhappinessError(f): # all exceptions that occur while talking to a peer are handled # in _remove_shareholder. That might raise UploadUnhappinessError, # which will cause the DeferredList to errback but which should # otherwise be consumed. Allow non-UploadUnhappinessError exceptions # to pass through as an unhandled errback. We use this in lieu of # consumeErrors=True to allow coding errors to be logged. f.trap(UploadUnhappinessError) return None for d0 in dl: d0.addErrback(_eatUploadUnhappinessError) return d def finish_hashing(self): self._start_hashing_and_close_timestamp = time.time() self.set_status("Finishing hashes") self.set_encode_and_push_progress(extra=0.0) crypttext_hash = self._crypttext_hasher.digest() self.uri_extension_data["crypttext_hash"] = crypttext_hash self._uploadable.close() def send_crypttext_hash_tree_to_all_shareholders(self): self.log("sending crypttext hash tree", level=log.NOISY) self.set_status("Sending Crypttext Hash Tree") self.set_encode_and_push_progress(extra=0.3) t = HashTree(self._crypttext_hashes) all_hashes = list(t) self.uri_extension_data["crypttext_root_hash"] = t[0] dl = [] for shareid in list(self.landlords): dl.append(self.send_crypttext_hash_tree(shareid, all_hashes)) return self._gather_responses(dl) def send_crypttext_hash_tree(self, shareid, all_hashes): if shareid not in self.landlords: return defer.succeed(None) sh = self.landlords[shareid] d = sh.put_crypttext_hashes(all_hashes) d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes") return d def send_all_block_hash_trees(self): self.log("sending block hash trees", level=log.NOISY) self.set_status("Sending Subshare Hash Trees") self.set_encode_and_push_progress(extra=0.4) dl = [] for shareid,hashes in enumerate(self.block_hashes): # hashes is a list of the hashes of all blocks that were sent # to shareholder[shareid]. dl.append(self.send_one_block_hash_tree(shareid, hashes)) return self._gather_responses(dl) def send_one_block_hash_tree(self, shareid, block_hashes): t = HashTree(block_hashes) all_hashes = list(t) # all_hashes[0] is the root hash, == hash(ah[1]+ah[2]) # all_hashes[1] is the left child, == hash(ah[3]+ah[4]) # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2]) self.share_root_hashes[shareid] = t[0] if shareid not in self.landlords: return defer.succeed(None) sh = self.landlords[shareid] d = sh.put_block_hashes(all_hashes) d.addErrback(self._remove_shareholder, shareid, "put_block_hashes") return d def send_all_share_hash_trees(self): # Each bucket gets a set of share hash tree nodes that are needed to validate their # share. This includes the share hash itself, but does not include the top-level hash # root (which is stored securely in the URI instead). self.log("sending all share hash trees", level=log.NOISY) self.set_status("Sending Share Hash Trees") self.set_encode_and_push_progress(extra=0.6) dl = [] for h in self.share_root_hashes: assert h # create the share hash tree t = HashTree(self.share_root_hashes) # the root of this hash tree goes into our URI self.uri_extension_data['share_root_hash'] = t[0] # now send just the necessary pieces out to each shareholder for i in range(self.num_shares): # the HashTree is given a list of leaves: 0,1,2,3..n . # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1 needed_hash_indices = t.needed_hashes(i, include_leaf=True) hashes = [(hi, t[hi]) for hi in needed_hash_indices] dl.append(self.send_one_share_hash_tree(i, hashes)) return self._gather_responses(dl) def send_one_share_hash_tree(self, shareid, needed_hashes): if shareid not in self.landlords: return defer.succeed(None) sh = self.landlords[shareid] d = sh.put_share_hashes(needed_hashes) d.addErrback(self._remove_shareholder, shareid, "put_share_hashes") return d def send_uri_extension_to_all_shareholders(self): lp = self.log("sending uri_extension", level=log.NOISY) self.set_status("Sending URI Extensions") self.set_encode_and_push_progress(extra=0.8) for k in ('crypttext_root_hash', 'crypttext_hash', ): assert k in self.uri_extension_data uri_extension = uri.pack_extension(self.uri_extension_data) ed = {} for k,v in self.uri_extension_data.items(): if k.endswith("hash"): ed[k] = base32.b2a(v) else: ed[k] = v self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp) self.uri_extension_hash = hashutil.uri_extension_hash(uri_extension) dl = [] for shareid in list(self.landlords): dl.append(self.send_uri_extension(shareid, uri_extension)) return self._gather_responses(dl) def send_uri_extension(self, shareid, uri_extension): sh = self.landlords[shareid] d = sh.put_uri_extension(uri_extension) d.addErrback(self._remove_shareholder, shareid, "put_uri_extension") return d def close_all_shareholders(self): self.log("closing shareholders", level=log.NOISY) self.set_status("Closing Shareholders") self.set_encode_and_push_progress(extra=0.9) dl = [] for shareid in list(self.landlords): d = self.landlords[shareid].close() d.addErrback(self._remove_shareholder, shareid, "close") dl.append(d) return self._gather_responses(dl) def done(self, res): self.log("upload done", level=log.OPERATIONAL) self.set_status("Finished") self.set_encode_and_push_progress(extra=1.0) # done now = time.time() h_and_c_elapsed = now - self._start_hashing_and_close_timestamp self._times["hashes_and_close"] = h_and_c_elapsed total_elapsed = now - self._start_total_timestamp self._times["total_encode_and_push"] = total_elapsed # update our sharemap self._shares_placed = set(self.landlords.keys()) return uri.CHKFileVerifierURI(self._storage_index, self.uri_extension_hash, self.required_shares, self.num_shares, self.file_size) def err(self, f): self.log("upload failed", failure=f, level=log.UNUSUAL) self.set_status("Failed") # we need to abort any remaining shareholders, so they'll delete the # partial share, allowing someone else to upload it again. self.log("aborting shareholders", level=log.UNUSUAL) for shareid in list(self.landlords): self.landlords[shareid].abort() if f.check(defer.FirstError): return f.value.subFailure return f def get_shares_placed(self): # return a set of share numbers that were successfully placed. return self._shares_placed def get_times(self): # return a dictionary of encode+push timings return self._times def get_uri_extension_data(self): return self.uri_extension_data def get_uri_extension_hash(self): return self.uri_extension_hash tahoe-lafs-1.10.0/src/allmydata/immutable/filenode.py000066400000000000000000000322601221140116300224630ustar00rootroot00000000000000 import binascii import time now = time.time from zope.interface import implements from twisted.internet import defer from allmydata import uri from twisted.internet.interfaces import IConsumer from allmydata.interfaces import IImmutableFileNode, IUploadResults from allmydata.util import consumer from allmydata.check_results import CheckResults, CheckAndRepairResults from allmydata.util.dictutil import DictOfSets from pycryptopp.cipher.aes import AES # local imports from allmydata.immutable.checker import Checker from allmydata.immutable.repairer import Repairer from allmydata.immutable.downloader.node import DownloadNode, \ IDownloadStatusHandlingConsumer from allmydata.immutable.downloader.status import DownloadStatus class CiphertextFileNode: def __init__(self, verifycap, storage_broker, secret_holder, terminator, history): assert isinstance(verifycap, uri.CHKFileVerifierURI) self._verifycap = verifycap self._storage_broker = storage_broker self._secret_holder = secret_holder self._terminator = terminator self._history = history self._download_status = None self._node = None # created lazily, on read() def _maybe_create_download_node(self): if not self._download_status: ds = DownloadStatus(self._verifycap.storage_index, self._verifycap.size) if self._history: self._history.add_download(ds) self._download_status = ds if self._node is None: self._node = DownloadNode(self._verifycap, self._storage_broker, self._secret_holder, self._terminator, self._history, self._download_status) def read(self, consumer, offset=0, size=None): """I am the main entry point, from which FileNode.read() can get data. I feed the consumer with the desired range of ciphertext. I return a Deferred that fires (with the consumer) when the read is finished.""" self._maybe_create_download_node() return self._node.read(consumer, offset, size) def get_segment(self, segnum): """Begin downloading a segment. I return a tuple (d, c): 'd' is a Deferred that fires with (offset,data) when the desired segment is available, and c is an object on which c.cancel() can be called to disavow interest in the segment (after which 'd' will never fire). You probably need to know the segment size before calling this, unless you want the first few bytes of the file. If you ask for a segment number which turns out to be too large, the Deferred will errback with BadSegmentNumberError. The Deferred fires with the offset of the first byte of the data segment, so that you can call get_segment() before knowing the segment size, and still know which data you received. """ self._maybe_create_download_node() return self._node.get_segment(segnum) def get_segment_size(self): # return a Deferred that fires with the file's real segment size self._maybe_create_download_node() return self._node.get_segsize() def get_storage_index(self): return self._verifycap.storage_index def get_verify_cap(self): return self._verifycap def get_size(self): return self._verifycap.size def raise_error(self): pass def is_mutable(self): return False def check_and_repair(self, monitor, verify=False, add_lease=False): c = Checker(verifycap=self._verifycap, servers=self._storage_broker.get_connected_servers(), verify=verify, add_lease=add_lease, secret_holder=self._secret_holder, monitor=monitor) d = c.start() d.addCallback(self._maybe_repair, monitor) return d def _maybe_repair(self, cr, monitor): crr = CheckAndRepairResults(self._verifycap.storage_index) crr.pre_repair_results = cr if cr.is_healthy(): crr.post_repair_results = cr return defer.succeed(crr) crr.repair_attempted = True crr.repair_successful = False # until proven successful def _repair_error(f): # as with mutable repair, I'm not sure if I want to pass # through a failure or not. TODO crr.repair_successful = False crr.repair_failure = f return f r = Repairer(self, storage_broker=self._storage_broker, secret_holder=self._secret_holder, monitor=monitor) d = r.start() d.addCallbacks(self._gather_repair_results, _repair_error, callbackArgs=(cr, crr,)) return d def _gather_repair_results(self, ur, cr, crr): assert IUploadResults.providedBy(ur), ur # clone the cr (check results) to form the basis of the # prr (post-repair results) verifycap = self._verifycap servers_responding = set(cr.get_servers_responding()) sm = DictOfSets() assert isinstance(cr.get_sharemap(), DictOfSets) for shnum, servers in cr.get_sharemap().items(): for server in servers: sm.add(shnum, server) for shnum, servers in ur.get_sharemap().items(): for server in servers: sm.add(shnum, server) servers_responding.add(server) servers_responding = sorted(servers_responding) good_hosts = len(reduce(set.union, sm.values(), set())) is_healthy = bool(len(sm) >= verifycap.total_shares) is_recoverable = bool(len(sm) >= verifycap.needed_shares) # TODO: this may be wrong, see ticket #1115 comment:27 and ticket #1784. needs_rebalancing = bool(len(sm) >= verifycap.total_shares) prr = CheckResults(cr.get_uri(), cr.get_storage_index(), healthy=is_healthy, recoverable=is_recoverable, needs_rebalancing=needs_rebalancing, count_shares_needed=verifycap.needed_shares, count_shares_expected=verifycap.total_shares, count_shares_good=len(sm), count_good_share_hosts=good_hosts, count_recoverable_versions=int(is_recoverable), count_unrecoverable_versions=int(not is_recoverable), servers_responding=list(servers_responding), sharemap=sm, count_wrong_shares=0, # no such thing as wrong, for immutable list_corrupt_shares=cr.get_corrupt_shares(), count_corrupt_shares=len(cr.get_corrupt_shares()), list_incompatible_shares=cr.get_incompatible_shares(), count_incompatible_shares=len(cr.get_incompatible_shares()), summary="", report=[], share_problems=[], servermap=None) crr.repair_successful = is_healthy crr.post_repair_results = prr return crr def check(self, monitor, verify=False, add_lease=False): verifycap = self._verifycap sb = self._storage_broker servers = sb.get_connected_servers() sh = self._secret_holder v = Checker(verifycap=verifycap, servers=servers, verify=verify, add_lease=add_lease, secret_holder=sh, monitor=monitor) return v.start() class DecryptingConsumer: """I sit between a CiphertextDownloader (which acts as a Producer) and the real Consumer, decrypting everything that passes by. The real Consumer sees the real Producer, but the Producer sees us instead of the real consumer.""" implements(IConsumer, IDownloadStatusHandlingConsumer) def __init__(self, consumer, readkey, offset): self._consumer = consumer self._read_ev = None self._download_status = None # TODO: pycryptopp CTR-mode needs random-access operations: I want # either a=AES(readkey, offset) or better yet both of: # a=AES(readkey, offset=0) # a.process(ciphertext, offset=xyz) # For now, we fake it with the existing iv= argument. offset_big = offset // 16 offset_small = offset % 16 iv = binascii.unhexlify("%032x" % offset_big) self._decryptor = AES(readkey, iv=iv) self._decryptor.process("\x00"*offset_small) def set_download_status_read_event(self, read_ev): self._read_ev = read_ev def set_download_status(self, ds): self._download_status = ds def registerProducer(self, producer, streaming): # this passes through, so the real consumer can flow-control the real # producer. Therefore we don't need to provide any IPushProducer # methods. We implement all the IConsumer methods as pass-throughs, # and only intercept write() to perform decryption. self._consumer.registerProducer(producer, streaming) def unregisterProducer(self): self._consumer.unregisterProducer() def write(self, ciphertext): started = now() plaintext = self._decryptor.process(ciphertext) if self._read_ev: elapsed = now() - started self._read_ev.update(0, elapsed, 0) if self._download_status: self._download_status.add_misc_event("AES", started, now()) self._consumer.write(plaintext) class ImmutableFileNode: implements(IImmutableFileNode) # I wrap a CiphertextFileNode with a decryption key def __init__(self, filecap, storage_broker, secret_holder, terminator, history): assert isinstance(filecap, uri.CHKFileURI) verifycap = filecap.get_verify_cap() self._cnode = CiphertextFileNode(verifycap, storage_broker, secret_holder, terminator, history) assert isinstance(filecap, uri.CHKFileURI) self.u = filecap self._readkey = filecap.key # TODO: I'm not sure about this.. what's the use case for node==node? If # we keep it here, we should also put this on CiphertextFileNode def __hash__(self): return self.u.__hash__() def __eq__(self, other): if isinstance(other, ImmutableFileNode): return self.u.__eq__(other.u) else: return False def __ne__(self, other): if isinstance(other, ImmutableFileNode): return self.u.__eq__(other.u) else: return True def read(self, consumer, offset=0, size=None): decryptor = DecryptingConsumer(consumer, self._readkey, offset) d = self._cnode.read(decryptor, offset, size) d.addCallback(lambda dc: consumer) return d def raise_error(self): pass def get_write_uri(self): return None def get_readonly_uri(self): return self.get_uri() def get_uri(self): return self.u.to_string() def get_cap(self): return self.u def get_readcap(self): return self.u.get_readonly() def get_verify_cap(self): return self.u.get_verify_cap() def get_repair_cap(self): # CHK files can be repaired with just the verifycap return self.u.get_verify_cap() def get_storage_index(self): return self.u.get_storage_index() def get_size(self): return self.u.get_size() def get_current_size(self): return defer.succeed(self.get_size()) def is_mutable(self): return False def is_readonly(self): return True def is_unknown(self): return False def is_allowed_in_immutable_directory(self): return True def check_and_repair(self, monitor, verify=False, add_lease=False): return self._cnode.check_and_repair(monitor, verify, add_lease) def check(self, monitor, verify=False, add_lease=False): return self._cnode.check(monitor, verify, add_lease) def get_best_readable_version(self): """ Return an IReadable of the best version of this file. Since immutable files can have only one version, we just return the current filenode. """ return defer.succeed(self) def download_best_version(self): """ Download the best version of this file, returning its contents as a bytestring. Since there is only one version of an immutable file, we download and return the contents of this file. """ d = consumer.download_to_data(self) return d # for an immutable file, download_to_data (specified in IReadable) # is the same as download_best_version (specified in IFileNode). For # mutable files, the difference is more meaningful, since they can # have multiple versions. download_to_data = download_best_version # get_size() (IReadable), get_current_size() (IFilesystemNode), and # get_size_of_best_version(IFileNode) are all the same for immutable # files. get_size_of_best_version = get_current_size tahoe-lafs-1.10.0/src/allmydata/immutable/layout.py000066400000000000000000000504621221140116300222170ustar00rootroot00000000000000import struct from zope.interface import implements from twisted.internet import defer from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE from allmydata.util import mathutil, observer, pipeline from allmydata.util.assertutil import precondition from allmydata.storage.server import si_b2a class LayoutInvalid(Exception): """ There is something wrong with these bytes so they can't be interpreted as the kind of immutable file that I know how to download.""" pass class RidiculouslyLargeURIExtensionBlock(LayoutInvalid): """ When downloading a file, the length of the URI Extension Block was given as >= 2**32. This means the share data must have been corrupted, or else the original uploader of the file wrote a ridiculous value into the URI Extension Block length.""" pass class ShareVersionIncompatible(LayoutInvalid): """ When downloading a share, its format was not one of the formats we know how to parse.""" pass """ Share data is written in a file. At the start of the file, there is a series of four-byte big-endian offset values, which indicate where each section starts. Each offset is measured from the beginning of the share data. 0x00: version number (=00 00 00 01) 0x04: block size # See Footnote 1 below. 0x08: share data size # See Footnote 1 below. 0x0c: offset of data (=00 00 00 24) 0x10: offset of plaintext_hash_tree UNUSED 0x14: offset of crypttext_hash_tree 0x18: offset of block_hashes 0x1c: offset of share_hashes 0x20: offset of uri_extension_length + uri_extension 0x24: start of data ? : start of plaintext_hash_tree UNUSED ? : start of crypttext_hash_tree ? : start of block_hashes ? : start of share_hashes each share_hash is written as a two-byte (big-endian) hashnum followed by the 32-byte SHA-256 hash. We store only the hashes necessary to validate the share hash root ? : start of uri_extension_length (four-byte big-endian value) ? : start of uri_extension """ """ v2 shares: these use 8-byte offsets to remove two of the three ~12GiB size limitations described in #346. 0x00: version number (=00 00 00 02) 0x04: block size # See Footnote 1 below. 0x0c: share data size # See Footnote 1 below. 0x14: offset of data (=00 00 00 00 00 00 00 44) 0x1c: offset of plaintext_hash_tree UNUSED 0x24: offset of crypttext_hash_tree 0x2c: offset of block_hashes 0x34: offset of share_hashes 0x3c: offset of uri_extension_length + uri_extension 0x44: start of data : rest of share is the same as v1, above ... ... ? : start of uri_extension_length (eight-byte big-endian value) ? : start of uri_extension """ # Footnote 1: as of Tahoe v1.3.0 these fields are not used when reading, but # they are still provided when writing so that older versions of Tahoe can # read them. FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares def make_write_bucket_proxy(rref, server, data_size, block_size, num_segments, num_share_hashes, uri_extension_size_max): # Use layout v1 for small files, so they'll be readable by older versions # (= 2**32 or data_size >= 2**32: raise FileTooLargeError("This file is too large to be uploaded (data_size).") offsets = self._offsets = {} x = 0x24 offsets['data'] = x x += data_size offsets['plaintext_hash_tree'] = x # UNUSED x += self._segment_hash_size offsets['crypttext_hash_tree'] = x x += self._segment_hash_size offsets['block_hashes'] = x x += self._segment_hash_size offsets['share_hashes'] = x x += self._share_hashtree_size offsets['uri_extension'] = x if x >= 2**32: raise FileTooLargeError("This file is too large to be uploaded (offsets).") offset_data = struct.pack(">LLLLLLLLL", 1, # version number block_size, data_size, offsets['data'], offsets['plaintext_hash_tree'], # UNUSED offsets['crypttext_hash_tree'], offsets['block_hashes'], offsets['share_hashes'], offsets['uri_extension'], ) assert len(offset_data) == 0x24 self._offset_data = offset_data def __repr__(self): return "" % self._server.get_name() def put_header(self): return self._write(0, self._offset_data) def put_block(self, segmentnum, data): offset = self._offsets['data'] + segmentnum * self._block_size assert offset + len(data) <= self._offsets['uri_extension'] assert isinstance(data, str) if segmentnum < self._num_segments-1: precondition(len(data) == self._block_size, len(data), self._block_size) else: precondition(len(data) == (self._data_size - (self._block_size * (self._num_segments - 1))), len(data), self._block_size) return self._write(offset, data) def put_crypttext_hashes(self, hashes): offset = self._offsets['crypttext_hash_tree'] assert isinstance(hashes, list) data = "".join(hashes) precondition(len(data) == self._segment_hash_size, len(data), self._segment_hash_size) precondition(offset + len(data) <= self._offsets['block_hashes'], offset, len(data), offset+len(data), self._offsets['block_hashes']) return self._write(offset, data) def put_block_hashes(self, blockhashes): offset = self._offsets['block_hashes'] assert isinstance(blockhashes, list) data = "".join(blockhashes) precondition(len(data) == self._segment_hash_size, len(data), self._segment_hash_size) precondition(offset + len(data) <= self._offsets['share_hashes'], offset, len(data), offset+len(data), self._offsets['share_hashes']) return self._write(offset, data) def put_share_hashes(self, sharehashes): # sharehashes is a list of (index, hash) tuples, so they get stored # as 2+32=34 bytes each offset = self._offsets['share_hashes'] assert isinstance(sharehashes, list) data = "".join([struct.pack(">H", hashnum) + hashvalue for hashnum,hashvalue in sharehashes]) precondition(len(data) == self._share_hashtree_size, len(data), self._share_hashtree_size) precondition(offset + len(data) <= self._offsets['uri_extension'], offset, len(data), offset+len(data), self._offsets['uri_extension']) return self._write(offset, data) def put_uri_extension(self, data): offset = self._offsets['uri_extension'] assert isinstance(data, str) precondition(len(data) <= self._uri_extension_size_max, len(data), self._uri_extension_size_max) length = struct.pack(self.fieldstruct, len(data)) return self._write(offset, length+data) def _write(self, offset, data): # use a Pipeline to pipeline several writes together. TODO: another # speedup would be to coalesce small writes into a single call: this # would reduce the foolscap CPU overhead per share, but wouldn't # reduce the number of round trips, so it might not be worth the # effort. return self._pipeline.add(len(data), self._rref.callRemote, "write", offset, data) def close(self): d = self._pipeline.add(0, self._rref.callRemote, "close") d.addCallback(lambda ign: self._pipeline.flush()) return d def abort(self): return self._rref.callRemoteOnly("abort") def get_servername(self): return self._server.get_name() def get_peerid(self): return self._server.get_serverid() class WriteBucketProxy_v2(WriteBucketProxy): fieldsize = 8 fieldstruct = ">Q" def _create_offsets(self, block_size, data_size): if block_size >= 2**64 or data_size >= 2**64: raise FileTooLargeError("This file is too large to be uploaded (data_size).") offsets = self._offsets = {} x = 0x44 offsets['data'] = x x += data_size offsets['plaintext_hash_tree'] = x # UNUSED x += self._segment_hash_size offsets['crypttext_hash_tree'] = x x += self._segment_hash_size offsets['block_hashes'] = x x += self._segment_hash_size offsets['share_hashes'] = x x += self._share_hashtree_size offsets['uri_extension'] = x if x >= 2**64: raise FileTooLargeError("This file is too large to be uploaded (offsets).") offset_data = struct.pack(">LQQQQQQQQ", 2, # version number block_size, data_size, offsets['data'], offsets['plaintext_hash_tree'], # UNUSED offsets['crypttext_hash_tree'], offsets['block_hashes'], offsets['share_hashes'], offsets['uri_extension'], ) assert len(offset_data) == 0x44, len(offset_data) self._offset_data = offset_data class ReadBucketProxy: implements(IStorageBucketReader) MAX_UEB_SIZE = 2000 # actual size is closer to 419, but varies by a few bytes def __init__(self, rref, server, storage_index): self._rref = rref self._server = server self._storage_index = storage_index self._started = False # sent request to server self._ready = observer.OneShotObserverList() # got response from server def get_peerid(self): return self._server.get_serverid() def __repr__(self): return "" % \ (id(self), self._server.get_name(), si_b2a(self._storage_index)) def _start_if_needed(self): """ Returns a deferred that will be fired when I'm ready to return data, or errbacks if the starting (header reading and parsing) process fails.""" if not self._started: self._start() return self._ready.when_fired() def _start(self): self._started = True # TODO: for small shares, read the whole bucket in _start() d = self._fetch_header() d.addCallback(self._parse_offsets) # XXX The following two callbacks implement a slightly faster/nicer # way to get the ueb and sharehashtree, but it requires that the # storage server be >= v1.3.0. # d.addCallback(self._fetch_sharehashtree_and_ueb) # d.addCallback(self._parse_sharehashtree_and_ueb) def _fail_waiters(f): self._ready.fire(f) def _notify_waiters(result): self._ready.fire(result) d.addCallbacks(_notify_waiters, _fail_waiters) return d def _fetch_header(self): return self._read(0, 0x44) def _parse_offsets(self, data): precondition(len(data) >= 0x4) self._offsets = {} (version,) = struct.unpack(">L", data[0:4]) if version != 1 and version != 2: raise ShareVersionIncompatible(version) if version == 1: precondition(len(data) >= 0x24) x = 0x0c fieldsize = 0x4 fieldstruct = ">L" else: precondition(len(data) >= 0x44) x = 0x14 fieldsize = 0x8 fieldstruct = ">Q" self._version = version self._fieldsize = fieldsize self._fieldstruct = fieldstruct for field in ( 'data', 'plaintext_hash_tree', # UNUSED 'crypttext_hash_tree', 'block_hashes', 'share_hashes', 'uri_extension', ): offset = struct.unpack(fieldstruct, data[x:x+fieldsize])[0] x += fieldsize self._offsets[field] = offset return self._offsets def _fetch_sharehashtree_and_ueb(self, offsets): sharehashtree_size = offsets['uri_extension'] - offsets['share_hashes'] return self._read(offsets['share_hashes'], self.MAX_UEB_SIZE+sharehashtree_size) def _parse_sharehashtree_and_ueb(self, data): sharehashtree_size = self._offsets['uri_extension'] - self._offsets['share_hashes'] if len(data) < sharehashtree_size: raise LayoutInvalid("share hash tree truncated -- should have at least %d bytes -- not %d" % (sharehashtree_size, len(data))) if sharehashtree_size % (2+HASH_SIZE) != 0: raise LayoutInvalid("share hash tree malformed -- should have an even multiple of %d bytes -- not %d" % (2+HASH_SIZE, sharehashtree_size)) self._share_hashes = [] for i in range(0, sharehashtree_size, 2+HASH_SIZE): hashnum = struct.unpack(">H", data[i:i+2])[0] hashvalue = data[i+2:i+2+HASH_SIZE] self._share_hashes.append( (hashnum, hashvalue) ) i = self._offsets['uri_extension']-self._offsets['share_hashes'] if len(data) < i+self._fieldsize: raise LayoutInvalid("not enough bytes to encode URI length -- should be at least %d bytes long, not %d " % (i+self._fieldsize, len(data),)) length = struct.unpack(self._fieldstruct, data[i:i+self._fieldsize])[0] self._ueb_data = data[i+self._fieldsize:i+self._fieldsize+length] def _get_block_data(self, unused, blocknum, blocksize, thisblocksize): offset = self._offsets['data'] + blocknum * blocksize return self._read(offset, thisblocksize) def get_block_data(self, blocknum, blocksize, thisblocksize): d = self._start_if_needed() d.addCallback(self._get_block_data, blocknum, blocksize, thisblocksize) return d def _str2l(self, s): """ split string (pulled from storage) into a list of blockids """ return [ s[i:i+HASH_SIZE] for i in range(0, len(s), HASH_SIZE) ] def _get_crypttext_hashes(self, unused=None): offset = self._offsets['crypttext_hash_tree'] size = self._offsets['block_hashes'] - offset d = self._read(offset, size) d.addCallback(self._str2l) return d def get_crypttext_hashes(self): d = self._start_if_needed() d.addCallback(self._get_crypttext_hashes) return d def _get_block_hashes(self, unused=None, at_least_these=()): # TODO: fetch only at_least_these instead of all of them. offset = self._offsets['block_hashes'] size = self._offsets['share_hashes'] - offset d = self._read(offset, size) d.addCallback(self._str2l) return d def get_block_hashes(self, at_least_these=()): if at_least_these: d = self._start_if_needed() d.addCallback(self._get_block_hashes, at_least_these) return d else: return defer.succeed([]) def _get_share_hashes(self, unused=None): if hasattr(self, '_share_hashes'): return self._share_hashes else: return self._get_share_hashes_the_old_way() return self._share_hashes def get_share_hashes(self): d = self._start_if_needed() d.addCallback(self._get_share_hashes) return d def _get_share_hashes_the_old_way(self): """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the end of the share, so we need to use the offset and read just that much.""" offset = self._offsets['share_hashes'] size = self._offsets['uri_extension'] - offset if size % (2+HASH_SIZE) != 0: raise LayoutInvalid("share hash tree corrupted -- should occupy a multiple of %d bytes, not %d bytes" % ((2+HASH_SIZE), size)) d = self._read(offset, size) def _unpack_share_hashes(data): if len(data) != size: raise LayoutInvalid("share hash tree corrupted -- got a short read of the share data -- should have gotten %d, not %d bytes" % (size, len(data))) hashes = [] for i in range(0, size, 2+HASH_SIZE): hashnum = struct.unpack(">H", data[i:i+2])[0] hashvalue = data[i+2:i+2+HASH_SIZE] hashes.append( (hashnum, hashvalue) ) return hashes d.addCallback(_unpack_share_hashes) return d def _get_uri_extension_the_old_way(self, unused=None): """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the end of the share, so we need to fetch the UEB size and then read just that much.""" offset = self._offsets['uri_extension'] d = self._read(offset, self._fieldsize) def _got_length(data): if len(data) != self._fieldsize: raise LayoutInvalid("not enough bytes to encode URI length -- should be %d bytes long, not %d " % (self._fieldsize, len(data),)) length = struct.unpack(self._fieldstruct, data)[0] if length >= 2**31: # URI extension blocks are around 419 bytes long, so this # must be corrupted. Anyway, the foolscap interface schema # for "read" will not allow >= 2**31 bytes length. raise RidiculouslyLargeURIExtensionBlock(length) return self._read(offset+self._fieldsize, length) d.addCallback(_got_length) return d def _get_uri_extension(self, unused=None): if hasattr(self, '_ueb_data'): return self._ueb_data else: return self._get_uri_extension_the_old_way() def get_uri_extension(self): d = self._start_if_needed() d.addCallback(self._get_uri_extension) return d def _read(self, offset, length): return self._rref.callRemote("read", offset, length) tahoe-lafs-1.10.0/src/allmydata/immutable/literal.py000066400000000000000000000063331221140116300223340ustar00rootroot00000000000000from cStringIO import StringIO from zope.interface import implements from twisted.internet import defer from twisted.internet.interfaces import IPushProducer from twisted.protocols import basic from allmydata.interfaces import IImmutableFileNode, ICheckable from allmydata.uri import LiteralFileURI class _ImmutableFileNodeBase(object): implements(IImmutableFileNode, ICheckable) def get_write_uri(self): return None def get_readonly_uri(self): return self.get_uri() def is_mutable(self): return False def is_readonly(self): return True def is_unknown(self): return False def is_allowed_in_immutable_directory(self): return True def raise_error(self): pass def __hash__(self): return self.u.__hash__() def __eq__(self, other): if isinstance(other, _ImmutableFileNodeBase): return self.u.__eq__(other.u) else: return False def __ne__(self, other): if isinstance(other, _ImmutableFileNodeBase): return self.u.__eq__(other.u) else: return True class LiteralProducer: implements(IPushProducer) def pauseProducing(self): pass def resumeProducing(self): pass def stopProducing(self): pass class LiteralFileNode(_ImmutableFileNodeBase): def __init__(self, filecap): assert isinstance(filecap, LiteralFileURI) self.u = filecap def get_size(self): return len(self.u.data) def get_current_size(self): return defer.succeed(self.get_size()) def get_cap(self): return self.u def get_readcap(self): return self.u def get_verify_cap(self): return None def get_repair_cap(self): return None def get_uri(self): return self.u.to_string() def get_storage_index(self): return None def check(self, monitor, verify=False, add_lease=False): return defer.succeed(None) def check_and_repair(self, monitor, verify=False, add_lease=False): return defer.succeed(None) def read(self, consumer, offset=0, size=None): if size is None: data = self.u.data[offset:] else: data = self.u.data[offset:offset+size] # We use twisted.protocols.basic.FileSender, which only does # non-streaming, i.e. PullProducer, where the receiver/consumer must # ask explicitly for each chunk of data. There are only two places in # the Twisted codebase that can't handle streaming=False, both of # which are in the upload path for an FTP/SFTP server # (protocols.ftp.FileConsumer and # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is # likely to be used as the target for a Tahoe download. d = basic.FileSender().beginFileTransfer(StringIO(data), consumer) d.addCallback(lambda lastSent: consumer) return d # IReadable, IFileNode, IFilesystemNode def get_best_readable_version(self): return defer.succeed(self) def download_best_version(self): return defer.succeed(self.u.data) download_to_data = download_best_version get_size_of_best_version = get_current_size tahoe-lafs-1.10.0/src/allmydata/immutable/offloaded.py000066400000000000000000000636731221140116300226350ustar00rootroot00000000000000 import os, stat, time, weakref from zope.interface import implements from twisted.internet import defer from foolscap.api import Referenceable, DeadReferenceError, eventually import allmydata # for __full_version__ from allmydata import interfaces, uri from allmydata.storage.server import si_b2a from allmydata.immutable import upload from allmydata.immutable.layout import ReadBucketProxy from allmydata.util.assertutil import precondition from allmydata.util import log, observer, fileutil, hashutil, dictutil class NotEnoughWritersError(Exception): pass class CHKCheckerAndUEBFetcher: """I check to see if a file is already present in the grid. I also fetch the URI Extension Block, which is useful for an uploading client who wants to avoid the work of encryption and encoding. I return False if the file is not completely healthy: i.e. if there are less than 'N' shares present. If the file is completely healthy, I return a tuple of (sharemap, UEB_data, UEB_hash). """ def __init__(self, peer_getter, storage_index, logparent=None): self._peer_getter = peer_getter self._found_shares = set() self._storage_index = storage_index self._sharemap = dictutil.DictOfSets() self._readers = set() self._ueb_hash = None self._ueb_data = None self._logparent = logparent def log(self, *args, **kwargs): if 'facility' not in kwargs: kwargs['facility'] = "tahoe.helper.chk.checkandUEBfetch" if 'parent' not in kwargs: kwargs['parent'] = self._logparent return log.msg(*args, **kwargs) def check(self): d = self._get_all_shareholders(self._storage_index) d.addCallback(self._get_uri_extension) d.addCallback(self._done) return d def _get_all_shareholders(self, storage_index): dl = [] for s in self._peer_getter(storage_index): d = s.get_rref().callRemote("get_buckets", storage_index) d.addCallbacks(self._got_response, self._got_error, callbackArgs=(s,)) dl.append(d) return defer.DeferredList(dl) def _got_response(self, buckets, server): # buckets is a dict: maps shum to an rref of the server who holds it shnums_s = ",".join([str(shnum) for shnum in buckets]) self.log("got_response: [%s] has %d shares (%s)" % (server.get_name(), len(buckets), shnums_s), level=log.NOISY) self._found_shares.update(buckets.keys()) for k in buckets: self._sharemap.add(k, server.get_serverid()) self._readers.update( [ (bucket, server) for bucket in buckets.values() ] ) def _got_error(self, f): if f.check(DeadReferenceError): return log.err(f, parent=self._logparent) pass def _get_uri_extension(self, res): # assume that we can pull the UEB from any share. If we get an error, # declare the whole file unavailable. if not self._readers: self.log("no readers, so no UEB", level=log.NOISY) return b,server = self._readers.pop() rbp = ReadBucketProxy(b, server, si_b2a(self._storage_index)) d = rbp.get_uri_extension() d.addCallback(self._got_uri_extension) d.addErrback(self._ueb_error) return d def _got_uri_extension(self, ueb): self.log("_got_uri_extension", level=log.NOISY) self._ueb_hash = hashutil.uri_extension_hash(ueb) self._ueb_data = uri.unpack_extension(ueb) def _ueb_error(self, f): # an error means the file is unavailable, but the overall check # shouldn't fail. self.log("UEB fetch failed", failure=f, level=log.WEIRD, umid="sJLKVg") return None def _done(self, res): if self._ueb_data: found = len(self._found_shares) total = self._ueb_data['total_shares'] self.log(format="got %(found)d shares of %(total)d", found=found, total=total, level=log.NOISY) if found < total: # not all shares are present in the grid self.log("not enough to qualify, file not found in grid", level=log.NOISY) return False # all shares are present self.log("all shares present, file is found in grid", level=log.NOISY) return (self._sharemap, self._ueb_data, self._ueb_hash) # no shares are present self.log("unable to find UEB data, file not found in grid", level=log.NOISY) return False class CHKUploadHelper(Referenceable, upload.CHKUploader): """I am the helper-server -side counterpart to AssistedUploader. I handle peer selection, encoding, and share pushing. I read ciphertext from the remote AssistedUploader. """ implements(interfaces.RICHKUploadHelper) VERSION = { "http://allmydata.org/tahoe/protocols/helper/chk-upload/v1" : { }, "application-version": str(allmydata.__full_version__), } def __init__(self, storage_index, helper, storage_broker, secret_holder, incoming_file, encoding_file, log_number): self._storage_index = storage_index self._helper = helper self._incoming_file = incoming_file self._encoding_file = encoding_file self._upload_id = si_b2a(storage_index)[:5] self._log_number = log_number self._upload_status = upload.UploadStatus() self._upload_status.set_helper(False) self._upload_status.set_storage_index(storage_index) self._upload_status.set_status("fetching ciphertext") self._upload_status.set_progress(0, 1.0) self._helper.log("CHKUploadHelper starting for SI %s" % self._upload_id, parent=log_number) self._storage_broker = storage_broker self._secret_holder = secret_holder self._fetcher = CHKCiphertextFetcher(self, incoming_file, encoding_file, self._log_number) self._reader = LocalCiphertextReader(self, storage_index, encoding_file) self._finished_observers = observer.OneShotObserverList() self._started = time.time() d = self._fetcher.when_done() d.addCallback(lambda res: self._reader.start()) d.addCallback(lambda res: self.start_encrypted(self._reader)) d.addCallback(self._finished) d.addErrback(self._failed) def log(self, *args, **kwargs): if 'facility' not in kwargs: kwargs['facility'] = "tahoe.helper.chk" return upload.CHKUploader.log(self, *args, **kwargs) def remote_get_version(self): return self.VERSION def remote_upload(self, reader): # reader is an RIEncryptedUploadable. I am specified to return an # UploadResults dictionary. # Log how much ciphertext we need to get. self.log("deciding whether to upload the file or not", level=log.NOISY) if os.path.exists(self._encoding_file): # we have the whole file, and we might be encoding it (or the # encode/upload might have failed, and we need to restart it). self.log("ciphertext already in place", level=log.UNUSUAL) elif os.path.exists(self._incoming_file): # we have some of the file, but not all of it (otherwise we'd be # encoding). The caller might be useful. self.log("partial ciphertext already present", level=log.UNUSUAL) else: # we don't remember uploading this file self.log("no ciphertext yet", level=log.NOISY) # let our fetcher pull ciphertext from the reader. self._fetcher.add_reader(reader) # and also hashes self._reader.add_reader(reader) # and inform the client when the upload has finished return self._finished_observers.when_fired() def _finished(self, ur): assert interfaces.IUploadResults.providedBy(ur), ur vcapstr = ur.get_verifycapstr() precondition(isinstance(vcapstr, str), vcapstr) v = uri.from_string(vcapstr) f_times = self._fetcher.get_times() hur = upload.HelperUploadResults() hur.timings = {"cumulative_fetch": f_times["cumulative_fetch"], "total_fetch": f_times["total"], } for key,val in ur.get_timings().items(): hur.timings[key] = val hur.uri_extension_hash = v.uri_extension_hash hur.ciphertext_fetched = self._fetcher.get_ciphertext_fetched() hur.preexisting_shares = ur.get_preexisting_shares() # hur.sharemap needs to be {shnum: set(serverid)} hur.sharemap = {} for shnum, servers in ur.get_sharemap().items(): hur.sharemap[shnum] = set([s.get_serverid() for s in servers]) # and hur.servermap needs to be {serverid: set(shnum)} hur.servermap = {} for server, shnums in ur.get_servermap().items(): hur.servermap[server.get_serverid()] = set(shnums) hur.pushed_shares = ur.get_pushed_shares() hur.file_size = ur.get_file_size() hur.uri_extension_data = ur.get_uri_extension_data() hur.verifycapstr = vcapstr self._reader.close() os.unlink(self._encoding_file) self._finished_observers.fire(hur) self._helper.upload_finished(self._storage_index, v.size) del self._reader def _failed(self, f): self.log(format="CHKUploadHelper(%(si)s) failed", si=si_b2a(self._storage_index)[:5], failure=f, level=log.UNUSUAL) self._finished_observers.fire(f) self._helper.upload_finished(self._storage_index, 0) del self._reader class AskUntilSuccessMixin: # create me with a _reader array _last_failure = None def add_reader(self, reader): self._readers.append(reader) def call(self, *args, **kwargs): if not self._readers: raise NotEnoughWritersError("ran out of assisted uploaders, last failure was %s" % self._last_failure) rr = self._readers[0] d = rr.callRemote(*args, **kwargs) def _err(f): self._last_failure = f if rr in self._readers: self._readers.remove(rr) self._upload_helper.log("call to assisted uploader %s failed" % rr, failure=f, level=log.UNUSUAL) # we can try again with someone else who's left return self.call(*args, **kwargs) d.addErrback(_err) return d class CHKCiphertextFetcher(AskUntilSuccessMixin): """I use one or more remote RIEncryptedUploadable instances to gather ciphertext on disk. When I'm done, the file I create can be used by a LocalCiphertextReader to satisfy the ciphertext needs of a CHK upload process. I begin pulling ciphertext as soon as a reader is added. I remove readers when they have any sort of error. If the last reader is removed, I fire my when_done() Deferred with a failure. I fire my when_done() Deferred (with None) immediately after I have moved the ciphertext to 'encoded_file'. """ def __init__(self, helper, incoming_file, encoded_file, logparent): self._upload_helper = helper self._incoming_file = incoming_file self._encoding_file = encoded_file self._upload_id = helper._upload_id self._log_parent = logparent self._done_observers = observer.OneShotObserverList() self._readers = [] self._started = False self._f = None self._times = { "cumulative_fetch": 0.0, "total": 0.0, } self._ciphertext_fetched = 0 def log(self, *args, **kwargs): if "facility" not in kwargs: kwargs["facility"] = "tahoe.helper.chkupload.fetch" if "parent" not in kwargs: kwargs["parent"] = self._log_parent return log.msg(*args, **kwargs) def add_reader(self, reader): AskUntilSuccessMixin.add_reader(self, reader) eventually(self._start) def _start(self): if self._started: return self._started = True started = time.time() if os.path.exists(self._encoding_file): self.log("ciphertext already present, bypassing fetch", level=log.UNUSUAL) d = defer.succeed(None) else: # first, find out how large the file is going to be d = self.call("get_size") d.addCallback(self._got_size) d.addCallback(self._start_reading) d.addCallback(self._done) d.addCallback(self._done2, started) d.addErrback(self._failed) def _got_size(self, size): self.log("total size is %d bytes" % size, level=log.NOISY) self._upload_helper._upload_status.set_size(size) self._expected_size = size def _start_reading(self, res): # then find out how much crypttext we have on disk if os.path.exists(self._incoming_file): self._have = os.stat(self._incoming_file)[stat.ST_SIZE] self._upload_helper._helper.count("chk_upload_helper.resumes") self.log("we already have %d bytes" % self._have, level=log.NOISY) else: self._have = 0 self.log("we do not have any ciphertext yet", level=log.NOISY) self.log("starting ciphertext fetch", level=log.NOISY) self._f = open(self._incoming_file, "ab") # now loop to pull the data from the readers d = defer.Deferred() self._loop(d) # this Deferred will be fired once the last byte has been written to # self._f return d # read data in 50kB chunks. We should choose a more considered number # here, possibly letting the client specify it. The goal should be to # keep the RTT*bandwidth to be less than 10% of the chunk size, to reduce # the upload bandwidth lost because this protocol is non-windowing. Too # large, however, means more memory consumption for both ends. Something # that can be transferred in, say, 10 seconds sounds about right. On my # home DSL line (50kBps upstream), that suggests 500kB. Most lines are # slower, maybe 10kBps, which suggests 100kB, and that's a bit more # memory than I want to hang on to, so I'm going to go with 50kB and see # how that works. CHUNK_SIZE = 50*1024 def _loop(self, fire_when_done): # this slightly weird structure is needed because Deferreds don't do # tail-recursion, so it is important to let each one retire promptly. # Simply chaining them will cause a stack overflow at the end of a # transfer that involves more than a few hundred chunks. # 'fire_when_done' lives a long time, but the Deferreds returned by # the inner _fetch() call do not. start = time.time() d = defer.maybeDeferred(self._fetch) def _done(finished): elapsed = time.time() - start self._times["cumulative_fetch"] += elapsed if finished: self.log("finished reading ciphertext", level=log.NOISY) fire_when_done.callback(None) else: self._loop(fire_when_done) def _err(f): self.log(format="[%(si)s] ciphertext read failed", si=self._upload_id, failure=f, level=log.UNUSUAL) fire_when_done.errback(f) d.addCallbacks(_done, _err) return None def _fetch(self): needed = self._expected_size - self._have fetch_size = min(needed, self.CHUNK_SIZE) if fetch_size == 0: self._upload_helper._upload_status.set_progress(1, 1.0) return True # all done percent = 0.0 if self._expected_size: percent = 1.0 * (self._have+fetch_size) / self._expected_size self.log(format="fetching [%(si)s] %(start)d-%(end)d of %(total)d (%(percent)d%%)", si=self._upload_id, start=self._have, end=self._have+fetch_size, total=self._expected_size, percent=int(100.0*percent), level=log.NOISY) d = self.call("read_encrypted", self._have, fetch_size) def _got_data(ciphertext_v): for data in ciphertext_v: self._f.write(data) self._have += len(data) self._ciphertext_fetched += len(data) self._upload_helper._helper.count("chk_upload_helper.fetched_bytes", len(data)) self._upload_helper._upload_status.set_progress(1, percent) return False # not done d.addCallback(_got_data) return d def _done(self, res): self._f.close() self._f = None self.log(format="done fetching ciphertext, size=%(size)d", size=os.stat(self._incoming_file)[stat.ST_SIZE], level=log.NOISY) os.rename(self._incoming_file, self._encoding_file) def _done2(self, _ignored, started): self.log("done2", level=log.NOISY) elapsed = time.time() - started self._times["total"] = elapsed self._readers = [] self._done_observers.fire(None) def _failed(self, f): if self._f: self._f.close() self._readers = [] self._done_observers.fire(f) def when_done(self): return self._done_observers.when_fired() def get_times(self): return self._times def get_ciphertext_fetched(self): return self._ciphertext_fetched class LocalCiphertextReader(AskUntilSuccessMixin): implements(interfaces.IEncryptedUploadable) def __init__(self, upload_helper, storage_index, encoding_file): self._readers = [] self._upload_helper = upload_helper self._storage_index = storage_index self._encoding_file = encoding_file self._status = None def start(self): self._upload_helper._upload_status.set_status("pushing") self._size = os.stat(self._encoding_file)[stat.ST_SIZE] self.f = open(self._encoding_file, "rb") def get_size(self): return defer.succeed(self._size) def get_all_encoding_parameters(self): return self.call("get_all_encoding_parameters") def get_storage_index(self): return defer.succeed(self._storage_index) def read_encrypted(self, length, hash_only): assert hash_only is False d = defer.maybeDeferred(self.f.read, length) d.addCallback(lambda data: [data]) return d def close(self): self.f.close() # ??. I'm not sure if it makes sense to forward the close message. return self.call("close") class Helper(Referenceable): implements(interfaces.RIHelper, interfaces.IStatsProducer) # this is the non-distributed version. When we need to have multiple # helpers, this object will become the HelperCoordinator, and will query # the farm of Helpers to see if anyone has the storage_index of interest, # and send the request off to them. If nobody has it, we'll choose a # helper at random. name = "helper" VERSION = { "http://allmydata.org/tahoe/protocols/helper/v1" : { }, "application-version": str(allmydata.__full_version__), } MAX_UPLOAD_STATUSES = 10 def __init__(self, basedir, storage_broker, secret_holder, stats_provider, history): self._basedir = basedir self._storage_broker = storage_broker self._secret_holder = secret_holder self._chk_incoming = os.path.join(basedir, "CHK_incoming") self._chk_encoding = os.path.join(basedir, "CHK_encoding") fileutil.make_dirs(self._chk_incoming) fileutil.make_dirs(self._chk_encoding) self._active_uploads = {} self._all_uploads = weakref.WeakKeyDictionary() # for debugging self.stats_provider = stats_provider if stats_provider: stats_provider.register_producer(self) self._counters = {"chk_upload_helper.upload_requests": 0, "chk_upload_helper.upload_already_present": 0, "chk_upload_helper.upload_need_upload": 0, "chk_upload_helper.resumes": 0, "chk_upload_helper.fetched_bytes": 0, "chk_upload_helper.encoded_bytes": 0, } self._history = history def log(self, *args, **kwargs): if 'facility' not in kwargs: kwargs['facility'] = "tahoe.helper" return log.msg(*args, **kwargs) def count(self, key, value=1): if self.stats_provider: self.stats_provider.count(key, value) self._counters[key] += value def get_stats(self): OLD = 86400*2 # 48hours now = time.time() inc_count = inc_size = inc_size_old = 0 enc_count = enc_size = enc_size_old = 0 inc = os.listdir(self._chk_incoming) enc = os.listdir(self._chk_encoding) for f in inc: s = os.stat(os.path.join(self._chk_incoming, f)) size = s[stat.ST_SIZE] mtime = s[stat.ST_MTIME] inc_count += 1 inc_size += size if now - mtime > OLD: inc_size_old += size for f in enc: s = os.stat(os.path.join(self._chk_encoding, f)) size = s[stat.ST_SIZE] mtime = s[stat.ST_MTIME] enc_count += 1 enc_size += size if now - mtime > OLD: enc_size_old += size stats = { 'chk_upload_helper.active_uploads': len(self._active_uploads), 'chk_upload_helper.incoming_count': inc_count, 'chk_upload_helper.incoming_size': inc_size, 'chk_upload_helper.incoming_size_old': inc_size_old, 'chk_upload_helper.encoding_count': enc_count, 'chk_upload_helper.encoding_size': enc_size, 'chk_upload_helper.encoding_size_old': enc_size_old, } stats.update(self._counters) return stats def remote_get_version(self): return self.VERSION def remote_upload_chk(self, storage_index): self.count("chk_upload_helper.upload_requests") lp = self.log(format="helper: upload_chk query for SI %(si)s", si=si_b2a(storage_index)) if storage_index in self._active_uploads: self.log("upload is currently active", parent=lp) uh = self._active_uploads[storage_index] return (None, uh) d = self._check_chk(storage_index, lp) d.addCallback(self._did_chk_check, storage_index, lp) def _err(f): self.log("error while checking for chk-already-in-grid", failure=f, level=log.WEIRD, parent=lp, umid="jDtxZg") return f d.addErrback(_err) return d def _check_chk(self, storage_index, lp): # see if this file is already in the grid lp2 = self.log("doing a quick check+UEBfetch", parent=lp, level=log.NOISY) sb = self._storage_broker c = CHKCheckerAndUEBFetcher(sb.get_servers_for_psi, storage_index, lp2) d = c.check() def _checked(res): if res: (sharemap, ueb_data, ueb_hash) = res self.log("found file in grid", level=log.NOISY, parent=lp) hur = upload.HelperUploadResults() hur.uri_extension_hash = ueb_hash hur.sharemap = sharemap hur.uri_extension_data = ueb_data hur.preexisting_shares = len(sharemap) hur.pushed_shares = 0 return hur return None d.addCallback(_checked) return d def _did_chk_check(self, already_present, storage_index, lp): if already_present: # the necessary results are placed in the UploadResults self.count("chk_upload_helper.upload_already_present") self.log("file already found in grid", parent=lp) return (already_present, None) self.count("chk_upload_helper.upload_need_upload") # the file is not present in the grid, by which we mean there are # less than 'N' shares available. self.log("unable to find file in the grid", parent=lp, level=log.NOISY) # We need an upload helper. Check our active uploads again in # case there was a race. if storage_index in self._active_uploads: self.log("upload is currently active", parent=lp) uh = self._active_uploads[storage_index] else: self.log("creating new upload helper", parent=lp) uh = self._make_chk_upload_helper(storage_index, lp) self._active_uploads[storage_index] = uh self._add_upload(uh) return (None, uh) def _make_chk_upload_helper(self, storage_index, lp): si_s = si_b2a(storage_index) incoming_file = os.path.join(self._chk_incoming, si_s) encoding_file = os.path.join(self._chk_encoding, si_s) uh = CHKUploadHelper(storage_index, self, self._storage_broker, self._secret_holder, incoming_file, encoding_file, lp) return uh def _add_upload(self, uh): self._all_uploads[uh] = None if self._history: s = uh.get_upload_status() self._history.notify_helper_upload(s) def upload_finished(self, storage_index, size): # this is called with size=0 if the upload failed self.count("chk_upload_helper.encoded_bytes", size) uh = self._active_uploads[storage_index] del self._active_uploads[storage_index] s = uh.get_upload_status() s.set_active(False) tahoe-lafs-1.10.0/src/allmydata/immutable/repairer.py000066400000000000000000000101061221140116300225020ustar00rootroot00000000000000from zope.interface import implements from twisted.internet import defer from allmydata.storage.server import si_b2a from allmydata.util import log, consumer from allmydata.util.assertutil import precondition from allmydata.interfaces import IEncryptedUploadable from allmydata.immutable import upload class Repairer(log.PrefixingLogMixin): implements(IEncryptedUploadable) """I generate any shares which were not available and upload them to servers. Which servers? Well, I just use the normal upload process, so any servers that will take shares. In fact, I even believe servers if they say that they already have shares even if attempts to download those shares would fail because the shares are corrupted. My process of uploading replacement shares proceeds in a segment-wise fashion -- first I ask servers if they can hold the new shares, and wait until enough have agreed then I download the first segment of the file and upload the first block of each replacement share, and only after all those blocks have been uploaded do I download the second segment of the file and upload the second block of each replacement share to its respective server. (I do it this way in order to minimize the amount of downloading I have to do and the amount of memory I have to use at any one time.) If any of the servers to which I am uploading replacement shares fails to accept the blocks during this process, then I just stop using that server, abandon any share-uploads that were going to that server, and proceed to finish uploading the remaining shares to their respective servers. At the end of my work, I produce an object which satisfies the ICheckAndRepairResults interface (by firing the deferred that I returned from start() and passing that check-and-repair-results object). Before I send any new request to a server, I always ask the 'monitor' object that was passed into my constructor whether this task has been cancelled (by invoking its raise_if_cancelled() method). """ def __init__(self, filenode, storage_broker, secret_holder, monitor): logprefix = si_b2a(filenode.get_storage_index())[:5] log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer", prefix=logprefix) self._filenode = filenode self._storage_broker = storage_broker self._secret_holder = secret_holder self._monitor = monitor self._offset = 0 def start(self): self.log("starting repair") d = self._filenode.get_segment_size() def _got_segsize(segsize): vcap = self._filenode.get_verify_cap() k = vcap.needed_shares N = vcap.total_shares # Per ticket #1212 # (http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1212) happy = 0 self._encodingparams = (k, happy, N, segsize) ul = upload.CHKUploader(self._storage_broker, self._secret_holder) return ul.start(self) # I am the IEncryptedUploadable d.addCallback(_got_segsize) return d # methods to satisfy the IEncryptedUploader interface # (From the perspective of an uploader I am an IEncryptedUploadable.) def set_upload_status(self, upload_status): self.upload_status = upload_status def get_size(self): size = self._filenode.get_size() assert size is not None return defer.succeed(size) def get_all_encoding_parameters(self): return defer.succeed(self._encodingparams) def read_encrypted(self, length, hash_only): """Returns a deferred which eventually fires with the requested ciphertext, as a list of strings.""" precondition(length) # please don't ask to read 0 bytes mc = consumer.MemoryConsumer() d = self._filenode.read(mc, self._offset, length) self._offset += length d.addCallback(lambda ign: mc.chunks) return d def get_storage_index(self): return self._filenode.get_storage_index() def close(self): pass tahoe-lafs-1.10.0/src/allmydata/immutable/upload.py000066400000000000000000002104021221140116300221560ustar00rootroot00000000000000import os, time, weakref, itertools from zope.interface import implements from twisted.python import failure from twisted.internet import defer from twisted.application import service from foolscap.api import Referenceable, Copyable, RemoteCopy, fireEventually from allmydata.util.hashutil import file_renewal_secret_hash, \ file_cancel_secret_hash, bucket_renewal_secret_hash, \ bucket_cancel_secret_hash, plaintext_hasher, \ storage_index_hash, plaintext_segment_hasher, convergence_hasher from allmydata import hashtree, uri from allmydata.storage.server import si_b2a from allmydata.immutable import encode from allmydata.util import base32, dictutil, idlib, log, mathutil from allmydata.util.happinessutil import servers_of_happiness, \ shares_by_server, merge_servers, \ failure_message from allmydata.util.assertutil import precondition, _assert from allmydata.util.rrefutil import add_version_to_remote_reference from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \ IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus, \ NoServersError, InsufficientVersionError, UploadUnhappinessError, \ DEFAULT_MAX_SEGMENT_SIZE from allmydata.immutable import layout from pycryptopp.cipher.aes import AES from cStringIO import StringIO # this wants to live in storage, not here class TooFullError(Exception): pass # HelperUploadResults are what we get from the Helper, and to retain # backwards compatibility with old Helpers we can't change the format. We # convert them into a local UploadResults upon receipt. class HelperUploadResults(Copyable, RemoteCopy): # note: don't change this string, it needs to match the value used on the # helper, and it does *not* need to match the fully-qualified # package/module/class name typeToCopy = "allmydata.upload.UploadResults.tahoe.allmydata.com" copytype = typeToCopy # also, think twice about changing the shape of any existing attribute, # because instances of this class are sent from the helper to its client, # so changing this may break compatibility. Consider adding new fields # instead of modifying existing ones. def __init__(self): self.timings = {} # dict of name to number of seconds self.sharemap = dictutil.DictOfSets() # {shnum: set(serverid)} self.servermap = dictutil.DictOfSets() # {serverid: set(shnum)} self.file_size = None self.ciphertext_fetched = None # how much the helper fetched self.uri = None self.preexisting_shares = None # count of shares already present self.pushed_shares = None # count of shares we pushed class UploadResults: implements(IUploadResults) def __init__(self, file_size, ciphertext_fetched, # how much the helper fetched preexisting_shares, # count of shares already present pushed_shares, # count of shares we pushed sharemap, # {shnum: set(server)} servermap, # {server: set(shnum)} timings, # dict of name to number of seconds uri_extension_data, uri_extension_hash, verifycapstr): self._file_size = file_size self._ciphertext_fetched = ciphertext_fetched self._preexisting_shares = preexisting_shares self._pushed_shares = pushed_shares self._sharemap = sharemap self._servermap = servermap self._timings = timings self._uri_extension_data = uri_extension_data self._uri_extension_hash = uri_extension_hash self._verifycapstr = verifycapstr def set_uri(self, uri): self._uri = uri def get_file_size(self): return self._file_size def get_uri(self): return self._uri def get_ciphertext_fetched(self): return self._ciphertext_fetched def get_preexisting_shares(self): return self._preexisting_shares def get_pushed_shares(self): return self._pushed_shares def get_sharemap(self): return self._sharemap def get_servermap(self): return self._servermap def get_timings(self): return self._timings def get_uri_extension_data(self): return self._uri_extension_data def get_verifycapstr(self): return self._verifycapstr # our current uri_extension is 846 bytes for small files, a few bytes # more for larger ones (since the filesize is encoded in decimal in a # few places). Ask for a little bit more just in case we need it. If # the extension changes size, we can change EXTENSION_SIZE to # allocate a more accurate amount of space. EXTENSION_SIZE = 1000 # TODO: actual extensions are closer to 419 bytes, so we can probably lower # this. def pretty_print_shnum_to_servers(s): return ', '.join([ "sh%s: %s" % (k, '+'.join([idlib.shortnodeid_b2a(x) for x in v])) for k, v in s.iteritems() ]) class ServerTracker: def __init__(self, server, sharesize, blocksize, num_segments, num_share_hashes, storage_index, bucket_renewal_secret, bucket_cancel_secret): self._server = server self.buckets = {} # k: shareid, v: IRemoteBucketWriter self.sharesize = sharesize wbp = layout.make_write_bucket_proxy(None, None, sharesize, blocksize, num_segments, num_share_hashes, EXTENSION_SIZE) self.wbp_class = wbp.__class__ # to create more of them self.allocated_size = wbp.get_allocated_size() self.blocksize = blocksize self.num_segments = num_segments self.num_share_hashes = num_share_hashes self.storage_index = storage_index self.renew_secret = bucket_renewal_secret self.cancel_secret = bucket_cancel_secret def __repr__(self): return ("" % (self._server.get_name(), si_b2a(self.storage_index)[:5])) def get_server(self): return self._server def get_serverid(self): return self._server.get_serverid() def get_name(self): return self._server.get_name() def query(self, sharenums): rref = self._server.get_rref() d = rref.callRemote("allocate_buckets", self.storage_index, self.renew_secret, self.cancel_secret, sharenums, self.allocated_size, canary=Referenceable()) d.addCallback(self._got_reply) return d def ask_about_existing_shares(self): rref = self._server.get_rref() return rref.callRemote("get_buckets", self.storage_index) def _got_reply(self, (alreadygot, buckets)): #log.msg("%s._got_reply(%s)" % (self, (alreadygot, buckets))) b = {} for sharenum, rref in buckets.iteritems(): bp = self.wbp_class(rref, self._server, self.sharesize, self.blocksize, self.num_segments, self.num_share_hashes, EXTENSION_SIZE) b[sharenum] = bp self.buckets.update(b) return (alreadygot, set(b.keys())) def abort(self): """ I abort the remote bucket writers for all shares. This is a good idea to conserve space on the storage server. """ self.abort_some_buckets(self.buckets.keys()) def abort_some_buckets(self, sharenums): """ I abort the remote bucket writers for the share numbers in sharenums. """ for sharenum in sharenums: if sharenum in self.buckets: self.buckets[sharenum].abort() del self.buckets[sharenum] def str_shareloc(shnum, bucketwriter): return "%s: %s" % (shnum, bucketwriter.get_servername(),) class Tahoe2ServerSelector(log.PrefixingLogMixin): def __init__(self, upload_id, logparent=None, upload_status=None): self.upload_id = upload_id self.query_count, self.good_query_count, self.bad_query_count = 0,0,0 # Servers that are working normally, but full. self.full_count = 0 self.error_count = 0 self.num_servers_contacted = 0 self.last_failure_msg = None self._status = IUploadStatus(upload_status) log.PrefixingLogMixin.__init__(self, 'tahoe.immutable.upload', logparent, prefix=upload_id) self.log("starting", level=log.OPERATIONAL) def __repr__(self): return "" % self.upload_id def get_shareholders(self, storage_broker, secret_holder, storage_index, share_size, block_size, num_segments, total_shares, needed_shares, servers_of_happiness): """ @return: (upload_trackers, already_serverids), where upload_trackers is a set of ServerTracker instances that have agreed to hold some shares for us (the shareids are stashed inside the ServerTracker), and already_serverids is a dict mapping shnum to a set of serverids for servers which claim to already have the share. """ if self._status: self._status.set_status("Contacting Servers..") self.total_shares = total_shares self.servers_of_happiness = servers_of_happiness self.needed_shares = needed_shares self.homeless_shares = set(range(total_shares)) self.use_trackers = set() # ServerTrackers that have shares assigned # to them self.preexisting_shares = {} # shareid => set(serverids) holding shareid # These servers have shares -- any shares -- for our SI. We keep # track of these to write an error message with them later. self.serverids_with_shares = set() # this needed_hashes computation should mirror # Encoder.send_all_share_hash_trees. We use an IncompleteHashTree # (instead of a HashTree) because we don't require actual hashing # just to count the levels. ht = hashtree.IncompleteHashTree(total_shares) num_share_hashes = len(ht.needed_hashes(0, include_leaf=True)) # figure out how much space to ask for wbp = layout.make_write_bucket_proxy(None, None, share_size, 0, num_segments, num_share_hashes, EXTENSION_SIZE) allocated_size = wbp.get_allocated_size() all_servers = storage_broker.get_servers_for_psi(storage_index) if not all_servers: raise NoServersError("client gave us zero servers") # filter the list of servers according to which ones can accomodate # this request. This excludes older servers (which used a 4-byte size # field) from getting large shares (for files larger than about # 12GiB). See #439 for details. def _get_maxsize(server): v0 = server.get_rref().version v1 = v0["http://allmydata.org/tahoe/protocols/storage/v1"] return v1["maximum-immutable-share-size"] writeable_servers = [server for server in all_servers if _get_maxsize(server) >= allocated_size] readonly_servers = set(all_servers[:2*total_shares]) - set(writeable_servers) # decide upon the renewal/cancel secrets, to include them in the # allocate_buckets query. client_renewal_secret = secret_holder.get_renewal_secret() client_cancel_secret = secret_holder.get_cancel_secret() file_renewal_secret = file_renewal_secret_hash(client_renewal_secret, storage_index) file_cancel_secret = file_cancel_secret_hash(client_cancel_secret, storage_index) def _make_trackers(servers): trackers = [] for s in servers: seed = s.get_lease_seed() renew = bucket_renewal_secret_hash(file_renewal_secret, seed) cancel = bucket_cancel_secret_hash(file_cancel_secret, seed) st = ServerTracker(s, share_size, block_size, num_segments, num_share_hashes, storage_index, renew, cancel) trackers.append(st) return trackers # We assign each servers/trackers into one three lists. They all # start in the "first pass" list. During the first pass, as we ask # each one to hold a share, we move their tracker to the "second # pass" list, until the first-pass list is empty. Then during the # second pass, as we ask each to hold more shares, we move their # tracker to the "next pass" list, until the second-pass list is # empty. Then we move everybody from the next-pass list back to the # second-pass list and repeat the "second" pass (really the third, # fourth, etc pass), until all shares are assigned, or we've run out # of potential servers. self.first_pass_trackers = _make_trackers(writeable_servers) self.second_pass_trackers = [] # servers worth asking again self.next_pass_trackers = [] # servers that we have asked again self._started_second_pass = False # We don't try to allocate shares to these servers, since they've # said that they're incapable of storing shares of the size that we'd # want to store. We ask them about existing shares for this storage # index, which we want to know about for accurate # servers_of_happiness accounting, then we forget about them. readonly_trackers = _make_trackers(readonly_servers) # We now ask servers that can't hold any new shares about existing # shares that they might have for our SI. Once this is done, we # start placing the shares that we haven't already accounted # for. ds = [] if self._status and readonly_trackers: self._status.set_status("Contacting readonly servers to find " "any existing shares") for tracker in readonly_trackers: assert isinstance(tracker, ServerTracker) d = tracker.ask_about_existing_shares() d.addBoth(self._handle_existing_response, tracker) ds.append(d) self.num_servers_contacted += 1 self.query_count += 1 self.log("asking server %s for any existing shares" % (tracker.get_name(),), level=log.NOISY) dl = defer.DeferredList(ds) dl.addCallback(lambda ign: self._loop()) return dl def _handle_existing_response(self, res, tracker): """ I handle responses to the queries sent by Tahoe2ServerSelector._existing_shares. """ serverid = tracker.get_serverid() if isinstance(res, failure.Failure): self.log("%s got error during existing shares check: %s" % (tracker.get_name(), res), level=log.UNUSUAL) self.error_count += 1 self.bad_query_count += 1 else: buckets = res if buckets: self.serverids_with_shares.add(serverid) self.log("response to get_buckets() from server %s: alreadygot=%s" % (tracker.get_name(), tuple(sorted(buckets))), level=log.NOISY) for bucket in buckets: self.preexisting_shares.setdefault(bucket, set()).add(serverid) self.homeless_shares.discard(bucket) self.full_count += 1 self.bad_query_count += 1 def _get_progress_message(self): if not self.homeless_shares: msg = "placed all %d shares, " % (self.total_shares) else: msg = ("placed %d shares out of %d total (%d homeless), " % (self.total_shares - len(self.homeless_shares), self.total_shares, len(self.homeless_shares))) return (msg + "want to place shares on at least %d servers such that " "any %d of them have enough shares to recover the file, " "sent %d queries to %d servers, " "%d queries placed some shares, %d placed none " "(of which %d placed none due to the server being" " full and %d placed none due to an error)" % (self.servers_of_happiness, self.needed_shares, self.query_count, self.num_servers_contacted, self.good_query_count, self.bad_query_count, self.full_count, self.error_count)) def _loop(self): if not self.homeless_shares: merged = merge_servers(self.preexisting_shares, self.use_trackers) effective_happiness = servers_of_happiness(merged) if self.servers_of_happiness <= effective_happiness: msg = ("server selection successful for %s: %s: pretty_print_merged: %s, " "self.use_trackers: %s, self.preexisting_shares: %s") \ % (self, self._get_progress_message(), pretty_print_shnum_to_servers(merged), [', '.join([str_shareloc(k,v) for k,v in st.buckets.iteritems()]) for st in self.use_trackers], pretty_print_shnum_to_servers(self.preexisting_shares)) self.log(msg, level=log.OPERATIONAL) return (self.use_trackers, self.preexisting_shares) else: # We're not okay right now, but maybe we can fix it by # redistributing some shares. In cases where one or two # servers has, before the upload, all or most of the # shares for a given SI, this can work by allowing _loop # a chance to spread those out over the other servers, delta = self.servers_of_happiness - effective_happiness shares = shares_by_server(self.preexisting_shares) # Each server in shares maps to a set of shares stored on it. # Since we want to keep at least one share on each server # that has one (otherwise we'd only be making # the situation worse by removing distinct servers), # each server has len(its shares) - 1 to spread around. shares_to_spread = sum([len(list(sharelist)) - 1 for (server, sharelist) in shares.items()]) if delta <= len(self.first_pass_trackers) and \ shares_to_spread >= delta: items = shares.items() while len(self.homeless_shares) < delta: # Loop through the allocated shares, removing # one from each server that has more than one # and putting it back into self.homeless_shares # until we've done this delta times. server, sharelist = items.pop() if len(sharelist) > 1: share = sharelist.pop() self.homeless_shares.add(share) self.preexisting_shares[share].remove(server) if not self.preexisting_shares[share]: del self.preexisting_shares[share] items.append((server, sharelist)) for writer in self.use_trackers: writer.abort_some_buckets(self.homeless_shares) return self._loop() else: # Redistribution won't help us; fail. server_count = len(self.serverids_with_shares) failmsg = failure_message(server_count, self.needed_shares, self.servers_of_happiness, effective_happiness) servmsgtempl = "server selection unsuccessful for %r: %s (%s), merged=%s" servmsg = servmsgtempl % ( self, failmsg, self._get_progress_message(), pretty_print_shnum_to_servers(merged) ) self.log(servmsg, level=log.INFREQUENT) return self._failed("%s (%s)" % (failmsg, self._get_progress_message())) if self.first_pass_trackers: tracker = self.first_pass_trackers.pop(0) # TODO: don't pre-convert all serverids to ServerTrackers assert isinstance(tracker, ServerTracker) shares_to_ask = set(sorted(self.homeless_shares)[:1]) self.homeless_shares -= shares_to_ask self.query_count += 1 self.num_servers_contacted += 1 if self._status: self._status.set_status("Contacting Servers [%s] (first query)," " %d shares left.." % (tracker.get_name(), len(self.homeless_shares))) d = tracker.query(shares_to_ask) d.addBoth(self._got_response, tracker, shares_to_ask, self.second_pass_trackers) return d elif self.second_pass_trackers: # ask a server that we've already asked. if not self._started_second_pass: self.log("starting second pass", level=log.NOISY) self._started_second_pass = True num_shares = mathutil.div_ceil(len(self.homeless_shares), len(self.second_pass_trackers)) tracker = self.second_pass_trackers.pop(0) shares_to_ask = set(sorted(self.homeless_shares)[:num_shares]) self.homeless_shares -= shares_to_ask self.query_count += 1 if self._status: self._status.set_status("Contacting Servers [%s] (second query)," " %d shares left.." % (tracker.get_name(), len(self.homeless_shares))) d = tracker.query(shares_to_ask) d.addBoth(self._got_response, tracker, shares_to_ask, self.next_pass_trackers) return d elif self.next_pass_trackers: # we've finished the second-or-later pass. Move all the remaining # servers back into self.second_pass_trackers for the next pass. self.second_pass_trackers.extend(self.next_pass_trackers) self.next_pass_trackers[:] = [] return self._loop() else: # no more servers. If we haven't placed enough shares, we fail. merged = merge_servers(self.preexisting_shares, self.use_trackers) effective_happiness = servers_of_happiness(merged) if effective_happiness < self.servers_of_happiness: msg = failure_message(len(self.serverids_with_shares), self.needed_shares, self.servers_of_happiness, effective_happiness) msg = ("server selection failed for %s: %s (%s)" % (self, msg, self._get_progress_message())) if self.last_failure_msg: msg += " (%s)" % (self.last_failure_msg,) self.log(msg, level=log.UNUSUAL) return self._failed(msg) else: # we placed enough to be happy, so we're done if self._status: self._status.set_status("Placed all shares") msg = ("server selection successful (no more servers) for %s: %s: %s" % (self, self._get_progress_message(), pretty_print_shnum_to_servers(merged))) self.log(msg, level=log.OPERATIONAL) return (self.use_trackers, self.preexisting_shares) def _got_response(self, res, tracker, shares_to_ask, put_tracker_here): if isinstance(res, failure.Failure): # This is unusual, and probably indicates a bug or a network # problem. self.log("%s got error during server selection: %s" % (tracker, res), level=log.UNUSUAL) self.error_count += 1 self.bad_query_count += 1 self.homeless_shares |= shares_to_ask if (self.first_pass_trackers or self.second_pass_trackers or self.next_pass_trackers): # there is still hope, so just loop pass else: # No more servers, so this upload might fail (it depends upon # whether we've hit servers_of_happiness or not). Log the last # failure we got: if a coding error causes all servers to fail # in the same way, this allows the common failure to be seen # by the uploader and should help with debugging msg = ("last failure (from %s) was: %s" % (tracker, res)) self.last_failure_msg = msg else: (alreadygot, allocated) = res self.log("response to allocate_buckets() from server %s: alreadygot=%s, allocated=%s" % (tracker.get_name(), tuple(sorted(alreadygot)), tuple(sorted(allocated))), level=log.NOISY) progress = False for s in alreadygot: self.preexisting_shares.setdefault(s, set()).add(tracker.get_serverid()) if s in self.homeless_shares: self.homeless_shares.remove(s) progress = True elif s in shares_to_ask: progress = True # the ServerTracker will remember which shares were allocated on # that peer. We just have to remember to use them. if allocated: self.use_trackers.add(tracker) progress = True if allocated or alreadygot: self.serverids_with_shares.add(tracker.get_serverid()) not_yet_present = set(shares_to_ask) - set(alreadygot) still_homeless = not_yet_present - set(allocated) if progress: # They accepted at least one of the shares that we asked # them to accept, or they had a share that we didn't ask # them to accept but that we hadn't placed yet, so this # was a productive query self.good_query_count += 1 else: self.bad_query_count += 1 self.full_count += 1 if still_homeless: # In networks with lots of space, this is very unusual and # probably indicates an error. In networks with servers that # are full, it is merely unusual. In networks that are very # full, it is common, and many uploads will fail. In most # cases, this is obviously not fatal, and we'll just use some # other servers. # some shares are still homeless, keep trying to find them a # home. The ones that were rejected get first priority. self.homeless_shares |= still_homeless # Since they were unable to accept all of our requests, so it # is safe to assume that asking them again won't help. else: # if they *were* able to accept everything, they might be # willing to accept even more. put_tracker_here.append(tracker) # now loop return self._loop() def _failed(self, msg): """ I am called when server selection fails. I first abort all of the remote buckets that I allocated during my unsuccessful attempt to place shares for this file. I then raise an UploadUnhappinessError with my msg argument. """ for tracker in self.use_trackers: assert isinstance(tracker, ServerTracker) tracker.abort() raise UploadUnhappinessError(msg) class EncryptAnUploadable: """This is a wrapper that takes an IUploadable and provides IEncryptedUploadable.""" implements(IEncryptedUploadable) CHUNKSIZE = 50*1024 def __init__(self, original, log_parent=None): precondition(original.default_params_set, "set_default_encoding_parameters not called on %r before wrapping with EncryptAnUploadable" % (original,)) self.original = IUploadable(original) self._log_number = log_parent self._encryptor = None self._plaintext_hasher = plaintext_hasher() self._plaintext_segment_hasher = None self._plaintext_segment_hashes = [] self._encoding_parameters = None self._file_size = None self._ciphertext_bytes_read = 0 self._status = None def set_upload_status(self, upload_status): self._status = IUploadStatus(upload_status) self.original.set_upload_status(upload_status) def log(self, *args, **kwargs): if "facility" not in kwargs: kwargs["facility"] = "upload.encryption" if "parent" not in kwargs: kwargs["parent"] = self._log_number return log.msg(*args, **kwargs) def get_size(self): if self._file_size is not None: return defer.succeed(self._file_size) d = self.original.get_size() def _got_size(size): self._file_size = size if self._status: self._status.set_size(size) return size d.addCallback(_got_size) return d def get_all_encoding_parameters(self): if self._encoding_parameters is not None: return defer.succeed(self._encoding_parameters) d = self.original.get_all_encoding_parameters() def _got(encoding_parameters): (k, happy, n, segsize) = encoding_parameters self._segment_size = segsize # used by segment hashers self._encoding_parameters = encoding_parameters self.log("my encoding parameters: %s" % (encoding_parameters,), level=log.NOISY) return encoding_parameters d.addCallback(_got) return d def _get_encryptor(self): if self._encryptor: return defer.succeed(self._encryptor) d = self.original.get_encryption_key() def _got(key): e = AES(key) self._encryptor = e storage_index = storage_index_hash(key) assert isinstance(storage_index, str) # There's no point to having the SI be longer than the key, so we # specify that it is truncated to the same 128 bits as the AES key. assert len(storage_index) == 16 # SHA-256 truncated to 128b self._storage_index = storage_index if self._status: self._status.set_storage_index(storage_index) return e d.addCallback(_got) return d def get_storage_index(self): d = self._get_encryptor() d.addCallback(lambda res: self._storage_index) return d def _get_segment_hasher(self): p = self._plaintext_segment_hasher if p: left = self._segment_size - self._plaintext_segment_hashed_bytes return p, left p = plaintext_segment_hasher() self._plaintext_segment_hasher = p self._plaintext_segment_hashed_bytes = 0 return p, self._segment_size def _update_segment_hash(self, chunk): offset = 0 while offset < len(chunk): p, segment_left = self._get_segment_hasher() chunk_left = len(chunk) - offset this_segment = min(chunk_left, segment_left) p.update(chunk[offset:offset+this_segment]) self._plaintext_segment_hashed_bytes += this_segment if self._plaintext_segment_hashed_bytes == self._segment_size: # we've filled this segment self._plaintext_segment_hashes.append(p.digest()) self._plaintext_segment_hasher = None self.log("closed hash [%d]: %dB" % (len(self._plaintext_segment_hashes)-1, self._plaintext_segment_hashed_bytes), level=log.NOISY) self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s", segnum=len(self._plaintext_segment_hashes)-1, hash=base32.b2a(p.digest()), level=log.NOISY) offset += this_segment def read_encrypted(self, length, hash_only): # make sure our parameters have been set up first d = self.get_all_encoding_parameters() # and size d.addCallback(lambda ignored: self.get_size()) d.addCallback(lambda ignored: self._get_encryptor()) # then fetch and encrypt the plaintext. The unusual structure here # (passing a Deferred *into* a function) is needed to avoid # overflowing the stack: Deferreds don't optimize out tail recursion. # We also pass in a list, to which _read_encrypted will append # ciphertext. ciphertext = [] d2 = defer.Deferred() d.addCallback(lambda ignored: self._read_encrypted(length, ciphertext, hash_only, d2)) d.addCallback(lambda ignored: d2) return d def _read_encrypted(self, remaining, ciphertext, hash_only, fire_when_done): if not remaining: fire_when_done.callback(ciphertext) return None # tolerate large length= values without consuming a lot of RAM by # reading just a chunk (say 50kB) at a time. This only really matters # when hash_only==True (i.e. resuming an interrupted upload), since # that's the case where we will be skipping over a lot of data. size = min(remaining, self.CHUNKSIZE) remaining = remaining - size # read a chunk of plaintext.. d = defer.maybeDeferred(self.original.read, size) # N.B.: if read() is synchronous, then since everything else is # actually synchronous too, we'd blow the stack unless we stall for a # tick. Once you accept a Deferred from IUploadable.read(), you must # be prepared to have it fire immediately too. d.addCallback(fireEventually) def _good(plaintext): # and encrypt it.. # o/' over the fields we go, hashing all the way, sHA! sHA! sHA! o/' ct = self._hash_and_encrypt_plaintext(plaintext, hash_only) ciphertext.extend(ct) self._read_encrypted(remaining, ciphertext, hash_only, fire_when_done) def _err(why): fire_when_done.errback(why) d.addCallback(_good) d.addErrback(_err) return None def _hash_and_encrypt_plaintext(self, data, hash_only): assert isinstance(data, (tuple, list)), type(data) data = list(data) cryptdata = [] # we use data.pop(0) instead of 'for chunk in data' to save # memory: each chunk is destroyed as soon as we're done with it. bytes_processed = 0 while data: chunk = data.pop(0) self.log(" read_encrypted handling %dB-sized chunk" % len(chunk), level=log.NOISY) bytes_processed += len(chunk) self._plaintext_hasher.update(chunk) self._update_segment_hash(chunk) # TODO: we have to encrypt the data (even if hash_only==True) # because pycryptopp's AES-CTR implementation doesn't offer a # way to change the counter value. Once pycryptopp acquires # this ability, change this to simply update the counter # before each call to (hash_only==False) _encryptor.process() ciphertext = self._encryptor.process(chunk) if hash_only: self.log(" skipping encryption", level=log.NOISY) else: cryptdata.append(ciphertext) del ciphertext del chunk self._ciphertext_bytes_read += bytes_processed if self._status: progress = float(self._ciphertext_bytes_read) / self._file_size self._status.set_progress(1, progress) return cryptdata def get_plaintext_hashtree_leaves(self, first, last, num_segments): # this is currently unused, but will live again when we fix #453 if len(self._plaintext_segment_hashes) < num_segments: # close out the last one assert len(self._plaintext_segment_hashes) == num_segments-1 p, segment_left = self._get_segment_hasher() self._plaintext_segment_hashes.append(p.digest()) del self._plaintext_segment_hasher self.log("closing plaintext leaf hasher, hashed %d bytes" % self._plaintext_segment_hashed_bytes, level=log.NOISY) self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s", segnum=len(self._plaintext_segment_hashes)-1, hash=base32.b2a(p.digest()), level=log.NOISY) assert len(self._plaintext_segment_hashes) == num_segments return defer.succeed(tuple(self._plaintext_segment_hashes[first:last])) def get_plaintext_hash(self): h = self._plaintext_hasher.digest() return defer.succeed(h) def close(self): return self.original.close() class UploadStatus: implements(IUploadStatus) statusid_counter = itertools.count(0) def __init__(self): self.storage_index = None self.size = None self.helper = False self.status = "Not started" self.progress = [0.0, 0.0, 0.0] self.active = True self.results = None self.counter = self.statusid_counter.next() self.started = time.time() def get_started(self): return self.started def get_storage_index(self): return self.storage_index def get_size(self): return self.size def using_helper(self): return self.helper def get_status(self): return self.status def get_progress(self): return tuple(self.progress) def get_active(self): return self.active def get_results(self): return self.results def get_counter(self): return self.counter def set_storage_index(self, si): self.storage_index = si def set_size(self, size): self.size = size def set_helper(self, helper): self.helper = helper def set_status(self, status): self.status = status def set_progress(self, which, value): # [0]: chk, [1]: ciphertext, [2]: encode+push self.progress[which] = value def set_active(self, value): self.active = value def set_results(self, value): self.results = value class CHKUploader: server_selector_class = Tahoe2ServerSelector def __init__(self, storage_broker, secret_holder): # server_selector needs storage_broker and secret_holder self._storage_broker = storage_broker self._secret_holder = secret_holder self._log_number = self.log("CHKUploader starting", parent=None) self._encoder = None self._storage_index = None self._upload_status = UploadStatus() self._upload_status.set_helper(False) self._upload_status.set_active(True) # locate_all_shareholders() will create the following attribute: # self._server_trackers = {} # k: shnum, v: instance of ServerTracker def log(self, *args, **kwargs): if "parent" not in kwargs: kwargs["parent"] = self._log_number if "facility" not in kwargs: kwargs["facility"] = "tahoe.upload" return log.msg(*args, **kwargs) def start(self, encrypted_uploadable): """Start uploading the file. Returns a Deferred that will fire with the UploadResults instance. """ self._started = time.time() eu = IEncryptedUploadable(encrypted_uploadable) self.log("starting upload of %s" % eu) eu.set_upload_status(self._upload_status) d = self.start_encrypted(eu) def _done(uploadresults): self._upload_status.set_active(False) return uploadresults d.addBoth(_done) return d def abort(self): """Call this if the upload must be abandoned before it completes. This will tell the shareholders to delete their partial shares. I return a Deferred that fires when these messages have been acked.""" if not self._encoder: # how did you call abort() before calling start() ? return defer.succeed(None) return self._encoder.abort() def start_encrypted(self, encrypted): """ Returns a Deferred that will fire with the UploadResults instance. """ eu = IEncryptedUploadable(encrypted) started = time.time() self._encoder = e = encode.Encoder(self._log_number, self._upload_status) d = e.set_encrypted_uploadable(eu) d.addCallback(self.locate_all_shareholders, started) d.addCallback(self.set_shareholders, e) d.addCallback(lambda res: e.start()) d.addCallback(self._encrypted_done) return d def locate_all_shareholders(self, encoder, started): server_selection_started = now = time.time() self._storage_index_elapsed = now - started storage_broker = self._storage_broker secret_holder = self._secret_holder storage_index = encoder.get_param("storage_index") self._storage_index = storage_index upload_id = si_b2a(storage_index)[:5] self.log("using storage index %s" % upload_id) server_selector = self.server_selector_class(upload_id, self._log_number, self._upload_status) share_size = encoder.get_param("share_size") block_size = encoder.get_param("block_size") num_segments = encoder.get_param("num_segments") k,desired,n = encoder.get_param("share_counts") self._server_selection_started = time.time() d = server_selector.get_shareholders(storage_broker, secret_holder, storage_index, share_size, block_size, num_segments, n, k, desired) def _done(res): self._server_selection_elapsed = time.time() - server_selection_started return res d.addCallback(_done) return d def set_shareholders(self, (upload_trackers, already_serverids), encoder): """ @param upload_trackers: a sequence of ServerTracker objects that have agreed to hold some shares for us (the shareids are stashed inside the ServerTracker) @paran already_serverids: a dict mapping sharenum to a set of serverids for servers that claim to already have this share """ msgtempl = "set_shareholders; upload_trackers is %s, already_serverids is %s" values = ([', '.join([str_shareloc(k,v) for k,v in st.buckets.iteritems()]) for st in upload_trackers], already_serverids) self.log(msgtempl % values, level=log.OPERATIONAL) # record already-present shares in self._results self._count_preexisting_shares = len(already_serverids) self._server_trackers = {} # k: shnum, v: instance of ServerTracker for tracker in upload_trackers: assert isinstance(tracker, ServerTracker) buckets = {} servermap = already_serverids.copy() for tracker in upload_trackers: buckets.update(tracker.buckets) for shnum in tracker.buckets: self._server_trackers[shnum] = tracker servermap.setdefault(shnum, set()).add(tracker.get_serverid()) assert len(buckets) == sum([len(tracker.buckets) for tracker in upload_trackers]), \ "%s (%s) != %s (%s)" % ( len(buckets), buckets, sum([len(tracker.buckets) for tracker in upload_trackers]), [(t.buckets, t.get_serverid()) for t in upload_trackers] ) encoder.set_shareholders(buckets, servermap) def _encrypted_done(self, verifycap): """Returns a Deferred that will fire with the UploadResults instance.""" e = self._encoder sharemap = dictutil.DictOfSets() servermap = dictutil.DictOfSets() for shnum in e.get_shares_placed(): server = self._server_trackers[shnum].get_server() sharemap.add(shnum, server) servermap.add(server, shnum) now = time.time() timings = {} timings["total"] = now - self._started timings["storage_index"] = self._storage_index_elapsed timings["peer_selection"] = self._server_selection_elapsed timings.update(e.get_times()) ur = UploadResults(file_size=e.file_size, ciphertext_fetched=0, preexisting_shares=self._count_preexisting_shares, pushed_shares=len(e.get_shares_placed()), sharemap=sharemap, servermap=servermap, timings=timings, uri_extension_data=e.get_uri_extension_data(), uri_extension_hash=e.get_uri_extension_hash(), verifycapstr=verifycap.to_string()) self._upload_status.set_results(ur) return ur def get_upload_status(self): return self._upload_status def read_this_many_bytes(uploadable, size, prepend_data=[]): if size == 0: return defer.succeed([]) d = uploadable.read(size) def _got(data): assert isinstance(data, list) bytes = sum([len(piece) for piece in data]) assert bytes > 0 assert bytes <= size remaining = size - bytes if remaining: return read_this_many_bytes(uploadable, remaining, prepend_data + data) return prepend_data + data d.addCallback(_got) return d class LiteralUploader: def __init__(self): self._status = s = UploadStatus() s.set_storage_index(None) s.set_helper(False) s.set_progress(0, 1.0) s.set_active(False) def start(self, uploadable): uploadable = IUploadable(uploadable) d = uploadable.get_size() def _got_size(size): self._size = size self._status.set_size(size) return read_this_many_bytes(uploadable, size) d.addCallback(_got_size) d.addCallback(lambda data: uri.LiteralFileURI("".join(data))) d.addCallback(lambda u: u.to_string()) d.addCallback(self._build_results) return d def _build_results(self, uri): ur = UploadResults(file_size=self._size, ciphertext_fetched=0, preexisting_shares=0, pushed_shares=0, sharemap={}, servermap={}, timings={}, uri_extension_data=None, uri_extension_hash=None, verifycapstr=None) ur.set_uri(uri) self._status.set_status("Finished") self._status.set_progress(1, 1.0) self._status.set_progress(2, 1.0) self._status.set_results(ur) return ur def close(self): pass def get_upload_status(self): return self._status class RemoteEncryptedUploadable(Referenceable): implements(RIEncryptedUploadable) def __init__(self, encrypted_uploadable, upload_status): self._eu = IEncryptedUploadable(encrypted_uploadable) self._offset = 0 self._bytes_sent = 0 self._status = IUploadStatus(upload_status) # we are responsible for updating the status string while we run, and # for setting the ciphertext-fetch progress. self._size = None def get_size(self): if self._size is not None: return defer.succeed(self._size) d = self._eu.get_size() def _got_size(size): self._size = size return size d.addCallback(_got_size) return d def remote_get_size(self): return self.get_size() def remote_get_all_encoding_parameters(self): return self._eu.get_all_encoding_parameters() def _read_encrypted(self, length, hash_only): d = self._eu.read_encrypted(length, hash_only) def _read(strings): if hash_only: self._offset += length else: size = sum([len(data) for data in strings]) self._offset += size return strings d.addCallback(_read) return d def remote_read_encrypted(self, offset, length): # we don't support seek backwards, but we allow skipping forwards precondition(offset >= 0, offset) precondition(length >= 0, length) lp = log.msg("remote_read_encrypted(%d-%d)" % (offset, offset+length), level=log.NOISY) precondition(offset >= self._offset, offset, self._offset) if offset > self._offset: # read the data from disk anyways, to build up the hash tree skip = offset - self._offset log.msg("remote_read_encrypted skipping ahead from %d to %d, skip=%d" % (self._offset, offset, skip), level=log.UNUSUAL, parent=lp) d = self._read_encrypted(skip, hash_only=True) else: d = defer.succeed(None) def _at_correct_offset(res): assert offset == self._offset, "%d != %d" % (offset, self._offset) return self._read_encrypted(length, hash_only=False) d.addCallback(_at_correct_offset) def _read(strings): size = sum([len(data) for data in strings]) self._bytes_sent += size return strings d.addCallback(_read) return d def remote_close(self): return self._eu.close() class AssistedUploader: def __init__(self, helper, storage_broker): self._helper = helper self._storage_broker = storage_broker self._log_number = log.msg("AssistedUploader starting") self._storage_index = None self._upload_status = s = UploadStatus() s.set_helper(True) s.set_active(True) def log(self, *args, **kwargs): if "parent" not in kwargs: kwargs["parent"] = self._log_number return log.msg(*args, **kwargs) def start(self, encrypted_uploadable, storage_index): """Start uploading the file. Returns a Deferred that will fire with the UploadResults instance. """ precondition(isinstance(storage_index, str), storage_index) self._started = time.time() eu = IEncryptedUploadable(encrypted_uploadable) eu.set_upload_status(self._upload_status) self._encuploadable = eu self._storage_index = storage_index d = eu.get_size() d.addCallback(self._got_size) d.addCallback(lambda res: eu.get_all_encoding_parameters()) d.addCallback(self._got_all_encoding_parameters) d.addCallback(self._contact_helper) d.addCallback(self._build_verifycap) def _done(res): self._upload_status.set_active(False) return res d.addBoth(_done) return d def _got_size(self, size): self._size = size self._upload_status.set_size(size) def _got_all_encoding_parameters(self, params): k, happy, n, segment_size = params # stash these for URI generation later self._needed_shares = k self._total_shares = n self._segment_size = segment_size def _contact_helper(self, res): now = self._time_contacting_helper_start = time.time() self._storage_index_elapsed = now - self._started self.log(format="contacting helper for SI %(si)s..", si=si_b2a(self._storage_index), level=log.NOISY) self._upload_status.set_status("Contacting Helper") d = self._helper.callRemote("upload_chk", self._storage_index) d.addCallback(self._contacted_helper) return d def _contacted_helper(self, (helper_upload_results, upload_helper)): now = time.time() elapsed = now - self._time_contacting_helper_start self._elapsed_time_contacting_helper = elapsed if upload_helper: self.log("helper says we need to upload", level=log.NOISY) self._upload_status.set_status("Uploading Ciphertext") # we need to upload the file reu = RemoteEncryptedUploadable(self._encuploadable, self._upload_status) # let it pre-compute the size for progress purposes d = reu.get_size() d.addCallback(lambda ignored: upload_helper.callRemote("upload", reu)) # this Deferred will fire with the upload results return d self.log("helper says file is already uploaded", level=log.OPERATIONAL) self._upload_status.set_progress(1, 1.0) return helper_upload_results def _convert_old_upload_results(self, upload_results): # pre-1.3.0 helpers return upload results which contain a mapping # from shnum to a single human-readable string, containing things # like "Found on [x],[y],[z]" (for healthy files that were already in # the grid), "Found on [x]" (for files that needed upload but which # discovered pre-existing shares), and "Placed on [x]" (for newly # uploaded shares). The 1.3.0 helper returns a mapping from shnum to # set of binary serverid strings. # the old results are too hard to deal with (they don't even contain # as much information as the new results, since the nodeids are # abbreviated), so if we detect old results, just clobber them. sharemap = upload_results.sharemap if str in [type(v) for v in sharemap.values()]: upload_results.sharemap = None def _build_verifycap(self, helper_upload_results): self.log("upload finished, building readcap", level=log.OPERATIONAL) self._convert_old_upload_results(helper_upload_results) self._upload_status.set_status("Building Readcap") hur = helper_upload_results assert hur.uri_extension_data["needed_shares"] == self._needed_shares assert hur.uri_extension_data["total_shares"] == self._total_shares assert hur.uri_extension_data["segment_size"] == self._segment_size assert hur.uri_extension_data["size"] == self._size # hur.verifycap doesn't exist if already found v = uri.CHKFileVerifierURI(self._storage_index, uri_extension_hash=hur.uri_extension_hash, needed_shares=self._needed_shares, total_shares=self._total_shares, size=self._size) timings = {} timings["storage_index"] = self._storage_index_elapsed timings["contacting_helper"] = self._elapsed_time_contacting_helper for key,val in hur.timings.items(): if key == "total": key = "helper_total" timings[key] = val now = time.time() timings["total"] = now - self._started gss = self._storage_broker.get_stub_server sharemap = {} servermap = {} for shnum, serverids in hur.sharemap.items(): sharemap[shnum] = set([gss(serverid) for serverid in serverids]) # if the file was already in the grid, hur.servermap is an empty dict for serverid, shnums in hur.servermap.items(): servermap[gss(serverid)] = set(shnums) ur = UploadResults(file_size=self._size, # not if already found ciphertext_fetched=hur.ciphertext_fetched, preexisting_shares=hur.preexisting_shares, pushed_shares=hur.pushed_shares, sharemap=sharemap, servermap=servermap, timings=timings, uri_extension_data=hur.uri_extension_data, uri_extension_hash=hur.uri_extension_hash, verifycapstr=v.to_string()) self._upload_status.set_status("Finished") self._upload_status.set_results(ur) return ur def get_upload_status(self): return self._upload_status class BaseUploadable: # this is overridden by max_segment_size default_max_segment_size = DEFAULT_MAX_SEGMENT_SIZE default_params_set = False max_segment_size = None encoding_param_k = None encoding_param_happy = None encoding_param_n = None _all_encoding_parameters = None _status = None def set_upload_status(self, upload_status): self._status = IUploadStatus(upload_status) def set_default_encoding_parameters(self, default_params): assert isinstance(default_params, dict) for k,v in default_params.items(): precondition(isinstance(k, str), k, v) precondition(isinstance(v, int), k, v) if "k" in default_params: self.default_encoding_param_k = default_params["k"] if "happy" in default_params: self.default_encoding_param_happy = default_params["happy"] if "n" in default_params: self.default_encoding_param_n = default_params["n"] if "max_segment_size" in default_params: self.default_max_segment_size = default_params["max_segment_size"] self.default_params_set = True def get_all_encoding_parameters(self): _assert(self.default_params_set, "set_default_encoding_parameters not called on %r" % (self,)) if self._all_encoding_parameters: return defer.succeed(self._all_encoding_parameters) max_segsize = self.max_segment_size or self.default_max_segment_size k = self.encoding_param_k or self.default_encoding_param_k happy = self.encoding_param_happy or self.default_encoding_param_happy n = self.encoding_param_n or self.default_encoding_param_n d = self.get_size() def _got_size(file_size): # for small files, shrink the segment size to avoid wasting space segsize = min(max_segsize, file_size) # this must be a multiple of 'required_shares'==k segsize = mathutil.next_multiple(segsize, k) encoding_parameters = (k, happy, n, segsize) self._all_encoding_parameters = encoding_parameters return encoding_parameters d.addCallback(_got_size) return d class FileHandle(BaseUploadable): implements(IUploadable) def __init__(self, filehandle, convergence): """ Upload the data from the filehandle. If convergence is None then a random encryption key will be used, else the plaintext will be hashed, then the hash will be hashed together with the string in the "convergence" argument to form the encryption key. """ assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) self._filehandle = filehandle self._key = None self.convergence = convergence self._size = None def _get_encryption_key_convergent(self): if self._key is not None: return defer.succeed(self._key) d = self.get_size() # that sets self._size as a side-effect d.addCallback(lambda size: self.get_all_encoding_parameters()) def _got(params): k, happy, n, segsize = params f = self._filehandle enckey_hasher = convergence_hasher(k, n, segsize, self.convergence) f.seek(0) BLOCKSIZE = 64*1024 bytes_read = 0 while True: data = f.read(BLOCKSIZE) if not data: break enckey_hasher.update(data) # TODO: setting progress in a non-yielding loop is kind of # pointless, but I'm anticipating (perhaps prematurely) the # day when we use a slowjob or twisted's CooperatorService to # make this yield time to other jobs. bytes_read += len(data) if self._status: self._status.set_progress(0, float(bytes_read)/self._size) f.seek(0) self._key = enckey_hasher.digest() if self._status: self._status.set_progress(0, 1.0) assert len(self._key) == 16 return self._key d.addCallback(_got) return d def _get_encryption_key_random(self): if self._key is None: self._key = os.urandom(16) return defer.succeed(self._key) def get_encryption_key(self): if self.convergence is not None: return self._get_encryption_key_convergent() else: return self._get_encryption_key_random() def get_size(self): if self._size is not None: return defer.succeed(self._size) self._filehandle.seek(0, os.SEEK_END) size = self._filehandle.tell() self._size = size self._filehandle.seek(0) return defer.succeed(size) def read(self, length): return defer.succeed([self._filehandle.read(length)]) def close(self): # the originator of the filehandle reserves the right to close it pass class FileName(FileHandle): def __init__(self, filename, convergence): """ Upload the data from the filename. If convergence is None then a random encryption key will be used, else the plaintext will be hashed, then the hash will be hashed together with the string in the "convergence" argument to form the encryption key. """ assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) FileHandle.__init__(self, open(filename, "rb"), convergence=convergence) def close(self): FileHandle.close(self) self._filehandle.close() class Data(FileHandle): def __init__(self, data, convergence): """ Upload the data from the data argument. If convergence is None then a random encryption key will be used, else the plaintext will be hashed, then the hash will be hashed together with the string in the "convergence" argument to form the encryption key. """ assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) FileHandle.__init__(self, StringIO(data), convergence=convergence) class Uploader(service.MultiService, log.PrefixingLogMixin): """I am a service that allows file uploading. I am a service-child of the Client. """ implements(IUploader) name = "uploader" URI_LIT_SIZE_THRESHOLD = 55 def __init__(self, helper_furl=None, stats_provider=None, history=None): self._helper_furl = helper_furl self.stats_provider = stats_provider self._history = history self._helper = None self._all_uploads = weakref.WeakKeyDictionary() # for debugging log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.upload") service.MultiService.__init__(self) def startService(self): service.MultiService.startService(self) if self._helper_furl: self.parent.tub.connectTo(self._helper_furl, self._got_helper) def _got_helper(self, helper): self.log("got helper connection, getting versions") default = { "http://allmydata.org/tahoe/protocols/helper/v1" : { }, "application-version": "unknown: no get_version()", } d = add_version_to_remote_reference(helper, default) d.addCallback(self._got_versioned_helper) def _got_versioned_helper(self, helper): needed = "http://allmydata.org/tahoe/protocols/helper/v1" if needed not in helper.version: raise InsufficientVersionError(needed, helper.version) self._helper = helper helper.notifyOnDisconnect(self._lost_helper) def _lost_helper(self): self._helper = None def get_helper_info(self): # return a tuple of (helper_furl_or_None, connected_bool) return (self._helper_furl, bool(self._helper)) def upload(self, uploadable): """ Returns a Deferred that will fire with the UploadResults instance. """ assert self.parent assert self.running uploadable = IUploadable(uploadable) d = uploadable.get_size() def _got_size(size): default_params = self.parent.get_encoding_parameters() precondition(isinstance(default_params, dict), default_params) precondition("max_segment_size" in default_params, default_params) uploadable.set_default_encoding_parameters(default_params) if self.stats_provider: self.stats_provider.count('uploader.files_uploaded', 1) self.stats_provider.count('uploader.bytes_uploaded', size) if size <= self.URI_LIT_SIZE_THRESHOLD: uploader = LiteralUploader() return uploader.start(uploadable) else: eu = EncryptAnUploadable(uploadable, self._parentmsgid) d2 = defer.succeed(None) storage_broker = self.parent.get_storage_broker() if self._helper: uploader = AssistedUploader(self._helper, storage_broker) d2.addCallback(lambda x: eu.get_storage_index()) d2.addCallback(lambda si: uploader.start(eu, si)) else: storage_broker = self.parent.get_storage_broker() secret_holder = self.parent._secret_holder uploader = CHKUploader(storage_broker, secret_holder) d2.addCallback(lambda x: uploader.start(eu)) self._all_uploads[uploader] = None if self._history: self._history.add_upload(uploader.get_upload_status()) def turn_verifycap_into_read_cap(uploadresults): # Generate the uri from the verifycap plus the key. d3 = uploadable.get_encryption_key() def put_readcap_into_results(key): v = uri.from_string(uploadresults.get_verifycapstr()) r = uri.CHKFileURI(key, v.uri_extension_hash, v.needed_shares, v.total_shares, v.size) uploadresults.set_uri(r.to_string()) return uploadresults d3.addCallback(put_readcap_into_results) return d3 d2.addCallback(turn_verifycap_into_read_cap) return d2 d.addCallback(_got_size) def _done(res): uploadable.close() return res d.addBoth(_done) return d tahoe-lafs-1.10.0/src/allmydata/interfaces.py000066400000000000000000003655141221140116300210550ustar00rootroot00000000000000 from zope.interface import Interface from foolscap.api import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \ ChoiceOf, IntegerConstraint, Any, RemoteInterface, Referenceable HASH_SIZE=32 SALT_SIZE=16 SDMF_VERSION=0 MDMF_VERSION=1 Hash = StringConstraint(maxLength=HASH_SIZE, minLength=HASH_SIZE)# binary format 32-byte SHA256 hash Nodeid = StringConstraint(maxLength=20, minLength=20) # binary format 20-byte SHA1 hash FURL = StringConstraint(1000) StorageIndex = StringConstraint(16) URI = StringConstraint(300) # kind of arbitrary MAX_BUCKETS = 256 # per peer -- zfec offers at most 256 shares per file DEFAULT_MAX_SEGMENT_SIZE = 128*1024 ShareData = StringConstraint(None) URIExtensionData = StringConstraint(1000) Number = IntegerConstraint(8) # 2**(8*8) == 16EiB ~= 18e18 ~= 18 exabytes Offset = Number ReadSize = int # the 'int' constraint is 2**31 == 2Gib -- large files are processed in not-so-large increments WriteEnablerSecret = Hash # used to protect mutable share modifications LeaseRenewSecret = Hash # used to protect lease renewal requests LeaseCancelSecret = Hash # was used to protect lease cancellation requests class RIBucketWriter(RemoteInterface): """ Objects of this kind live on the server side. """ def write(offset=Offset, data=ShareData): return None def close(): """ If the data that has been written is incomplete or inconsistent then the server will throw the data away, else it will store it for future retrieval. """ return None def abort(): """Abandon all the data that has been written. """ return None class RIBucketReader(RemoteInterface): def read(offset=Offset, length=ReadSize): return ShareData def advise_corrupt_share(reason=str): """Clients who discover hash failures in shares that they have downloaded from me will use this method to inform me about the failures. I will record their concern so that my operator can manually inspect the shares in question. I return None. This is a wrapper around RIStorageServer.advise_corrupt_share() that is tied to a specific share, and therefore does not need the extra share-identifying arguments. Please see that method for full documentation. """ TestVector = ListOf(TupleOf(Offset, ReadSize, str, str)) # elements are (offset, length, operator, specimen) # operator is one of "lt, le, eq, ne, ge, gt" # nop always passes and is used to fetch data while writing. # you should use length==len(specimen) for everything except nop DataVector = ListOf(TupleOf(Offset, ShareData)) # (offset, data). This limits us to 30 writes of 1MiB each per call TestAndWriteVectorsForShares = DictOf(int, TupleOf(TestVector, DataVector, ChoiceOf(None, Offset), # new_length )) ReadVector = ListOf(TupleOf(Offset, ReadSize)) ReadData = ListOf(ShareData) # returns data[offset:offset+length] for each element of TestVector class RIStorageServer(RemoteInterface): __remote_name__ = "RIStorageServer.tahoe.allmydata.com" def get_version(): """ Return a dictionary of version information. """ return DictOf(str, Any()) def allocate_buckets(storage_index=StorageIndex, renew_secret=LeaseRenewSecret, cancel_secret=LeaseCancelSecret, sharenums=SetOf(int, maxLength=MAX_BUCKETS), allocated_size=Offset, canary=Referenceable): """ @param storage_index: the index of the bucket to be created or increfed. @param sharenums: these are the share numbers (probably between 0 and 99) that the sender is proposing to store on this server. @param renew_secret: This is the secret used to protect bucket refresh This secret is generated by the client and stored for later comparison by the server. Each server is given a different secret. @param cancel_secret: This no longer allows lease cancellation, but must still be a unique value identifying the lease. XXX stop relying on it to be unique. @param canary: If the canary is lost before close(), the bucket is deleted. @return: tuple of (alreadygot, allocated), where alreadygot is what we already have and allocated is what we hereby agree to accept. New leases are added for shares in both lists. """ return TupleOf(SetOf(int, maxLength=MAX_BUCKETS), DictOf(int, RIBucketWriter, maxKeys=MAX_BUCKETS)) def add_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret, cancel_secret=LeaseCancelSecret): """ Add a new lease on the given bucket. If the renew_secret matches an existing lease, that lease will be renewed instead. If there is no bucket for the given storage_index, return silently. (note that in tahoe-1.3.0 and earlier, IndexError was raised if there was no bucket) """ return Any() # returns None now, but future versions might change def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret): """ Renew the lease on a given bucket, resetting the timer to 31 days. Some networks will use this, some will not. If there is no bucket for the given storage_index, IndexError will be raised. For mutable shares, if the given renew_secret does not match an existing lease, IndexError will be raised with a note listing the server-nodeids on the existing leases, so leases on migrated shares can be renewed. For immutable shares, IndexError (without the note) will be raised. """ return Any() def get_buckets(storage_index=StorageIndex): return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS) def slot_readv(storage_index=StorageIndex, shares=ListOf(int), readv=ReadVector): """Read a vector from the numbered shares associated with the given storage index. An empty shares list means to return data from all known shares. Returns a dictionary with one key per share.""" return DictOf(int, ReadData) # shnum -> results def slot_testv_and_readv_and_writev(storage_index=StorageIndex, secrets=TupleOf(WriteEnablerSecret, LeaseRenewSecret, LeaseCancelSecret), tw_vectors=TestAndWriteVectorsForShares, r_vector=ReadVector, ): """ General-purpose test-read-and-set operation for mutable slots: (1) For submitted shnums, compare the test vectors against extant shares, or against an empty share for shnums that do not exist. (2) Use the read vectors to extract "old data" from extant shares. (3) If all tests in (1) passed, then apply the write vectors (possibly creating new shares). (4) Return whether the tests passed, and the "old data", which does not include any modifications made by the writes. The operation does not interleave with other operations on the same shareset. This method is, um, large. The goal is to allow clients to update all the shares associated with a mutable file in a single round trip. @param storage_index: the index of the bucket to be created or increfed. @param write_enabler: a secret that is stored along with the slot. Writes are accepted from any caller who can present the matching secret. A different secret should be used for each slot*server pair. @param renew_secret: This is the secret used to protect bucket refresh This secret is generated by the client and stored for later comparison by the server. Each server is given a different secret. @param cancel_secret: This no longer allows lease cancellation, but must still be a unique value identifying the lease. XXX stop relying on it to be unique. The 'secrets' argument is a tuple of (write_enabler, renew_secret, cancel_secret). The first is required to perform any write. The latter two are used when allocating new shares. To simply acquire a new lease on existing shares, use an empty testv and an empty writev. Each share can have a separate test vector (i.e. a list of comparisons to perform). If all vectors for all shares pass, then all writes for all shares are recorded. Each comparison is a 4-tuple of (offset, length, operator, specimen), which effectively does a bool( (read(offset, length)) OPERATOR specimen ) and only performs the write if all these evaluate to True. Basic test-and-set uses 'eq'. Write-if-newer uses a seqnum and (offset, length, 'lt', specimen). Write-if-same-or-newer uses 'le'. Reads from the end of the container are truncated, and missing shares behave like empty ones, so to assert that a share doesn't exist (for use when creating a new share), use (0, 1, 'eq', ''). The write vector will be applied to the given share, expanding it if necessary. A write vector applied to a share number that did not exist previously will cause that share to be created. Write vectors must not overlap (if they do, this will either cause an error or apply them in an unspecified order). Duplicate write vectors, with the same offset and data, are currently tolerated but are not desirable. In Tahoe-LAFS v1.8.3 or later (except 1.9.0a1), if you send a write vector whose offset is beyond the end of the current data, the space between the end of the current data and the beginning of the write vector will be filled with zero bytes. In earlier versions the contents of this space was unspecified (and might end up containing secrets). Storage servers with the new zero-filling behavior will advertise a true value for the 'fills-holes-with-zero-bytes' key (under 'http://allmydata.org/tahoe/protocols/storage/v1') in their version information. Each write vector is accompanied by a 'new_length' argument, which can be used to truncate the data. If new_length is not None and it is less than the current size of the data (after applying all write vectors), then the data will be truncated to new_length. If new_length==0, the share will be deleted. In Tahoe-LAFS v1.8.2 and earlier, new_length could also be used to enlarge the file by sending a number larger than the size of the data after applying all write vectors. That behavior was not used, and as of Tahoe-LAFS v1.8.3 it no longer works and the new_length is ignored in that case. If a storage client knows that the server supports zero-filling, for example from the 'fills-holes-with-zero-bytes' key in its version information, it can extend the file efficiently by writing a single zero byte just before the new end-of-file. Otherwise it must explicitly write zeroes to all bytes between the old and new end-of-file. In any case it should avoid sending new_length larger than the size of the data after applying all write vectors. The read vector is used to extract data from all known shares, *before* any writes have been applied. The same read vector is used for all shares. This captures the state that was tested by the test vector, for extant shares. This method returns two values: a boolean and a dict. The boolean is True if the write vectors were applied, False if not. The dict is keyed by share number, and each value contains a list of strings, one for each element of the read vector. If the write_enabler is wrong, this will raise BadWriteEnablerError. To enable share migration (using update_write_enabler), the exception will have the nodeid used for the old write enabler embedded in it, in the following string:: The write enabler was recorded by nodeid '%s'. Note that the nodeid here is encoded using the same base32 encoding used by Foolscap and allmydata.util.idlib.nodeid_b2a(). """ return TupleOf(bool, DictOf(int, ReadData)) def advise_corrupt_share(share_type=str, storage_index=StorageIndex, shnum=int, reason=str): """Clients who discover hash failures in shares that they have downloaded from me will use this method to inform me about the failures. I will record their concern so that my operator can manually inspect the shares in question. I return None. 'share_type' is either 'mutable' or 'immutable'. 'storage_index' is a (binary) storage index string, and 'shnum' is the integer share number. 'reason' is a human-readable explanation of the problem, probably including some expected hash values and the computed ones that did not match. Corruption advisories for mutable shares should include a hash of the public key (the same value that appears in the mutable-file verify-cap), since the current share format does not store that on disk. """ class IStorageBucketWriter(Interface): """ Objects of this kind live on the client side. """ def put_block(segmentnum, data): """ @param segmentnum=int @param data=ShareData: For most segments, this data will be 'blocksize' bytes in length. The last segment might be shorter. @return: a Deferred that fires (with None) when the operation completes """ def put_crypttext_hashes(hashes): """ @param hashes=ListOf(Hash) @return: a Deferred that fires (with None) when the operation completes """ def put_block_hashes(blockhashes): """ @param blockhashes=ListOf(Hash) @return: a Deferred that fires (with None) when the operation completes """ def put_share_hashes(sharehashes): """ @param sharehashes=ListOf(TupleOf(int, Hash)) @return: a Deferred that fires (with None) when the operation completes """ def put_uri_extension(data): """This block of data contains integrity-checking information (hashes of plaintext, crypttext, and shares), as well as encoding parameters that are necessary to recover the data. This is a serialized dict mapping strings to other strings. The hash of this data is kept in the URI and verified before any of the data is used. All buckets for a given file contain identical copies of this data. The serialization format is specified with the following pseudocode: for k in sorted(dict.keys()): assert re.match(r'^[a-zA-Z_\-]+$', k) write(k + ':' + netstring(dict[k])) @param data=URIExtensionData @return: a Deferred that fires (with None) when the operation completes """ def close(): """Finish writing and close the bucket. The share is not finalized until this method is called: if the uploading client disconnects before calling close(), the partially-written share will be discarded. @return: a Deferred that fires (with None) when the operation completes """ class IStorageBucketReader(Interface): def get_block_data(blocknum, blocksize, size): """Most blocks will be the same size. The last block might be shorter than the others. @param blocknum=int @param blocksize=int @param size=int @return: ShareData """ def get_crypttext_hashes(): """ @return: ListOf(Hash) """ def get_block_hashes(at_least_these=()): """ @param at_least_these=SetOf(int) @return: ListOf(Hash) """ def get_share_hashes(): """ @return: ListOf(TupleOf(int, Hash)) """ def get_uri_extension(): """ @return: URIExtensionData """ class IStorageBroker(Interface): def get_servers_for_psi(peer_selection_index): """ @return: list of IServer instances """ def get_connected_servers(): """ @return: frozenset of connected IServer instances """ def get_known_servers(): """ @return: frozenset of IServer instances """ def get_all_serverids(): """ @return: frozenset of serverid strings """ def get_nickname_for_serverid(serverid): """ @return: unicode nickname, or None """ # methods moved from IntroducerClient, need review def get_all_connections(): """Return a frozenset of (nodeid, service_name, rref) tuples, one for each active connection we've established to a remote service. This is mostly useful for unit tests that need to wait until a certain number of connections have been made.""" def get_all_connectors(): """Return a dict that maps from (nodeid, service_name) to a RemoteServiceConnector instance for all services that we are actively trying to connect to. Each RemoteServiceConnector has the following public attributes:: service_name: the type of service provided, like 'storage' announcement_time: when we first heard about this service last_connect_time: when we last established a connection last_loss_time: when we last lost a connection version: the peer's version, from the most recent connection oldest_supported: the peer's oldest supported version, same rref: the RemoteReference, if connected, otherwise None remote_host: the IAddress, if connected, otherwise None This method is intended for monitoring interfaces, such as a web page that describes connecting and connected peers. """ def get_all_peerids(): """Return a frozenset of all peerids to whom we have a connection (to one or more services) established. Mostly useful for unit tests.""" def get_all_connections_for(service_name): """Return a frozenset of (nodeid, service_name, rref) tuples, one for each active connection that provides the given SERVICE_NAME.""" def get_permuted_peers(service_name, key): """Returns an ordered list of (peerid, rref) tuples, selecting from the connections that provide SERVICE_NAME, using a hash-based permutation keyed by KEY. This randomizes the service list in a repeatable way, to distribute load over many peers. """ class IDisplayableServer(Interface): def get_nickname(): pass def get_name(): pass def get_longname(): pass class IServer(IDisplayableServer): """I live in the client, and represent a single server.""" def start_connecting(tub, trigger_cb): pass def get_rref(): """Once a server is connected, I return a RemoteReference. Before a server is connected for the first time, I return None. Note that the rref I return will start producing DeadReferenceErrors once the connection is lost. """ class IMutableSlotWriter(Interface): """ The interface for a writer around a mutable slot on a remote server. """ def set_checkstring(seqnum_or_checkstring, root_hash=None, salt=None): """ Set the checkstring that I will pass to the remote server when writing. @param checkstring A packed checkstring to use. Note that implementations can differ in which semantics they wish to support for set_checkstring -- they can, for example, build the checkstring themselves from its constituents, or some other thing. """ def get_checkstring(): """ Get the checkstring that I think currently exists on the remote server. """ def put_block(data, segnum, salt): """ Add a block and salt to the share. """ def put_encprivkey(encprivkey): """ Add the encrypted private key to the share. """ def put_blockhashes(blockhashes): """ @param blockhashes=list Add the block hash tree to the share. """ def put_sharehashes(sharehashes): """ @param sharehashes=dict Add the share hash chain to the share. """ def get_signable(): """ Return the part of the share that needs to be signed. """ def put_signature(signature): """ Add the signature to the share. """ def put_verification_key(verification_key): """ Add the verification key to the share. """ def finish_publishing(): """ Do anything necessary to finish writing the share to a remote server. I require that no further publishing needs to take place after this method has been called. """ class IURI(Interface): def init_from_string(uri): """Accept a string (as created by my to_string() method) and populate this instance with its data. I am not normally called directly, please use the module-level uri.from_string() function to convert arbitrary URI strings into IURI-providing instances.""" def is_readonly(): """Return False if this URI be used to modify the data. Return True if this URI cannot be used to modify the data.""" def is_mutable(): """Return True if the data can be modified by *somebody* (perhaps someone who has a more powerful URI than this one).""" # TODO: rename to get_read_cap() def get_readonly(): """Return another IURI instance that represents a read-only form of this one. If is_readonly() is True, this returns self.""" def get_verify_cap(): """Return an instance that provides IVerifierURI, which can be used to check on the availability of the file or directory, without providing enough capabilities to actually read or modify the contents. This may return None if the file does not need checking or verification (e.g. LIT URIs). """ def to_string(): """Return a string of printable ASCII characters, suitable for passing into init_from_string.""" class IVerifierURI(Interface, IURI): def init_from_string(uri): """Accept a string (as created by my to_string() method) and populate this instance with its data. I am not normally called directly, please use the module-level uri.from_string() function to convert arbitrary URI strings into IURI-providing instances.""" def to_string(): """Return a string of printable ASCII characters, suitable for passing into init_from_string.""" class IDirnodeURI(Interface): """I am a URI that represents a dirnode.""" class IFileURI(Interface): """I am a URI that represents a filenode.""" def get_size(): """Return the length (in bytes) of the file that I represent.""" class IImmutableFileURI(IFileURI): pass class IMutableFileURI(Interface): pass class IDirectoryURI(Interface): pass class IReadonlyDirectoryURI(Interface): pass class CapConstraintError(Exception): """A constraint on a cap was violated.""" class MustBeDeepImmutableError(CapConstraintError): """Mutable children cannot be added to an immutable directory. Also, caps obtained from an immutable directory can trigger this error if they are later found to refer to a mutable object and then used.""" class MustBeReadonlyError(CapConstraintError): """Known write caps cannot be specified in a ro_uri field. Also, caps obtained from a ro_uri field can trigger this error if they are later found to be write caps and then used.""" class MustNotBeUnknownRWError(CapConstraintError): """Cannot add an unknown child cap specified in a rw_uri field.""" class IReadable(Interface): """I represent a readable object -- either an immutable file, or a specific version of a mutable file. """ def is_readonly(): """Return True if this reference provides mutable access to the given file or directory (i.e. if you can modify it), or False if not. Note that even if this reference is read-only, someone else may hold a read-write reference to it. For an IReadable returned by get_best_readable_version(), this will always return True, but for instances of subinterfaces such as IMutableFileVersion, it may return False.""" def is_mutable(): """Return True if this file or directory is mutable (by *somebody*, not necessarily you), False if it is is immutable. Note that a file might be mutable overall, but your reference to it might be read-only. On the other hand, all references to an immutable file will be read-only; there are no read-write references to an immutable file.""" def get_storage_index(): """Return the storage index of the file.""" def get_size(): """Return the length (in bytes) of this readable object.""" def download_to_data(): """Download all of the file contents. I return a Deferred that fires with the contents as a byte string.""" def read(consumer, offset=0, size=None): """Download a portion (possibly all) of the file's contents, making them available to the given IConsumer. Return a Deferred that fires (with the consumer) when the consumer is unregistered (either because the last byte has been given to it, or because the consumer threw an exception during write(), possibly because it no longer wants to receive data). The portion downloaded will start at 'offset' and contain 'size' bytes (or the remainder of the file if size==None). The consumer will be used in non-streaming mode: an IPullProducer will be attached to it. The consumer will not receive data right away: several network trips must occur first. The order of events will be:: consumer.registerProducer(p, streaming) (if streaming == False):: consumer does p.resumeProducing() consumer.write(data) consumer does p.resumeProducing() consumer.write(data).. (repeat until all data is written) consumer.unregisterProducer() deferred.callback(consumer) If a download error occurs, or an exception is raised by consumer.registerProducer() or consumer.write(), I will call consumer.unregisterProducer() and then deliver the exception via deferred.errback(). To cancel the download, the consumer should call p.stopProducing(), which will result in an exception being delivered via deferred.errback(). See src/allmydata/util/consumer.py for an example of a simple download-to-memory consumer. """ class IWriteable(Interface): """ I define methods that callers can use to update SDMF and MDMF mutable files on a Tahoe-LAFS grid. """ # XXX: For the moment, we have only this. It is possible that we # want to move overwrite() and modify() in here too. def update(data, offset): """ I write the data from my data argument to the MDMF file, starting at offset. I continue writing data until my data argument is exhausted, appending data to the file as necessary. """ # assert IMutableUploadable.providedBy(data) # to append data: offset=node.get_size_of_best_version() # do we want to support compacting MDMF? # for an MDMF file, this can be done with O(data.get_size()) # memory. For an SDMF file, any modification takes # O(node.get_size_of_best_version()). class IMutableFileVersion(IReadable): """I provide access to a particular version of a mutable file. The access is read/write if I was obtained from a filenode derived from a write cap, or read-only if the filenode was derived from a read cap. """ def get_sequence_number(): """Return the sequence number of this version.""" def get_servermap(): """Return the IMutableFileServerMap instance that was used to create this object. """ def get_writekey(): """Return this filenode's writekey, or None if the node does not have write-capability. This may be used to assist with data structures that need to make certain data available only to writers, such as the read-write child caps in dirnodes. The recommended process is to have reader-visible data be submitted to the filenode in the clear (where it will be encrypted by the filenode using the readkey), but encrypt writer-visible data using this writekey. """ def overwrite(new_contents): """Replace the contents of the mutable file, provided that no other node has published (or is attempting to publish, concurrently) a newer version of the file than this one. I will avoid modifying any share that is different than the version given by get_sequence_number(). However, if another node is writing to the file at the same time as me, I may manage to update some shares while they update others. If I see any evidence of this, I will signal UncoordinatedWriteError, and the file will be left in an inconsistent state (possibly the version you provided, possibly the old version, possibly somebody else's version, and possibly a mix of shares from all of these). The recommended response to UncoordinatedWriteError is to either return it to the caller (since they failed to coordinate their writes), or to attempt some sort of recovery. It may be sufficient to wait a random interval (with exponential backoff) and repeat your operation. If I do not signal UncoordinatedWriteError, then I was able to write the new version without incident. I return a Deferred that fires (with a PublishStatus object) when the update has completed. """ def modify(modifier_cb): """Modify the contents of the file, by downloading this version, applying the modifier function (or bound method), then uploading the new version. This will succeed as long as no other node publishes a version between the download and the upload. I return a Deferred that fires (with a PublishStatus object) when the update is complete. The modifier callable will be given three arguments: a string (with the old contents), a 'first_time' boolean, and a servermap. As with download_to_data(), the old contents will be from this version, but the modifier can use the servermap to make other decisions (such as refusing to apply the delta if there are multiple parallel versions, or if there is evidence of a newer unrecoverable version). 'first_time' will be True the first time the modifier is called, and False on any subsequent calls. The callable should return a string with the new contents. The callable must be prepared to be called multiple times, and must examine the input string to see if the change that it wants to make is already present in the old version. If it does not need to make any changes, it can either return None, or return its input string. If the modifier raises an exception, it will be returned in the errback. """ # The hierarchy looks like this: # IFilesystemNode # IFileNode # IMutableFileNode # IImmutableFileNode # IDirectoryNode class IFilesystemNode(Interface): def get_cap(): """Return the strongest 'cap instance' associated with this node. (writecap for writeable-mutable files/directories, readcap for immutable or readonly-mutable files/directories). To convert this into a string, call .to_string() on the result.""" def get_readcap(): """Return a readonly cap instance for this node. For immutable or readonly nodes, get_cap() and get_readcap() return the same thing.""" def get_repair_cap(): """Return an IURI instance that can be used to repair the file, or None if this node cannot be repaired (either because it is not distributed, like a LIT file, or because the node does not represent sufficient authority to create a repair-cap, like a read-only RSA mutable file node [which cannot create the correct write-enablers]). """ def get_verify_cap(): """Return an IVerifierURI instance that represents the 'verifiy/refresh capability' for this node. The holder of this capability will be able to renew the lease for this node, protecting it from garbage-collection. They will also be able to ask a server if it holds a share for the file or directory. """ def get_uri(): """Return the URI string corresponding to the strongest cap associated with this node. If this node is read-only, the URI will only offer read-only access. If this node is read-write, the URI will offer read-write access. If you have read-write access to a node and wish to share merely read-only access with others, use get_readonly_uri(). """ def get_write_uri(): """Return the URI string that can be used by others to get write access to this node, if it is writeable. If this is a read-only node, return None.""" def get_readonly_uri(): """Return the URI string that can be used by others to get read-only access to this node. The result is a read-only URI, regardless of whether this node is read-only or read-write. If you have merely read-only access to this node, get_readonly_uri() will return the same thing as get_uri(). """ def get_storage_index(): """Return a string with the (binary) storage index in use on this download. This may be None if there is no storage index (i.e. LIT files and directories).""" def is_readonly(): """Return True if this reference provides mutable access to the given file or directory (i.e. if you can modify it), or False if not. Note that even if this reference is read-only, someone else may hold a read-write reference to it.""" def is_mutable(): """Return True if this file or directory is mutable (by *somebody*, not necessarily you), False if it is is immutable. Note that a file might be mutable overall, but your reference to it might be read-only. On the other hand, all references to an immutable file will be read-only; there are no read-write references to an immutable file. """ def is_unknown(): """Return True if this is an unknown node.""" def is_allowed_in_immutable_directory(): """Return True if this node is allowed as a child of a deep-immutable directory. This is true if either the node is of a known-immutable type, or it is unknown and read-only. """ def raise_error(): """Raise any error associated with this node.""" # XXX: These may not be appropriate outside the context of an IReadable. def get_size(): """Return the length (in bytes) of the data this node represents. For directory nodes, I return the size of the backing store. I return synchronously and do not consult the network, so for mutable objects, I will return the most recently observed size for the object, or None if I don't remember a size. Use get_current_size, which returns a Deferred, if you want more up-to-date information.""" def get_current_size(): """I return a Deferred that fires with the length (in bytes) of the data this node represents. """ class IFileNode(IFilesystemNode): """I am a node that represents a file: a sequence of bytes. I am not a container, like IDirectoryNode.""" def get_best_readable_version(): """Return a Deferred that fires with an IReadable for the 'best' available version of the file. The IReadable provides only read access, even if this filenode was derived from a write cap. For an immutable file, there is only one version. For a mutable file, the 'best' version is the recoverable version with the highest sequence number. If no uncoordinated writes have occurred, and if enough shares are available, then this will be the most recent version that has been uploaded. If no version is recoverable, the Deferred will errback with an UnrecoverableFileError. """ def download_best_version(): """Download the contents of the version that would be returned by get_best_readable_version(). This is equivalent to calling download_to_data() on the IReadable given by that method. I return a Deferred that fires with a byte string when the file has been fully downloaded. To support streaming download, use the 'read' method of IReadable. If no version is recoverable, the Deferred will errback with an UnrecoverableFileError. """ def get_size_of_best_version(): """Find the size of the version that would be returned by get_best_readable_version(). I return a Deferred that fires with an integer. If no version is recoverable, the Deferred will errback with an UnrecoverableFileError. """ class IImmutableFileNode(IFileNode, IReadable): """I am a node representing an immutable file. Immutable files have only one version""" class IMutableFileNode(IFileNode): """I provide access to a 'mutable file', which retains its identity regardless of what contents are put in it. The consistency-vs-availability problem means that there might be multiple versions of a file present in the grid, some of which might be unrecoverable (i.e. have fewer than 'k' shares). These versions are loosely ordered: each has a sequence number and a hash, and any version with seqnum=N was uploaded by a node that has seen at least one version with seqnum=N-1. The 'servermap' (an instance of IMutableFileServerMap) is used to describe the versions that are known to be present in the grid, and which servers are hosting their shares. It is used to represent the 'state of the world', and is used for this purpose by my test-and-set operations. Downloading the contents of the mutable file will also return a servermap. Uploading a new version into the mutable file requires a servermap as input, and the semantics of the replace operation is 'replace the file with my new version if it looks like nobody else has changed the file since my previous download'. Because the file is distributed, this is not a perfect test-and-set operation, but it will do its best. If the replace process sees evidence of a simultaneous write, it will signal an UncoordinatedWriteError, so that the caller can take corrective action. Most readers will want to use the 'best' current version of the file, and should use my 'download_best_version()' method. To unconditionally replace the file, callers should use overwrite(). This is the mode that user-visible mutable files will probably use. To apply some delta to the file, call modify() with a callable modifier function that can apply the modification that you want to make. This is the mode that dirnodes will use, since most directory modification operations can be expressed in terms of deltas to the directory state. Three methods are available for users who need to perform more complex operations. The first is get_servermap(), which returns an up-to-date servermap using a specified mode. The second is download_version(), which downloads a specific version (not necessarily the 'best' one). The third is 'upload', which accepts new contents and a servermap (which must have been updated with MODE_WRITE). The upload method will attempt to apply the new contents as long as no other node has modified the file since the servermap was updated. This might be useful to a caller who wants to merge multiple versions into a single new one. Note that each time the servermap is updated, a specific 'mode' is used, which determines how many peers are queried. To use a servermap for my replace() method, that servermap must have been updated in MODE_WRITE. These modes are defined in allmydata.mutable.common, and consist of MODE_READ, MODE_WRITE, MODE_ANYTHING, and MODE_CHECK. Please look in allmydata/mutable/servermap.py for details about the differences. Mutable files are currently limited in size (about 3.5MB max) and can only be retrieved and updated all-at-once, as a single big string. Future versions of our mutable files will remove this restriction. """ def get_best_mutable_version(): """Return a Deferred that fires with an IMutableFileVersion for the 'best' available version of the file. The best version is the recoverable version with the highest sequence number. If no uncoordinated writes have occurred, and if enough shares are available, then this will be the most recent version that has been uploaded. If no version is recoverable, the Deferred will errback with an UnrecoverableFileError. """ def overwrite(new_contents): """Unconditionally replace the contents of the mutable file with new ones. This simply chains get_servermap(MODE_WRITE) and upload(). This is only appropriate to use when the new contents of the file are completely unrelated to the old ones, and you do not care about other clients' changes. I return a Deferred that fires (with a PublishStatus object) when the update has completed. """ def modify(modifier_cb): """Modify the contents of the file, by downloading the current version, applying the modifier function (or bound method), then uploading the new version. I return a Deferred that fires (with a PublishStatus object) when the update is complete. The modifier callable will be given three arguments: a string (with the old contents), a 'first_time' boolean, and a servermap. As with download_best_version(), the old contents will be from the best recoverable version, but the modifier can use the servermap to make other decisions (such as refusing to apply the delta if there are multiple parallel versions, or if there is evidence of a newer unrecoverable version). 'first_time' will be True the first time the modifier is called, and False on any subsequent calls. The callable should return a string with the new contents. The callable must be prepared to be called multiple times, and must examine the input string to see if the change that it wants to make is already present in the old version. If it does not need to make any changes, it can either return None, or return its input string. If the modifier raises an exception, it will be returned in the errback. """ def get_servermap(mode): """Return a Deferred that fires with an IMutableFileServerMap instance, updated using the given mode. """ def download_version(servermap, version): """Download a specific version of the file, using the servermap as a guide to where the shares are located. I return a Deferred that fires with the requested contents, or errbacks with UnrecoverableFileError. Note that a servermap that was updated with MODE_ANYTHING or MODE_READ may not know about shares for all versions (those modes stop querying servers as soon as they can fulfil their goals), so you may want to use MODE_CHECK (which checks everything) to get increased visibility. """ def upload(new_contents, servermap): """Replace the contents of the file with new ones. This requires a servermap that was previously updated with MODE_WRITE. I attempt to provide test-and-set semantics, in that I will avoid modifying any share that is different than the version I saw in the servermap. However, if another node is writing to the file at the same time as me, I may manage to update some shares while they update others. If I see any evidence of this, I will signal UncoordinatedWriteError, and the file will be left in an inconsistent state (possibly the version you provided, possibly the old version, possibly somebody else's version, and possibly a mix of shares from all of these). The recommended response to UncoordinatedWriteError is to either return it to the caller (since they failed to coordinate their writes), or to attempt some sort of recovery. It may be sufficient to wait a random interval (with exponential backoff) and repeat your operation. If I do not signal UncoordinatedWriteError, then I was able to write the new version without incident. I return a Deferred that fires (with a PublishStatus object) when the publish has completed. I will update the servermap in-place with the location of all new shares. """ def get_writekey(): """Return this filenode's writekey, or None if the node does not have write-capability. This may be used to assist with data structures that need to make certain data available only to writers, such as the read-write child caps in dirnodes. The recommended process is to have reader-visible data be submitted to the filenode in the clear (where it will be encrypted by the filenode using the readkey), but encrypt writer-visible data using this writekey. """ def get_version(): """Returns the mutable file protocol version.""" class NotEnoughSharesError(Exception): """Download was unable to get enough shares""" class NoSharesError(Exception): """Download was unable to get any shares at all.""" class DownloadStopped(Exception): pass class UploadUnhappinessError(Exception): """Upload was unable to satisfy 'servers_of_happiness'""" class UnableToFetchCriticalDownloadDataError(Exception): """I was unable to fetch some piece of critical data that is supposed to be identically present in all shares.""" class NoServersError(Exception): """Upload wasn't given any servers to work with, usually indicating a network or Introducer problem.""" class ExistingChildError(Exception): """A directory node was asked to add or replace a child that already exists, and overwrite= was set to False.""" class NoSuchChildError(Exception): """A directory node was asked to fetch a child that does not exist.""" def __str__(self): # avoid UnicodeEncodeErrors when converting to str return self.__repr__() class ChildOfWrongTypeError(Exception): """An operation was attempted on a child of the wrong type (file or directory).""" class IDirectoryNode(IFilesystemNode): """I represent a filesystem node that is a container, with a name-to-child mapping, holding the tahoe equivalent of a directory. All child names are unicode strings, and all children are some sort of IFilesystemNode (a file, subdirectory, or unknown node). """ def get_uri(): """ The dirnode ('1') URI returned by this method can be used in set_uri() on a different directory ('2') to 'mount' a reference to this directory ('1') under the other ('2'). This URI is just a string, so it can be passed around through email or other out-of-band protocol. """ def get_readonly_uri(): """ The dirnode ('1') URI returned by this method can be used in set_uri() on a different directory ('2') to 'mount' a reference to this directory ('1') under the other ('2'). This URI is just a string, so it can be passed around through email or other out-of-band protocol. """ def list(): """I return a Deferred that fires with a dictionary mapping child name (a unicode string) to (node, metadata_dict) tuples, in which 'node' is an IFilesystemNode and 'metadata_dict' is a dictionary of metadata.""" def has_child(name): """I return a Deferred that fires with a boolean, True if there exists a child of the given name, False if not. The child name must be a unicode string.""" def get(name): """I return a Deferred that fires with a specific named child node, which is an IFilesystemNode. The child name must be a unicode string. I raise NoSuchChildError if I do not have a child by that name.""" def get_metadata_for(name): """I return a Deferred that fires with the metadata dictionary for a specific named child node. The child name must be a unicode string. This metadata is stored in the *edge*, not in the child, so it is attached to the parent dirnode rather than the child node. I raise NoSuchChildError if I do not have a child by that name.""" def set_metadata_for(name, metadata): """I replace any existing metadata for the named child with the new metadata. The child name must be a unicode string. This metadata is stored in the *edge*, not in the child, so it is attached to the parent dirnode rather than the child node. I return a Deferred (that fires with this dirnode) when the operation is complete. I raise NoSuchChildError if I do not have a child by that name.""" def get_child_at_path(path): """Transform a child path into an IFilesystemNode. I perform a recursive series of 'get' operations to find the named descendant node. I return a Deferred that fires with the node, or errbacks with NoSuchChildError if the node could not be found. The path can be either a single string (slash-separated) or a list of path-name elements. All elements must be unicode strings. """ def get_child_and_metadata_at_path(path): """Transform a child path into an IFilesystemNode and metadata. I am like get_child_at_path(), but my Deferred fires with a tuple of (node, metadata). The metadata comes from the last edge. If the path is empty, the metadata will be an empty dictionary. """ def set_uri(name, writecap, readcap=None, metadata=None, overwrite=True): """I add a child (by writecap+readcap) at the specific name. I return a Deferred that fires when the operation finishes. If overwrite= is True, I will replace any existing child of the same name, otherwise an existing child will cause me to return ExistingChildError. The child name must be a unicode string. The child caps could be for a file, or for a directory. If you have both the writecap and readcap, you should provide both arguments. If you have only one cap and don't know whether it is read-only, provide it as the writecap argument and leave the readcap as None. If you have only one cap that is known to be read-only, provide it as the readcap argument and leave the writecap as None. The filecaps are typically obtained from an IFilesystemNode with get_uri() and get_readonly_uri(). If metadata= is provided, I will use it as the metadata for the named edge. This will replace any existing metadata. If metadata= is left as the default value of None, I will set ['mtime'] to the current time, and I will set ['ctime'] to the current time if there was not already a child by this name present. This roughly matches the ctime/mtime semantics of traditional filesystems. See the "About the metadata" section of webapi.txt for futher information. If this directory node is read-only, the Deferred will errback with a NotWriteableError.""" def set_children(entries, overwrite=True): """Add multiple children (by writecap+readcap) to a directory node. Takes a dictionary, with childname as keys and (writecap, readcap) tuples (or (writecap, readcap, metadata) triples) as values. Returns a Deferred that fires (with this dirnode) when the operation finishes. This is equivalent to calling set_uri() multiple times, but is much more efficient. All child names must be unicode strings. """ def set_node(name, child, metadata=None, overwrite=True): """I add a child at the specific name. I return a Deferred that fires when the operation finishes. This Deferred will fire with the child node that was just added. I will replace any existing child of the same name. The child name must be a unicode string. The 'child' instance must be an instance providing IFilesystemNode. If metadata= is provided, I will use it as the metadata for the named edge. This will replace any existing metadata. If metadata= is left as the default value of None, I will set ['mtime'] to the current time, and I will set ['ctime'] to the current time if there was not already a child by this name present. This roughly matches the ctime/mtime semantics of traditional filesystems. See the "About the metadata" section of webapi.txt for futher information. If this directory node is read-only, the Deferred will errback with a NotWriteableError.""" def set_nodes(entries, overwrite=True): """Add multiple children to a directory node. Takes a dict mapping unicode childname to (child_node, metdata) tuples. If metdata=None, the original metadata is left unmodified. Returns a Deferred that fires (with this dirnode) when the operation finishes. This is equivalent to calling set_node() multiple times, but is much more efficient.""" def add_file(name, uploadable, metadata=None, overwrite=True): """I upload a file (using the given IUploadable), then attach the resulting ImmutableFileNode to the directory at the given name. I set metadata the same way as set_uri and set_node. The child name must be a unicode string. I return a Deferred that fires (with the IFileNode of the uploaded file) when the operation completes.""" def delete(name, must_exist=True, must_be_directory=False, must_be_file=False): """I remove the child at the specific name. I return a Deferred that fires when the operation finishes. The child name must be a unicode string. If must_exist is True and I do not have a child by that name, I raise NoSuchChildError. If must_be_directory is True and the child is a file, or if must_be_file is True and the child is a directory, I raise ChildOfWrongTypeError.""" def create_subdirectory(name, initial_children={}, overwrite=True, metadata=None): """I create and attach a directory at the given name. The new directory can be empty, or it can be populated with children according to 'initial_children', which takes a dictionary in the same format as set_nodes (i.e. mapping unicode child name to (childnode, metadata) tuples). The child name must be a unicode string. I return a Deferred that fires (with the new directory node) when the operation finishes.""" def move_child_to(current_child_name, new_parent, new_child_name=None, overwrite=True): """I take one of my children and move them to a new parent. The child is referenced by name. On the new parent, the child will live under 'new_child_name', which defaults to 'current_child_name'. TODO: what should we do about metadata? I return a Deferred that fires when the operation finishes. The child name must be a unicode string. I raise NoSuchChildError if I do not have a child by that name.""" def build_manifest(): """I generate a table of everything reachable from this directory. I also compute deep-stats as described below. I return a Monitor. The Monitor's results will be a dictionary with four elements: res['manifest']: a list of (path, cap) tuples for all nodes (directories and files) reachable from this one. 'path' will be a tuple of unicode strings. The origin dirnode will be represented by an empty path tuple. res['verifycaps']: a list of (printable) verifycap strings, one for each reachable non-LIT node. This is a set: it will contain no duplicates. res['storage-index']: a list of (base32) storage index strings, one for each reachable non-LIT node. This is a set: it will contain no duplicates. res['stats']: a dictionary, the same that is generated by start_deep_stats() below. The Monitor will also have an .origin_si attribute with the (binary) storage index of the starting point. """ def start_deep_stats(): """Return a Monitor, examining all nodes (directories and files) reachable from this one. The Monitor's results will be a dictionary with the following keys:: count-immutable-files: count of how many CHK files are in the set count-mutable-files: same, for mutable files (does not include directories) count-literal-files: same, for LIT files count-files: sum of the above three count-directories: count of directories size-immutable-files: total bytes for all CHK files in the set size-mutable-files (TODO): same, for current version of all mutable files, does not include directories size-literal-files: same, for LIT files size-directories: size of mutable files used by directories largest-directory: number of bytes in the largest directory largest-directory-children: number of children in the largest directory largest-immutable-file: number of bytes in the largest CHK file size-mutable-files is not yet implemented, because it would involve even more queries than deep_stats does. The Monitor will also have an .origin_si attribute with the (binary) storage index of the starting point. This operation will visit every directory node underneath this one, and can take a long time to run. On a typical workstation with good bandwidth, this can examine roughly 15 directories per second (and takes several minutes of 100% CPU for ~1700 directories). """ class ICodecEncoder(Interface): def set_params(data_size, required_shares, max_shares): """Set up the parameters of this encoder. This prepares the encoder to perform an operation that converts a single block of data into a number of shares, such that a future ICodecDecoder can use a subset of these shares to recover the original data. This operation is invoked by calling encode(). Once the encoding parameters are set up, the encode operation can be invoked multiple times. set_params() prepares the encoder to accept blocks of input data that are exactly 'data_size' bytes in length. The encoder will be prepared to produce 'max_shares' shares for each encode() operation (although see the 'desired_share_ids' to use less CPU). The encoding math will be chosen such that the decoder can get by with as few as 'required_shares' of these shares and still reproduce the original data. For example, set_params(1000, 5, 5) offers no redundancy at all, whereas set_params(1000, 1, 10) provides 10x redundancy. Numerical Restrictions: 'data_size' is required to be an integral multiple of 'required_shares'. In general, the caller should choose required_shares and max_shares based upon their reliability requirements and the number of peers available (the total storage space used is roughly equal to max_shares*data_size/required_shares), then choose data_size to achieve the memory footprint desired (larger data_size means more efficient operation, smaller data_size means smaller memory footprint). In addition, 'max_shares' must be equal to or greater than 'required_shares'. Of course, setting them to be equal causes encode() to degenerate into a particularly slow form of the 'split' utility. See encode() for more details about how these parameters are used. set_params() must be called before any other ICodecEncoder methods may be invoked. """ def get_params(): """Return the 3-tuple of data_size, required_shares, max_shares""" def get_encoder_type(): """Return a short string that describes the type of this encoder. There is required to be a global table of encoder classes. This method returns an index into this table; the value at this index is an encoder class, and this encoder is an instance of that class. """ def get_block_size(): """Return the length of the shares that encode() will produce. """ def encode_proposal(data, desired_share_ids=None): """Encode some data. 'data' must be a string (or other buffer object), and len(data) must be equal to the 'data_size' value passed earlier to set_params(). This will return a Deferred that will fire with two lists. The first is a list of shares, each of which is a string (or other buffer object) such that len(share) is the same as what get_share_size() returned earlier. The second is a list of shareids, in which each is an integer. The lengths of the two lists will always be equal to each other. The user should take care to keep each share closely associated with its shareid, as one is useless without the other. The length of this output list will normally be the same as the value provided to the 'max_shares' parameter of set_params(). This may be different if 'desired_share_ids' is provided. 'desired_share_ids', if provided, is required to be a sequence of ints, each of which is required to be >= 0 and < max_shares. If not provided, encode() will produce 'max_shares' shares, as if 'desired_share_ids' were set to range(max_shares). You might use this if you initially thought you were going to use 10 peers, started encoding, and then two of the peers dropped out: you could use desired_share_ids= to skip the work (both memory and CPU) of producing shares for the peers that are no longer available. """ def encode(inshares, desired_share_ids=None): """Encode some data. This may be called multiple times. Each call is independent. inshares is a sequence of length required_shares, containing buffers (i.e. strings), where each buffer contains the next contiguous non-overlapping segment of the input data. Each buffer is required to be the same length, and the sum of the lengths of the buffers is required to be exactly the data_size promised by set_params(). (This implies that the data has to be padded before being passed to encode(), unless of course it already happens to be an even multiple of required_shares in length.) Note: the requirement to break up your data into 'required_shares' chunks of exactly the right length before calling encode() is surprising from point of view of a user who doesn't know how FEC works. It feels like an implementation detail that has leaked outside the abstraction barrier. Is there a use case in which the data to be encoded might already be available in pre-segmented chunks, such that it is faster or less work to make encode() take a list rather than splitting a single string? Yes, there is: suppose you are uploading a file with K=64, N=128, segsize=262,144. Then each in-share will be of size 4096. If you use this .encode() API then your code could first read each successive 4096-byte chunk from the file and store each one in a Python string and store each such Python string in a Python list. Then you could call .encode(), passing that list as "inshares". The encoder would generate the other 64 "secondary shares" and return to you a new list containing references to the same 64 Python strings that you passed in (as the primary shares) plus references to the new 64 Python strings. (You could even imagine that your code could use readv() so that the operating system can arrange to get all of those bytes copied from the file into the Python list of Python strings as efficiently as possible instead of having a loop written in C or in Python to copy the next part of the file into the next string.) On the other hand if you instead use the .encode_proposal() API (above), then your code can first read in all of the 262,144 bytes of the segment from the file into a Python string, then call .encode_proposal() passing the segment data as the "data" argument. The encoder would basically first split the "data" argument into a list of 64 in-shares of 4096 byte each, and then do the same thing that .encode() does. So this would result in a little bit more copying of data and a little bit higher of a "maximum memory usage" during the process, although it might or might not make a practical difference for our current use cases. Note that "inshares" is a strange name for the parameter if you think of the parameter as being just for feeding in data to the codec. It makes more sense if you think of the result of this encoding as being the set of shares from inshares plus an extra set of "secondary shares" (or "check shares"). It is a surprising name! If the API is going to be surprising then the name should be surprising. If we switch to encode_proposal() above then we should also switch to an unsurprising name. 'desired_share_ids', if provided, is required to be a sequence of ints, each of which is required to be >= 0 and < max_shares. If not provided, encode() will produce 'max_shares' shares, as if 'desired_share_ids' were set to range(max_shares). You might use this if you initially thought you were going to use 10 peers, started encoding, and then two of the peers dropped out: you could use desired_share_ids= to skip the work (both memory and CPU) of producing shares for the peers that are no longer available. For each call, encode() will return a Deferred that fires with two lists, one containing shares and the other containing the shareids. The get_share_size() method can be used to determine the length of the share strings returned by encode(). Each shareid is a small integer, exactly as passed into 'desired_share_ids' (or range(max_shares), if desired_share_ids was not provided). The shares and their corresponding shareids are required to be kept together during storage and retrieval. Specifically, the share data is useless by itself: the decoder needs to be told which share is which by providing it with both the shareid and the actual share data. This function will allocate an amount of memory roughly equal to:: (max_shares - required_shares) * get_share_size() When combined with the memory that the caller must allocate to provide the input data, this leads to a memory footprint roughly equal to the size of the resulting encoded shares (i.e. the expansion factor times the size of the input segment). """ # rejected ideas: # # returning a list of (shareidN,shareN) tuples instead of a pair of # lists (shareids..,shares..). Brian thought the tuples would # encourage users to keep the share and shareid together throughout # later processing, Zooko pointed out that the code to iterate # through two lists is not really more complicated than using a list # of tuples and there's also a performance improvement # # having 'data_size' not required to be an integral multiple of # 'required_shares'. Doing this would require encode() to perform # padding internally, and we'd prefer to have any padding be done # explicitly by the caller. Yes, it is an abstraction leak, but # hopefully not an onerous one. class ICodecDecoder(Interface): def set_params(data_size, required_shares, max_shares): """Set the params. They have to be exactly the same ones that were used for encoding.""" def get_needed_shares(): """Return the number of shares needed to reconstruct the data. set_params() is required to be called before this.""" def decode(some_shares, their_shareids): """Decode a partial list of shares into data. 'some_shares' is required to be a sequence of buffers of sharedata, a subset of the shares returned by ICodecEncode.encode(). Each share is required to be of the same length. The i'th element of their_shareids is required to be the shareid of the i'th buffer in some_shares. This returns a Deferred that fires with a sequence of buffers. This sequence will contain all of the segments of the original data, in order. The sum of the lengths of all of the buffers will be the 'data_size' value passed into the original ICodecEncode.set_params() call. To get back the single original input block of data, use ''.join(output_buffers), or you may wish to simply write them in order to an output file. Note that some of the elements in the result sequence may be references to the elements of the some_shares input sequence. In particular, this means that if those share objects are mutable (e.g. arrays) and if they are changed, then both the input (the 'some_shares' parameter) and the output (the value given when the deferred is triggered) will change. The length of 'some_shares' is required to be exactly the value of 'required_shares' passed into the original ICodecEncode.set_params() call. """ class IEncoder(Interface): """I take an object that provides IEncryptedUploadable, which provides encrypted data, and a list of shareholders. I then encode, hash, and deliver shares to those shareholders. I will compute all the necessary Merkle hash trees that are necessary to validate the crypttext that eventually comes back from the shareholders. I provide the URI Extension Block Hash, and the encoding parameters, both of which must be included in the URI. I do not choose shareholders, that is left to the IUploader. I must be given a dict of RemoteReferences to storage buckets that are ready and willing to receive data. """ def set_size(size): """Specify the number of bytes that will be encoded. This must be peformed before get_serialized_params() can be called. """ def set_encrypted_uploadable(u): """Provide a source of encrypted upload data. 'u' must implement IEncryptedUploadable. When this is called, the IEncryptedUploadable will be queried for its length and the storage_index that should be used. This returns a Deferred that fires with this Encoder instance. This must be performed before start() can be called. """ def get_param(name): """Return an encoding parameter, by name. 'storage_index': return a string with the (16-byte truncated SHA-256 hash) storage index to which these shares should be pushed. 'share_counts': return a tuple describing how many shares are used: (needed_shares, servers_of_happiness, total_shares) 'num_segments': return an int with the number of segments that will be encoded. 'segment_size': return an int with the size of each segment. 'block_size': return the size of the individual blocks that will be delivered to a shareholder's put_block() method. By knowing this, the shareholder will be able to keep all blocks in a single file and still provide random access when reading them. # TODO: can we avoid exposing this? 'share_size': an int with the size of the data that will be stored on each shareholder. This is aggregate amount of data that will be sent to the shareholder, summed over all the put_block() calls I will ever make. It is useful to determine this size before asking potential shareholders whether they will grant a lease or not, since their answers will depend upon how much space we need. TODO: this might also include some amount of overhead, like the size of all the hashes. We need to decide whether this is useful or not. 'serialized_params': a string with a concise description of the codec name and its parameters. This may be passed into the IUploadable to let it make sure that the same file encoded with different parameters will result in different storage indexes. Once this is called, set_size() and set_params() may not be called. """ def set_shareholders(shareholders, servermap): """Tell the encoder where to put the encoded shares. 'shareholders' must be a dictionary that maps share number (an integer ranging from 0 to n-1) to an instance that provides IStorageBucketWriter. 'servermap' is a dictionary that maps share number (as defined above) to a set of peerids. This must be performed before start() can be called.""" def start(): """Begin the encode/upload process. This involves reading encrypted data from the IEncryptedUploadable, encoding it, uploading the shares to the shareholders, then sending the hash trees. set_encrypted_uploadable() and set_shareholders() must be called before this can be invoked. This returns a Deferred that fires with a verify cap when the upload process is complete. The verifycap, plus the encryption key, is sufficient to construct the read cap. """ class IDecoder(Interface): """I take a list of shareholders and some setup information, then download, validate, decode, and decrypt data from them, writing the results to an output file. I do not locate the shareholders, that is left to the IDownloader. I must be given a dict of RemoteReferences to storage buckets that are ready to send data. """ def setup(outfile): """I take a file-like object (providing write and close) to which all the plaintext data will be written. TODO: producer/consumer . Maybe write() should return a Deferred that indicates when it will accept more data? But probably having the IDecoder be a producer is easier to glue to IConsumer pieces. """ def set_shareholders(shareholders): """I take a dictionary that maps share identifiers (small integers) to RemoteReferences that provide RIBucketReader. This must be called before start().""" def start(): """I start the download. This process involves retrieving data and hash chains from the shareholders, using the hashes to validate the data, decoding the shares into segments, decrypting the segments, then writing the resulting plaintext to the output file. I return a Deferred that will fire (with self) when the download is complete. """ class IDownloadTarget(Interface): # Note that if the IDownloadTarget is also an IConsumer, the downloader # will register itself as a producer. This allows the target to invoke # downloader.pauseProducing, resumeProducing, and stopProducing. def open(size): """Called before any calls to write() or close(). If an error occurs before any data is available, fail() may be called without a previous call to open(). 'size' is the length of the file being downloaded, in bytes.""" def write(data): """Output some data to the target.""" def close(): """Inform the target that there is no more data to be written.""" def fail(why): """fail() is called to indicate that the download has failed. 'why' is a Failure object indicating what went wrong. No further methods will be invoked on the IDownloadTarget after fail().""" def register_canceller(cb): """The CiphertextDownloader uses this to register a no-argument function that the target can call to cancel the download. Once this canceller is invoked, no further calls to write() or close() will be made.""" def finish(): """When the CiphertextDownloader is done, this finish() function will be called. Whatever it returns will be returned to the invoker of Downloader.download. """ class IDownloader(Interface): def download(uri, target): """Perform a CHK download, sending the data to the given target. 'target' must provide IDownloadTarget. Returns a Deferred that fires (with the results of target.finish) when the download is finished, or errbacks if something went wrong.""" class IEncryptedUploadable(Interface): def set_upload_status(upload_status): """Provide an IUploadStatus object that should be filled with status information. The IEncryptedUploadable is responsible for setting key-determination progress ('chk'), size, storage_index, and ciphertext-fetch progress. It may delegate some of this responsibility to others, in particular to the IUploadable.""" def get_size(): """This behaves just like IUploadable.get_size().""" def get_all_encoding_parameters(): """Return a Deferred that fires with a tuple of (k,happy,n,segment_size). The segment_size will be used as-is, and must match the following constraints: it must be a multiple of k, and it shouldn't be unreasonably larger than the file size (if segment_size is larger than filesize, the difference must be stored as padding). This usually passes through to the IUploadable method of the same name. The encoder strictly obeys the values returned by this method. To make an upload use non-default encoding parameters, you must arrange to control the values that this method returns. """ def get_storage_index(): """Return a Deferred that fires with a 16-byte storage index. """ def read_encrypted(length, hash_only): """This behaves just like IUploadable.read(), but returns crypttext instead of plaintext. If hash_only is True, then this discards the data (and returns an empty list); this improves efficiency when resuming an interrupted upload (where we need to compute the plaintext hashes, but don't need the redundant encrypted data).""" def close(): """Just like IUploadable.close().""" class IUploadable(Interface): def set_upload_status(upload_status): """Provide an IUploadStatus object that should be filled with status information. The IUploadable is responsible for setting key-determination progress ('chk').""" def set_default_encoding_parameters(params): """Set the default encoding parameters, which must be a dict mapping strings to ints. The meaningful keys are 'k', 'happy', 'n', and 'max_segment_size'. These might have an influence on the final encoding parameters returned by get_all_encoding_parameters(), if the Uploadable doesn't have more specific preferences. This call is optional: if it is not used, the Uploadable will use some built-in defaults. If used, this method must be called before any other IUploadable methods to have any effect. """ def get_size(): """Return a Deferred that will fire with the length of the data to be uploaded, in bytes. This will be called before the data is actually used, to compute encoding parameters. """ def get_all_encoding_parameters(): """Return a Deferred that fires with a tuple of (k,happy,n,segment_size). The segment_size will be used as-is, and must match the following constraints: it must be a multiple of k, and it shouldn't be unreasonably larger than the file size (if segment_size is larger than filesize, the difference must be stored as padding). The relative values of k and n allow some IUploadables to request better redundancy than others (in exchange for consuming more space in the grid). Larger values of segment_size reduce hash overhead, while smaller values reduce memory footprint and cause data to be delivered in smaller pieces (which may provide a smoother and more predictable download experience). The encoder strictly obeys the values returned by this method. To make an upload use non-default encoding parameters, you must arrange to control the values that this method returns. One way to influence them may be to call set_encoding_parameters() before calling get_all_encoding_parameters(). """ def get_encryption_key(): """Return a Deferred that fires with a 16-byte AES key. This key will be used to encrypt the data. The key will also be hashed to derive the StorageIndex. Uploadables that want to achieve convergence should hash their file contents and the serialized_encoding_parameters to form the key (which of course requires a full pass over the data). Uploadables can use the upload.ConvergentUploadMixin class to achieve this automatically. Uploadables that do not care about convergence (or do not wish to make multiple passes over the data) can simply return a strongly-random 16 byte string. get_encryption_key() may be called multiple times: the IUploadable is required to return the same value each time. """ def read(length): """Return a Deferred that fires with a list of strings (perhaps with only a single element) that, when concatenated together, contain the next 'length' bytes of data. If EOF is near, this may provide fewer than 'length' bytes. The total number of bytes provided by read() before it signals EOF must equal the size provided by get_size(). If the data must be acquired through multiple internal read operations, returning a list instead of a single string may help to reduce string copies. However, the length of the concatenated strings must equal the amount of data requested, unless EOF is encountered. Long reads, or short reads without EOF, are not allowed. read() should return the same amount of data as a local disk file read, just in a different shape and asynchronously. 'length' will typically be equal to (min(get_size(),1MB)/req_shares), so a 10kB file means length=3kB, 100kB file means length=30kB, and >=1MB file means length=300kB. This method provides for a single full pass through the data. Later use cases may desire multiple passes or access to only parts of the data (such as a mutable file making small edits-in-place). This API will be expanded once those use cases are better understood. """ def close(): """The upload is finished, and whatever filehandle was in use may be closed.""" class IMutableUploadable(Interface): """ I represent content that is due to be uploaded to a mutable filecap. """ # This is somewhat simpler than the IUploadable interface above # because mutable files do not need to be concerned with possibly # generating a CHK, nor with per-file keys. It is a subset of the # methods in IUploadable, though, so we could just as well implement # the mutable uploadables as IUploadables that don't happen to use # those methods (with the understanding that the unused methods will # never be called on such objects) def get_size(): """ Returns a Deferred that fires with the size of the content held by the uploadable. """ def read(length): """ Returns a list of strings that, when concatenated, are the next length bytes of the file, or fewer if there are fewer bytes between the current location and the end of the file. """ def close(): """ The process that used the Uploadable is finished using it, so the uploadable may be closed. """ class IUploadResults(Interface): """I am returned by immutable upload() methods and contain the results of the upload. Note that some of my methods return empty values (0 or an empty dict) when called for non-distributed LIT files.""" def get_file_size(): """Return the file size, in bytes.""" def get_uri(): """Return the (string) URI of the object uploaded, a CHK readcap.""" def get_ciphertext_fetched(): """Return the number of bytes fetched by the helpe for this upload, or 0 if the helper did not need to fetch any bytes (or if there was no helper).""" def get_preexisting_shares(): """Return the number of shares that were already present in the grid.""" def get_pushed_shares(): """Return the number of shares that were uploaded.""" def get_sharemap(): """Return a dict mapping share identifier to set of IServer instances. This indicates which servers were given which shares. For immutable files, the shareid is an integer (the share number, from 0 to N-1). For mutable files, it is a string of the form 'seq%d-%s-sh%d', containing the sequence number, the roothash, and the share number.""" def get_servermap(): """Return dict mapping IServer instance to a set of share numbers.""" def get_timings(): """Return dict of timing information, mapping name to seconds. All times are floats: total : total upload time, start to finish storage_index : time to compute the storage index peer_selection : time to decide which peers will be used contacting_helper : initial helper query to upload/no-upload decision helper_total : initial helper query to helper finished pushing cumulative_fetch : helper waiting for ciphertext requests total_fetch : helper start to last ciphertext response cumulative_encoding : just time spent in zfec cumulative_sending : just time spent waiting for storage servers hashes_and_close : last segment push to shareholder close total_encode_and_push : first encode to shareholder close """ def get_uri_extension_data(): """Return the dict of UEB data created for this file.""" def get_verifycapstr(): """Return the (string) verify-cap URI for the uploaded object.""" class IDownloadResults(Interface): """I am created internally by download() methods. I contain a number of public attributes that contain details about the download process.:: .file_size : the size of the file, in bytes .servers_used : set of server peerids that were used during download .server_problems : dict mapping server peerid to a problem string. Only servers that had problems (bad hashes, disconnects) are listed here. .servermap : dict mapping server peerid to a set of share numbers. Only servers that had any shares are listed here. .timings : dict of timing information, mapping name to seconds (float) peer_selection : time to ask servers about shares servers_peer_selection : dict of peerid to DYHB-query time uri_extension : time to fetch a copy of the URI extension block hashtrees : time to fetch the hash trees segments : time to fetch, decode, and deliver segments cumulative_fetch : time spent waiting for storage servers cumulative_decode : just time spent in zfec cumulative_decrypt : just time spent in decryption total : total download time, start to finish fetch_per_server : dict of server to list of per-segment fetch times """ class IUploader(Interface): def upload(uploadable): """Upload the file. 'uploadable' must impement IUploadable. This returns a Deferred that fires with an IUploadResults instance, from which the URI of the file can be obtained as results.uri .""" class ICheckable(Interface): def check(monitor, verify=False, add_lease=False): """Check up on my health, optionally repairing any problems. This returns a Deferred that fires with an instance that provides ICheckResults, or None if the object is non-distributed (i.e. LIT files). The monitor will be checked periodically to see if the operation has been cancelled. If so, no new queries will be sent, and the Deferred will fire (with a OperationCancelledError) immediately. Filenodes and dirnodes (which provide IFilesystemNode) are also checkable. Instances that represent verifier-caps will be checkable but not downloadable. Some objects (like LIT files) do not actually live in the grid, and their checkers return None (non-distributed files are always healthy). If verify=False, a relatively lightweight check will be performed: I will ask all servers if they have a share for me, and I will believe whatever they say. If there are at least N distinct shares on the grid, my results will indicate r.is_healthy()==True. This requires a roundtrip to each server, but does not transfer very much data, so the network bandwidth is fairly low. If verify=True, a more resource-intensive check will be performed: every share will be downloaded, and the hashes will be validated on every bit. I will ignore any shares that failed their hash checks. If there are at least N distinct valid shares on the grid, my results will indicate r.is_healthy()==True. This requires N/k times as much download bandwidth (and server disk IO) as a regular download. If a storage server is holding a corrupt share, or is experiencing memory failures during retrieval, or is malicious or buggy, then verification will detect the problem, but checking will not. If add_lease=True, I will ensure that an up-to-date lease is present on each share. The lease secrets will be derived from by node secret (in BASEDIR/private/secret), so either I will add a new lease to the share, or I will merely renew the lease that I already had. In a future version of the storage-server protocol (once Accounting has been implemented), there may be additional options here to define the kind of lease that is obtained (which account number to claim, etc). TODO: any problems seen during checking will be reported to the health-manager.furl, a centralized object that is responsible for figuring out why files are unhealthy so corrective action can be taken. """ def check_and_repair(monitor, verify=False, add_lease=False): """Like check(), but if the file/directory is not healthy, attempt to repair the damage. Any non-healthy result will cause an immediate repair operation, to generate and upload new shares. After repair, the file will be as healthy as we can make it. Details about what sort of repair is done will be put in the check-and-repair results. The Deferred will not fire until the repair is complete. This returns a Deferred that fires with an instance of ICheckAndRepairResults.""" class IDeepCheckable(Interface): def start_deep_check(verify=False, add_lease=False): """Check upon the health of me and everything I can reach. This is a recursive form of check(), useable only on dirnodes. I return a Monitor, with results that are an IDeepCheckResults object. TODO: If any of the directories I traverse are unrecoverable, the Monitor will report failure. If any of the files I check upon are unrecoverable, those problems will be reported in the IDeepCheckResults as usual, and the Monitor will not report a failure. """ def start_deep_check_and_repair(verify=False, add_lease=False): """Check upon the health of me and everything I can reach. Repair anything that isn't healthy. This is a recursive form of check_and_repair(), useable only on dirnodes. I return a Monitor, with results that are an IDeepCheckAndRepairResults object. TODO: If any of the directories I traverse are unrecoverable, the Monitor will report failure. If any of the files I check upon are unrecoverable, those problems will be reported in the IDeepCheckResults as usual, and the Monitor will not report a failure. """ class ICheckResults(Interface): """I contain the detailed results of a check/verify operation. """ def get_storage_index(): """Return a string with the (binary) storage index.""" def get_storage_index_string(): """Return a string with the (printable) abbreviated storage index.""" def get_uri(): """Return the (string) URI of the object that was checked.""" def is_healthy(): """Return a boolean, True if the file/dir is fully healthy, False if it is damaged in any way. Non-distributed LIT files always return True.""" def is_recoverable(): """Return a boolean, True if the file/dir can be recovered, False if not. Unrecoverable files are obviously unhealthy. Non-distributed LIT files always return True.""" def needs_rebalancing(): """Return a boolean, True if the file/dir's reliability could be improved by moving shares to new servers. Non-distributed LIT files always return False.""" # the following methods all return None for non-distributed LIT files def get_encoding_needed(): """Return 'k', the number of shares required for recovery""" def get_encoding_expected(): """Return 'N', the number of total shares generated""" def get_share_counter_good(): """Return the number of distinct good shares that were found. For mutable files, this counts shares for the 'best' version.""" def get_share_counter_wrong(): """For mutable files, return the number of shares for versions other than the 'best' one (which is defined as being the recoverable version with the highest sequence number, then the highest roothash). These are either leftover shares from an older version (perhaps on a server that was offline when an update occurred), shares from an unrecoverable newer version, or shares from an alternate current version that results from an uncoordinated write collision. For a healthy file, this will equal 0. For immutable files, this will always equal 0.""" def get_corrupt_shares(): """Return a list of 'share locators', one for each share that was found to be corrupt (integrity failure). Each share locator is a list of (IServer, storage_index, sharenum).""" def get_incompatible_shares(): """Return a list of 'share locators', one for each share that was found to be of an unknown format. Each share locator is a list of (IServer, storage_index, sharenum).""" def get_servers_responding(): """Return a list of IServer objects, one for each server that responded to the share query (even if they said they didn't have shares, and even if they said they did have shares but then didn't send them when asked, or dropped the connection, or returned a Failure, and even if they said they did have shares and sent incorrect ones when asked)""" def get_host_counter_good_shares(): """Return the number of distinct storage servers with good shares. If this number is less than get_share_counters()[good], then some shares are doubled up, increasing the correlation of failures. This indicates that one or more shares should be moved to an otherwise unused server, if one is available. """ def get_version_counter_recoverable(): """Return the number of recoverable versions of the file. For a healthy file, this will equal 1.""" def get_version_counter_unrecoverable(): """Return the number of unrecoverable versions of the file. For a healthy file, this will be 0.""" def get_sharemap(): """Return a dict mapping share identifier to list of IServer objects. This indicates which servers are holding which shares. For immutable files, the shareid is an integer (the share number, from 0 to N-1). For mutable files, it is a string of the form 'seq%d-%s-sh%d', containing the sequence number, the roothash, and the share number.""" def get_summary(): """Return a string with a brief (one-line) summary of the results.""" def get_report(): """Return a list of strings with more detailed results.""" class ICheckAndRepairResults(Interface): """I contain the detailed results of a check/verify/repair operation. The IFilesystemNode.check()/verify()/repair() methods all return instances that provide ICheckAndRepairResults. """ def get_storage_index(): """Return a string with the (binary) storage index.""" def get_storage_index_string(): """Return a string with the (printable) abbreviated storage index.""" def get_repair_attempted(): """Return a boolean, True if a repair was attempted. We might not attempt to repair the file because it was healthy, or healthy enough (i.e. some shares were missing but not enough to exceed some threshold), or because we don't know how to repair this object.""" def get_repair_successful(): """Return a boolean, True if repair was attempted and the file/dir was fully healthy afterwards. False if no repair was attempted or if a repair attempt failed.""" def get_pre_repair_results(): """Return an ICheckResults instance that describes the state of the file/dir before any repair was attempted.""" def get_post_repair_results(): """Return an ICheckResults instance that describes the state of the file/dir after any repair was attempted. If no repair was attempted, the pre-repair and post-repair results will be identical.""" class IDeepCheckResults(Interface): """I contain the results of a deep-check operation. This is returned by a call to ICheckable.deep_check(). """ def get_root_storage_index_string(): """Return the storage index (abbreviated human-readable string) of the first object checked.""" def get_counters(): """Return a dictionary with the following keys:: count-objects-checked: count of how many objects were checked count-objects-healthy: how many of those objects were completely healthy count-objects-unhealthy: how many were damaged in some way count-objects-unrecoverable: how many were unrecoverable count-corrupt-shares: how many shares were found to have corruption, summed over all objects examined """ def get_corrupt_shares(): """Return a set of (IServer, storage_index, sharenum) for all shares that were found to be corrupt. storage_index is binary.""" def get_all_results(): """Return a dictionary mapping pathname (a tuple of strings, ready to be slash-joined) to an ICheckResults instance, one for each object that was checked.""" def get_results_for_storage_index(storage_index): """Retrive the ICheckResults instance for the given (binary) storage index. Raises KeyError if there are no results for that storage index.""" def get_stats(): """Return a dictionary with the same keys as IDirectoryNode.deep_stats().""" class IDeepCheckAndRepairResults(Interface): """I contain the results of a deep-check-and-repair operation. This is returned by a call to ICheckable.deep_check_and_repair(). """ def get_root_storage_index_string(): """Return the storage index (abbreviated human-readable string) of the first object checked.""" def get_counters(): """Return a dictionary with the following keys:: count-objects-checked: count of how many objects were checked count-objects-healthy-pre-repair: how many of those objects were completely healthy (before any repair) count-objects-unhealthy-pre-repair: how many were damaged in some way count-objects-unrecoverable-pre-repair: how many were unrecoverable count-objects-healthy-post-repair: how many of those objects were completely healthy (after any repair) count-objects-unhealthy-post-repair: how many were damaged in some way count-objects-unrecoverable-post-repair: how many were unrecoverable count-repairs-attempted: repairs were attempted on this many objects. The count-repairs- keys will always be provided, however unless repair=true is present, they will all be zero. count-repairs-successful: how many repairs resulted in healthy objects count-repairs-unsuccessful: how many repairs resulted did not results in completely healthy objects count-corrupt-shares-pre-repair: how many shares were found to have corruption, summed over all objects examined (before any repair) count-corrupt-shares-post-repair: how many shares were found to have corruption, summed over all objects examined (after any repair) """ def get_stats(): """Return a dictionary with the same keys as IDirectoryNode.deep_stats().""" def get_corrupt_shares(): """Return a set of (IServer, storage_index, sharenum) for all shares that were found to be corrupt before any repair was attempted. storage_index is binary. """ def get_remaining_corrupt_shares(): """Return a set of (IServer, storage_index, sharenum) for all shares that were found to be corrupt after any repair was completed. storage_index is binary. These are shares that need manual inspection and probably deletion. """ def get_all_results(): """Return a dictionary mapping pathname (a tuple of strings, ready to be slash-joined) to an ICheckAndRepairResults instance, one for each object that was checked.""" def get_results_for_storage_index(storage_index): """Retrive the ICheckAndRepairResults instance for the given (binary) storage index. Raises KeyError if there are no results for that storage index.""" class IRepairable(Interface): def repair(check_results): """Attempt to repair the given object. Returns a Deferred that fires with a IRepairResults object. I must be called with an object that implements ICheckResults, as proof that you have actually discovered a problem with this file. I will use the data in the checker results to guide the repair process, such as which servers provided bad data and should therefore be avoided. The ICheckResults object is inside the ICheckAndRepairResults object, which is returned by the ICheckable.check() method:: d = filenode.check(repair=False) def _got_results(check_and_repair_results): check_results = check_and_repair_results.get_pre_repair_results() return filenode.repair(check_results) d.addCallback(_got_results) return d """ class IRepairResults(Interface): """I contain the results of a repair operation.""" def get_successful(): """Returns a boolean: True if the repair made the file healthy, False if not. Repair failure generally indicates a file that has been damaged beyond repair.""" class IClient(Interface): def upload(uploadable): """Upload some data into a CHK, get back the UploadResults for it. @param uploadable: something that implements IUploadable @return: a Deferred that fires with the UploadResults instance. To get the URI for this file, use results.uri . """ def create_mutable_file(contents=""): """Create a new mutable file (with initial) contents, get back the new node instance. @param contents: (bytestring, callable, or None): this provides the initial contents of the mutable file. If 'contents' is a bytestring, it will be used as-is. If 'contents' is a callable, it will be invoked with the new MutableFileNode instance and is expected to return a bytestring with the initial contents of the file (the callable can use node.get_writekey() to decide how to encrypt the initial contents, e.g. for a brand new dirnode with initial children). contents=None is equivalent to an empty string. Using content_maker= is more efficient than creating a mutable file and setting its contents in two separate operations. @return: a Deferred that fires with an IMutableFileNode instance. """ def create_dirnode(initial_children={}): """Create a new unattached dirnode, possibly with initial children. @param initial_children: dict with keys that are unicode child names, and values that are (childnode, metadata) tuples. @return: a Deferred that fires with the new IDirectoryNode instance. """ def create_node_from_uri(uri, rouri): """Create a new IFilesystemNode instance from the uri, synchronously. @param uri: a string or IURI-providing instance, or None. This could be for a LiteralFileNode, a CHK file node, a mutable file node, or a directory node @param rouri: a string or IURI-providing instance, or None. If the main uri is None, I will use the rouri instead. If I recognize the format of the main uri, I will ignore the rouri (because it can be derived from the writecap). @return: an instance that provides IFilesystemNode (or more usefully one of its subclasses). File-specifying URIs will result in IFileNode-providing instances, like ImmutableFileNode, LiteralFileNode, or MutableFileNode. Directory-specifying URIs will result in IDirectoryNode-providing instances, like DirectoryNode. """ class INodeMaker(Interface): """The NodeMaker is used to create IFilesystemNode instances. It can accept a filecap/dircap string and return the node right away. It can also create new nodes (i.e. upload a file, or create a mutable file) asynchronously. Once you have one of these nodes, you can use other methods to determine whether it is a file or directory, and to download or modify its contents. The NodeMaker encapsulates all the authorities that these IFilesystemNodes require (like references to the StorageFarmBroker). Each Tahoe process will typically have a single NodeMaker, but unit tests may create simplified/mocked forms for testing purposes. """ def create_from_cap(writecap, readcap=None, deep_immutable=False, name=u""): """I create an IFilesystemNode from the given writecap/readcap. I can only provide nodes for existing file/directory objects: use my other methods to create new objects. I return synchronously.""" def create_mutable_file(contents=None, keysize=None): """I create a new mutable file, and return a Deferred that will fire with the IMutableFileNode instance when it is ready. If contents= is provided (a bytestring), it will be used as the initial contents of the new file, otherwise the file will contain zero bytes. keysize= is for use by unit tests, to create mutable files that are smaller than usual.""" def create_new_mutable_directory(initial_children={}): """I create a new mutable directory, and return a Deferred that will fire with the IDirectoryNode instance when it is ready. If initial_children= is provided (a dict mapping unicode child name to (childnode, metadata_dict) tuples), the directory will be populated with those children, otherwise it will be empty.""" class IClientStatus(Interface): def list_all_uploads(): """Return a list of uploader objects, one for each upload that currently has an object available (tracked with weakrefs). This is intended for debugging purposes.""" def list_active_uploads(): """Return a list of active IUploadStatus objects.""" def list_recent_uploads(): """Return a list of IUploadStatus objects for the most recently started uploads.""" def list_all_downloads(): """Return a list of downloader objects, one for each download that currently has an object available (tracked with weakrefs). This is intended for debugging purposes.""" def list_active_downloads(): """Return a list of active IDownloadStatus objects.""" def list_recent_downloads(): """Return a list of IDownloadStatus objects for the most recently started downloads.""" class IUploadStatus(Interface): def get_started(): """Return a timestamp (float with seconds since epoch) indicating when the operation was started.""" def get_storage_index(): """Return a string with the (binary) storage index in use on this upload. Returns None if the storage index has not yet been calculated.""" def get_size(): """Return an integer with the number of bytes that will eventually be uploaded for this file. Returns None if the size is not yet known. """ def using_helper(): """Return True if this upload is using a Helper, False if not.""" def get_status(): """Return a string describing the current state of the upload process.""" def get_progress(): """Returns a tuple of floats, (chk, ciphertext, encode_and_push), each from 0.0 to 1.0 . 'chk' describes how much progress has been made towards hashing the file to determine a CHK encryption key: if non-convergent encryption is in use, this will be trivial, otherwise the whole file must be hashed. 'ciphertext' describes how much of the ciphertext has been pushed to the helper, and is '1.0' for non-helper uploads. 'encode_and_push' describes how much of the encode-and-push process has finished: for helper uploads this is dependent upon the helper providing progress reports. It might be reasonable to add all three numbers and report the sum to the user.""" def get_active(): """Return True if the upload is currently active, False if not.""" def get_results(): """Return an instance of UploadResults (which contains timing and sharemap information). Might return None if the upload is not yet finished.""" def get_counter(): """Each upload status gets a unique number: this method returns that number. This provides a handle to this particular upload, so a web page can generate a suitable hyperlink.""" class IDownloadStatus(Interface): def get_started(): """Return a timestamp (float with seconds since epoch) indicating when the operation was started.""" def get_storage_index(): """Return a string with the (binary) storage index in use on this download. This may be None if there is no storage index (i.e. LIT files).""" def get_size(): """Return an integer with the number of bytes that will eventually be retrieved for this file. Returns None if the size is not yet known. """ def using_helper(): """Return True if this download is using a Helper, False if not.""" def get_status(): """Return a string describing the current state of the download process.""" def get_progress(): """Returns a float (from 0.0 to 1.0) describing the amount of the download that has completed. This value will remain at 0.0 until the first byte of plaintext is pushed to the download target.""" def get_active(): """Return True if the download is currently active, False if not.""" def get_counter(): """Each download status gets a unique number: this method returns that number. This provides a handle to this particular download, so a web page can generate a suitable hyperlink.""" class IServermapUpdaterStatus(Interface): pass class IPublishStatus(Interface): pass class IRetrieveStatus(Interface): pass class NotCapableError(Exception): """You have tried to write to a read-only node.""" class BadWriteEnablerError(Exception): pass class RIControlClient(RemoteInterface): def wait_for_client_connections(num_clients=int): """Do not return until we have connections to at least NUM_CLIENTS storage servers. """ def upload_from_file_to_uri(filename=str, convergence=ChoiceOf(None, StringConstraint(2**20))): """Upload a file to the grid. This accepts a filename (which must be absolute) that points to a file on the node's local disk. The node will read the contents of this file, upload it to the grid, then return the URI at which it was uploaded. If convergence is None then a random encryption key will be used, else the plaintext will be hashed, then that hash will be mixed together with the "convergence" string to form the encryption key. """ return URI def download_from_uri_to_file(uri=URI, filename=str): """Download a file from the grid, placing it on the node's local disk at the given filename (which must be absolute[?]). Returns the absolute filename where the file was written.""" return str # debug stuff def get_memory_usage(): """Return a dict describes the amount of memory currently in use. The keys are 'VmPeak', 'VmSize', and 'VmData'. The values are integers, measuring memory consupmtion in bytes.""" return DictOf(str, int) def speed_test(count=int, size=int, mutable=Any()): """Write 'count' tempfiles to disk, all of the given size. Measure how long (in seconds) it takes to upload them all to the servers. Then measure how long it takes to download all of them. If 'mutable' is 'create', time creation of mutable files. If 'mutable' is 'upload', then time access to the same mutable file instead of creating one. Returns a tuple of (upload_time, download_time). """ return (float, float) def measure_peer_response_time(): """Send a short message to each connected peer, and measure the time it takes for them to respond to it. This is a rough measure of the application-level round trip time. @return: a dictionary mapping peerid to a float (RTT time in seconds) """ return DictOf(str, float) UploadResults = Any() #DictOf(str, str) class RIEncryptedUploadable(RemoteInterface): __remote_name__ = "RIEncryptedUploadable.tahoe.allmydata.com" def get_size(): return Offset def get_all_encoding_parameters(): return (int, int, int, long) def read_encrypted(offset=Offset, length=ReadSize): return ListOf(str) def close(): return None class RICHKUploadHelper(RemoteInterface): __remote_name__ = "RIUploadHelper.tahoe.allmydata.com" def get_version(): """ Return a dictionary of version information. """ return DictOf(str, Any()) def upload(reader=RIEncryptedUploadable): return UploadResults class RIHelper(RemoteInterface): __remote_name__ = "RIHelper.tahoe.allmydata.com" def get_version(): """ Return a dictionary of version information. """ return DictOf(str, Any()) def upload_chk(si=StorageIndex): """See if a file with a given storage index needs uploading. The helper will ask the appropriate storage servers to see if the file has already been uploaded. If so, the helper will return a set of 'upload results' that includes whatever hashes are needed to build the read-cap, and perhaps a truncated sharemap. If the file has not yet been uploaded (or if it was only partially uploaded), the helper will return an empty upload-results dictionary and also an RICHKUploadHelper object that will take care of the upload process. The client should call upload() on this object and pass it a reference to an RIEncryptedUploadable object that will provide ciphertext. When the upload is finished, the upload() method will finish and return the upload results. """ return (UploadResults, ChoiceOf(RICHKUploadHelper, None)) class RIStatsProvider(RemoteInterface): __remote_name__ = "RIStatsProvider.tahoe.allmydata.com" """ Provides access to statistics and monitoring information. """ def get_stats(): """ returns a dictionary containing 'counters' and 'stats', each a dictionary with string counter/stat name keys, and numeric or None values. counters are monotonically increasing measures of work done, and stats are instantaneous measures (potentially time averaged internally) """ return DictOf(str, DictOf(str, ChoiceOf(float, int, long, None))) class RIStatsGatherer(RemoteInterface): __remote_name__ = "RIStatsGatherer.tahoe.allmydata.com" """ Provides a monitoring service for centralised collection of stats """ def provide(provider=RIStatsProvider, nickname=str): """ @param provider: a stats collector instance that should be polled periodically by the gatherer to collect stats. @param nickname: a name useful to identify the provided client """ return None class IStatsProducer(Interface): def get_stats(): """ returns a dictionary, with str keys representing the names of stats to be monitored, and numeric values. """ class RIKeyGenerator(RemoteInterface): __remote_name__ = "RIKeyGenerator.tahoe.allmydata.com" """ Provides a service offering to make RSA key pairs. """ def get_rsa_key_pair(key_size=int): """ @param key_size: the size of the signature key. @return: tuple(verifying_key, signing_key) """ return TupleOf(str, str) class FileTooLargeError(Exception): pass class IValidatedThingProxy(Interface): def start(): """ Acquire a thing and validate it. Return a deferred that is eventually fired with self if the thing is valid or errbacked if it can't be acquired or validated.""" class InsufficientVersionError(Exception): def __init__(self, needed, got): self.needed = needed self.got = got def __repr__(self): return "InsufficientVersionError(need '%s', got %s)" % (self.needed, self.got) class EmptyPathnameComponentError(Exception): """The webapi disallows empty pathname components.""" tahoe-lafs-1.10.0/src/allmydata/introducer/000077500000000000000000000000001221140116300205205ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/introducer/__init__.py000066400000000000000000000003271221140116300226330ustar00rootroot00000000000000 # This is for compatibilty with old .tac files, which reference # allmydata.introducer.IntroducerNode from allmydata.introducer.server import IntroducerNode # hush pyflakes _unused = [IntroducerNode] del _unused tahoe-lafs-1.10.0/src/allmydata/introducer/client.py000066400000000000000000000375321221140116300223620ustar00rootroot00000000000000 import time from zope.interface import implements from twisted.application import service from foolscap.api import Referenceable, eventually, RemoteInterface from allmydata.interfaces import InsufficientVersionError from allmydata.introducer.interfaces import IIntroducerClient, \ RIIntroducerSubscriberClient_v1, RIIntroducerSubscriberClient_v2 from allmydata.introducer.common import sign_to_foolscap, unsign_from_foolscap,\ convert_announcement_v1_to_v2, convert_announcement_v2_to_v1, \ make_index, get_tubid_string_from_ann, get_tubid_string from allmydata.util import log from allmydata.util.rrefutil import add_version_to_remote_reference from allmydata.util.keyutil import BadSignatureError class WrapV2ClientInV1Interface(Referenceable): # for_v1 """I wrap a v2 IntroducerClient to make it look like a v1 client, so it can be attached to an old server.""" implements(RIIntroducerSubscriberClient_v1) def __init__(self, original): self.original = original def remote_announce(self, announcements): lp = self.original.log("received %d announcements (v1)" % len(announcements)) anns_v1 = set([convert_announcement_v1_to_v2(ann_v1) for ann_v1 in announcements]) return self.original.got_announcements(anns_v1, lp) def remote_set_encoding_parameters(self, parameters): self.original.remote_set_encoding_parameters(parameters) class RIStubClient(RemoteInterface): # for_v1 """Each client publishes a service announcement for a dummy object called the StubClient. This object doesn't actually offer any services, but the announcement helps the Introducer keep track of which clients are subscribed (so the grid admin can keep track of things like the size of the grid and the client versions in use. This is the (empty) RemoteInterface for the StubClient.""" class StubClient(Referenceable): # for_v1 implements(RIStubClient) V1 = "http://allmydata.org/tahoe/protocols/introducer/v1" V2 = "http://allmydata.org/tahoe/protocols/introducer/v2" class IntroducerClient(service.Service, Referenceable): implements(RIIntroducerSubscriberClient_v2, IIntroducerClient) def __init__(self, tub, introducer_furl, nickname, my_version, oldest_supported, app_versions, sequencer): self._tub = tub self.introducer_furl = introducer_furl assert type(nickname) is unicode self._nickname = nickname self._my_version = my_version self._oldest_supported = oldest_supported self._app_versions = app_versions self._sequencer = sequencer self._my_subscriber_info = { "version": 0, "nickname": self._nickname, "app-versions": self._app_versions, "my-version": self._my_version, "oldest-supported": self._oldest_supported, } self._stub_client = None # for_v1 self._stub_client_furl = None self._outbound_announcements = {} # not signed self._published_announcements = {} # signed self._canary = Referenceable() self._publisher = None self._local_subscribers = [] # (servicename,cb,args,kwargs) tuples self._subscribed_service_names = set() self._subscriptions = set() # requests we've actually sent # _inbound_announcements remembers one announcement per # (servicename,serverid) pair. Anything that arrives with the same # pair will displace the previous one. This stores tuples of # (unpacked announcement dictionary, verifyingkey, rxtime). The ann # dicts can be compared for equality to distinguish re-announcement # from updates. It also provides memory for clients who subscribe # after startup. self._inbound_announcements = {} self.encoding_parameters = None # hooks for unit tests self._debug_counts = { "inbound_message": 0, "inbound_announcement": 0, "wrong_service": 0, "duplicate_announcement": 0, "update": 0, "new_announcement": 0, "outbound_message": 0, } self._debug_outstanding = 0 def _debug_retired(self, res): self._debug_outstanding -= 1 return res def startService(self): service.Service.startService(self) self._introducer_error = None rc = self._tub.connectTo(self.introducer_furl, self._got_introducer) self._introducer_reconnector = rc def connect_failed(failure): self.log("Initial Introducer connection failed: perhaps it's down", level=log.WEIRD, failure=failure, umid="c5MqUQ") d = self._tub.getReference(self.introducer_furl) d.addErrback(connect_failed) def _got_introducer(self, publisher): self.log("connected to introducer, getting versions") default = { "http://allmydata.org/tahoe/protocols/introducer/v1": { }, "application-version": "unknown: no get_version()", } d = add_version_to_remote_reference(publisher, default) d.addCallback(self._got_versioned_introducer) d.addErrback(self._got_error) def _got_error(self, f): # TODO: for the introducer, perhaps this should halt the application self._introducer_error = f # polled by tests def _got_versioned_introducer(self, publisher): self.log("got introducer version: %s" % (publisher.version,)) # we require an introducer that speaks at least one of (V1, V2) if not (V1 in publisher.version or V2 in publisher.version): raise InsufficientVersionError("V1 or V2", publisher.version) self._publisher = publisher publisher.notifyOnDisconnect(self._disconnected) self._maybe_publish() self._maybe_subscribe() def _disconnected(self): self.log("bummer, we've lost our connection to the introducer") self._publisher = None self._subscriptions.clear() def log(self, *args, **kwargs): if "facility" not in kwargs: kwargs["facility"] = "tahoe.introducer.client" return log.msg(*args, **kwargs) def subscribe_to(self, service_name, cb, *args, **kwargs): self._local_subscribers.append( (service_name,cb,args,kwargs) ) self._subscribed_service_names.add(service_name) self._maybe_subscribe() for index,(ann,key_s,when) in self._inbound_announcements.items(): servicename = index[0] if servicename == service_name: eventually(cb, key_s, ann, *args, **kwargs) def _maybe_subscribe(self): if not self._publisher: self.log("want to subscribe, but no introducer yet", level=log.NOISY) return for service_name in self._subscribed_service_names: if service_name in self._subscriptions: continue self._subscriptions.add(service_name) if V2 in self._publisher.version: self._debug_outstanding += 1 d = self._publisher.callRemote("subscribe_v2", self, service_name, self._my_subscriber_info) d.addBoth(self._debug_retired) else: d = self._subscribe_handle_v1(service_name) # for_v1 d.addErrback(log.err, facility="tahoe.introducer.client", level=log.WEIRD, umid="2uMScQ") def _subscribe_handle_v1(self, service_name): # for_v1 # they don't speak V2: must be a v1 introducer. Fall back to the v1 # 'subscribe' method, using a client adapter. ca = WrapV2ClientInV1Interface(self) self._debug_outstanding += 1 d = self._publisher.callRemote("subscribe", ca, service_name) d.addBoth(self._debug_retired) # We must also publish an empty 'stub_client' object, so the # introducer can count how many clients are connected and see what # versions they're running. if not self._stub_client_furl: self._stub_client = sc = StubClient() self._stub_client_furl = self._tub.registerReference(sc) def _publish_stub_client(ignored): furl = self._stub_client_furl self.publish("stub_client", { "anonymous-storage-FURL": furl, "permutation-seed-base32": get_tubid_string(furl), }) d.addCallback(_publish_stub_client) return d def create_announcement_dict(self, service_name, ann): ann_d = { "version": 0, # "seqnum" and "nonce" will be populated with new values in # publish(), each time we make a change "nickname": self._nickname, "app-versions": self._app_versions, "my-version": self._my_version, "oldest-supported": self._oldest_supported, "service-name": service_name, } ann_d.update(ann) return ann_d def publish(self, service_name, ann, signing_key=None): # we increment the seqnum every time we publish something new current_seqnum, current_nonce = self._sequencer() ann_d = self.create_announcement_dict(service_name, ann) self._outbound_announcements[service_name] = ann_d # publish all announcements with the new seqnum and nonce for service_name,ann_d in self._outbound_announcements.items(): ann_d["seqnum"] = current_seqnum ann_d["nonce"] = current_nonce ann_t = sign_to_foolscap(ann_d, signing_key) self._published_announcements[service_name] = ann_t self._maybe_publish() def _maybe_publish(self): if not self._publisher: self.log("want to publish, but no introducer yet", level=log.NOISY) return # this re-publishes everything. The Introducer ignores duplicates for ann_t in self._published_announcements.values(): self._debug_counts["outbound_message"] += 1 if V2 in self._publisher.version: self._debug_outstanding += 1 d = self._publisher.callRemote("publish_v2", ann_t, self._canary) d.addBoth(self._debug_retired) else: d = self._handle_v1_publisher(ann_t) # for_v1 d.addErrback(log.err, ann_t=ann_t, facility="tahoe.introducer.client", level=log.WEIRD, umid="xs9pVQ") def _handle_v1_publisher(self, ann_t): # for_v1 # they don't speak V2, so fall back to the old 'publish' method # (which takes an unsigned tuple of bytestrings) self.log("falling back to publish_v1", level=log.UNUSUAL, umid="9RCT1A") ann_v1 = convert_announcement_v2_to_v1(ann_t) self._debug_outstanding += 1 d = self._publisher.callRemote("publish", ann_v1) d.addBoth(self._debug_retired) return d def remote_announce_v2(self, announcements): lp = self.log("received %d announcements (v2)" % len(announcements)) return self.got_announcements(announcements, lp) def got_announcements(self, announcements, lp=None): # this is the common entry point for both v1 and v2 announcements self._debug_counts["inbound_message"] += 1 for ann_t in announcements: try: # this might raise UnknownKeyError or bad-sig error ann, key_s = unsign_from_foolscap(ann_t) # key is "v0-base32abc123" except BadSignatureError: self.log("bad signature on inbound announcement: %s" % (ann_t,), parent=lp, level=log.WEIRD, umid="ZAU15Q") # process other announcements that arrived with the bad one continue self._process_announcement(ann, key_s) def _process_announcement(self, ann, key_s): self._debug_counts["inbound_announcement"] += 1 service_name = str(ann["service-name"]) if service_name not in self._subscribed_service_names: self.log("announcement for a service we don't care about [%s]" % (service_name,), level=log.UNUSUAL, umid="dIpGNA") self._debug_counts["wrong_service"] += 1 return # for ASCII values, simplejson might give us unicode *or* bytes if "nickname" in ann and isinstance(ann["nickname"], str): ann["nickname"] = unicode(ann["nickname"]) nick_s = ann.get("nickname",u"").encode("utf-8") lp2 = self.log(format="announcement for nickname '%(nick)s', service=%(svc)s: %(ann)s", nick=nick_s, svc=service_name, ann=ann, umid="BoKEag") # how do we describe this node in the logs? desc_bits = [] if key_s: desc_bits.append("serverid=" + key_s[:20]) if "anonymous-storage-FURL" in ann: tubid_s = get_tubid_string_from_ann(ann) desc_bits.append("tubid=" + tubid_s[:8]) description = "/".join(desc_bits) # the index is used to track duplicates index = make_index(ann, key_s) # is this announcement a duplicate? if (index in self._inbound_announcements and self._inbound_announcements[index][0] == ann): self.log(format="reannouncement for [%(service)s]:%(description)s, ignoring", service=service_name, description=description, parent=lp2, level=log.UNUSUAL, umid="B1MIdA") self._debug_counts["duplicate_announcement"] += 1 return # does it update an existing one? if index in self._inbound_announcements: old,_,_ = self._inbound_announcements[index] if "seqnum" in old: # must beat previous sequence number to replace if ("seqnum" not in ann or not isinstance(ann["seqnum"], (int,long))): self.log("not replacing old announcement, no valid seqnum: %s" % (ann,), parent=lp2, level=log.NOISY, umid="zFGH3Q") return if ann["seqnum"] <= old["seqnum"]: # note that exact replays are caught earlier, by # comparing the entire signed announcement. self.log("not replacing old announcement, " "new seqnum is too old (%s <= %s) " "(replay attack?): %s" % (ann["seqnum"], old["seqnum"], ann), parent=lp2, level=log.UNUSUAL, umid="JAAAoQ") return # ok, seqnum is newer, allow replacement self._debug_counts["update"] += 1 self.log("replacing old announcement: %s" % (ann,), parent=lp2, level=log.NOISY, umid="wxwgIQ") else: self._debug_counts["new_announcement"] += 1 self.log("new announcement[%s]" % service_name, parent=lp2, level=log.NOISY) self._inbound_announcements[index] = (ann, key_s, time.time()) # note: we never forget an index, but we might update its value for (service_name2,cb,args,kwargs) in self._local_subscribers: if service_name2 == service_name: eventually(cb, key_s, ann, *args, **kwargs) def remote_set_encoding_parameters(self, parameters): self.encoding_parameters = parameters def connected_to_introducer(self): return bool(self._publisher) tahoe-lafs-1.10.0/src/allmydata/introducer/common.py000066400000000000000000000136661221140116300223760ustar00rootroot00000000000000 import re, simplejson from allmydata.util import keyutil, base32, rrefutil def make_index(ann, key_s): """Return something that can be used as an index (e.g. a tuple of strings), such that two messages that refer to the same 'thing' will have the same index. This is a tuple of (service-name, signing-key, None) for signed announcements, or (service-name, None, tubid_s) for unsigned announcements.""" service_name = str(ann["service-name"]) if key_s: return (service_name, key_s, None) else: tubid_s = get_tubid_string_from_ann(ann) return (service_name, None, tubid_s) def get_tubid_string_from_ann(ann): return get_tubid_string(str(ann.get("anonymous-storage-FURL") or ann.get("FURL"))) def get_tubid_string(furl): m = re.match(r'pb://(\w+)@', furl) assert m return m.group(1).lower() def convert_announcement_v1_to_v2(ann_t): (furl, service_name, ri_name, nickname, ver, oldest) = ann_t assert type(furl) is str assert type(service_name) is str # ignore ri_name assert type(nickname) is str assert type(ver) is str assert type(oldest) is str ann = {"version": 0, "nickname": nickname.decode("utf-8", "replace"), "app-versions": {}, "my-version": ver, "oldest-supported": oldest, "service-name": service_name, "anonymous-storage-FURL": furl, "permutation-seed-base32": get_tubid_string(furl), } msg = simplejson.dumps(ann).encode("utf-8") return (msg, None, None) def convert_announcement_v2_to_v1(ann_v2): (msg, sig, pubkey) = ann_v2 ann = simplejson.loads(msg) assert ann["version"] == 0 ann_t = (str(ann["anonymous-storage-FURL"]), str(ann["service-name"]), "remoteinterface-name is unused", ann["nickname"].encode("utf-8"), str(ann["my-version"]), str(ann["oldest-supported"]), ) return ann_t def sign_to_foolscap(ann, sk): # return (bytes, None, None) or (bytes, sig-str, pubkey-str). A future # HTTP-based serialization will use JSON({msg:b64(JSON(msg).utf8), # sig:v0-b64(sig), pubkey:v0-b64(pubkey)}) . msg = simplejson.dumps(ann).encode("utf-8") if sk: sig = "v0-"+base32.b2a(sk.sign(msg)) vk_bytes = sk.get_verifying_key_bytes() ann_t = (msg, sig, "v0-"+base32.b2a(vk_bytes)) else: ann_t = (msg, None, None) return ann_t class UnknownKeyError(Exception): pass def unsign_from_foolscap(ann_t): (msg, sig_vs, claimed_key_vs) = ann_t key_vs = None if sig_vs and claimed_key_vs: if not sig_vs.startswith("v0-"): raise UnknownKeyError("only v0- signatures recognized") if not claimed_key_vs.startswith("v0-"): raise UnknownKeyError("only v0- keys recognized") claimed_key = keyutil.parse_pubkey("pub-"+claimed_key_vs) sig_bytes = base32.a2b(keyutil.remove_prefix(sig_vs, "v0-")) claimed_key.verify(sig_bytes, msg) key_vs = claimed_key_vs ann = simplejson.loads(msg.decode("utf-8")) return (ann, key_vs) class SubscriberDescriptor: """This describes a subscriber, for status display purposes. It contains the following attributes: .service_name: what they subscribed to (string) .when: time when they subscribed (seconds since epoch) .nickname: their self-provided nickname, or "?" (unicode) .version: their self-provided version (string) .app_versions: versions of each library they use (dict str->str) .advertised_addresses: what hosts they listen on (list of strings) .remote_address: the external address from which they connected (string) .tubid: for subscribers connecting with Foolscap, their tubid (string) """ def __init__(self, service_name, when, nickname, version, app_versions, advertised_addresses, remote_address, tubid): self.service_name = service_name self.when = when self.nickname = nickname self.version = version self.app_versions = app_versions self.advertised_addresses = advertised_addresses self.remote_address = remote_address self.tubid = tubid class AnnouncementDescriptor: """This describes an announcement, for status display purposes. It contains the following attributes, which will be empty ("" for strings) if the client did not provide them: .when: time the announcement was first received (seconds since epoch) .index: the announcements 'index', a tuple of (string-or-None). The server remembers one announcement per index. .canary: a Referenceable on the announcer, so the server can learn when they disconnect (for the status display) .announcement: raw dictionary of announcement data .service_name: which service they are announcing (string) .version: 'my-version' portion of announcement (string) .nickname: their self-provided nickname, or "" (unicode) .serverid: the server identifier. This is a pubkey (for V2 clients), or a tubid (for V1 clients). .advertised_addresses: which hosts they listen on (list of strings) if the announcement included a key for 'anonymous-storage-FURL', else an empty list. """ def __init__(self, when, index, canary, ann_d): self.when = when self.index = index self.canary = canary self.announcement = ann_d self.service_name = ann_d["service-name"] self.version = ann_d.get("my-version", "") self.nickname = ann_d.get("nickname", u"") (service_name, key_s, tubid_s) = index self.serverid = key_s or tubid_s furl = ann_d.get("anonymous-storage-FURL") if furl: self.advertised_addresses = rrefutil.hosts_for_furl(furl) else: self.advertised_addresses = [] tahoe-lafs-1.10.0/src/allmydata/introducer/interfaces.py000066400000000000000000000133141221140116300232170ustar00rootroot00000000000000 from zope.interface import Interface from foolscap.api import StringConstraint, TupleOf, SetOf, DictOf, Any, \ RemoteInterface, Referenceable from old import RIIntroducerSubscriberClient_v1 FURL = StringConstraint(1000) # old introducer protocol (v1): # # Announcements are (FURL, service_name, remoteinterface_name, # nickname, my_version, oldest_supported) # the (FURL, service_name, remoteinterface_name) refer to the service being # announced. The (nickname, my_version, oldest_supported) refer to the # client as a whole. The my_version/oldest_supported strings can be parsed # by an allmydata.util.version.Version instance, and then compared. The # first goal is to make sure that nodes are not confused by speaking to an # incompatible peer. The second goal is to enable the development of # backwards-compatibility code. Announcement_v1 = TupleOf(FURL, str, str, str, str, str) # v2 protocol over foolscap: Announcements are 3-tuples of (bytes, str, str) # or (bytes, none, none) Announcement_v2 = Any() class RIIntroducerSubscriberClient_v2(RemoteInterface): __remote_name__ = "RIIntroducerSubscriberClient_v2.tahoe.allmydata.com" def announce_v2(announcements=SetOf(Announcement_v2)): """I accept announcements from the publisher.""" return None def set_encoding_parameters(parameters=(int, int, int)): """Advise the client of the recommended k-of-n encoding parameters for this grid. 'parameters' is a tuple of (k, desired, n), where 'n' is the total number of shares that will be created for any given file, while 'k' is the number of shares that must be retrieved to recover that file, and 'desired' is the minimum number of shares that must be placed before the uploader will consider its job a success. n/k is the expansion ratio, while k determines the robustness. Introducers should specify 'n' according to the expected size of the grid (there is no point to producing more shares than there are peers), and k according to the desired reliability-vs-overhead goals. Note that setting k=1 is equivalent to simple replication. """ return None SubscriberInfo = DictOf(str, Any()) class RIIntroducerPublisherAndSubscriberService_v2(RemoteInterface): """To publish a service to the world, connect to me and give me your announcement message. I will deliver a copy to all connected subscribers. To hear about services, connect to me and subscribe to a specific service_name.""" __remote_name__ = "RIIntroducerPublisherAndSubscriberService_v2.tahoe.allmydata.com" def get_version(): return DictOf(str, Any()) def publish(announcement=Announcement_v1): return None def publish_v2(announcement=Announcement_v2, canary=Referenceable): return None def subscribe(subscriber=RIIntroducerSubscriberClient_v1, service_name=str): return None def subscribe_v2(subscriber=RIIntroducerSubscriberClient_v2, service_name=str, subscriber_info=SubscriberInfo): """Give me a subscriber reference, and I will call its announce_v2() method with any announcements that match the desired service name. I will ignore duplicate subscriptions. The subscriber_info dictionary tells me about the subscriber, and is used for diagnostic/status displays.""" return None class IIntroducerClient(Interface): """I provide service introduction facilities for a node. I help nodes publish their services to the rest of the world, and I help them learn about services available on other nodes.""" def publish(service_name, ann, signing_key=None): """Publish the given announcement dictionary (which must be JSON-serializable), plus some additional keys, to the world. Each announcement is characterized by a (service_name, serverid) pair. When the server sees two announcements with the same pair, the later one will replace the earlier one. The serverid is derived from the signing_key, if present, otherwise it is derived from the 'anonymous-storage-FURL' key. If signing_key= is set to an instance of SigningKey, it will be used to sign the announcement.""" def subscribe_to(service_name, callback, *args, **kwargs): """Call this if you will eventually want to use services with the given SERVICE_NAME. This will prompt me to subscribe to announcements of those services. Your callback will be invoked with at least two arguments: a pubkey and an announcement dictionary, followed by any additional callback args/kwargs you gave me. The pubkey will be None unless the announcement was signed by the corresponding pubkey, in which case it will be a printable string like 'v0-base32..'. I will run your callback for both new announcements and for announcements that have changed, but you must be prepared to tolerate duplicates. The announcement that I give you comes from some other client. It will be a JSON-serializable dictionary which (by convention) is expected to have at least the following keys: version: 0 nickname: unicode app-versions: {} my-version: str oldest-supported: str service-name: str('storage') anonymous-storage-FURL: str(furl) Note that app-version will be an empty dictionary if either the publishing client or the Introducer are running older code. """ def connected_to_introducer(): """Returns a boolean, True if we are currently connected to the introducer, False if not.""" tahoe-lafs-1.10.0/src/allmydata/introducer/old.py000066400000000000000000000520641221140116300216570ustar00rootroot00000000000000 import time from base64 import b32decode from zope.interface import implements, Interface from twisted.application import service import allmydata from allmydata.interfaces import InsufficientVersionError from allmydata.util import log, idlib, rrefutil from foolscap.api import StringConstraint, TupleOf, SetOf, DictOf, Any, \ RemoteInterface, Referenceable, eventually, SturdyRef from allmydata.introducer.common import SubscriberDescriptor, \ AnnouncementDescriptor FURL = StringConstraint(1000) # We keep a copy of the old introducer (both client and server) here to # support compatibility tests. The old client is supposed to handle the new # server, and new client is supposed to handle the old server. # Announcements are (FURL, service_name, remoteinterface_name, # nickname, my_version, oldest_supported) # the (FURL, service_name, remoteinterface_name) refer to the service being # announced. The (nickname, my_version, oldest_supported) refer to the # client as a whole. The my_version/oldest_supported strings can be parsed # by an allmydata.util.version.Version instance, and then compared. The # first goal is to make sure that nodes are not confused by speaking to an # incompatible peer. The second goal is to enable the development of # backwards-compatibility code. Announcement = TupleOf(FURL, str, str, str, str, str) class RIIntroducerSubscriberClient_v1(RemoteInterface): __remote_name__ = "RIIntroducerSubscriberClient.tahoe.allmydata.com" def announce(announcements=SetOf(Announcement)): """I accept announcements from the publisher.""" return None def set_encoding_parameters(parameters=(int, int, int)): """Advise the client of the recommended k-of-n encoding parameters for this grid. 'parameters' is a tuple of (k, desired, n), where 'n' is the total number of shares that will be created for any given file, while 'k' is the number of shares that must be retrieved to recover that file, and 'desired' is the minimum number of shares that must be placed before the uploader will consider its job a success. n/k is the expansion ratio, while k determines the robustness. Introducers should specify 'n' according to the expected size of the grid (there is no point to producing more shares than there are peers), and k according to the desired reliability-vs-overhead goals. Note that setting k=1 is equivalent to simple replication. """ return None # When Foolscap can handle multiple interfaces (Foolscap#17), the # full-powered introducer will implement both RIIntroducerPublisher and # RIIntroducerSubscriberService. Until then, we define # RIIntroducerPublisherAndSubscriberService as a combination of the two, and # make everybody use that. class RIIntroducerPublisher_v1(RemoteInterface): """To publish a service to the world, connect to me and give me your announcement message. I will deliver a copy to all connected subscribers.""" __remote_name__ = "RIIntroducerPublisher.tahoe.allmydata.com" def publish(announcement=Announcement): # canary? return None class RIIntroducerSubscriberService_v1(RemoteInterface): __remote_name__ = "RIIntroducerSubscriberService.tahoe.allmydata.com" def subscribe(subscriber=RIIntroducerSubscriberClient_v1, service_name=str): """Give me a subscriber reference, and I will call its new_peers() method will any announcements that match the desired service name. I will ignore duplicate subscriptions. """ return None class RIIntroducerPublisherAndSubscriberService_v1(RemoteInterface): __remote_name__ = "RIIntroducerPublisherAndSubscriberService.tahoe.allmydata.com" def get_version(): return DictOf(str, Any()) def publish(announcement=Announcement): return None def subscribe(subscriber=RIIntroducerSubscriberClient_v1, service_name=str): return None class IIntroducerClient(Interface): """I provide service introduction facilities for a node. I help nodes publish their services to the rest of the world, and I help them learn about services available on other nodes.""" def publish(furl, service_name, remoteinterface_name): """Once you call this, I will tell the world that the Referenceable available at FURL is available to provide a service named SERVICE_NAME. The precise definition of the service being provided is identified by the Foolscap 'remote interface name' in the last parameter: this is supposed to be a globally-unique string that identifies the RemoteInterface that is implemented.""" def subscribe_to(service_name, callback, *args, **kwargs): """Call this if you will eventually want to use services with the given SERVICE_NAME. This will prompt me to subscribe to announcements of those services. Your callback will be invoked with at least two arguments: a serverid (binary string), and an announcement dictionary, followed by any additional callback args/kwargs you give me. I will run your callback for both new announcements and for announcements that have changed, but you must be prepared to tolerate duplicates. The announcement dictionary that I give you will have the following keys: version: 0 service-name: str('storage') FURL: str(furl) remoteinterface-name: str(ri_name) nickname: unicode app-versions: {} my-version: str oldest-supported: str Note that app-version will be an empty dictionary until #466 is done and both the introducer and the remote client have been upgraded. For current (native) server types, the serverid will always be equal to the binary form of the FURL's tubid. """ def connected_to_introducer(): """Returns a boolean, True if we are currently connected to the introducer, False if not.""" class IntroducerClient_v1(service.Service, Referenceable): implements(RIIntroducerSubscriberClient_v1, IIntroducerClient) def __init__(self, tub, introducer_furl, nickname, my_version, oldest_supported): self._tub = tub self.introducer_furl = introducer_furl assert type(nickname) is unicode self._nickname_utf8 = nickname.encode("utf-8") # we always send UTF-8 self._my_version = my_version self._oldest_supported = oldest_supported self._published_announcements = set() self._publisher = None self._local_subscribers = [] # (servicename,cb,args,kwargs) tuples self._subscribed_service_names = set() self._subscriptions = set() # requests we've actually sent # _current_announcements remembers one announcement per # (servicename,serverid) pair. Anything that arrives with the same # pair will displace the previous one. This stores unpacked # announcement dictionaries, which can be compared for equality to # distinguish re-announcement from updates. It also provides memory # for clients who subscribe after startup. self._current_announcements = {} self.encoding_parameters = None # hooks for unit tests self._debug_counts = { "inbound_message": 0, "inbound_announcement": 0, "wrong_service": 0, "duplicate_announcement": 0, "update": 0, "new_announcement": 0, "outbound_message": 0, } self._debug_outstanding = 0 def _debug_retired(self, res): self._debug_outstanding -= 1 return res def startService(self): service.Service.startService(self) self._introducer_error = None rc = self._tub.connectTo(self.introducer_furl, self._got_introducer) self._introducer_reconnector = rc def connect_failed(failure): self.log("Initial Introducer connection failed: perhaps it's down", level=log.WEIRD, failure=failure, umid="c5MqUQ") d = self._tub.getReference(self.introducer_furl) d.addErrback(connect_failed) def _got_introducer(self, publisher): self.log("connected to introducer, getting versions") default = { "http://allmydata.org/tahoe/protocols/introducer/v1": { }, "application-version": "unknown: no get_version()", } d = rrefutil.add_version_to_remote_reference(publisher, default) d.addCallback(self._got_versioned_introducer) d.addErrback(self._got_error) def _got_error(self, f): # TODO: for the introducer, perhaps this should halt the application self._introducer_error = f # polled by tests def _got_versioned_introducer(self, publisher): self.log("got introducer version: %s" % (publisher.version,)) # we require a V1 introducer needed = "http://allmydata.org/tahoe/protocols/introducer/v1" if needed not in publisher.version: raise InsufficientVersionError(needed, publisher.version) self._publisher = publisher publisher.notifyOnDisconnect(self._disconnected) self._maybe_publish() self._maybe_subscribe() def _disconnected(self): self.log("bummer, we've lost our connection to the introducer") self._publisher = None self._subscriptions.clear() def log(self, *args, **kwargs): if "facility" not in kwargs: kwargs["facility"] = "tahoe.introducer" return log.msg(*args, **kwargs) def publish(self, furl, service_name, remoteinterface_name): assert type(self._nickname_utf8) is str # we always send UTF-8 ann = (furl, service_name, remoteinterface_name, self._nickname_utf8, self._my_version, self._oldest_supported) self._published_announcements.add(ann) self._maybe_publish() def subscribe_to(self, service_name, cb, *args, **kwargs): self._local_subscribers.append( (service_name,cb,args,kwargs) ) self._subscribed_service_names.add(service_name) self._maybe_subscribe() for (servicename,nodeid),ann_d in self._current_announcements.items(): if servicename == service_name: eventually(cb, nodeid, ann_d) def _maybe_subscribe(self): if not self._publisher: self.log("want to subscribe, but no introducer yet", level=log.NOISY) return for service_name in self._subscribed_service_names: if service_name not in self._subscriptions: # there is a race here, but the subscription desk ignores # duplicate requests. self._subscriptions.add(service_name) self._debug_outstanding += 1 d = self._publisher.callRemote("subscribe", self, service_name) d.addBoth(self._debug_retired) d.addErrback(rrefutil.trap_deadref) d.addErrback(log.err, format="server errored during subscribe", facility="tahoe.introducer", level=log.WEIRD, umid="2uMScQ") def _maybe_publish(self): if not self._publisher: self.log("want to publish, but no introducer yet", level=log.NOISY) return # this re-publishes everything. The Introducer ignores duplicates for ann in self._published_announcements: self._debug_counts["outbound_message"] += 1 self._debug_outstanding += 1 d = self._publisher.callRemote("publish", ann) d.addBoth(self._debug_retired) d.addErrback(rrefutil.trap_deadref) d.addErrback(log.err, format="server errored during publish %(ann)s", ann=ann, facility="tahoe.introducer", level=log.WEIRD, umid="xs9pVQ") def remote_announce(self, announcements): self.log("received %d announcements" % len(announcements)) self._debug_counts["inbound_message"] += 1 for ann in announcements: try: self._process_announcement(ann) except: log.err(format="unable to process announcement %(ann)s", ann=ann) # Don't let a corrupt announcement prevent us from processing # the remaining ones. Don't return an error to the server, # since they'd just ignore it anyways. pass def _process_announcement(self, ann): self._debug_counts["inbound_announcement"] += 1 (furl, service_name, ri_name, nickname_utf8, ver, oldest) = ann if service_name not in self._subscribed_service_names: self.log("announcement for a service we don't care about [%s]" % (service_name,), level=log.UNUSUAL, umid="dIpGNA") self._debug_counts["wrong_service"] += 1 return self.log("announcement for [%s]: %s" % (service_name, ann), umid="BoKEag") assert type(furl) is str assert type(service_name) is str assert type(ri_name) is str assert type(nickname_utf8) is str nickname = nickname_utf8.decode("utf-8") assert type(nickname) is unicode assert type(ver) is str assert type(oldest) is str nodeid = b32decode(SturdyRef(furl).tubID.upper()) nodeid_s = idlib.shortnodeid_b2a(nodeid) ann_d = { "version": 0, "service-name": service_name, "FURL": furl, "nickname": nickname, "app-versions": {}, # need #466 and v2 introducer "my-version": ver, "oldest-supported": oldest, } index = (service_name, nodeid) if self._current_announcements.get(index, None) == ann_d: self.log("reannouncement for [%(service)s]:%(nodeid)s, ignoring", service=service_name, nodeid=nodeid_s, level=log.UNUSUAL, umid="B1MIdA") self._debug_counts["duplicate_announcement"] += 1 return if index in self._current_announcements: self._debug_counts["update"] += 1 else: self._debug_counts["new_announcement"] += 1 self._current_announcements[index] = ann_d # note: we never forget an index, but we might update its value for (service_name2,cb,args,kwargs) in self._local_subscribers: if service_name2 == service_name: eventually(cb, nodeid, ann_d, *args, **kwargs) def remote_set_encoding_parameters(self, parameters): self.encoding_parameters = parameters def connected_to_introducer(self): return bool(self._publisher) class IntroducerService_v1(service.MultiService, Referenceable): implements(RIIntroducerPublisherAndSubscriberService_v1) name = "introducer" VERSION = { "http://allmydata.org/tahoe/protocols/introducer/v1": { }, "application-version": str(allmydata.__full_version__), } def __init__(self, basedir="."): service.MultiService.__init__(self) self.introducer_url = None # 'index' is (service_name, tubid) self._announcements = {} # dict of index -> (announcement, timestamp) self._subscribers = {} # [service_name]->[rref]->timestamp self._debug_counts = {"inbound_message": 0, "inbound_duplicate": 0, "inbound_update": 0, "outbound_message": 0, "outbound_announcements": 0, "inbound_subscribe": 0} self._debug_outstanding = 0 def _debug_retired(self, res): self._debug_outstanding -= 1 return res def log(self, *args, **kwargs): if "facility" not in kwargs: kwargs["facility"] = "tahoe.introducer" return log.msg(*args, **kwargs) def get_announcements(self, include_stub_clients=True): announcements = [] for index, (ann_t, when) in self._announcements.items(): (furl, service_name, ri_name, nickname, ver, oldest) = ann_t if service_name == "stub_client" and not include_stub_clients: continue ann_d = {"nickname": nickname.decode("utf-8", "replace"), "my-version": ver, "service-name": service_name, "anonymous-storage-FURL": furl, } # the V2 introducer uses (service_name, key_s, tubid_s) as an # index, so match that format for AnnouncementDescriptor new_index = (index[0], None, idlib.nodeid_b2a(index[1])) ad = AnnouncementDescriptor(when, new_index, None, ann_d) announcements.append(ad) return announcements def get_subscribers(self): s = [] for service_name, subscribers in self._subscribers.items(): for rref, when in subscribers.items(): tubid = rref.getRemoteTubID() or "?" advertised_addresses = rrefutil.hosts_for_rref(rref) remote_address = rrefutil.stringify_remote_address(rref) nickname, version, app_versions = u"?", u"?", {} sd = SubscriberDescriptor(service_name, when, nickname, version, app_versions, advertised_addresses, remote_address, tubid) s.append(sd) return s def remote_get_version(self): return self.VERSION def remote_publish(self, announcement): try: self._publish(announcement) except: log.err(format="Introducer.remote_publish failed on %(ann)s", ann=announcement, level=log.UNUSUAL, umid="620rWA") raise def _publish(self, announcement): self._debug_counts["inbound_message"] += 1 self.log("introducer: announcement published: %s" % (announcement,) ) (furl, service_name, ri_name, nickname_utf8, ver, oldest) = announcement #print "PUB", service_name, nickname_utf8 nodeid = b32decode(SturdyRef(furl).tubID.upper()) index = (service_name, nodeid) if index in self._announcements: (old_announcement, timestamp) = self._announcements[index] if old_announcement == announcement: self.log("but we already knew it, ignoring", level=log.NOISY) self._debug_counts["inbound_duplicate"] += 1 return else: self.log("old announcement being updated", level=log.NOISY) self._debug_counts["inbound_update"] += 1 self._announcements[index] = (announcement, time.time()) for s in self._subscribers.get(service_name, []): self._debug_counts["outbound_message"] += 1 self._debug_counts["outbound_announcements"] += 1 self._debug_outstanding += 1 d = s.callRemote("announce", set([announcement])) d.addBoth(self._debug_retired) d.addErrback(rrefutil.trap_deadref) d.addErrback(log.err, format="subscriber errored on announcement %(ann)s", ann=announcement, facility="tahoe.introducer", level=log.UNUSUAL, umid="jfGMXQ") def remote_subscribe(self, subscriber, service_name): self.log("introducer: subscription[%s] request at %s" % (service_name, subscriber)) self._debug_counts["inbound_subscribe"] += 1 if service_name not in self._subscribers: self._subscribers[service_name] = {} subscribers = self._subscribers[service_name] if subscriber in subscribers: self.log("but they're already subscribed, ignoring", level=log.UNUSUAL) return subscribers[subscriber] = time.time() def _remove(): self.log("introducer: unsubscribing[%s] %s" % (service_name, subscriber)) subscribers.pop(subscriber, None) subscriber.notifyOnDisconnect(_remove) announcements = set( [ ann for (sn2,nodeid),(ann,when) in self._announcements.items() if sn2 == service_name] ) self._debug_counts["outbound_message"] += 1 self._debug_counts["outbound_announcements"] += len(announcements) self._debug_outstanding += 1 d = subscriber.callRemote("announce", announcements) d.addBoth(self._debug_retired) d.addErrback(rrefutil.trap_deadref) d.addErrback(log.err, format="subscriber errored during subscribe %(anns)s", anns=announcements, facility="tahoe.introducer", level=log.UNUSUAL, umid="1XChxA") tahoe-lafs-1.10.0/src/allmydata/introducer/server.py000066400000000000000000000421531221140116300224050ustar00rootroot00000000000000 import time, os.path, textwrap from zope.interface import implements from twisted.application import service from foolscap.api import Referenceable import allmydata from allmydata import node from allmydata.util import log, rrefutil from allmydata.util.encodingutil import get_filesystem_encoding from allmydata.introducer.interfaces import \ RIIntroducerPublisherAndSubscriberService_v2 from allmydata.introducer.common import convert_announcement_v1_to_v2, \ convert_announcement_v2_to_v1, unsign_from_foolscap, make_index, \ get_tubid_string_from_ann, SubscriberDescriptor, AnnouncementDescriptor class FurlFileConflictError(Exception): pass class IntroducerNode(node.Node): PORTNUMFILE = "introducer.port" NODETYPE = "introducer" GENERATED_FILES = ['introducer.furl'] def __init__(self, basedir="."): node.Node.__init__(self, basedir) self.read_config() self.init_introducer() webport = self.get_config("node", "web.port", None) if webport: self.init_web(webport) # strports string def init_introducer(self): introducerservice = IntroducerService(self.basedir) self.add_service(introducerservice) old_public_fn = os.path.join(self.basedir, "introducer.furl").encode(get_filesystem_encoding()) private_fn = os.path.join(self.basedir, "private", "introducer.furl").encode(get_filesystem_encoding()) if os.path.exists(old_public_fn): if os.path.exists(private_fn): msg = """This directory (%s) contains both an old public 'introducer.furl' file, and a new-style 'private/introducer.furl', so I cannot safely remove the old one. Please make sure your desired FURL is in private/introducer.furl, and remove the public file. If this causes your Introducer's FURL to change, you need to inform all grid members so they can update their tahoe.cfg. """ raise FurlFileConflictError(textwrap.dedent(msg)) os.rename(old_public_fn, private_fn) d = self.when_tub_ready() def _publish(res): furl = self.tub.registerReference(introducerservice, furlFile=private_fn) self.log(" introducer is at %s" % furl, umid="qF2L9A") self.introducer_url = furl # for tests d.addCallback(_publish) d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="UaNs9A") def init_web(self, webport): self.log("init_web(webport=%s)", args=(webport,), umid="2bUygA") from allmydata.webish import IntroducerWebishServer nodeurl_path = os.path.join(self.basedir, "node.url") staticdir = self.get_config("node", "web.static", "public_html") staticdir = os.path.expanduser(staticdir) ws = IntroducerWebishServer(self, webport, nodeurl_path, staticdir) self.add_service(ws) class WrapV1SubscriberInV2Interface: # for_v1 """I wrap a RemoteReference that points at an old v1 subscriber, enabling it to be treated like a v2 subscriber. """ def __init__(self, original): self.original = original # also used for tests def __eq__(self, them): return self.original == them def __ne__(self, them): return self.original != them def __hash__(self): return hash(self.original) def getRemoteTubID(self): return self.original.getRemoteTubID() def getSturdyRef(self): return self.original.getSturdyRef() def getPeer(self): return self.original.getPeer() def getLocationHints(self): return self.original.getLocationHints() def callRemote(self, methname, *args, **kwargs): m = getattr(self, "wrap_" + methname) return m(*args, **kwargs) def wrap_announce_v2(self, announcements): anns_v1 = [convert_announcement_v2_to_v1(ann) for ann in announcements] return self.original.callRemote("announce", set(anns_v1)) def wrap_set_encoding_parameters(self, parameters): # note: unused return self.original.callRemote("set_encoding_parameters", parameters) def notifyOnDisconnect(self, *args, **kwargs): return self.original.notifyOnDisconnect(*args, **kwargs) class IntroducerService(service.MultiService, Referenceable): implements(RIIntroducerPublisherAndSubscriberService_v2) name = "introducer" # v1 is the original protocol, supported since 1.0 (but only advertised # starting in 1.3). v2 is the new signed protocol, supported after 1.9 VERSION = { "http://allmydata.org/tahoe/protocols/introducer/v1": { }, "http://allmydata.org/tahoe/protocols/introducer/v2": { }, "application-version": str(allmydata.__full_version__), } def __init__(self, basedir="."): service.MultiService.__init__(self) self.introducer_url = None # 'index' is (service_name, key_s, tubid), where key_s or tubid is # None self._announcements = {} # dict of index -> # (ann_t, canary, ann, timestamp) # ann (the announcement dictionary) is cleaned up: nickname is always # unicode, servicename is always ascii, etc, even though # simplejson.loads sometimes returns either # self._subscribers is a dict mapping servicename to subscriptions # 'subscriptions' is a dict mapping rref to a subscription # 'subscription' is a tuple of (subscriber_info, timestamp) # 'subscriber_info' is a dict, provided directly for v2 clients, or # synthesized for v1 clients. The expected keys are: # version, nickname, app-versions, my-version, oldest-supported self._subscribers = {} # self._stub_client_announcements contains the information provided # by v1 clients. We stash this so we can match it up with their # subscriptions. self._stub_client_announcements = {} # maps tubid to sinfo # for_v1 self._debug_counts = {"inbound_message": 0, "inbound_duplicate": 0, "inbound_no_seqnum": 0, "inbound_old_replay": 0, "inbound_update": 0, "outbound_message": 0, "outbound_announcements": 0, "inbound_subscribe": 0} self._debug_outstanding = 0 # also covers WrapV1SubscriberInV2Interface def _debug_retired(self, res): self._debug_outstanding -= 1 return res def log(self, *args, **kwargs): if "facility" not in kwargs: kwargs["facility"] = "tahoe.introducer.server" return log.msg(*args, **kwargs) def get_announcements(self, include_stub_clients=True): """Return a list of AnnouncementDescriptor for all announcements""" announcements = [] for (index, (_, canary, ann, when)) in self._announcements.items(): if ann["service-name"] == "stub_client": if not include_stub_clients: continue ad = AnnouncementDescriptor(when, index, canary, ann) announcements.append(ad) return announcements def get_subscribers(self): """Return a list of SubscriberDescriptor objects for all subscribers""" s = [] for service_name, subscriptions in self._subscribers.items(): for rref,(subscriber_info,when) in subscriptions.items(): # note that if the subscriber didn't do Tub.setLocation, # tubid will be None. Also, subscribers do not tell us which # pubkey they use; only publishers do that. tubid = rref.getRemoteTubID() or "?" advertised_addresses = rrefutil.hosts_for_rref(rref) remote_address = rrefutil.stringify_remote_address(rref) # these three assume subscriber_info["version"]==0, but # should tolerate other versions if not subscriber_info: # V1 clients that haven't yet sent their stub_info data subscriber_info = {} nickname = subscriber_info.get("nickname", u"?") version = subscriber_info.get("my-version", u"?") app_versions = subscriber_info.get("app-versions", {}) # 'when' is the time they subscribed sd = SubscriberDescriptor(service_name, when, nickname, version, app_versions, advertised_addresses, remote_address, tubid) s.append(sd) return s def remote_get_version(self): return self.VERSION def remote_publish(self, ann_t): # for_v1 lp = self.log("introducer: old (v1) announcement published: %s" % (ann_t,), umid="6zGOIw") ann_v2 = convert_announcement_v1_to_v2(ann_t) return self.publish(ann_v2, None, lp) def remote_publish_v2(self, ann_t, canary): lp = self.log("introducer: announcement (v2) published", umid="L2QXkQ") return self.publish(ann_t, canary, lp) def publish(self, ann_t, canary, lp): try: self._publish(ann_t, canary, lp) except: log.err(format="Introducer.remote_publish failed on %(ann)s", ann=ann_t, level=log.UNUSUAL, parent=lp, umid="620rWA") raise def _publish(self, ann_t, canary, lp): self._debug_counts["inbound_message"] += 1 self.log("introducer: announcement published: %s" % (ann_t,), umid="wKHgCw") ann, key = unsign_from_foolscap(ann_t) # might raise BadSignatureError index = make_index(ann, key) service_name = str(ann["service-name"]) if service_name == "stub_client": # for_v1 self._attach_stub_client(ann, lp) return old = self._announcements.get(index) if old: (old_ann_t, canary, old_ann, timestamp) = old if old_ann == ann: self.log("but we already knew it, ignoring", level=log.NOISY, umid="myxzLw") self._debug_counts["inbound_duplicate"] += 1 return else: if "seqnum" in old_ann: # must beat previous sequence number to replace if ("seqnum" not in ann or not isinstance(ann["seqnum"], (int,long))): self.log("not replacing old ann, no valid seqnum", level=log.NOISY, umid="ySbaVw") self._debug_counts["inbound_no_seqnum"] += 1 return if ann["seqnum"] <= old_ann["seqnum"]: self.log("not replacing old ann, new seqnum is too old" " (%s <= %s) (replay attack?)" % (ann["seqnum"], old_ann["seqnum"]), level=log.UNUSUAL, umid="sX7yqQ") self._debug_counts["inbound_old_replay"] += 1 return # ok, seqnum is newer, allow replacement self.log("old announcement being updated", level=log.NOISY, umid="304r9g") self._debug_counts["inbound_update"] += 1 self._announcements[index] = (ann_t, canary, ann, time.time()) #if canary: # canary.notifyOnDisconnect ... # use a CanaryWatcher? with cw.is_connected()? # actually we just want foolscap to give rref.is_connected(), since # this is only for the status display for s in self._subscribers.get(service_name, []): self._debug_counts["outbound_message"] += 1 self._debug_counts["outbound_announcements"] += 1 self._debug_outstanding += 1 d = s.callRemote("announce_v2", set([ann_t])) d.addBoth(self._debug_retired) d.addErrback(log.err, format="subscriber errored on announcement %(ann)s", ann=ann_t, facility="tahoe.introducer", level=log.UNUSUAL, umid="jfGMXQ") def _attach_stub_client(self, ann, lp): # There might be a v1 subscriber for whom this is a stub_client. # We might have received the subscription before the stub_client # announcement, in which case we now need to fix up the record in # self._subscriptions . # record it for later, in case the stub_client arrived before the # subscription subscriber_info = self._get_subscriber_info_from_ann(ann) ann_tubid = get_tubid_string_from_ann(ann) self._stub_client_announcements[ann_tubid] = subscriber_info lp2 = self.log("stub_client announcement, " "looking for matching subscriber", parent=lp, level=log.NOISY, umid="BTywDg") for sn in self._subscribers: s = self._subscribers[sn] for (subscriber, info) in s.items(): # we correlate these by looking for a subscriber whose tubid # matches this announcement sub_tubid = subscriber.getRemoteTubID() if sub_tubid == ann_tubid: self.log(format="found a match, nodeid=%(nodeid)s", nodeid=sub_tubid, level=log.NOISY, parent=lp2, umid="xsWs1A") # found a match. Does it need info? if not info[0]: self.log(format="replacing info", level=log.NOISY, parent=lp2, umid="m5kxwA") # yup s[subscriber] = (subscriber_info, info[1]) # and we don't remember or announce stub_clients beyond what we # need to get the subscriber_info set up def _get_subscriber_info_from_ann(self, ann): # for_v1 sinfo = { "version": ann["version"], "nickname": ann["nickname"], "app-versions": ann["app-versions"], "my-version": ann["my-version"], "oldest-supported": ann["oldest-supported"], } return sinfo def remote_subscribe(self, subscriber, service_name): # for_v1 self.log("introducer: old (v1) subscription[%s] request at %s" % (service_name, subscriber), umid="hJlGUg") return self.add_subscriber(WrapV1SubscriberInV2Interface(subscriber), service_name, None) def remote_subscribe_v2(self, subscriber, service_name, subscriber_info): self.log("introducer: subscription[%s] request at %s" % (service_name, subscriber), umid="U3uzLg") return self.add_subscriber(subscriber, service_name, subscriber_info) def add_subscriber(self, subscriber, service_name, subscriber_info): self._debug_counts["inbound_subscribe"] += 1 if service_name not in self._subscribers: self._subscribers[service_name] = {} subscribers = self._subscribers[service_name] if subscriber in subscribers: self.log("but they're already subscribed, ignoring", level=log.UNUSUAL, umid="Sy9EfA") return if not subscriber_info: # for_v1 # v1 clients don't provide subscriber_info, but they should # publish a 'stub client' record which contains the same # information. If we've already received this, it will be in # self._stub_client_announcements tubid = subscriber.getRemoteTubID() if tubid in self._stub_client_announcements: subscriber_info = self._stub_client_announcements[tubid] subscribers[subscriber] = (subscriber_info, time.time()) def _remove(): self.log("introducer: unsubscribing[%s] %s" % (service_name, subscriber), umid="vYGcJg") subscribers.pop(subscriber, None) subscriber.notifyOnDisconnect(_remove) # now tell them about any announcements they're interested in announcements = set( [ ann_t for idx,(ann_t,canary,ann,when) in self._announcements.items() if idx[0] == service_name] ) if announcements: self._debug_counts["outbound_message"] += 1 self._debug_counts["outbound_announcements"] += len(announcements) self._debug_outstanding += 1 d = subscriber.callRemote("announce_v2", announcements) d.addBoth(self._debug_retired) d.addErrback(log.err, format="subscriber errored during subscribe %(anns)s", anns=announcements, facility="tahoe.introducer", level=log.UNUSUAL, umid="mtZepQ") return d tahoe-lafs-1.10.0/src/allmydata/key_generator.py000066400000000000000000000076331221140116300215630ustar00rootroot00000000000000 import os import time from foolscap.api import Referenceable, Tub from zope.interface import implements from twisted.internet import reactor from twisted.application import service from allmydata.util import log from pycryptopp.publickey import rsa from allmydata.interfaces import RIKeyGenerator class KeyGenerator(service.MultiService, Referenceable): implements(RIKeyGenerator) pool_size = 16 # no. keys to keep on hand in the pool pool_refresh_delay = 6 # no. sec to wait after a fetch before generating new keys verbose = False def __init__(self, default_key_size=2048): service.MultiService.__init__(self) self.keypool = [] self.last_fetch = 0 self.default_key_size = default_key_size def startService(self): self.timer = reactor.callLater(0, self.maybe_refill_pool) return service.MultiService.startService(self) def stopService(self): if self.timer.active(): self.timer.cancel() return service.MultiService.stopService(self) def __repr__(self): return '' % (len(self.keypool),) def vlog(self, msg): if self.verbose: log.msg(msg) def reset_timer(self): self.last_fetch = time.time() if self.timer.active(): self.timer.reset(self.pool_refresh_delay) else: self.timer = reactor.callLater(self.pool_refresh_delay, self.maybe_refill_pool) def maybe_refill_pool(self): now = time.time() if self.last_fetch + self.pool_refresh_delay < now: self.vlog('%s refilling pool' % (self,)) while len(self.keypool) < self.pool_size: self.keypool.append(self.gen_key(self.default_key_size)) else: self.vlog('%s not refilling pool' % (self,)) reactor.callLater(1, self.maybe_refill_pool) def gen_key(self, key_size): self.vlog('%s generating key size %s' % (self, key_size, )) signer = rsa.generate(key_size) verifier = signer.get_verifying_key() return verifier.serialize(), signer.serialize() def remote_get_rsa_key_pair(self, key_size): self.vlog('%s remote_get_key' % (self,)) if key_size != self.default_key_size or not self.keypool: key = self.gen_key(key_size) self.reset_timer() return key else: self.reset_timer() return self.keypool.pop() class KeyGeneratorService(service.MultiService): furl_file = 'key_generator.furl' def __init__(self, basedir='.', display_furl=True, default_key_size=2048): service.MultiService.__init__(self) self.basedir = basedir self.tub = Tub(certFile=os.path.join(self.basedir, 'key_generator.pem')) self.tub.setOption("expose-remote-exception-types", False) self.tub.setServiceParent(self) self.key_generator = KeyGenerator(default_key_size=default_key_size) self.key_generator.setServiceParent(self) portnum = self.get_portnum() self.listener = self.tub.listenOn(portnum or 'tcp:0') d = self.tub.setLocationAutomatically() if portnum is None: d.addCallback(self.save_portnum) d.addCallback(self.tub_ready, display_furl) d.addErrback(log.err) def get_portnum(self): portnumfile = os.path.join(self.basedir, 'portnum') if os.path.exists(portnumfile): return file(portnumfile, 'rb').read().strip() def save_portnum(self, junk): portnum = self.listener.getPortnum() portnumfile = os.path.join(self.basedir, 'portnum') file(portnumfile, 'wb').write('%d\n' % (portnum,)) def tub_ready(self, junk, display_furl): kgf = os.path.join(self.basedir, self.furl_file) self.keygen_furl = self.tub.registerReference(self.key_generator, furlFile=kgf) if display_furl: print 'key generator at:', self.keygen_furl tahoe-lafs-1.10.0/src/allmydata/manhole.py000066400000000000000000000246501221140116300203460ustar00rootroot00000000000000 # this is adapted from my code in Buildbot -warner import os.path import binascii, base64 from twisted.python import log from twisted.application import service, strports from twisted.cred import checkers, portal from twisted.conch import manhole, telnet, manhole_ssh, checkers as conchc from twisted.conch.insults import insults from twisted.internet import protocol from zope.interface import implements # makeTelnetProtocol and _TelnetRealm are for the TelnetManhole class makeTelnetProtocol: # this curries the 'portal' argument into a later call to # TelnetTransport() def __init__(self, portal): self.portal = portal def __call__(self): auth = telnet.AuthenticatingTelnetProtocol return telnet.TelnetTransport(auth, self.portal) class _TelnetRealm: implements(portal.IRealm) def __init__(self, namespace_maker): self.namespace_maker = namespace_maker def requestAvatar(self, avatarId, *interfaces): if telnet.ITelnetProtocol in interfaces: namespace = self.namespace_maker() p = telnet.TelnetBootstrapProtocol(insults.ServerProtocol, manhole.ColoredManhole, namespace) return (telnet.ITelnetProtocol, p, lambda: None) raise NotImplementedError() class chainedProtocolFactory: # this curries the 'namespace' argument into a later call to # chainedProtocolFactory() def __init__(self, namespace): self.namespace = namespace def __call__(self): return insults.ServerProtocol(manhole.ColoredManhole, self.namespace) class AuthorizedKeysChecker(conchc.SSHPublicKeyDatabase): """Accept connections using SSH keys from a given file. SSHPublicKeyDatabase takes the username that the prospective client has requested and attempts to get a ~/.ssh/authorized_keys file for that username. This requires root access, so it isn't as useful as you'd like. Instead, this subclass looks for keys in a single file, given as an argument. This file is typically kept in the buildmaster's basedir. The file should have 'ssh-dss ....' lines in it, just like authorized_keys. """ def __init__(self, authorized_keys_file): self.authorized_keys_file = os.path.expanduser(authorized_keys_file) def checkKey(self, credentials): f = open(self.authorized_keys_file) for l in f.readlines(): l2 = l.split() if len(l2) < 2: continue try: if base64.decodestring(l2[1]) == credentials.blob: return 1 except binascii.Error: continue return 0 class ModifiedColoredManhole(manhole.ColoredManhole): def connectionMade(self): manhole.ColoredManhole.connectionMade(self) # look in twisted.conch.recvline.RecvLine for hints self.keyHandlers["\x08"] = self.handle_BACKSPACE self.keyHandlers["\x15"] = self.handle_KILLLINE self.keyHandlers["\x01"] = self.handle_HOME self.keyHandlers["\x04"] = self.handle_DELETE self.keyHandlers["\x05"] = self.handle_END self.keyHandlers["\x0b"] = self.handle_KILLLINE # really kill-to-end #self.keyHandlers["\xe2"] = self.handle_BACKWARDS_WORD # M-b #self.keyHandlers["\xe6"] = self.handle_FORWARDS_WORD # M-f def handle_KILLLINE(self): self.handle_END() for i in range(len(self.lineBuffer)): self.handle_BACKSPACE() class _BaseManhole(service.MultiService): """This provides remote access to a python interpreter (a read/exec/print loop) embedded in the buildmaster via an internal SSH server. This allows detailed inspection of the buildmaster state. It is of most use to buildbot developers. Connect to this by running an ssh client. """ def __init__(self, port, checker, using_ssh=True): """ @type port: string or int @param port: what port should the Manhole listen on? This is a strports specification string, like 'tcp:12345' or 'tcp:12345:interface=127.0.0.1'. Bare integers are treated as a simple tcp port. @type checker: an object providing the L{twisted.cred.checkers.ICredentialsChecker} interface @param checker: if provided, this checker is used to authenticate the client instead of using the username/password scheme. You must either provide a username/password or a Checker. Some useful values are:: import twisted.cred.checkers as credc import twisted.conch.checkers as conchc c = credc.AllowAnonymousAccess # completely open c = credc.FilePasswordDB(passwd_filename) # file of name:passwd c = conchc.UNIXPasswordDatabase # getpwnam() (probably /etc/passwd) @type using_ssh: bool @param using_ssh: If True, accept SSH connections. If False, accept regular unencrypted telnet connections. """ # unfortunately, these don't work unless we're running as root #c = credc.PluggableAuthenticationModulesChecker: PAM #c = conchc.SSHPublicKeyDatabase() # ~/.ssh/authorized_keys # and I can't get UNIXPasswordDatabase to work service.MultiService.__init__(self) if type(port) is int: port = "tcp:%d" % port self.port = port # for comparison later self.checker = checker # to maybe compare later def makeNamespace(): # close over 'self' so we can get access to .parent later from allmydata import debugshell debugshell.app = self.parent # make node accessible via 'app' namespace = {} for sym in dir(debugshell): if sym.startswith('__') and sym.endswith('__'): continue namespace[sym] = getattr(debugshell, sym) return namespace def makeProtocol(): namespace = makeNamespace() p = insults.ServerProtocol(ModifiedColoredManhole, namespace) return p self.using_ssh = using_ssh if using_ssh: r = manhole_ssh.TerminalRealm() r.chainedProtocolFactory = makeProtocol p = portal.Portal(r, [self.checker]) f = manhole_ssh.ConchFactory(p) else: r = _TelnetRealm(makeNamespace) p = portal.Portal(r, [self.checker]) f = protocol.ServerFactory() f.protocol = makeTelnetProtocol(p) s = strports.service(self.port, f) s.setServiceParent(self) def startService(self): service.MultiService.startService(self) if self.using_ssh: via = "via SSH" else: via = "via telnet" log.msg("Manhole listening %s on port %s" % (via, self.port)) class TelnetManhole(_BaseManhole): """This Manhole accepts unencrypted (telnet) connections, and requires a username and password authorize access. You are encouraged to use the encrypted ssh-based manhole classes instead.""" def __init__(self, port, username, password): """ @type port: string or int @param port: what port should the Manhole listen on? This is a strports specification string, like 'tcp:12345' or 'tcp:12345:interface=127.0.0.1'. Bare integers are treated as a simple tcp port. @param username: @param password: username= and password= form a pair of strings to use when authenticating the remote user. """ self.username = username self.password = password c = checkers.InMemoryUsernamePasswordDatabaseDontUse() c.addUser(username, password) _BaseManhole.__init__(self, port, c, using_ssh=False) class PasswordManhole(_BaseManhole): """This Manhole accepts encrypted (ssh) connections, and requires a username and password to authorize access. """ def __init__(self, port, username, password): """ @type port: string or int @param port: what port should the Manhole listen on? This is a strports specification string, like 'tcp:12345' or 'tcp:12345:interface=127.0.0.1'. Bare integers are treated as a simple tcp port. @param username: @param password: username= and password= form a pair of strings to use when authenticating the remote user. """ self.username = username self.password = password c = checkers.InMemoryUsernamePasswordDatabaseDontUse() c.addUser(username, password) _BaseManhole.__init__(self, port, c) class AuthorizedKeysManhole(_BaseManhole): """This Manhole accepts ssh connections, and requires that the prospective client have an ssh private key that matches one of the public keys in our authorized_keys file. It is created with the name of a file that contains the public keys that we will accept.""" def __init__(self, port, keyfile): """ @type port: string or int @param port: what port should the Manhole listen on? This is a strports specification string, like 'tcp:12345' or 'tcp:12345:interface=127.0.0.1'. Bare integers are treated as a simple tcp port. @param keyfile: the name of a file (relative to the buildmaster's basedir) that contains SSH public keys of authorized users, one per line. This is the exact same format as used by sshd in ~/.ssh/authorized_keys . """ # TODO: expanduser this, and make it relative to the buildmaster's # basedir self.keyfile = keyfile c = AuthorizedKeysChecker(keyfile) _BaseManhole.__init__(self, port, c) class ArbitraryCheckerManhole(_BaseManhole): """This Manhole accepts ssh connections, but uses an arbitrary user-supplied 'checker' object to perform authentication.""" def __init__(self, port, checker): """ @type port: string or int @param port: what port should the Manhole listen on? This is a strports specification string, like 'tcp:12345' or 'tcp:12345:interface=127.0.0.1'. Bare integers are treated as a simple tcp port. @param checker: an instance of a twisted.cred 'checker' which will perform authentication """ _BaseManhole.__init__(self, port, checker) tahoe-lafs-1.10.0/src/allmydata/monitor.py000066400000000000000000000103461221140116300204070ustar00rootroot00000000000000 from zope.interface import Interface, implements from allmydata.util import observer class IMonitor(Interface): """I manage status, progress, and cancellation for long-running operations. Whoever initiates the operation should create a Monitor instance and pass it into the code that implements the operation. That code should periodically check in with the Monitor, perhaps after each major unit of work has been completed, for two purposes. The first is to inform the Monitor about progress that has been made, so that external observers can be reassured that the operation is proceeding normally. If the operation has a well-known amount of work to perform, this notification should reflect that, so that an ETA or 'percentage complete' value can be derived. The second purpose is to check to see if the operation has been cancelled. The impatient observer who no longer wants the operation to continue will inform the Monitor; the next time the operation code checks in, it should notice that the operation has been cancelled, and wrap things up. The same monitor can be passed to multiple operations, all of which may check for cancellation: this pattern may be simpler than having the original caller keep track of subtasks and cancel them individually. """ # the following methods are provided for the operation code def is_cancelled(): """Returns True if the operation has been cancelled. If True, operation code should stop creating new work, and attempt to stop any work already in progress.""" def raise_if_cancelled(): """Raise OperationCancelledError if the operation has been cancelled. Operation code that has a robust error-handling path can simply call this periodically.""" def set_status(status): """Sets the Monitor's 'status' object to an arbitrary value. Different operations will store different sorts of status information here. Operation code should use get+modify+set sequences to update this.""" def get_status(): """Return the status object. If the operation failed, this will be a Failure instance.""" def finish(status): """Call this when the operation is done, successful or not. The Monitor's lifetime is influenced by the completion of the operation it is monitoring. The Monitor's 'status' value will be set with the 'status' argument, just as if it had been passed to set_status(). This value will be used to fire the Deferreds that are returned by when_done(). Operations that fire a Deferred when they finish should trigger this with d.addBoth(monitor.finish)""" # the following methods are provided for the initiator of the operation def is_finished(): """Return a boolean, True if the operation is done (whether successful or failed), False if it is still running.""" def when_done(): """Return a Deferred that fires when the operation is complete. It will fire with the operation status, the same value as returned by get_status().""" def cancel(): """Cancel the operation as soon as possible. is_cancelled() will start returning True after this is called.""" # get_status() is useful too, but it is operation-specific class OperationCancelledError(Exception): pass class Monitor: implements(IMonitor) def __init__(self): self.cancelled = False self.finished = False self.status = None self.observer = observer.OneShotObserverList() def is_cancelled(self): return self.cancelled def raise_if_cancelled(self): if self.cancelled: raise OperationCancelledError() def is_finished(self): return self.finished def when_done(self): return self.observer.when_fired() def cancel(self): self.cancelled = True def finish(self, status_or_failure): self.set_status(status_or_failure) self.finished = True self.observer.fire(status_or_failure) return status_or_failure def get_status(self): return self.status def set_status(self, status): self.status = status tahoe-lafs-1.10.0/src/allmydata/mutable/000077500000000000000000000000001221140116300177735ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/mutable/__init__.py000066400000000000000000000000001221140116300220720ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/mutable/checker.py000066400000000000000000000307311221140116300217550ustar00rootroot00000000000000 from allmydata.uri import from_string from allmydata.util import base32, log, dictutil from allmydata.check_results import CheckAndRepairResults, CheckResults from allmydata.mutable.common import MODE_CHECK, MODE_WRITE, CorruptShareError from allmydata.mutable.servermap import ServerMap, ServermapUpdater from allmydata.mutable.retrieve import Retrieve # for verifying class MutableChecker: SERVERMAP_MODE = MODE_CHECK def __init__(self, node, storage_broker, history, monitor): self._node = node self._storage_broker = storage_broker self._history = history self._monitor = monitor self.bad_shares = [] # list of (server,shnum,failure) self._storage_index = self._node.get_storage_index() self.need_repair = False self.responded = set() # set of (binary) nodeids def check(self, verify=False, add_lease=False): servermap = ServerMap() # Updating the servermap in MODE_CHECK will stand a good chance # of finding all of the shares, and getting a good idea of # recoverability, etc, without verifying. u = ServermapUpdater(self._node, self._storage_broker, self._monitor, servermap, self.SERVERMAP_MODE, add_lease=add_lease) if self._history: self._history.notify_mapupdate(u.get_status()) d = u.update() d.addCallback(self._got_mapupdate_results) if verify: d.addCallback(self._verify_all_shares) d.addCallback(lambda res: servermap) d.addCallback(self._make_checker_results) return d def _got_mapupdate_results(self, servermap): # the file is healthy if there is exactly one recoverable version, it # has at least N distinct shares, and there are no unrecoverable # versions: all existing shares will be for the same version. self._monitor.raise_if_cancelled() self.best_version = None num_recoverable = len(servermap.recoverable_versions()) if num_recoverable: self.best_version = servermap.best_recoverable_version() # The file is unhealthy and needs to be repaired if: # - There are unrecoverable versions. if servermap.unrecoverable_versions(): self.need_repair = True # - There isn't a recoverable version. if num_recoverable != 1: self.need_repair = True # - The best recoverable version is missing some shares. if self.best_version: available_shares = servermap.shares_available() (num_distinct_shares, k, N) = available_shares[self.best_version] if num_distinct_shares < N: self.need_repair = True return servermap def _verify_all_shares(self, servermap): # read every byte of each share # # This logic is going to be very nearly the same as the # downloader. I bet we could pass the downloader a flag that # makes it do this, and piggyback onto that instead of # duplicating a bunch of code. # # Like: # r = Retrieve(blah, blah, blah, verify=True) # d = r.download() # (wait, wait, wait, d.callback) # # Then, when it has finished, we can check the servermap (which # we provided to Retrieve) to figure out which shares are bad, # since the Retrieve process will have updated the servermap as # it went along. # # By passing the verify=True flag to the constructor, we are # telling the downloader a few things. # # 1. It needs to download all N shares, not just K shares. # 2. It doesn't need to decrypt or decode the shares, only # verify them. if not self.best_version: return r = Retrieve(self._node, self._storage_broker, servermap, self.best_version, verify=True) d = r.download() d.addCallback(self._process_bad_shares) return d def _process_bad_shares(self, bad_shares): if bad_shares: self.need_repair = True self.bad_shares = bad_shares def _count_shares(self, smap, version): available_shares = smap.shares_available() (num_distinct_shares, k, N) = available_shares[version] counters = {} counters["count-shares-good"] = num_distinct_shares counters["count-shares-needed"] = k counters["count-shares-expected"] = N good_hosts = smap.all_servers_for_version(version) counters["count-good-share-hosts"] = len(good_hosts) vmap = smap.make_versionmap() counters["count-wrong-shares"] = sum([len(shares) for verinfo,shares in vmap.items() if verinfo != version]) return counters def _make_checker_results(self, smap): self._monitor.raise_if_cancelled() healthy = True report = [] summary = [] vmap = smap.make_versionmap() recoverable = smap.recoverable_versions() unrecoverable = smap.unrecoverable_versions() if recoverable: report.append("Recoverable Versions: " + "/".join(["%d*%s" % (len(vmap[v]), smap.summarize_version(v)) for v in recoverable])) if unrecoverable: report.append("Unrecoverable Versions: " + "/".join(["%d*%s" % (len(vmap[v]), smap.summarize_version(v)) for v in unrecoverable])) if smap.unrecoverable_versions(): healthy = False summary.append("some versions are unrecoverable") report.append("Unhealthy: some versions are unrecoverable") if len(recoverable) == 0: healthy = False summary.append("no versions are recoverable") report.append("Unhealthy: no versions are recoverable") if len(recoverable) > 1: healthy = False summary.append("multiple versions are recoverable") report.append("Unhealthy: there are multiple recoverable versions") needs_rebalancing = False if recoverable: best_version = smap.best_recoverable_version() report.append("Best Recoverable Version: " + smap.summarize_version(best_version)) counters = self._count_shares(smap, best_version) s = counters["count-shares-good"] k = counters["count-shares-needed"] N = counters["count-shares-expected"] if s < N: healthy = False report.append("Unhealthy: best version has only %d shares " "(encoding is %d-of-%d)" % (s, k, N)) summary.append("%d shares (enc %d-of-%d)" % (s, k, N)) hosts = smap.all_servers_for_version(best_version) needs_rebalancing = bool( len(hosts) < N ) elif unrecoverable: healthy = False # find a k and N from somewhere first = list(unrecoverable)[0] # not exactly the best version, but that doesn't matter too much counters = self._count_shares(smap, first) # leave needs_rebalancing=False: the file being unrecoverable is # the bigger problem else: # couldn't find anything at all counters = { "count-shares-good": 0, "count-shares-needed": 3, # arbitrary defaults "count-shares-expected": 10, "count-good-share-hosts": 0, "count-wrong-shares": 0, } corrupt_share_locators = [] problems = [] if self.bad_shares: report.append("Corrupt Shares:") summary.append("Corrupt Shares:") for (server, shnum, f) in sorted(self.bad_shares): serverid = server.get_serverid() locator = (server, self._storage_index, shnum) corrupt_share_locators.append(locator) s = "%s-sh%d" % (server.get_name(), shnum) if f.check(CorruptShareError): ft = f.value.reason else: ft = str(f) report.append(" %s: %s" % (s, ft)) summary.append(s) p = (serverid, self._storage_index, shnum, f) problems.append(p) msg = ("CorruptShareError during mutable verify, " "serverid=%(serverid)s, si=%(si)s, shnum=%(shnum)d, " "where=%(where)s") log.msg(format=msg, serverid=server.get_name(), si=base32.b2a(self._storage_index), shnum=shnum, where=ft, level=log.WEIRD, umid="EkK8QA") sharemap = dictutil.DictOfSets() for verinfo in vmap: for (shnum, server, timestamp) in vmap[verinfo]: shareid = "%s-sh%d" % (smap.summarize_version(verinfo), shnum) sharemap.add(shareid, server) if healthy: summary = "Healthy" else: summary = "Unhealthy: " + " ".join(summary) cr = CheckResults(from_string(self._node.get_uri()), self._storage_index, healthy=healthy, recoverable=bool(recoverable), needs_rebalancing=needs_rebalancing, count_shares_needed=counters["count-shares-needed"], count_shares_expected=counters["count-shares-expected"], count_shares_good=counters["count-shares-good"], count_good_share_hosts=counters["count-good-share-hosts"], count_recoverable_versions=len(recoverable), count_unrecoverable_versions=len(unrecoverable), servers_responding=list(smap.get_reachable_servers()), sharemap=sharemap, count_wrong_shares=counters["count-wrong-shares"], list_corrupt_shares=corrupt_share_locators, count_corrupt_shares=len(corrupt_share_locators), list_incompatible_shares=[], count_incompatible_shares=0, summary=summary, report=report, share_problems=problems, servermap=smap.copy()) return cr class MutableCheckAndRepairer(MutableChecker): SERVERMAP_MODE = MODE_WRITE # needed to get the privkey def __init__(self, node, storage_broker, history, monitor): MutableChecker.__init__(self, node, storage_broker, history, monitor) self.cr_results = CheckAndRepairResults(self._storage_index) self.need_repair = False def check(self, verify=False, add_lease=False): d = MutableChecker.check(self, verify, add_lease) d.addCallback(self._stash_pre_repair_results) d.addCallback(self._maybe_repair) d.addCallback(lambda res: self.cr_results) return d def _stash_pre_repair_results(self, pre_repair_results): self.cr_results.pre_repair_results = pre_repair_results return pre_repair_results def _maybe_repair(self, pre_repair_results): crr = self.cr_results self._monitor.raise_if_cancelled() if not self.need_repair: crr.post_repair_results = pre_repair_results return if self._node.is_readonly(): # ticket #625: we cannot yet repair read-only mutable files crr.post_repair_results = pre_repair_results crr.repair_attempted = False return crr.repair_attempted = True d = self._node.repair(pre_repair_results, monitor=self._monitor) def _repair_finished(rr): crr.repair_successful = rr.get_successful() crr.post_repair_results = self._make_checker_results(rr.servermap) crr.repair_results = rr # TODO? return def _repair_error(f): # I'm not sure if I want to pass through a failure or not. crr.repair_successful = False crr.repair_failure = f # TODO? #crr.post_repair_results = ?? return f d.addCallbacks(_repair_finished, _repair_error) return d tahoe-lafs-1.10.0/src/allmydata/mutable/common.py000066400000000000000000000045761221140116300216510ustar00rootroot00000000000000 MODE_CHECK = "MODE_CHECK" # query all peers MODE_ANYTHING = "MODE_ANYTHING" # one recoverable version MODE_WRITE = "MODE_WRITE" # replace all shares, probably.. not for initial # creation MODE_READ = "MODE_READ" MODE_REPAIR = "MODE_REPAIR" # query all peers, get the privkey class NotWriteableError(Exception): pass class BadShareError(Exception): """This represents an error discovered in a particular share, during retrieve, from which we can recover by using some other share. This does *not* include local coding errors. """ class NeedMoreDataError(BadShareError): def __init__(self, needed_bytes, encprivkey_offset, encprivkey_length): Exception.__init__(self) self.needed_bytes = needed_bytes # up through EOF self.encprivkey_offset = encprivkey_offset self.encprivkey_length = encprivkey_length def __repr__(self): return "" % self.needed_bytes class UncoordinatedWriteError(Exception): def __repr__(self): return ("<%s -- You, oh user, tried to change a file or directory " "at the same time as another process was trying to change it. " " To avoid data loss, don't do this. Please see " "docs/write_coordination.rst for details.>" % (self.__class__.__name__,)) class UnrecoverableFileError(Exception): pass class NotEnoughServersError(Exception): """There were not enough functioning servers available to place shares upon. This might result from all servers being full or having an error, a local bug which causes all server requests to fail in the same way, or from there being zero servers. The first error received (if any) is stored in my .first_error attribute.""" def __init__(self, why, first_error=None): Exception.__init__(self, why, first_error) self.first_error = first_error class CorruptShareError(BadShareError): def __init__(self, server, shnum, reason): self.args = (server, shnum, reason) self.server = server self.shnum = shnum self.reason = reason def __str__(self): return "" % (self.__class__.__name__, id(self), self.is_readonly() and 'RO' or 'RW', self._uri.abbrev()) else: return "<%s %x %s %s>" % (self.__class__.__name__, id(self), None, None) def init_from_cap(self, filecap): # we have the URI, but we have not yet retrieved the public # verification key, nor things like 'k' or 'N'. If and when someone # wants to get our contents, we'll pull from shares and fill those # in. if isinstance(filecap, (WriteableMDMFFileURI, ReadonlyMDMFFileURI)): self._protocol_version = MDMF_VERSION elif isinstance(filecap, (ReadonlySSKFileURI, WriteableSSKFileURI)): self._protocol_version = SDMF_VERSION self._uri = filecap self._writekey = None if not filecap.is_readonly() and filecap.is_mutable(): self._writekey = self._uri.writekey self._readkey = self._uri.readkey self._storage_index = self._uri.storage_index self._fingerprint = self._uri.fingerprint # the following values are learned during Retrieval # self._pubkey # self._required_shares # self._total_shares # and these are needed for Publish. They are filled in by Retrieval # if possible, otherwise by the first peer that Publish talks to. self._privkey = None self._encprivkey = None return self def create_with_keys(self, (pubkey, privkey), contents, version=SDMF_VERSION): """Call this to create a brand-new mutable file. It will create the shares, find homes for them, and upload the initial contents (created with the same rules as IClient.create_mutable_file() ). Returns a Deferred that fires (with the MutableFileNode instance you should use) when it completes. """ self._pubkey, self._privkey = pubkey, privkey pubkey_s = self._pubkey.serialize() privkey_s = self._privkey.serialize() self._writekey = hashutil.ssk_writekey_hash(privkey_s) self._encprivkey = self._encrypt_privkey(self._writekey, privkey_s) self._fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s) if version == MDMF_VERSION: self._uri = WriteableMDMFFileURI(self._writekey, self._fingerprint) self._protocol_version = version elif version == SDMF_VERSION: self._uri = WriteableSSKFileURI(self._writekey, self._fingerprint) self._protocol_version = version self._readkey = self._uri.readkey self._storage_index = self._uri.storage_index initial_contents = self._get_initial_contents(contents) return self._upload(initial_contents, None) def _get_initial_contents(self, contents): if contents is None: return MutableData("") if isinstance(contents, str): return MutableData(contents) if IMutableUploadable.providedBy(contents): return contents assert callable(contents), "%s should be callable, not %s" % \ (contents, type(contents)) return contents(self) def _encrypt_privkey(self, writekey, privkey): enc = AES(writekey) crypttext = enc.process(privkey) return crypttext def _decrypt_privkey(self, enc_privkey): enc = AES(self._writekey) privkey = enc.process(enc_privkey) return privkey def _populate_pubkey(self, pubkey): self._pubkey = pubkey def _populate_required_shares(self, required_shares): self._required_shares = required_shares def _populate_total_shares(self, total_shares): self._total_shares = total_shares def _populate_privkey(self, privkey): self._privkey = privkey def _populate_encprivkey(self, encprivkey): self._encprivkey = encprivkey def get_write_enabler(self, server): seed = server.get_foolscap_write_enabler_seed() assert len(seed) == 20 return hashutil.ssk_write_enabler_hash(self._writekey, seed) def get_renewal_secret(self, server): crs = self._secret_holder.get_renewal_secret() frs = hashutil.file_renewal_secret_hash(crs, self._storage_index) lease_seed = server.get_lease_seed() assert len(lease_seed) == 20 return hashutil.bucket_renewal_secret_hash(frs, lease_seed) def get_cancel_secret(self, server): ccs = self._secret_holder.get_cancel_secret() fcs = hashutil.file_cancel_secret_hash(ccs, self._storage_index) lease_seed = server.get_lease_seed() assert len(lease_seed) == 20 return hashutil.bucket_cancel_secret_hash(fcs, lease_seed) def get_writekey(self): return self._writekey def get_readkey(self): return self._readkey def get_storage_index(self): return self._storage_index def get_fingerprint(self): return self._fingerprint def get_privkey(self): return self._privkey def get_encprivkey(self): return self._encprivkey def get_pubkey(self): return self._pubkey def get_required_shares(self): return self._required_shares def get_total_shares(self): return self._total_shares #################################### # IFilesystemNode def get_size(self): return self._most_recent_size def get_current_size(self): d = self.get_size_of_best_version() d.addCallback(self._stash_size) return d def _stash_size(self, size): self._most_recent_size = size return size def get_cap(self): return self._uri def get_readcap(self): return self._uri.get_readonly() def get_verify_cap(self): return self._uri.get_verify_cap() def get_repair_cap(self): if self._uri.is_readonly(): return None return self._uri def get_uri(self): return self._uri.to_string() def get_write_uri(self): if self.is_readonly(): return None return self._uri.to_string() def get_readonly_uri(self): return self._uri.get_readonly().to_string() def get_readonly(self): if self.is_readonly(): return self ro = MutableFileNode(self._storage_broker, self._secret_holder, self._default_encoding_parameters, self._history) ro.init_from_cap(self._uri.get_readonly()) return ro def is_mutable(self): return self._uri.is_mutable() def is_readonly(self): return self._uri.is_readonly() def is_unknown(self): return False def is_allowed_in_immutable_directory(self): return not self._uri.is_mutable() def raise_error(self): pass def __hash__(self): return hash((self.__class__, self._uri)) def __cmp__(self, them): if cmp(type(self), type(them)): return cmp(type(self), type(them)) if cmp(self.__class__, them.__class__): return cmp(self.__class__, them.__class__) return cmp(self._uri, them._uri) ################################# # ICheckable def check(self, monitor, verify=False, add_lease=False): checker = MutableChecker(self, self._storage_broker, self._history, monitor) return checker.check(verify, add_lease) def check_and_repair(self, monitor, verify=False, add_lease=False): checker = MutableCheckAndRepairer(self, self._storage_broker, self._history, monitor) return checker.check(verify, add_lease) ################################# # IRepairable def repair(self, check_results, force=False, monitor=None): assert ICheckResults(check_results) r = Repairer(self, check_results, self._storage_broker, self._history, monitor) d = r.start(force) return d ################################# # IFileNode def get_best_readable_version(self): """ I return a Deferred that fires with a MutableFileVersion representing the best readable version of the file that I represent """ return self.get_readable_version() def get_readable_version(self, servermap=None, version=None): """ I return a Deferred that fires with an MutableFileVersion for my version argument, if there is a recoverable file of that version on the grid. If there is no recoverable version, I fire with an UnrecoverableFileError. If a servermap is provided, I look in there for the requested version. If no servermap is provided, I create and update a new one. If no version is provided, then I return a MutableFileVersion representing the best recoverable version of the file. """ d = self._get_version_from_servermap(MODE_READ, servermap, version) def _build_version((servermap, their_version)): assert their_version in servermap.recoverable_versions() assert their_version in servermap.make_versionmap() mfv = MutableFileVersion(self, servermap, their_version, self._storage_index, self._storage_broker, self._readkey, history=self._history) assert mfv.is_readonly() mfv.set_downloader_hints(self._downloader_hints) # our caller can use this to download the contents of the # mutable file. return mfv return d.addCallback(_build_version) def _get_version_from_servermap(self, mode, servermap=None, version=None): """ I return a Deferred that fires with (servermap, version). This function performs validation and a servermap update. If it returns (servermap, version), the caller can assume that: - servermap was last updated in mode. - version is recoverable, and corresponds to the servermap. If version and servermap are provided to me, I will validate that version exists in the servermap, and that the servermap was updated correctly. If version is not provided, but servermap is, I will validate the servermap and return the best recoverable version that I can find in the servermap. If the version is provided but the servermap isn't, I will obtain a servermap that has been updated in the correct mode and validate that version is found and recoverable. If neither servermap nor version are provided, I will obtain a servermap updated in the correct mode, and return the best recoverable version that I can find in there. """ # XXX: wording ^^^^ if servermap and servermap.get_last_update()[0] == mode: d = defer.succeed(servermap) else: d = self._get_servermap(mode) def _get_version(servermap, v): if v and v not in servermap.recoverable_versions(): v = None elif not v: v = servermap.best_recoverable_version() if not v: raise UnrecoverableFileError("no recoverable versions") return (servermap, v) return d.addCallback(_get_version, version) def download_best_version(self): """ I return a Deferred that fires with the contents of the best version of this mutable file. """ return self._do_serialized(self._download_best_version) def _download_best_version(self): """ I am the serialized sibling of download_best_version. """ d = self.get_best_readable_version() d.addCallback(self._record_size) d.addCallback(lambda version: version.download_to_data()) # It is possible that the download will fail because there # aren't enough shares to be had. If so, we will try again after # updating the servermap in MODE_WRITE, which may find more # shares than updating in MODE_READ, as we just did. We can do # this by getting the best mutable version and downloading from # that -- the best mutable version will be a MutableFileVersion # with a servermap that was last updated in MODE_WRITE, as we # want. If this fails, then we give up. def _maybe_retry(failure): failure.trap(NotEnoughSharesError) d = self.get_best_mutable_version() d.addCallback(self._record_size) d.addCallback(lambda version: version.download_to_data()) return d d.addErrback(_maybe_retry) return d def _record_size(self, mfv): """ I record the size of a mutable file version. """ self._most_recent_size = mfv.get_size() return mfv def get_size_of_best_version(self): """ I return the size of the best version of this mutable file. This is equivalent to calling get_size() on the result of get_best_readable_version(). """ d = self.get_best_readable_version() return d.addCallback(lambda mfv: mfv.get_size()) ################################# # IMutableFileNode def get_best_mutable_version(self, servermap=None): """ I return a Deferred that fires with a MutableFileVersion representing the best readable version of the file that I represent. I am like get_best_readable_version, except that I will try to make a writeable version if I can. """ return self.get_mutable_version(servermap=servermap) def get_mutable_version(self, servermap=None, version=None): """ I return a version of this mutable file. I return a Deferred that fires with a MutableFileVersion If version is provided, the Deferred will fire with a MutableFileVersion initailized with that version. Otherwise, it will fire with the best version that I can recover. If servermap is provided, I will use that to find versions instead of performing my own servermap update. """ if self.is_readonly(): return self.get_readable_version(servermap=servermap, version=version) # get_mutable_version => write intent, so we require that the # servermap is updated in MODE_WRITE d = self._get_version_from_servermap(MODE_WRITE, servermap, version) def _build_version((servermap, smap_version)): # these should have been set by the servermap update. assert self._secret_holder assert self._writekey mfv = MutableFileVersion(self, servermap, smap_version, self._storage_index, self._storage_broker, self._readkey, self._writekey, self._secret_holder, history=self._history) assert not mfv.is_readonly() mfv.set_downloader_hints(self._downloader_hints) return mfv return d.addCallback(_build_version) # XXX: I'm uncomfortable with the difference between upload and # overwrite, which, FWICT, is basically that you don't have to # do a servermap update before you overwrite. We split them up # that way anyway, so I guess there's no real difficulty in # offering both ways to callers, but it also makes the # public-facing API cluttery, and makes it hard to discern the # right way of doing things. # In general, we leave it to callers to ensure that they aren't # going to cause UncoordinatedWriteErrors when working with # MutableFileVersions. We know that the next three operations # (upload, overwrite, and modify) will all operate on the same # version, so we say that only one of them can be going on at once, # and serialize them to ensure that that actually happens, since as # the caller in this situation it is our job to do that. def overwrite(self, new_contents): """ I overwrite the contents of the best recoverable version of this mutable file with new_contents. This is equivalent to calling overwrite on the result of get_best_mutable_version with new_contents as an argument. I return a Deferred that eventually fires with the results of my replacement process. """ # TODO: Update downloader hints. return self._do_serialized(self._overwrite, new_contents) def _overwrite(self, new_contents): """ I am the serialized sibling of overwrite. """ d = self.get_best_mutable_version() d.addCallback(lambda mfv: mfv.overwrite(new_contents)) d.addCallback(self._did_upload, new_contents.get_size()) return d def upload(self, new_contents, servermap): """ I overwrite the contents of the best recoverable version of this mutable file with new_contents, using servermap instead of creating/updating our own servermap. I return a Deferred that fires with the results of my upload. """ # TODO: Update downloader hints return self._do_serialized(self._upload, new_contents, servermap) def modify(self, modifier, backoffer=None): """ I modify the contents of the best recoverable version of this mutable file with the modifier. This is equivalent to calling modify on the result of get_best_mutable_version. I return a Deferred that eventually fires with an UploadResults instance describing this process. """ # TODO: Update downloader hints. return self._do_serialized(self._modify, modifier, backoffer) def _modify(self, modifier, backoffer): """ I am the serialized sibling of modify. """ d = self.get_best_mutable_version() d.addCallback(lambda mfv: mfv.modify(modifier, backoffer)) return d def download_version(self, servermap, version, fetch_privkey=False): """ Download the specified version of this mutable file. I return a Deferred that fires with the contents of the specified version as a bytestring, or errbacks if the file is not recoverable. """ d = self.get_readable_version(servermap, version) return d.addCallback(lambda mfv: mfv.download_to_data(fetch_privkey)) def get_servermap(self, mode): """ I return a servermap that has been updated in mode. mode should be one of MODE_READ, MODE_WRITE, MODE_CHECK or MODE_ANYTHING. See servermap.py for more on what these mean. """ return self._do_serialized(self._get_servermap, mode) def _get_servermap(self, mode): """ I am a serialized twin to get_servermap. """ servermap = ServerMap() d = self._update_servermap(servermap, mode) # The servermap will tell us about the most recent size of the # file, so we may as well set that so that callers might get # more data about us. if not self._most_recent_size: d.addCallback(self._get_size_from_servermap) return d def _get_size_from_servermap(self, servermap): """ I extract the size of the best version of this file and record it in self._most_recent_size. I return the servermap that I was given. """ if servermap.recoverable_versions(): v = servermap.best_recoverable_version() size = v[4] # verinfo[4] == size self._most_recent_size = size return servermap def _update_servermap(self, servermap, mode): u = ServermapUpdater(self, self._storage_broker, Monitor(), servermap, mode) if self._history: self._history.notify_mapupdate(u.get_status()) return u.update() #def set_version(self, version): # I can be set in two ways: # 1. When the node is created. # 2. (for an existing share) when the Servermap is updated # before I am read. # assert version in (MDMF_VERSION, SDMF_VERSION) # self._protocol_version = version def get_version(self): return self._protocol_version def _do_serialized(self, cb, *args, **kwargs): # note: to avoid deadlock, this callable is *not* allowed to invoke # other serialized methods within this (or any other) # MutableFileNode. The callable should be a bound method of this same # MFN instance. d = defer.Deferred() self._serializer.addCallback(lambda ignore: cb(*args, **kwargs)) # we need to put off d.callback until this Deferred is finished being # processed. Otherwise the caller's subsequent activities (like, # doing other things with this node) can cause reentrancy problems in # the Deferred code itself self._serializer.addBoth(lambda res: eventually(d.callback, res)) # add a log.err just in case something really weird happens, because # self._serializer stays around forever, therefore we won't see the # usual Unhandled Error in Deferred that would give us a hint. self._serializer.addErrback(log.err) return d def _upload(self, new_contents, servermap): """ A MutableFileNode still has to have some way of getting published initially, which is what I am here for. After that, all publishing, updating, modifying and so on happens through MutableFileVersions. """ assert self._pubkey, "update_servermap must be called before publish" # Define IPublishInvoker with a set_downloader_hints method? # Then have the publisher call that method when it's done publishing? p = Publish(self, self._storage_broker, servermap) if self._history: self._history.notify_publish(p.get_status(), new_contents.get_size()) d = p.publish(new_contents) d.addCallback(self._did_upload, new_contents.get_size()) return d def set_downloader_hints(self, hints): self._downloader_hints = hints def _did_upload(self, res, size): self._most_recent_size = size return res class MutableFileVersion: """ I represent a specific version (most likely the best version) of a mutable file. Since I implement IReadable, instances which hold a reference to an instance of me are guaranteed the ability (absent connection difficulties or unrecoverable versions) to read the file that I represent. Depending on whether I was initialized with a write capability or not, I may also provide callers the ability to overwrite or modify the contents of the mutable file that I reference. """ implements(IMutableFileVersion, IWriteable) def __init__(self, node, servermap, version, storage_index, storage_broker, readcap, writekey=None, write_secrets=None, history=None): self._node = node self._servermap = servermap self._version = version self._storage_index = storage_index self._write_secrets = write_secrets self._history = history self._storage_broker = storage_broker #assert isinstance(readcap, IURI) self._readcap = readcap self._writekey = writekey self._serializer = defer.succeed(None) def get_sequence_number(self): """ Get the sequence number of the mutable version that I represent. """ return self._version[0] # verinfo[0] == the sequence number # TODO: Terminology? def get_writekey(self): """ I return a writekey or None if I don't have a writekey. """ return self._writekey def set_downloader_hints(self, hints): """ I set the downloader hints. """ assert isinstance(hints, dict) self._downloader_hints = hints def get_downloader_hints(self): """ I return the downloader hints. """ return self._downloader_hints def overwrite(self, new_contents): """ I overwrite the contents of this mutable file version with the data in new_contents. """ assert not self.is_readonly() return self._do_serialized(self._overwrite, new_contents) def _overwrite(self, new_contents): assert IMutableUploadable.providedBy(new_contents) assert self._servermap.get_last_update()[0] == MODE_WRITE return self._upload(new_contents) def modify(self, modifier, backoffer=None): """I use a modifier callback to apply a change to the mutable file. I implement the following pseudocode:: obtain_mutable_filenode_lock() first_time = True while True: update_servermap(MODE_WRITE) old = retrieve_best_version() new = modifier(old, servermap, first_time) first_time = False if new == old: break try: publish(new) except UncoordinatedWriteError, e: backoffer(e) continue break release_mutable_filenode_lock() The idea is that your modifier function can apply a delta of some sort, and it will be re-run as necessary until it succeeds. The modifier must inspect the old version to see whether its delta has already been applied: if so it should return the contents unmodified. Note that the modifier is required to run synchronously, and must not invoke any methods on this MutableFileNode instance. The backoff-er is a callable that is responsible for inserting a random delay between subsequent attempts, to help competing updates from colliding forever. It is also allowed to give up after a while. The backoffer is given two arguments: this MutableFileNode, and the Failure object that contains the UncoordinatedWriteError. It should return a Deferred that will fire when the next attempt should be made, or return the Failure if the loop should give up. If backoffer=None, a default one is provided which will perform exponential backoff, and give up after 4 tries. Note that the backoffer should not invoke any methods on this MutableFileNode instance, and it needs to be highly conscious of deadlock issues. """ assert not self.is_readonly() return self._do_serialized(self._modify, modifier, backoffer) def _modify(self, modifier, backoffer): if backoffer is None: backoffer = BackoffAgent().delay return self._modify_and_retry(modifier, backoffer, True) def _modify_and_retry(self, modifier, backoffer, first_time): """ I try to apply modifier to the contents of this version of the mutable file. If I succeed, I return an UploadResults instance describing my success. If I fail, I try again after waiting for a little bit. """ log.msg("doing modify") if first_time: d = self._update_servermap() else: # We ran into trouble; do MODE_CHECK so we're a little more # careful on subsequent tries. d = self._update_servermap(mode=MODE_CHECK) d.addCallback(lambda ignored: self._modify_once(modifier, first_time)) def _retry(f): f.trap(UncoordinatedWriteError) # Uh oh, it broke. We're allowed to trust the servermap for our # first try, but after that we need to update it. It's # possible that we've failed due to a race with another # uploader, and if the race is to converge correctly, we # need to know about that upload. d2 = defer.maybeDeferred(backoffer, self, f) d2.addCallback(lambda ignored: self._modify_and_retry(modifier, backoffer, False)) return d2 d.addErrback(_retry) return d def _modify_once(self, modifier, first_time): """ I attempt to apply a modifier to the contents of the mutable file. """ assert self._servermap.get_last_update()[0] != MODE_READ # download_to_data is serialized, so we have to call this to # avoid deadlock. d = self._try_to_download_data() def _apply(old_contents): new_contents = modifier(old_contents, self._servermap, first_time) precondition((isinstance(new_contents, str) or new_contents is None), "Modifier function must return a string " "or None") if new_contents is None or new_contents == old_contents: log.msg("no changes") # no changes need to be made if first_time: return # However, since Publish is not automatically doing a # recovery when it observes UCWE, we need to do a second # publish. See #551 for details. We'll basically loop until # we managed an uncontested publish. old_uploadable = MutableData(old_contents) new_contents = old_uploadable else: new_contents = MutableData(new_contents) return self._upload(new_contents) d.addCallback(_apply) return d def is_readonly(self): """ I return True if this MutableFileVersion provides no write access to the file that it encapsulates, and False if it provides the ability to modify the file. """ return self._writekey is None def is_mutable(self): """ I return True, since mutable files are always mutable by somebody. """ return True def get_storage_index(self): """ I return the storage index of the reference that I encapsulate. """ return self._storage_index def get_size(self): """ I return the length, in bytes, of this readable object. """ return self._servermap.size_of_version(self._version) def download_to_data(self, fetch_privkey=False): """ I return a Deferred that fires with the contents of this readable object as a byte string. """ c = consumer.MemoryConsumer() d = self.read(c, fetch_privkey=fetch_privkey) d.addCallback(lambda mc: "".join(mc.chunks)) return d def _try_to_download_data(self): """ I am an unserialized cousin of download_to_data; I am called from the children of modify() to download the data associated with this mutable version. """ c = consumer.MemoryConsumer() # modify will almost certainly write, so we need the privkey. d = self._read(c, fetch_privkey=True) d.addCallback(lambda mc: "".join(mc.chunks)) return d def read(self, consumer, offset=0, size=None, fetch_privkey=False): """ I read a portion (possibly all) of the mutable file that I reference into consumer. """ return self._do_serialized(self._read, consumer, offset, size, fetch_privkey) def _read(self, consumer, offset=0, size=None, fetch_privkey=False): """ I am the serialized companion of read. """ r = Retrieve(self._node, self._storage_broker, self._servermap, self._version, fetch_privkey) if self._history: self._history.notify_retrieve(r.get_status()) d = r.download(consumer, offset, size) return d def _do_serialized(self, cb, *args, **kwargs): # note: to avoid deadlock, this callable is *not* allowed to invoke # other serialized methods within this (or any other) # MutableFileNode. The callable should be a bound method of this same # MFN instance. d = defer.Deferred() self._serializer.addCallback(lambda ignore: cb(*args, **kwargs)) # we need to put off d.callback until this Deferred is finished being # processed. Otherwise the caller's subsequent activities (like, # doing other things with this node) can cause reentrancy problems in # the Deferred code itself self._serializer.addBoth(lambda res: eventually(d.callback, res)) # add a log.err just in case something really weird happens, because # self._serializer stays around forever, therefore we won't see the # usual Unhandled Error in Deferred that would give us a hint. self._serializer.addErrback(log.err) return d def _upload(self, new_contents): #assert self._pubkey, "update_servermap must be called before publish" p = Publish(self._node, self._storage_broker, self._servermap) if self._history: self._history.notify_publish(p.get_status(), new_contents.get_size()) d = p.publish(new_contents) d.addCallback(self._did_upload, new_contents.get_size()) return d def _did_upload(self, res, size): self._most_recent_size = size return res def update(self, data, offset): """ Do an update of this mutable file version by inserting data at offset within the file. If offset is the EOF, this is an append operation. I return a Deferred that fires with the results of the update operation when it has completed. In cases where update does not append any data, or where it does not append so many blocks that the block count crosses a power-of-two boundary, this operation will use roughly O(data.get_size()) memory/bandwidth/CPU to perform the update. Otherwise, it must download, re-encode, and upload the entire file again, which will use O(filesize) resources. """ return self._do_serialized(self._update, data, offset) def _update(self, data, offset): """ I update the mutable file version represented by this particular IMutableVersion by inserting the data in data at the offset offset. I return a Deferred that fires when this has been completed. """ new_size = data.get_size() + offset old_size = self.get_size() segment_size = self._version[3] num_old_segments = mathutil.div_ceil(old_size, segment_size) num_new_segments = mathutil.div_ceil(new_size, segment_size) log.msg("got %d old segments, %d new segments" % \ (num_old_segments, num_new_segments)) # We do a whole file re-encode if the file is an SDMF file. if self._version[2]: # version[2] == SDMF salt, which MDMF lacks log.msg("doing re-encode instead of in-place update") return self._do_modify_update(data, offset) # Otherwise, we can replace just the parts that are changing. log.msg("updating in place") d = self._do_update_update(data, offset) d.addCallback(self._decode_and_decrypt_segments, data, offset) d.addCallback(self._build_uploadable_and_finish, data, offset) return d def _do_modify_update(self, data, offset): """ I perform a file update by modifying the contents of the file after downloading it, then reuploading it. I am less efficient than _do_update_update, but am necessary for certain updates. """ def m(old, servermap, first_time): start = offset rest = offset + data.get_size() new = old[:start] new += "".join(data.read(data.get_size())) new += old[rest:] return new return self._modify(m, None) def _do_update_update(self, data, offset): """ I start the Servermap update that gets us the data we need to continue the update process. I return a Deferred that fires when the servermap update is done. """ assert IMutableUploadable.providedBy(data) assert self.is_mutable() # offset == self.get_size() is valid and means that we are # appending data to the file. assert offset <= self.get_size() segsize = self._version[3] # We'll need the segment that the data starts in, regardless of # what we'll do later. start_segment = offset // segsize # We only need the end segment if the data we append does not go # beyond the current end-of-file. end_segment = start_segment if offset + data.get_size() < self.get_size(): end_data = offset + data.get_size() # The last byte we touch is the end_data'th byte, which is actually # byte end_data - 1 because bytes are zero-indexed. end_data -= 1 end_segment = end_data // segsize self._start_segment = start_segment self._end_segment = end_segment # Now ask for the servermap to be updated in MODE_WRITE with # this update range. return self._update_servermap(update_range=(start_segment, end_segment)) def _decode_and_decrypt_segments(self, ignored, data, offset): """ After the servermap update, I take the encrypted and encoded data that the servermap fetched while doing its update and transform it into decoded-and-decrypted plaintext that can be used by the new uploadable. I return a Deferred that fires with the segments. """ r = Retrieve(self._node, self._storage_broker, self._servermap, self._version) # decode: takes in our blocks and salts from the servermap, # returns a Deferred that fires with the corresponding plaintext # segments. Does not download -- simply takes advantage of # existing infrastructure within the Retrieve class to avoid # duplicating code. sm = self._servermap # XXX: If the methods in the servermap don't work as # abstractions, you should rewrite them instead of going around # them. update_data = sm.update_data start_segments = {} # shnum -> start segment end_segments = {} # shnum -> end segment blockhashes = {} # shnum -> blockhash tree for (shnum, original_data) in update_data.iteritems(): data = [d[1] for d in original_data if d[0] == self._version] # data is [(blockhashes,start,end)..] # Every data entry in our list should now be share shnum for # a particular version of the mutable file, so all of the # entries should be identical. datum = data[0] assert [x for x in data if x != datum] == [] # datum is (blockhashes,start,end) blockhashes[shnum] = datum[0] start_segments[shnum] = datum[1] # (block,salt) bytestrings end_segments[shnum] = datum[2] d1 = r.decode(start_segments, self._start_segment) d2 = r.decode(end_segments, self._end_segment) d3 = defer.succeed(blockhashes) return deferredutil.gatherResults([d1, d2, d3]) def _build_uploadable_and_finish(self, segments_and_bht, data, offset): """ After the process has the plaintext segments, I build the TransformingUploadable that the publisher will eventually re-upload to the grid. I then invoke the publisher with that uploadable, and return a Deferred when the publish operation has completed without issue. """ u = TransformingUploadable(data, offset, self._version[3], segments_and_bht[0], segments_and_bht[1]) p = Publish(self._node, self._storage_broker, self._servermap) return p.update(u, offset, segments_and_bht[2], self._version) def _update_servermap(self, mode=MODE_WRITE, update_range=None): """ I update the servermap. I return a Deferred that fires when the servermap update is done. """ if update_range: u = ServermapUpdater(self._node, self._storage_broker, Monitor(), self._servermap, mode=mode, update_range=update_range) else: u = ServermapUpdater(self._node, self._storage_broker, Monitor(), self._servermap, mode=mode) return u.update() tahoe-lafs-1.10.0/src/allmydata/mutable/layout.py000066400000000000000000002131201221140116300216610ustar00rootroot00000000000000 import struct from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \ BadShareError from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \ MDMF_VERSION, IMutableSlotWriter from allmydata.util import mathutil from twisted.python import failure from twisted.internet import defer from zope.interface import implements # These strings describe the format of the packed structs they help process. # Here's what they mean: # # PREFIX: # >: Big-endian byte order; the most significant byte is first (leftmost). # B: The container version information; stored as an unsigned 8-bit integer. # This is currently either SDMF_VERSION or MDMF_VERSION. # Q: The sequence number; this is sort of like a revision history for # mutable files; they start at 1 and increase as they are changed after # being uploaded. Stored as an unsigned 64-bit integer. # 32s: The root hash of the share hash tree. We use sha-256d, so we use 32 # bytes to store the value. # 16s: The salt for the readkey. This is a 16-byte random value. # # SIGNED_PREFIX additions, things that are covered by the signature: # B: The "k" encoding parameter. We store this as an unsigned 8-bit # integer, since our erasure coding scheme cannot encode to more than # 255 pieces. # B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for # the same reason as above. # Q: The segment size of the uploaded file. This is an unsigned 64-bit # integer, to allow handling large segments and files. For SDMF the # segment size is the data length plus padding; for MDMF it can be # smaller. # Q: The data length of the uploaded file. Like the segment size field, # it is an unsigned 64-bit integer. # # HEADER additions: # L: The offset of the signature. An unsigned 32-bit integer. # L: The offset of the share hash chain. An unsigned 32-bit integer. # L: The offset of the block hash tree. An unsigned 32-bit integer. # L: The offset of the share data. An unsigned 32-bit integer. # Q: The offset of the encrypted private key. An unsigned 64-bit integer, # to account for the possibility of a lot of share data. # Q: The offset of the EOF. An unsigned 64-bit integer, to account for # the possibility of a lot of share data. # # After all of these, we have the following: # - The verification key: Occupies the space between the end of the header # and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']]. # - The signature, which goes from the signature offset to the share hash # chain offset. # - The share hash chain, which goes from the share hash chain offset to # the block hash tree offset. # - The share data, which goes from the share data offset to the encrypted # private key offset. # - The encrypted private key offset, which goes until the end of the file. # # The block hash tree in this encoding has only one share, so the offset of # the share data will be 32 bits more than the offset of the block hash tree. # Given this, we may need to check to see how many bytes a reasonably sized # block hash tree will take up. PREFIX = ">BQ32s16s" # each version may have a different prefix SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX) HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets HEADER_LENGTH = struct.calcsize(HEADER) OFFSETS = ">LLLLQQ" OFFSETS_LENGTH = struct.calcsize(OFFSETS) MAX_MUTABLE_SHARE_SIZE = 69105*1000*1000*1000*1000 # 69105 TB, kind of arbitrary # These are still used for some tests of SDMF files. def unpack_header(data): o = {} (version, seqnum, root_hash, IV, k, N, segsize, datalen, o['signature'], o['share_hash_chain'], o['block_hash_tree'], o['share_data'], o['enc_privkey'], o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH]) return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o) def unpack_share(data): assert len(data) >= HEADER_LENGTH o = {} (version, seqnum, root_hash, IV, k, N, segsize, datalen, o['signature'], o['share_hash_chain'], o['block_hash_tree'], o['share_data'], o['enc_privkey'], o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH]) if version != 0: raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version) if len(data) < o['EOF']: raise NeedMoreDataError(o['EOF'], o['enc_privkey'], o['EOF']-o['enc_privkey']) pubkey = data[HEADER_LENGTH:o['signature']] signature = data[o['signature']:o['share_hash_chain']] share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']] share_hash_format = ">H32s" hsize = struct.calcsize(share_hash_format) if len(share_hash_chain_s) % hsize != 0: raise BadShareError("hash chain is %d bytes, not multiple of %d" % (len(share_hash_chain_s), hsize)) share_hash_chain = [] for i in range(0, len(share_hash_chain_s), hsize): chunk = share_hash_chain_s[i:i+hsize] (hid, h) = struct.unpack(share_hash_format, chunk) share_hash_chain.append( (hid, h) ) share_hash_chain = dict(share_hash_chain) block_hash_tree_s = data[o['block_hash_tree']:o['share_data']] if len(block_hash_tree_s) % 32 != 0: raise BadShareError("block_hash_tree is %d bytes, not multiple of %d" % (len(block_hash_tree_s), 32)) block_hash_tree = [] for i in range(0, len(block_hash_tree_s), 32): block_hash_tree.append(block_hash_tree_s[i:i+32]) share_data = data[o['share_data']:o['enc_privkey']] enc_privkey = data[o['enc_privkey']:o['EOF']] return (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) def get_version_from_checkstring(checkstring): (t, ) = struct.unpack(">B", checkstring[:1]) return t def unpack_sdmf_checkstring(checkstring): cs_len = struct.calcsize(PREFIX) version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len]) assert version == SDMF_VERSION, version return (seqnum, root_hash, IV) def unpack_mdmf_checkstring(checkstring): cs_len = struct.calcsize(MDMFCHECKSTRING) version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len]) assert version == MDMF_VERSION, version return (seqnum, root_hash) def pack_offsets(verification_key_length, signature_length, share_hash_chain_length, block_hash_tree_length, share_data_length, encprivkey_length): post_offset = HEADER_LENGTH offsets = {} o1 = offsets['signature'] = post_offset + verification_key_length o2 = offsets['share_hash_chain'] = o1 + signature_length o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length o4 = offsets['share_data'] = o3 + block_hash_tree_length o5 = offsets['enc_privkey'] = o4 + share_data_length offsets['EOF'] = o5 + encprivkey_length return struct.pack(">LLLLQQ", offsets['signature'], offsets['share_hash_chain'], offsets['block_hash_tree'], offsets['share_data'], offsets['enc_privkey'], offsets['EOF']) def pack_share(prefix, verification_key, signature, share_hash_chain, block_hash_tree, share_data, encprivkey): share_hash_chain_s = "".join([struct.pack(">H32s", i, share_hash_chain[i]) for i in sorted(share_hash_chain.keys())]) for h in block_hash_tree: assert len(h) == 32 block_hash_tree_s = "".join(block_hash_tree) offsets = pack_offsets(len(verification_key), len(signature), len(share_hash_chain_s), len(block_hash_tree_s), len(share_data), len(encprivkey)) final_share = "".join([prefix, offsets, verification_key, signature, share_hash_chain_s, block_hash_tree_s, share_data, encprivkey]) return final_share def pack_prefix(seqnum, root_hash, IV, required_shares, total_shares, segment_size, data_length): prefix = struct.pack(SIGNED_PREFIX, 0, # version, seqnum, root_hash, IV, required_shares, total_shares, segment_size, data_length, ) return prefix class SDMFSlotWriteProxy: implements(IMutableSlotWriter) """ I represent a remote write slot for an SDMF mutable file. I build a share in memory, and then write it in one piece to the remote server. This mimics how SDMF shares were built before MDMF (and the new MDMF uploader), but provides that functionality in a way that allows the MDMF uploader to be built without much special-casing for file format, which makes the uploader code more readable. """ def __init__(self, shnum, rref, # a remote reference to a storage server storage_index, secrets, # (write_enabler, renew_secret, cancel_secret) seqnum, # the sequence number of the mutable file required_shares, total_shares, segment_size, data_length): # the length of the original file self.shnum = shnum self._rref = rref self._storage_index = storage_index self._secrets = secrets self._seqnum = seqnum self._required_shares = required_shares self._total_shares = total_shares self._segment_size = segment_size self._data_length = data_length # This is an SDMF file, so it should have only one segment, so, # modulo padding of the data length, the segment size and the # data length should be the same. expected_segment_size = mathutil.next_multiple(data_length, self._required_shares) assert expected_segment_size == segment_size self._block_size = self._segment_size / self._required_shares # This is meant to mimic how SDMF files were built before MDMF # entered the picture: we generate each share in its entirety, # then push it off to the storage server in one write. When # callers call set_*, they are just populating this dict. # finish_publishing will stitch these pieces together into a # coherent share, and then write the coherent share to the # storage server. self._share_pieces = {} # This tells the write logic what checkstring to use when # writing remote shares. self._testvs = [] self._readvs = [(0, struct.calcsize(PREFIX))] def set_checkstring(self, checkstring_or_seqnum, root_hash=None, salt=None): """ Set the checkstring that I will pass to the remote server when writing. @param checkstring_or_seqnum: A packed checkstring to use, or a sequence number. I will treat this as a checkstr Note that implementations can differ in which semantics they wish to support for set_checkstring -- they can, for example, build the checkstring themselves from its constituents, or some other thing. """ if root_hash and salt: checkstring = struct.pack(PREFIX, 0, checkstring_or_seqnum, root_hash, salt) else: checkstring = checkstring_or_seqnum self._testvs = [(0, len(checkstring), "eq", checkstring)] def get_checkstring(self): """ Get the checkstring that I think currently exists on the remote server. """ if self._testvs: return self._testvs[0][3] return "" def put_block(self, data, segnum, salt): """ Add a block and salt to the share. """ # SDMF files have only one segment assert segnum == 0 assert len(data) == self._block_size assert len(salt) == SALT_SIZE self._share_pieces['sharedata'] = data self._share_pieces['salt'] = salt # TODO: Figure out something intelligent to return. return defer.succeed(None) def put_encprivkey(self, encprivkey): """ Add the encrypted private key to the share. """ self._share_pieces['encprivkey'] = encprivkey return defer.succeed(None) def put_blockhashes(self, blockhashes): """ Add the block hash tree to the share. """ assert isinstance(blockhashes, list) for h in blockhashes: assert len(h) == HASH_SIZE # serialize the blockhashes, then set them. blockhashes_s = "".join(blockhashes) self._share_pieces['block_hash_tree'] = blockhashes_s return defer.succeed(None) def put_sharehashes(self, sharehashes): """ Add the share hash chain to the share. """ assert isinstance(sharehashes, dict) for h in sharehashes.itervalues(): assert len(h) == HASH_SIZE # serialize the sharehashes, then set them. sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i]) for i in sorted(sharehashes.keys())]) self._share_pieces['share_hash_chain'] = sharehashes_s return defer.succeed(None) def put_root_hash(self, root_hash): """ Add the root hash to the share. """ assert len(root_hash) == HASH_SIZE self._share_pieces['root_hash'] = root_hash return defer.succeed(None) def put_salt(self, salt): """ Add a salt to an empty SDMF file. """ assert len(salt) == SALT_SIZE self._share_pieces['salt'] = salt self._share_pieces['sharedata'] = "" def get_signable(self): """ Return the part of the share that needs to be signed. SDMF writers need to sign the packed representation of the first eight fields of the remote share, that is: - version number (0) - sequence number - root of the share hash tree - salt - k - n - segsize - datalen This method is responsible for returning that to callers. """ return struct.pack(SIGNED_PREFIX, 0, self._seqnum, self._share_pieces['root_hash'], self._share_pieces['salt'], self._required_shares, self._total_shares, self._segment_size, self._data_length) def put_signature(self, signature): """ Add the signature to the share. """ self._share_pieces['signature'] = signature return defer.succeed(None) def put_verification_key(self, verification_key): """ Add the verification key to the share. """ self._share_pieces['verification_key'] = verification_key return defer.succeed(None) def get_verinfo(self): """ I return my verinfo tuple. This is used by the ServermapUpdater to keep track of versions of mutable files. The verinfo tuple for MDMF files contains: - seqnum - root hash - a blank (nothing) - segsize - datalen - k - n - prefix (the thing that you sign) - a tuple of offsets We include the nonce in MDMF to simplify processing of version information tuples. The verinfo tuple for SDMF files is the same, but contains a 16-byte IV instead of a hash of salts. """ return (self._seqnum, self._share_pieces['root_hash'], self._share_pieces['salt'], self._segment_size, self._data_length, self._required_shares, self._total_shares, self.get_signable(), self._get_offsets_tuple()) def _get_offsets_dict(self): post_offset = HEADER_LENGTH offsets = {} verification_key_length = len(self._share_pieces['verification_key']) o1 = offsets['signature'] = post_offset + verification_key_length signature_length = len(self._share_pieces['signature']) o2 = offsets['share_hash_chain'] = o1 + signature_length share_hash_chain_length = len(self._share_pieces['share_hash_chain']) o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length block_hash_tree_length = len(self._share_pieces['block_hash_tree']) o4 = offsets['share_data'] = o3 + block_hash_tree_length share_data_length = len(self._share_pieces['sharedata']) o5 = offsets['enc_privkey'] = o4 + share_data_length encprivkey_length = len(self._share_pieces['encprivkey']) offsets['EOF'] = o5 + encprivkey_length return offsets def _get_offsets_tuple(self): offsets = self._get_offsets_dict() return tuple([(key, value) for key, value in offsets.items()]) def _pack_offsets(self): offsets = self._get_offsets_dict() return struct.pack(">LLLLQQ", offsets['signature'], offsets['share_hash_chain'], offsets['block_hash_tree'], offsets['share_data'], offsets['enc_privkey'], offsets['EOF']) def finish_publishing(self): """ Do anything necessary to finish writing the share to a remote server. I require that no further publishing needs to take place after this method has been called. """ for k in ["sharedata", "encprivkey", "signature", "verification_key", "share_hash_chain", "block_hash_tree"]: assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys()) # This is the only method that actually writes something to the # remote server. # First, we need to pack the share into data that we can write # to the remote server in one write. offsets = self._pack_offsets() prefix = self.get_signable() final_share = "".join([prefix, offsets, self._share_pieces['verification_key'], self._share_pieces['signature'], self._share_pieces['share_hash_chain'], self._share_pieces['block_hash_tree'], self._share_pieces['sharedata'], self._share_pieces['encprivkey']]) # Our only data vector is going to be writing the final share, # in its entirely. datavs = [(0, final_share)] if not self._testvs: # Our caller has not provided us with another checkstring # yet, so we assume that we are writing a new share, and set # a test vector that will allow a new share to be written. self._testvs = [] self._testvs.append(tuple([0, 1, "eq", ""])) tw_vectors = {} tw_vectors[self.shnum] = (self._testvs, datavs, None) return self._rref.callRemote("slot_testv_and_readv_and_writev", self._storage_index, self._secrets, tw_vectors, # TODO is it useful to read something? self._readvs) MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ" MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ" MDMFHEADERSIZE = struct.calcsize(MDMFHEADER) MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS) MDMFCHECKSTRING = ">BQ32s" MDMFSIGNABLEHEADER = ">BQ32sBBQQ" MDMFOFFSETS = ">QQQQQQQQ" MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS) PRIVATE_KEY_SIZE = 1220 SIGNATURE_SIZE = 260 VERIFICATION_KEY_SIZE = 292 # We know we won't have more than 256 shares, and we know that we won't need # to store more than ln2(256) hash-chain nodes to validate, so that's our # bound. Each node requires 2 bytes of node-number plus 32 bytes of hash. SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2) class MDMFSlotWriteProxy: implements(IMutableSlotWriter) """ I represent a remote write slot for an MDMF mutable file. I abstract away from my caller the details of block and salt management, and the implementation of the on-disk format for MDMF shares. """ # Expected layout, MDMF: # offset: size: name: #-- signed part -- # 0 1 version number (01) # 1 8 sequence number # 9 32 share tree root hash # 41 1 The "k" encoding parameter # 42 1 The "N" encoding parameter # 43 8 The segment size of the uploaded file # 51 8 The data length of the original plaintext #-- end signed part -- # 59 8 The offset of the encrypted private key # 67 8 The offset of the share hash chain # 75 8 The offset of the signature # 83 8 The offset of the verification key # 91 8 The offset of the end of the v. key. # 99 8 The offset of the share data # 107 8 The offset of the block hash tree # 115 8 The offset of EOF # 123 var encrypted private key # var var share hash chain # var var signature # var var verification key # var large share data # var var block hash tree # # We order the fields that way to make smart downloaders -- downloaders # which prempetively read a big part of the share -- possible. # # The checkstring is the first three fields -- the version number, # sequence number, root hash and root salt hash. This is consistent # in meaning to what we have with SDMF files, except now instead of # using the literal salt, we use a value derived from all of the # salts -- the share hash root. # # The salt is stored before the block for each segment. The block # hash tree is computed over the combination of block and salt for # each segment. In this way, we get integrity checking for both # block and salt with the current block hash tree arrangement. # # The ordering of the offsets is different to reflect the dependencies # that we'll run into with an MDMF file. The expected write flow is # something like this: # # 0: Initialize with the sequence number, encoding parameters and # data length. From this, we can deduce the number of segments, # and where they should go.. We can also figure out where the # encrypted private key should go, because we can figure out how # big the share data will be. # # 1: Encrypt, encode, and upload the file in chunks. Do something # like # # put_block(data, segnum, salt) # # to write a block and a salt to the disk. We can do both of # these operations now because we have enough of the offsets to # know where to put them. # # 2: Put the encrypted private key. Use: # # put_encprivkey(encprivkey) # # Now that we know the length of the private key, we can fill # in the offset for the block hash tree. # # 3: We're now in a position to upload the block hash tree for # a share. Put that using something like: # # put_blockhashes(block_hash_tree) # # Note that block_hash_tree is a list of hashes -- we'll take # care of the details of serializing that appropriately. When # we get the block hash tree, we are also in a position to # calculate the offset for the share hash chain, and fill that # into the offsets table. # # 4: We're now in a position to upload the share hash chain for # a share. Do that with something like: # # put_sharehashes(share_hash_chain) # # share_hash_chain should be a dictionary mapping shnums to # 32-byte hashes -- the wrapper handles serialization. # We'll know where to put the signature at this point, also. # The root of this tree will be put explicitly in the next # step. # # 5: Before putting the signature, we must first put the # root_hash. Do this with: # # put_root_hash(root_hash). # # In terms of knowing where to put this value, it was always # possible to place it, but it makes sense semantically to # place it after the share hash tree, so that's why you do it # in this order. # # 6: With the root hash put, we can now sign the header. Use: # # get_signable() # # to get the part of the header that you want to sign, and use: # # put_signature(signature) # # to write your signature to the remote server. # # 6: Add the verification key, and finish. Do: # # put_verification_key(key) # # and # # finish_publish() # # Checkstring management: # # To write to a mutable slot, we have to provide test vectors to ensure # that we are writing to the same data that we think we are. These # vectors allow us to detect uncoordinated writes; that is, writes # where both we and some other shareholder are writing to the # mutable slot, and to report those back to the parts of the program # doing the writing. # # With SDMF, this was easy -- all of the share data was written in # one go, so it was easy to detect uncoordinated writes, and we only # had to do it once. With MDMF, not all of the file is written at # once. # # If a share is new, we write out as much of the header as we can # before writing out anything else. This gives other writers a # canary that they can use to detect uncoordinated writes, and, if # they do the same thing, gives us the same canary. We them update # the share. We won't be able to write out two fields of the header # -- the share tree hash and the salt hash -- until we finish # writing out the share. We only require the writer to provide the # initial checkstring, and keep track of what it should be after # updates ourselves. # # If we haven't written anything yet, then on the first write (which # will probably be a block + salt of a share), we'll also write out # the header. On subsequent passes, we'll expect to see the header. # This changes in two places: # # - When we write out the salt hash # - When we write out the root of the share hash tree # # since these values will change the header. It is possible that we # can just make those be written in one operation to minimize # disruption. def __init__(self, shnum, rref, # a remote reference to a storage server storage_index, secrets, # (write_enabler, renew_secret, cancel_secret) seqnum, # the sequence number of the mutable file required_shares, total_shares, segment_size, data_length): # the length of the original file self.shnum = shnum self._rref = rref self._storage_index = storage_index self._seqnum = seqnum self._required_shares = required_shares assert self.shnum >= 0 and self.shnum < total_shares self._total_shares = total_shares # We build up the offset table as we write things. It is the # last thing we write to the remote server. self._offsets = {} self._testvs = [] # This is a list of write vectors that will be sent to our # remote server once we are directed to write things there. self._writevs = [] self._secrets = secrets # The segment size needs to be a multiple of the k parameter -- # any padding should have been carried out by the publisher # already. assert segment_size % required_shares == 0 self._segment_size = segment_size self._data_length = data_length # These are set later -- we define them here so that we can # check for their existence easily # This is the root of the share hash tree -- the Merkle tree # over the roots of the block hash trees computed for shares in # this upload. self._root_hash = None # We haven't yet written anything to the remote bucket. By # setting this, we tell the _write method as much. The write # method will then know that it also needs to add a write vector # for the checkstring (or what we have of it) to the first write # request. We'll then record that value for future use. If # we're expecting something to be there already, we need to call # set_checkstring before we write anything to tell the first # write about that. self._written = False # When writing data to the storage servers, we get a read vector # for free. We'll read the checkstring, which will help us # figure out what's gone wrong if a write fails. self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))] # We calculate the number of segments because it tells us # where the salt part of the file ends/share segment begins, # and also because it provides a useful amount of bounds checking. self._num_segments = mathutil.div_ceil(self._data_length, self._segment_size) self._block_size = self._segment_size / self._required_shares # We also calculate the share size, to help us with block # constraints later. tail_size = self._data_length % self._segment_size if not tail_size: self._tail_block_size = self._block_size else: self._tail_block_size = mathutil.next_multiple(tail_size, self._required_shares) self._tail_block_size /= self._required_shares # We already know where the sharedata starts; right after the end # of the header (which is defined as the signable part + the offsets) # We can also calculate where the encrypted private key begins # from what we know know. self._actual_block_size = self._block_size + SALT_SIZE data_size = self._actual_block_size * (self._num_segments - 1) data_size += self._tail_block_size data_size += SALT_SIZE self._offsets['enc_privkey'] = MDMFHEADERSIZE # We don't define offsets for these because we want them to be # tightly packed -- this allows us to ignore the responsibility # of padding individual values, and of removing that padding # later. So nonconstant_start is where we start writing # nonconstant data. nonconstant_start = self._offsets['enc_privkey'] nonconstant_start += PRIVATE_KEY_SIZE nonconstant_start += SIGNATURE_SIZE nonconstant_start += VERIFICATION_KEY_SIZE nonconstant_start += SHARE_HASH_CHAIN_SIZE self._offsets['share_data'] = nonconstant_start # Finally, we know how big the share data will be, so we can # figure out where the block hash tree needs to go. # XXX: But this will go away if Zooko wants to make it so that # you don't need to know the size of the file before you start # uploading it. self._offsets['block_hash_tree'] = self._offsets['share_data'] + \ data_size # Done. We can snow start writing. def set_checkstring(self, seqnum_or_checkstring, root_hash=None, salt=None): """ Set checkstring checkstring for the given shnum. This can be invoked in one of two ways. With one argument, I assume that you are giving me a literal checkstring -- e.g., the output of get_checkstring. I will then set that checkstring as it is. This form is used by unit tests. With two arguments, I assume that you are giving me a sequence number and root hash to make a checkstring from. In that case, I will build a checkstring and set it for you. This form is used by the publisher. By default, I assume that I am writing new shares to the grid. If you don't explcitly set your own checkstring, I will use one that requires that the remote share not exist. You will want to use this method if you are updating a share in-place; otherwise, writes will fail. """ # You're allowed to overwrite checkstrings with this method; # I assume that users know what they are doing when they call # it. if root_hash: checkstring = struct.pack(MDMFCHECKSTRING, 1, seqnum_or_checkstring, root_hash) else: checkstring = seqnum_or_checkstring if checkstring == "": # We special-case this, since len("") = 0, but we need # length of 1 for the case of an empty share to work on the # storage server, which is what a checkstring that is the # empty string means. self._testvs = [] else: self._testvs = [] self._testvs.append((0, len(checkstring), "eq", checkstring)) def __repr__(self): return "MDMFSlotWriteProxy for share %d" % self.shnum def get_checkstring(self): """ Given a share number, I return a representation of what the checkstring for that share on the server will look like. I am mostly used for tests. """ if self._root_hash: roothash = self._root_hash else: roothash = "\x00" * 32 return struct.pack(MDMFCHECKSTRING, 1, self._seqnum, roothash) def put_block(self, data, segnum, salt): """ I queue a write vector for the data, salt, and segment number provided to me. I return None, as I do not actually cause anything to be written yet. """ if segnum >= self._num_segments: raise LayoutInvalid("I won't overwrite the block hash tree") if len(salt) != SALT_SIZE: raise LayoutInvalid("I was given a salt of size %d, but " "I wanted a salt of size %d") if segnum + 1 == self._num_segments: if len(data) != self._tail_block_size: raise LayoutInvalid("I was given the wrong size block to write") elif len(data) != self._block_size: raise LayoutInvalid("I was given the wrong size block to write") # We want to write at len(MDMFHEADER) + segnum * block_size. offset = self._offsets['share_data'] + \ (self._actual_block_size * segnum) data = salt + data self._writevs.append(tuple([offset, data])) def put_encprivkey(self, encprivkey): """ I queue a write vector for the encrypted private key provided to me. """ assert self._offsets assert self._offsets['enc_privkey'] # You shouldn't re-write the encprivkey after the block hash # tree is written, since that could cause the private key to run # into the block hash tree. Before it writes the block hash # tree, the block hash tree writing method writes the offset of # the share hash chain. So that's a good indicator of whether or # not the block hash tree has been written. if "signature" in self._offsets: raise LayoutInvalid("You can't put the encrypted private key " "after putting the share hash chain") self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \ len(encprivkey) self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey])) def put_blockhashes(self, blockhashes): """ I queue a write vector to put the block hash tree in blockhashes onto the remote server. The encrypted private key must be queued before the block hash tree, since we need to know how large it is to know where the block hash tree should go. The block hash tree must be put before the share hash chain, since its size determines the offset of the share hash chain. """ assert self._offsets assert "block_hash_tree" in self._offsets assert isinstance(blockhashes, list) blockhashes_s = "".join(blockhashes) self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s) self._writevs.append(tuple([self._offsets['block_hash_tree'], blockhashes_s])) def put_sharehashes(self, sharehashes): """ I queue a write vector to put the share hash chain in my argument onto the remote server. The block hash tree must be queued before the share hash chain, since we need to know where the block hash tree ends before we can know where the share hash chain starts. The share hash chain must be put before the signature, since the length of the packed share hash chain determines the offset of the signature. Also, semantically, you must know what the root of the block hash tree is before you can generate a valid signature. """ assert isinstance(sharehashes, dict) assert self._offsets if "share_hash_chain" not in self._offsets: raise LayoutInvalid("You must put the block hash tree before " "putting the share hash chain") # The signature comes after the share hash chain. If the # signature has already been written, we must not write another # share hash chain. The signature writes the verification key # offset when it gets sent to the remote server, so we look for # that. if "verification_key" in self._offsets: raise LayoutInvalid("You must write the share hash chain " "before you write the signature") sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i]) for i in sorted(sharehashes.keys())]) self._offsets['signature'] = self._offsets['share_hash_chain'] + \ len(sharehashes_s) self._writevs.append(tuple([self._offsets['share_hash_chain'], sharehashes_s])) def put_root_hash(self, roothash): """ Put the root hash (the root of the share hash tree) in the remote slot. """ # It does not make sense to be able to put the root # hash without first putting the share hashes, since you need # the share hashes to generate the root hash. # # Signature is defined by the routine that places the share hash # chain, so it's a good thing to look for in finding out whether # or not the share hash chain exists on the remote server. if len(roothash) != HASH_SIZE: raise LayoutInvalid("hashes and salts must be exactly %d bytes" % HASH_SIZE) self._root_hash = roothash # To write both of these values, we update the checkstring on # the remote server, which includes them checkstring = self.get_checkstring() self._writevs.append(tuple([0, checkstring])) # This write, if successful, changes the checkstring, so we need # to update our internal checkstring to be consistent with the # one on the server. def get_signable(self): """ Get the first seven fields of the mutable file; the parts that are signed. """ if not self._root_hash: raise LayoutInvalid("You need to set the root hash " "before getting something to " "sign") return struct.pack(MDMFSIGNABLEHEADER, 1, self._seqnum, self._root_hash, self._required_shares, self._total_shares, self._segment_size, self._data_length) def put_signature(self, signature): """ I queue a write vector for the signature of the MDMF share. I require that the root hash and share hash chain have been put to the grid before I will write the signature to the grid. """ if "signature" not in self._offsets: raise LayoutInvalid("You must put the share hash chain " # It does not make sense to put a signature without first # putting the root hash and the salt hash (since otherwise # the signature would be incomplete), so we don't allow that. "before putting the signature") if not self._root_hash: raise LayoutInvalid("You must complete the signed prefix " "before computing a signature") # If we put the signature after we put the verification key, we # could end up running into the verification key, and will # probably screw up the offsets as well. So we don't allow that. if "verification_key_end" in self._offsets: raise LayoutInvalid("You can't put the signature after the " "verification key") # The method that writes the verification key defines the EOF # offset before writing the verification key, so look for that. self._offsets['verification_key'] = self._offsets['signature'] +\ len(signature) self._writevs.append(tuple([self._offsets['signature'], signature])) def put_verification_key(self, verification_key): """ I queue a write vector for the verification key. I require that the signature have been written to the storage server before I allow the verification key to be written to the remote server. """ if "verification_key" not in self._offsets: raise LayoutInvalid("You must put the signature before you " "can put the verification key") self._offsets['verification_key_end'] = \ self._offsets['verification_key'] + len(verification_key) assert self._offsets['verification_key_end'] <= self._offsets['share_data'] self._writevs.append(tuple([self._offsets['verification_key'], verification_key])) def _get_offsets_tuple(self): return tuple([(key, value) for key, value in self._offsets.items()]) def get_verinfo(self): return (self._seqnum, self._root_hash, None, self._segment_size, self._data_length, self._required_shares, self._total_shares, self.get_signable(), self._get_offsets_tuple()) def finish_publishing(self): """ I add a write vector for the offsets table, and then cause all of the write vectors that I've dealt with so far to be published to the remote server, ending the write process. """ if "verification_key_end" not in self._offsets: raise LayoutInvalid("You must put the verification key before " "you can publish the offsets") offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS) offsets = struct.pack(MDMFOFFSETS, self._offsets['enc_privkey'], self._offsets['share_hash_chain'], self._offsets['signature'], self._offsets['verification_key'], self._offsets['verification_key_end'], self._offsets['share_data'], self._offsets['block_hash_tree'], self._offsets['EOF']) self._writevs.append(tuple([offsets_offset, offsets])) encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING) params = struct.pack(">BBQQ", self._required_shares, self._total_shares, self._segment_size, self._data_length) self._writevs.append(tuple([encoding_parameters_offset, params])) return self._write(self._writevs) def _write(self, datavs, on_failure=None, on_success=None): """I write the data vectors in datavs to the remote slot.""" tw_vectors = {} if not self._testvs: self._testvs = [] self._testvs.append(tuple([0, 1, "eq", ""])) if not self._written: # Write a new checkstring to the share when we write it, so # that we have something to check later. new_checkstring = self.get_checkstring() datavs.append((0, new_checkstring)) def _first_write(): self._written = True self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)] on_success = _first_write tw_vectors[self.shnum] = (self._testvs, datavs, None) d = self._rref.callRemote("slot_testv_and_readv_and_writev", self._storage_index, self._secrets, tw_vectors, self._readv) def _result(results): if isinstance(results, failure.Failure) or not results[0]: # Do nothing; the write was unsuccessful. if on_failure: on_failure() else: if on_success: on_success() return results d.addCallback(_result) return d def _handle_bad_struct(f): # struct.unpack errors mean the server didn't give us enough data, so # this share is bad f.trap(struct.error) raise BadShareError(f.value.args[0]) class MDMFSlotReadProxy: """ I read from a mutable slot filled with data written in the MDMF data format (which is described above). I can be initialized with some amount of data, which I will use (if it is valid) to eliminate some of the need to fetch it from servers. """ def __init__(self, rref, storage_index, shnum, data="", data_is_everything=False): # Start the initialization process. self._rref = rref self._storage_index = storage_index self.shnum = shnum # Before doing anything, the reader is probably going to want to # verify that the signature is correct. To do that, they'll need # the verification key, and the signature. To get those, we'll # need the offset table. So fetch the offset table on the # assumption that that will be the first thing that a reader is # going to do. # The fact that these encoding parameters are None tells us # that we haven't yet fetched them from the remote share, so we # should. We could just not set them, but the checks will be # easier to read if we don't have to use hasattr. self._version_number = None self._sequence_number = None self._root_hash = None # Filled in if we're dealing with an SDMF file. Unused # otherwise. self._salt = None self._required_shares = None self._total_shares = None self._segment_size = None self._data_length = None self._offsets = None # If the user has chosen to initialize us with some data, we'll # try to satisfy subsequent data requests with that data before # asking the storage server for it. self._data = data # If the provided data is known to be complete, then we know there's # nothing to be gained by querying the server, so we should just # partially satisfy requests with what we have. self._data_is_everything = data_is_everything # The way callers interact with cache in the filenode returns # None if there isn't any cached data, but the way we index the # cached data requires a string, so convert None to "". if self._data == None: self._data = "" def _maybe_fetch_offsets_and_header(self, force_remote=False): """ I fetch the offset table and the header from the remote slot if I don't already have them. If I do have them, I do nothing and return an empty Deferred. """ if self._offsets: return defer.succeed(None) # At this point, we may be either SDMF or MDMF. Fetching 107 # bytes will be enough to get header and offsets for both SDMF and # MDMF, though we'll be left with 4 more bytes than we # need if this ends up being MDMF. This is probably less # expensive than the cost of a second roundtrip. readvs = [(0, 123)] d = self._read(readvs, force_remote) d.addCallback(self._process_encoding_parameters) d.addCallback(self._process_offsets) d.addErrback(_handle_bad_struct) return d def _process_encoding_parameters(self, encoding_parameters): if self.shnum not in encoding_parameters: raise BadShareError("no data for shnum %d" % self.shnum) encoding_parameters = encoding_parameters[self.shnum][0] # The first byte is the version number. It will tell us what # to do next. (verno,) = struct.unpack(">B", encoding_parameters[:1]) if verno == MDMF_VERSION: read_size = MDMFHEADERWITHOUTOFFSETSSIZE (verno, seqnum, root_hash, k, n, segsize, datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS, encoding_parameters[:read_size]) if segsize == 0 and datalen == 0: # Empty file, no segments. self._num_segments = 0 else: self._num_segments = mathutil.div_ceil(datalen, segsize) elif verno == SDMF_VERSION: read_size = SIGNED_PREFIX_LENGTH (verno, seqnum, root_hash, salt, k, n, segsize, datalen) = struct.unpack(">BQ32s16s BBQQ", encoding_parameters[:SIGNED_PREFIX_LENGTH]) self._salt = salt if segsize == 0 and datalen == 0: # empty file self._num_segments = 0 else: # non-empty SDMF files have one segment. self._num_segments = 1 else: raise UnknownVersionError("You asked me to read mutable file " "version %d, but I only understand " "%d and %d" % (verno, SDMF_VERSION, MDMF_VERSION)) self._version_number = verno self._sequence_number = seqnum self._root_hash = root_hash self._required_shares = k self._total_shares = n self._segment_size = segsize self._data_length = datalen self._block_size = self._segment_size / self._required_shares # We can upload empty files, and need to account for this fact # so as to avoid zero-division and zero-modulo errors. if datalen > 0: tail_size = self._data_length % self._segment_size else: tail_size = 0 if not tail_size: self._tail_block_size = self._block_size else: self._tail_block_size = mathutil.next_multiple(tail_size, self._required_shares) self._tail_block_size /= self._required_shares return encoding_parameters def _process_offsets(self, offsets): if self._version_number == 0: read_size = OFFSETS_LENGTH read_offset = SIGNED_PREFIX_LENGTH end = read_size + read_offset (signature, share_hash_chain, block_hash_tree, share_data, enc_privkey, EOF) = struct.unpack(">LLLLQQ", offsets[read_offset:end]) self._offsets = {} self._offsets['signature'] = signature self._offsets['share_data'] = share_data self._offsets['block_hash_tree'] = block_hash_tree self._offsets['share_hash_chain'] = share_hash_chain self._offsets['enc_privkey'] = enc_privkey self._offsets['EOF'] = EOF elif self._version_number == 1: read_offset = MDMFHEADERWITHOUTOFFSETSSIZE read_length = MDMFOFFSETS_LENGTH end = read_offset + read_length (encprivkey, sharehashes, signature, verification_key, verification_key_end, sharedata, blockhashes, eof) = struct.unpack(MDMFOFFSETS, offsets[read_offset:end]) self._offsets = {} self._offsets['enc_privkey'] = encprivkey self._offsets['block_hash_tree'] = blockhashes self._offsets['share_hash_chain'] = sharehashes self._offsets['signature'] = signature self._offsets['verification_key'] = verification_key self._offsets['verification_key_end']= \ verification_key_end self._offsets['EOF'] = eof self._offsets['share_data'] = sharedata def get_block_and_salt(self, segnum): """ I return (block, salt), where block is the block data and salt is the salt used to encrypt that segment. """ d = self._maybe_fetch_offsets_and_header() def _then(ignored): base_share_offset = self._offsets['share_data'] if segnum + 1 > self._num_segments: raise LayoutInvalid("Not a valid segment number") if self._version_number == 0: share_offset = base_share_offset + self._block_size * segnum else: share_offset = base_share_offset + (self._block_size + \ SALT_SIZE) * segnum if segnum + 1 == self._num_segments: data = self._tail_block_size else: data = self._block_size if self._version_number == 1: data += SALT_SIZE readvs = [(share_offset, data)] return readvs d.addCallback(_then) d.addCallback(lambda readvs: self._read(readvs)) def _process_results(results): if self.shnum not in results: raise BadShareError("no data for shnum %d" % self.shnum) if self._version_number == 0: # We only read the share data, but we know the salt from # when we fetched the header data = results[self.shnum] if not data: data = "" else: if len(data) != 1: raise BadShareError("got %d vectors, not 1" % len(data)) data = data[0] salt = self._salt else: data = results[self.shnum] if not data: salt = data = "" else: salt_and_data = results[self.shnum][0] salt = salt_and_data[:SALT_SIZE] data = salt_and_data[SALT_SIZE:] return data, salt d.addCallback(_process_results) return d def get_blockhashes(self, needed=None, force_remote=False): """ I return the block hash tree I take an optional argument, needed, which is a set of indices correspond to hashes that I should fetch. If this argument is missing, I will fetch the entire block hash tree; otherwise, I may attempt to fetch fewer hashes, based on what needed says that I should do. Note that I may fetch as many hashes as I want, so long as the set of hashes that I do fetch is a superset of the ones that I am asked for, so callers should be prepared to tolerate additional hashes. """ # TODO: Return only the parts of the block hash tree necessary # to validate the blocknum provided? # This is a good idea, but it is hard to implement correctly. It # is bad to fetch any one block hash more than once, so we # probably just want to fetch the whole thing at once and then # serve it. if needed == set([]): return defer.succeed([]) d = self._maybe_fetch_offsets_and_header() def _then(ignored): blockhashes_offset = self._offsets['block_hash_tree'] if self._version_number == 1: blockhashes_length = self._offsets['EOF'] - blockhashes_offset else: blockhashes_length = self._offsets['share_data'] - blockhashes_offset readvs = [(blockhashes_offset, blockhashes_length)] return readvs d.addCallback(_then) d.addCallback(lambda readvs: self._read(readvs, force_remote=force_remote)) def _build_block_hash_tree(results): if self.shnum not in results: raise BadShareError("no data for shnum %d" % self.shnum) rawhashes = results[self.shnum][0] results = [rawhashes[i:i+HASH_SIZE] for i in range(0, len(rawhashes), HASH_SIZE)] return results d.addCallback(_build_block_hash_tree) return d def get_sharehashes(self, needed=None, force_remote=False): """ I return the part of the share hash chain placed to validate this share. I take an optional argument, needed. Needed is a set of indices that correspond to the hashes that I should fetch. If needed is not present, I will fetch and return the entire share hash chain. Otherwise, I may fetch and return any part of the share hash chain that is a superset of the part that I am asked to fetch. Callers should be prepared to deal with more hashes than they've asked for. """ if needed == set([]): return defer.succeed([]) d = self._maybe_fetch_offsets_and_header() def _make_readvs(ignored): sharehashes_offset = self._offsets['share_hash_chain'] if self._version_number == 0: sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset else: sharehashes_length = self._offsets['signature'] - sharehashes_offset readvs = [(sharehashes_offset, sharehashes_length)] return readvs d.addCallback(_make_readvs) d.addCallback(lambda readvs: self._read(readvs, force_remote=force_remote)) def _build_share_hash_chain(results): if self.shnum not in results: raise BadShareError("no data for shnum %d" % self.shnum) sharehashes = results[self.shnum][0] results = [sharehashes[i:i+(HASH_SIZE + 2)] for i in range(0, len(sharehashes), HASH_SIZE + 2)] results = dict([struct.unpack(">H32s", data) for data in results]) return results d.addCallback(_build_share_hash_chain) d.addErrback(_handle_bad_struct) return d def get_encprivkey(self): """ I return the encrypted private key. """ d = self._maybe_fetch_offsets_and_header() def _make_readvs(ignored): privkey_offset = self._offsets['enc_privkey'] if self._version_number == 0: privkey_length = self._offsets['EOF'] - privkey_offset else: privkey_length = self._offsets['share_hash_chain'] - privkey_offset readvs = [(privkey_offset, privkey_length)] return readvs d.addCallback(_make_readvs) d.addCallback(lambda readvs: self._read(readvs)) def _process_results(results): if self.shnum not in results: raise BadShareError("no data for shnum %d" % self.shnum) privkey = results[self.shnum][0] return privkey d.addCallback(_process_results) return d def get_signature(self): """ I return the signature of my share. """ d = self._maybe_fetch_offsets_and_header() def _make_readvs(ignored): signature_offset = self._offsets['signature'] if self._version_number == 1: signature_length = self._offsets['verification_key'] - signature_offset else: signature_length = self._offsets['share_hash_chain'] - signature_offset readvs = [(signature_offset, signature_length)] return readvs d.addCallback(_make_readvs) d.addCallback(lambda readvs: self._read(readvs)) def _process_results(results): if self.shnum not in results: raise BadShareError("no data for shnum %d" % self.shnum) signature = results[self.shnum][0] return signature d.addCallback(_process_results) return d def get_verification_key(self): """ I return the verification key. """ d = self._maybe_fetch_offsets_and_header() def _make_readvs(ignored): if self._version_number == 1: vk_offset = self._offsets['verification_key'] vk_length = self._offsets['verification_key_end'] - vk_offset else: vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ") vk_length = self._offsets['signature'] - vk_offset readvs = [(vk_offset, vk_length)] return readvs d.addCallback(_make_readvs) d.addCallback(lambda readvs: self._read(readvs)) def _process_results(results): if self.shnum not in results: raise BadShareError("no data for shnum %d" % self.shnum) verification_key = results[self.shnum][0] return verification_key d.addCallback(_process_results) return d def get_encoding_parameters(self): """ I return (k, n, segsize, datalen) """ d = self._maybe_fetch_offsets_and_header() d.addCallback(lambda ignored: (self._required_shares, self._total_shares, self._segment_size, self._data_length)) return d def get_seqnum(self): """ I return the sequence number for this share. """ d = self._maybe_fetch_offsets_and_header() d.addCallback(lambda ignored: self._sequence_number) return d def get_root_hash(self): """ I return the root of the block hash tree """ d = self._maybe_fetch_offsets_and_header() d.addCallback(lambda ignored: self._root_hash) return d def get_checkstring(self): """ I return the packed representation of the following: - version number - sequence number - root hash - salt hash which my users use as a checkstring to detect other writers. """ d = self._maybe_fetch_offsets_and_header() def _build_checkstring(ignored): if self._salt: checkstring = struct.pack(PREFIX, self._version_number, self._sequence_number, self._root_hash, self._salt) else: checkstring = struct.pack(MDMFCHECKSTRING, self._version_number, self._sequence_number, self._root_hash) return checkstring d.addCallback(_build_checkstring) return d def get_prefix(self, force_remote): d = self._maybe_fetch_offsets_and_header(force_remote) d.addCallback(lambda ignored: self._build_prefix()) return d def _build_prefix(self): # The prefix is another name for the part of the remote share # that gets signed. It consists of everything up to and # including the datalength, packed by struct. if self._version_number == SDMF_VERSION: return struct.pack(SIGNED_PREFIX, self._version_number, self._sequence_number, self._root_hash, self._salt, self._required_shares, self._total_shares, self._segment_size, self._data_length) else: return struct.pack(MDMFSIGNABLEHEADER, self._version_number, self._sequence_number, self._root_hash, self._required_shares, self._total_shares, self._segment_size, self._data_length) def _get_offsets_tuple(self): # The offsets tuple is another component of the version # information tuple. It is basically our offsets dictionary, # itemized and in a tuple. return self._offsets.copy() def get_verinfo(self): """ I return my verinfo tuple. This is used by the ServermapUpdater to keep track of versions of mutable files. The verinfo tuple for MDMF files contains: - seqnum - root hash - a blank (nothing) - segsize - datalen - k - n - prefix (the thing that you sign) - a tuple of offsets We include the nonce in MDMF to simplify processing of version information tuples. The verinfo tuple for SDMF files is the same, but contains a 16-byte IV instead of a hash of salts. """ d = self._maybe_fetch_offsets_and_header() def _build_verinfo(ignored): if self._version_number == SDMF_VERSION: salt_to_use = self._salt else: salt_to_use = None return (self._sequence_number, self._root_hash, salt_to_use, self._segment_size, self._data_length, self._required_shares, self._total_shares, self._build_prefix(), self._get_offsets_tuple()) d.addCallback(_build_verinfo) return d def _read(self, readvs, force_remote=False): unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs) # TODO: It's entirely possible to tweak this so that it just # fulfills the requests that it can, and not demand that all # requests are satisfiable before running it. if not unsatisfiable or self._data_is_everything: results = [self._data[offset:offset+length] for (offset, length) in readvs] results = {self.shnum: results} return defer.succeed(results) else: return self._rref.callRemote("slot_readv", self._storage_index, [self.shnum], readvs) def is_sdmf(self): """I tell my caller whether or not my remote file is SDMF or MDMF """ d = self._maybe_fetch_offsets_and_header() d.addCallback(lambda ignored: self._version_number == 0) return d class LayoutInvalid(BadShareError): """ This isn't a valid MDMF mutable file """ tahoe-lafs-1.10.0/src/allmydata/mutable/publish.py000066400000000000000000001547031221140116300220250ustar00rootroot00000000000000 import os, time from StringIO import StringIO from itertools import count from zope.interface import implements from twisted.internet import defer from twisted.python import failure from allmydata.interfaces import IPublishStatus, SDMF_VERSION, MDMF_VERSION, \ IMutableUploadable from allmydata.util import base32, hashutil, mathutil, log from allmydata.util.dictutil import DictOfSets from allmydata import hashtree, codec from allmydata.storage.server import si_b2a from pycryptopp.cipher.aes import AES from foolscap.api import eventually, fireEventually from allmydata.mutable.common import MODE_WRITE, MODE_CHECK, MODE_REPAIR, \ UncoordinatedWriteError, NotEnoughServersError from allmydata.mutable.servermap import ServerMap from allmydata.mutable.layout import get_version_from_checkstring,\ unpack_mdmf_checkstring, \ unpack_sdmf_checkstring, \ MDMFSlotWriteProxy, \ SDMFSlotWriteProxy KiB = 1024 DEFAULT_MAX_SEGMENT_SIZE = 128 * KiB PUSHING_BLOCKS_STATE = 0 PUSHING_EVERYTHING_ELSE_STATE = 1 DONE_STATE = 2 class PublishStatus: implements(IPublishStatus) statusid_counter = count(0) def __init__(self): self.timings = {} self.timings["send_per_server"] = {} self.timings["encrypt"] = 0.0 self.timings["encode"] = 0.0 self.servermap = None self._problems = {} self.active = True self.storage_index = None self.helper = False self.encoding = ("?", "?") self.size = None self.status = "Not started" self.progress = 0.0 self.counter = self.statusid_counter.next() self.started = time.time() def add_per_server_time(self, server, elapsed): if server not in self.timings["send_per_server"]: self.timings["send_per_server"][server] = [] self.timings["send_per_server"][server].append(elapsed) def accumulate_encode_time(self, elapsed): self.timings["encode"] += elapsed def accumulate_encrypt_time(self, elapsed): self.timings["encrypt"] += elapsed def get_started(self): return self.started def get_storage_index(self): return self.storage_index def get_encoding(self): return self.encoding def using_helper(self): return self.helper def get_servermap(self): return self.servermap def get_size(self): return self.size def get_status(self): return self.status def get_progress(self): return self.progress def get_active(self): return self.active def get_counter(self): return self.counter def get_problems(self): return self._problems def set_storage_index(self, si): self.storage_index = si def set_helper(self, helper): self.helper = helper def set_servermap(self, servermap): self.servermap = servermap def set_encoding(self, k, n): self.encoding = (k, n) def set_size(self, size): self.size = size def set_status(self, status): self.status = status def set_progress(self, value): self.progress = value def set_active(self, value): self.active = value class LoopLimitExceededError(Exception): pass class Publish: """I represent a single act of publishing the mutable file to the grid. I will only publish my data if the servermap I am using still represents the current state of the world. To make the initial publish, set servermap to None. """ def __init__(self, filenode, storage_broker, servermap): self._node = filenode self._storage_broker = storage_broker self._servermap = servermap self._storage_index = self._node.get_storage_index() self._log_prefix = prefix = si_b2a(self._storage_index)[:5] num = self.log("Publish(%s): starting" % prefix, parent=None) self._log_number = num self._running = True self._first_write_error = None self._last_failure = None self._status = PublishStatus() self._status.set_storage_index(self._storage_index) self._status.set_helper(False) self._status.set_progress(0.0) self._status.set_active(True) self._version = self._node.get_version() assert self._version in (SDMF_VERSION, MDMF_VERSION) def get_status(self): return self._status def log(self, *args, **kwargs): if 'parent' not in kwargs: kwargs['parent'] = self._log_number if "facility" not in kwargs: kwargs["facility"] = "tahoe.mutable.publish" return log.msg(*args, **kwargs) def update(self, data, offset, blockhashes, version): """ I replace the contents of this file with the contents of data, starting at offset. I return a Deferred that fires with None when the replacement has been completed, or with an error if something went wrong during the process. Note that this process will not upload new shares. If the file being updated is in need of repair, callers will have to repair it on their own. """ # How this works: # 1: Make server assignments. We'll assign each share that we know # about on the grid to that server that currently holds that # share, and will not place any new shares. # 2: Setup encoding parameters. Most of these will stay the same # -- datalength will change, as will some of the offsets. # 3. Upload the new segments. # 4. Be done. assert IMutableUploadable.providedBy(data) self.data = data # XXX: Use the MutableFileVersion instead. self.datalength = self._node.get_size() if data.get_size() > self.datalength: self.datalength = data.get_size() self.log("starting update") self.log("adding new data of length %d at offset %d" % \ (data.get_size(), offset)) self.log("new data length is %d" % self.datalength) self._status.set_size(self.datalength) self._status.set_status("Started") self._started = time.time() self.done_deferred = defer.Deferred() self._writekey = self._node.get_writekey() assert self._writekey, "need write capability to publish" # first, which servers will we publish to? We require that the # servermap was updated in MODE_WRITE, so we can depend upon the # serverlist computed by that process instead of computing our own. assert self._servermap assert self._servermap.get_last_update()[0] in (MODE_WRITE, MODE_CHECK, MODE_REPAIR) # we will push a version that is one larger than anything present # in the grid, according to the servermap. self._new_seqnum = self._servermap.highest_seqnum() + 1 self._status.set_servermap(self._servermap) self.log(format="new seqnum will be %(seqnum)d", seqnum=self._new_seqnum, level=log.NOISY) # We're updating an existing file, so all of the following # should be available. self.readkey = self._node.get_readkey() self.required_shares = self._node.get_required_shares() assert self.required_shares is not None self.total_shares = self._node.get_total_shares() assert self.total_shares is not None self._status.set_encoding(self.required_shares, self.total_shares) self._pubkey = self._node.get_pubkey() assert self._pubkey self._privkey = self._node.get_privkey() assert self._privkey self._encprivkey = self._node.get_encprivkey() sb = self._storage_broker full_serverlist = list(sb.get_servers_for_psi(self._storage_index)) self.full_serverlist = full_serverlist # for use later, immutable self.bad_servers = set() # servers who have errbacked/refused requests # This will set self.segment_size, self.num_segments, and # self.fec. TODO: Does it know how to do the offset? Probably # not. So do that part next. self.setup_encoding_parameters(offset=offset) # if we experience any surprises (writes which were rejected because # our test vector did not match, or shares which we didn't expect to # see), we set this flag and report an UncoordinatedWriteError at the # end of the publish process. self.surprised = False # we keep track of three tables. The first is our goal: which share # we want to see on which servers. This is initially populated by the # existing servermap. self.goal = set() # pairs of (server, shnum) tuples # the number of outstanding queries: those that are in flight and # may or may not be delivered, accepted, or acknowledged. This is # incremented when a query is sent, and decremented when the response # returns or errbacks. self.num_outstanding = 0 # the third is a table of successes: share which have actually been # placed. These are populated when responses come back with success. # When self.placed == self.goal, we're done. self.placed = set() # (server, shnum) tuples self.bad_share_checkstrings = {} # This is set at the last step of the publishing process. self.versioninfo = "" # we use the servermap to populate the initial goal: this way we will # try to update each existing share in place. Since we're # updating, we ignore damaged and missing shares -- callers must # do a repair to repair and recreate these. self.goal = set(self._servermap.get_known_shares()) # shnum -> set of IMutableSlotWriter self.writers = DictOfSets() # SDMF files are updated differently. self._version = MDMF_VERSION writer_class = MDMFSlotWriteProxy # For each (server, shnum) in self.goal, we make a # write proxy for that server. We'll use this to write # shares to the server. for (server,shnum) in self.goal: write_enabler = self._node.get_write_enabler(server) renew_secret = self._node.get_renewal_secret(server) cancel_secret = self._node.get_cancel_secret(server) secrets = (write_enabler, renew_secret, cancel_secret) writer = writer_class(shnum, server.get_rref(), self._storage_index, secrets, self._new_seqnum, self.required_shares, self.total_shares, self.segment_size, self.datalength) self.writers.add(shnum, writer) writer.server = server known_shares = self._servermap.get_known_shares() assert (server, shnum) in known_shares old_versionid, old_timestamp = known_shares[(server,shnum)] (old_seqnum, old_root_hash, old_salt, old_segsize, old_datalength, old_k, old_N, old_prefix, old_offsets_tuple) = old_versionid writer.set_checkstring(old_seqnum, old_root_hash, old_salt) # Our remote shares will not have a complete checkstring until # after we are done writing share data and have started to write # blocks. In the meantime, we need to know what to look for when # writing, so that we can detect UncoordinatedWriteErrors. self._checkstring = self._get_some_writer().get_checkstring() # Now, we start pushing shares. self._status.timings["setup"] = time.time() - self._started # First, we encrypt, encode, and publish the shares that we need # to encrypt, encode, and publish. # Our update process fetched these for us. We need to update # them in place as publishing happens. self.blockhashes = {} # (shnum, [blochashes]) for (i, bht) in blockhashes.iteritems(): # We need to extract the leaves from our old hash tree. old_segcount = mathutil.div_ceil(version[4], version[3]) h = hashtree.IncompleteHashTree(old_segcount) bht = dict(enumerate(bht)) h.set_hashes(bht) leaves = h[h.get_leaf_index(0):] for j in xrange(self.num_segments - len(leaves)): leaves.append(None) assert len(leaves) >= self.num_segments self.blockhashes[i] = leaves # This list will now be the leaves that were set during the # initial upload + enough empty hashes to make it a # power-of-two. If we exceed a power of two boundary, we # should be encoding the file over again, and should not be # here. So, we have #assert len(self.blockhashes[i]) == \ # hashtree.roundup_pow2(self.num_segments), \ # len(self.blockhashes[i]) # XXX: Except this doesn't work. Figure out why. # These are filled in later, after we've modified the block hash # tree suitably. self.sharehash_leaves = None # eventually [sharehashes] self.sharehashes = {} # shnum -> [sharehash leaves necessary to # validate the share] self.log("Starting push") self._state = PUSHING_BLOCKS_STATE self._push() return self.done_deferred def publish(self, newdata): """Publish the filenode's current contents. Returns a Deferred that fires (with None) when the publish has done as much work as it's ever going to do, or errbacks with ConsistencyError if it detects a simultaneous write. """ # 0. Setup encoding parameters, encoder, and other such things. # 1. Encrypt, encode, and publish segments. assert IMutableUploadable.providedBy(newdata) self.data = newdata self.datalength = newdata.get_size() #if self.datalength >= DEFAULT_MAX_SEGMENT_SIZE: # self._version = MDMF_VERSION #else: # self._version = SDMF_VERSION self.log("starting publish, datalen is %s" % self.datalength) self._status.set_size(self.datalength) self._status.set_status("Started") self._started = time.time() self.done_deferred = defer.Deferred() self._writekey = self._node.get_writekey() assert self._writekey, "need write capability to publish" # first, which servers will we publish to? We require that the # servermap was updated in MODE_WRITE, so we can depend upon the # serverlist computed by that process instead of computing our own. if self._servermap: assert self._servermap.get_last_update()[0] in (MODE_WRITE, MODE_CHECK, MODE_REPAIR) # we will push a version that is one larger than anything present # in the grid, according to the servermap. self._new_seqnum = self._servermap.highest_seqnum() + 1 else: # If we don't have a servermap, that's because we're doing the # initial publish self._new_seqnum = 1 self._servermap = ServerMap() self._status.set_servermap(self._servermap) self.log(format="new seqnum will be %(seqnum)d", seqnum=self._new_seqnum, level=log.NOISY) # having an up-to-date servermap (or using a filenode that was just # created for the first time) also guarantees that the following # fields are available self.readkey = self._node.get_readkey() self.required_shares = self._node.get_required_shares() assert self.required_shares is not None self.total_shares = self._node.get_total_shares() assert self.total_shares is not None self._status.set_encoding(self.required_shares, self.total_shares) self._pubkey = self._node.get_pubkey() assert self._pubkey self._privkey = self._node.get_privkey() assert self._privkey self._encprivkey = self._node.get_encprivkey() sb = self._storage_broker full_serverlist = list(sb.get_servers_for_psi(self._storage_index)) self.full_serverlist = full_serverlist # for use later, immutable self.bad_servers = set() # servers who have errbacked/refused requests # This will set self.segment_size, self.num_segments, and # self.fec. self.setup_encoding_parameters() # if we experience any surprises (writes which were rejected because # our test vector did not match, or shares which we didn't expect to # see), we set this flag and report an UncoordinatedWriteError at the # end of the publish process. self.surprised = False # we keep track of three tables. The first is our goal: which share # we want to see on which servers. This is initially populated by the # existing servermap. self.goal = set() # pairs of (server, shnum) tuples # the number of outstanding queries: those that are in flight and # may or may not be delivered, accepted, or acknowledged. This is # incremented when a query is sent, and decremented when the response # returns or errbacks. self.num_outstanding = 0 # the third is a table of successes: share which have actually been # placed. These are populated when responses come back with success. # When self.placed == self.goal, we're done. self.placed = set() # (server, shnum) tuples self.bad_share_checkstrings = {} # This is set at the last step of the publishing process. self.versioninfo = "" # we use the servermap to populate the initial goal: this way we will # try to update each existing share in place. self.goal = set(self._servermap.get_known_shares()) # then we add in all the shares that were bad (corrupted, bad # signatures, etc). We want to replace these. for key, old_checkstring in self._servermap.get_bad_shares().items(): (server, shnum) = key self.goal.add( (server,shnum) ) self.bad_share_checkstrings[(server,shnum)] = old_checkstring # TODO: Make this part do server selection. self.update_goal() # shnum -> set of IMutableSlotWriter self.writers = DictOfSets() if self._version == MDMF_VERSION: writer_class = MDMFSlotWriteProxy else: writer_class = SDMFSlotWriteProxy # For each (server, shnum) in self.goal, we make a # write proxy for that server. We'll use this to write # shares to the server. for (server,shnum) in self.goal: write_enabler = self._node.get_write_enabler(server) renew_secret = self._node.get_renewal_secret(server) cancel_secret = self._node.get_cancel_secret(server) secrets = (write_enabler, renew_secret, cancel_secret) writer = writer_class(shnum, server.get_rref(), self._storage_index, secrets, self._new_seqnum, self.required_shares, self.total_shares, self.segment_size, self.datalength) self.writers.add(shnum, writer) writer.server = server known_shares = self._servermap.get_known_shares() if (server, shnum) in known_shares: old_versionid, old_timestamp = known_shares[(server,shnum)] (old_seqnum, old_root_hash, old_salt, old_segsize, old_datalength, old_k, old_N, old_prefix, old_offsets_tuple) = old_versionid writer.set_checkstring(old_seqnum, old_root_hash, old_salt) elif (server, shnum) in self.bad_share_checkstrings: old_checkstring = self.bad_share_checkstrings[(server, shnum)] writer.set_checkstring(old_checkstring) # Our remote shares will not have a complete checkstring until # after we are done writing share data and have started to write # blocks. In the meantime, we need to know what to look for when # writing, so that we can detect UncoordinatedWriteErrors. self._checkstring = self._get_some_writer().get_checkstring() # Now, we start pushing shares. self._status.timings["setup"] = time.time() - self._started # First, we encrypt, encode, and publish the shares that we need # to encrypt, encode, and publish. # This will eventually hold the block hash chain for each share # that we publish. We define it this way so that empty publishes # will still have something to write to the remote slot. self.blockhashes = dict([(i, []) for i in xrange(self.total_shares)]) for i in xrange(self.total_shares): blocks = self.blockhashes[i] for j in xrange(self.num_segments): blocks.append(None) self.sharehash_leaves = None # eventually [sharehashes] self.sharehashes = {} # shnum -> [sharehash leaves necessary to # validate the share] self.log("Starting push") self._state = PUSHING_BLOCKS_STATE self._push() return self.done_deferred def _get_some_writer(self): return list(self.writers.values()[0])[0] def _update_status(self): self._status.set_status("Sending Shares: %d placed out of %d, " "%d messages outstanding" % (len(self.placed), len(self.goal), self.num_outstanding)) self._status.set_progress(1.0 * len(self.placed) / len(self.goal)) def setup_encoding_parameters(self, offset=0): if self._version == MDMF_VERSION: segment_size = DEFAULT_MAX_SEGMENT_SIZE # 128 KiB by default else: segment_size = self.datalength # SDMF is only one segment # this must be a multiple of self.required_shares segment_size = mathutil.next_multiple(segment_size, self.required_shares) self.segment_size = segment_size # Calculate the starting segment for the upload. if segment_size: # We use div_ceil instead of integer division here because # it is semantically correct. # If datalength isn't an even multiple of segment_size, but # is larger than segment_size, datalength // segment_size # will be the largest number such that num <= datalength and # num % segment_size == 0. But that's not what we want, # because it ignores the extra data. div_ceil will give us # the right number of segments for the data that we're # given. self.num_segments = mathutil.div_ceil(self.datalength, segment_size) self.starting_segment = offset // segment_size else: self.num_segments = 0 self.starting_segment = 0 self.log("building encoding parameters for file") self.log("got segsize %d" % self.segment_size) self.log("got %d segments" % self.num_segments) if self._version == SDMF_VERSION: assert self.num_segments in (0, 1) # SDMF # calculate the tail segment size. if segment_size and self.datalength: self.tail_segment_size = self.datalength % segment_size self.log("got tail segment size %d" % self.tail_segment_size) else: self.tail_segment_size = 0 if self.tail_segment_size == 0 and segment_size: # The tail segment is the same size as the other segments. self.tail_segment_size = segment_size # Make FEC encoders fec = codec.CRSEncoder() fec.set_params(self.segment_size, self.required_shares, self.total_shares) self.piece_size = fec.get_block_size() self.fec = fec if self.tail_segment_size == self.segment_size: self.tail_fec = self.fec else: tail_fec = codec.CRSEncoder() tail_fec.set_params(self.tail_segment_size, self.required_shares, self.total_shares) self.tail_fec = tail_fec self._current_segment = self.starting_segment self.end_segment = self.num_segments - 1 # Now figure out where the last segment should be. if self.data.get_size() != self.datalength: # We're updating a few segments in the middle of a mutable # file, so we don't want to republish the whole thing. # (we don't have enough data to do that even if we wanted # to) end = self.data.get_size() self.end_segment = end // segment_size if end % segment_size == 0: self.end_segment -= 1 self.log("got start segment %d" % self.starting_segment) self.log("got end segment %d" % self.end_segment) def _push(self, ignored=None): """ I manage state transitions. In particular, I see that we still have a good enough number of writers to complete the upload successfully. """ # Can we still successfully publish this file? # TODO: Keep track of outstanding queries before aborting the # process. num_shnums = len(self.writers) if num_shnums < self.required_shares or self.surprised: return self._failure() # Figure out what we need to do next. Each of these needs to # return a deferred so that we don't block execution when this # is first called in the upload method. if self._state == PUSHING_BLOCKS_STATE: return self.push_segment(self._current_segment) elif self._state == PUSHING_EVERYTHING_ELSE_STATE: return self.push_everything_else() # If we make it to this point, we were successful in placing the # file. return self._done() def push_segment(self, segnum): if self.num_segments == 0 and self._version == SDMF_VERSION: self._add_dummy_salts() if segnum > self.end_segment: # We don't have any more segments to push. self._state = PUSHING_EVERYTHING_ELSE_STATE return self._push() d = self._encode_segment(segnum) d.addCallback(self._push_segment, segnum) def _increment_segnum(ign): self._current_segment += 1 # XXX: I don't think we need to do addBoth here -- any errBacks # should be handled within push_segment. d.addCallback(_increment_segnum) d.addCallback(self._turn_barrier) d.addCallback(self._push) d.addErrback(self._failure) def _turn_barrier(self, result): """ I help the publish process avoid the recursion limit issues described in #237. """ return fireEventually(result) def _add_dummy_salts(self): """ SDMF files need a salt even if they're empty, or the signature won't make sense. This method adds a dummy salt to each of our SDMF writers so that they can write the signature later. """ salt = os.urandom(16) assert self._version == SDMF_VERSION for shnum, writers in self.writers.iteritems(): for writer in writers: writer.put_salt(salt) def _encode_segment(self, segnum): """ I encrypt and encode the segment segnum. """ started = time.time() if segnum + 1 == self.num_segments: segsize = self.tail_segment_size else: segsize = self.segment_size self.log("Pushing segment %d of %d" % (segnum + 1, self.num_segments)) data = self.data.read(segsize) # XXX: This is dumb. Why return a list? data = "".join(data) assert len(data) == segsize, len(data) salt = os.urandom(16) key = hashutil.ssk_readkey_data_hash(salt, self.readkey) self._status.set_status("Encrypting") enc = AES(key) crypttext = enc.process(data) assert len(crypttext) == len(data) now = time.time() self._status.accumulate_encrypt_time(now - started) started = now # now apply FEC if segnum + 1 == self.num_segments: fec = self.tail_fec else: fec = self.fec self._status.set_status("Encoding") crypttext_pieces = [None] * self.required_shares piece_size = fec.get_block_size() for i in range(len(crypttext_pieces)): offset = i * piece_size piece = crypttext[offset:offset+piece_size] piece = piece + "\x00"*(piece_size - len(piece)) # padding crypttext_pieces[i] = piece assert len(piece) == piece_size d = fec.encode(crypttext_pieces) def _done_encoding(res): elapsed = time.time() - started self._status.accumulate_encode_time(elapsed) return (res, salt) d.addCallback(_done_encoding) return d def _push_segment(self, encoded_and_salt, segnum): """ I push (data, salt) as segment number segnum. """ results, salt = encoded_and_salt shares, shareids = results self._status.set_status("Pushing segment") for i in xrange(len(shares)): sharedata = shares[i] shareid = shareids[i] if self._version == MDMF_VERSION: hashed = salt + sharedata else: hashed = sharedata block_hash = hashutil.block_hash(hashed) self.blockhashes[shareid][segnum] = block_hash # find the writer for this share writers = self.writers[shareid] for writer in writers: writer.put_block(sharedata, segnum, salt) def push_everything_else(self): """ I put everything else associated with a share. """ self._pack_started = time.time() self.push_encprivkey() self.push_blockhashes() self.push_sharehashes() self.push_toplevel_hashes_and_signature() d = self.finish_publishing() def _change_state(ignored): self._state = DONE_STATE d.addCallback(_change_state) d.addCallback(self._push) return d def push_encprivkey(self): encprivkey = self._encprivkey self._status.set_status("Pushing encrypted private key") for shnum, writers in self.writers.iteritems(): for writer in writers: writer.put_encprivkey(encprivkey) def push_blockhashes(self): self.sharehash_leaves = [None] * len(self.blockhashes) self._status.set_status("Building and pushing block hash tree") for shnum, blockhashes in self.blockhashes.iteritems(): t = hashtree.HashTree(blockhashes) self.blockhashes[shnum] = list(t) # set the leaf for future use. self.sharehash_leaves[shnum] = t[0] writers = self.writers[shnum] for writer in writers: writer.put_blockhashes(self.blockhashes[shnum]) def push_sharehashes(self): self._status.set_status("Building and pushing share hash chain") share_hash_tree = hashtree.HashTree(self.sharehash_leaves) for shnum in xrange(len(self.sharehash_leaves)): needed_indices = share_hash_tree.needed_hashes(shnum) self.sharehashes[shnum] = dict( [ (i, share_hash_tree[i]) for i in needed_indices] ) writers = self.writers[shnum] for writer in writers: writer.put_sharehashes(self.sharehashes[shnum]) self.root_hash = share_hash_tree[0] def push_toplevel_hashes_and_signature(self): # We need to to three things here: # - Push the root hash and salt hash # - Get the checkstring of the resulting layout; sign that. # - Push the signature self._status.set_status("Pushing root hashes and signature") for shnum in xrange(self.total_shares): writers = self.writers[shnum] for writer in writers: writer.put_root_hash(self.root_hash) self._update_checkstring() self._make_and_place_signature() def _update_checkstring(self): """ After putting the root hash, MDMF files will have the checkstring written to the storage server. This means that we can update our copy of the checkstring so we can detect uncoordinated writes. SDMF files will have the same checkstring, so we need not do anything. """ self._checkstring = self._get_some_writer().get_checkstring() def _make_and_place_signature(self): """ I create and place the signature. """ started = time.time() self._status.set_status("Signing prefix") signable = self._get_some_writer().get_signable() self.signature = self._privkey.sign(signable) for (shnum, writers) in self.writers.iteritems(): for writer in writers: writer.put_signature(self.signature) self._status.timings['sign'] = time.time() - started def finish_publishing(self): # We're almost done -- we just need to put the verification key # and the offsets started = time.time() self._status.set_status("Pushing shares") self._started_pushing = started ds = [] verification_key = self._pubkey.serialize() for (shnum, writers) in self.writers.copy().iteritems(): for writer in writers: writer.put_verification_key(verification_key) self.num_outstanding += 1 def _no_longer_outstanding(res): self.num_outstanding -= 1 return res d = writer.finish_publishing() d.addBoth(_no_longer_outstanding) d.addErrback(self._connection_problem, writer) d.addCallback(self._got_write_answer, writer, started) ds.append(d) self._record_verinfo() self._status.timings['pack'] = time.time() - started return defer.DeferredList(ds) def _record_verinfo(self): self.versioninfo = self._get_some_writer().get_verinfo() def _connection_problem(self, f, writer): """ We ran into a connection problem while working with writer, and need to deal with that. """ self.log("found problem: %s" % str(f)) self._last_failure = f self.writers.discard(writer.shnum, writer) def log_goal(self, goal, message=""): logmsg = [message] for (shnum, server) in sorted([(s,p) for (p,s) in goal]): logmsg.append("sh%d to [%s]" % (shnum, server.get_name())) self.log("current goal: %s" % (", ".join(logmsg)), level=log.NOISY) self.log("we are planning to push new seqnum=#%d" % self._new_seqnum, level=log.NOISY) def update_goal(self): # if log.recording_noisy if True: self.log_goal(self.goal, "before update: ") # first, remove any bad servers from our goal self.goal = set([ (server, shnum) for (server, shnum) in self.goal if server not in self.bad_servers ]) # find the homeless shares: homefull_shares = set([shnum for (server, shnum) in self.goal]) homeless_shares = set(range(self.total_shares)) - homefull_shares homeless_shares = sorted(list(homeless_shares)) # place them somewhere. We prefer unused servers at the beginning of # the available server list. if not homeless_shares: return # if an old share X is on a node, put the new share X there too. # TODO: 1: redistribute shares to achieve one-per-server, by copying # shares from existing servers to new (less-crowded) ones. The # old shares must still be updated. # TODO: 2: move those shares instead of copying them, to reduce future # update work # this is a bit CPU intensive but easy to analyze. We create a sort # order for each server. If the server is marked as bad, we don't # even put them in the list. Then we care about the number of shares # which have already been assigned to them. After that we care about # their permutation order. old_assignments = DictOfSets() for (server, shnum) in self.goal: old_assignments.add(server, shnum) serverlist = [] for i, server in enumerate(self.full_serverlist): serverid = server.get_serverid() if server in self.bad_servers: continue entry = (len(old_assignments.get(server, [])), i, serverid, server) serverlist.append(entry) serverlist.sort() if not serverlist: raise NotEnoughServersError("Ran out of non-bad servers, " "first_error=%s" % str(self._first_write_error), self._first_write_error) # we then index this serverlist with an integer, because we may have # to wrap. We update the goal as we go. i = 0 for shnum in homeless_shares: (ignored1, ignored2, ignored3, server) = serverlist[i] # if we are forced to send a share to a server that already has # one, we may have two write requests in flight, and the # servermap (which was computed before either request was sent) # won't reflect the new shares, so the second response will be # surprising. There is code in _got_write_answer() to tolerate # this, otherwise it would cause the publish to fail with an # UncoordinatedWriteError. See #546 for details of the trouble # this used to cause. self.goal.add( (server, shnum) ) i += 1 if i >= len(serverlist): i = 0 if True: self.log_goal(self.goal, "after update: ") def _got_write_answer(self, answer, writer, started): if not answer: # SDMF writers only pretend to write when readers set their # blocks, salts, and so on -- they actually just write once, # at the end of the upload process. In fake writes, they # return defer.succeed(None). If we see that, we shouldn't # bother checking it. return server = writer.server lp = self.log("_got_write_answer from %s, share %d" % (server.get_name(), writer.shnum)) now = time.time() elapsed = now - started self._status.add_per_server_time(server, elapsed) wrote, read_data = answer surprise_shares = set(read_data.keys()) - set([writer.shnum]) # We need to remove from surprise_shares any shares that we are # knowingly also writing to that server from other writers. # TODO: Precompute this. shares = [] for shnum, writers in self.writers.iteritems(): shares.extend([x.shnum for x in writers if x.server == server]) known_shnums = set(shares) surprise_shares -= known_shnums self.log("found the following surprise shares: %s" % str(surprise_shares)) # Now surprise shares contains all of the shares that we did not # expect to be there. surprised = False for shnum in surprise_shares: # read_data is a dict mapping shnum to checkstring (SIGNED_PREFIX) checkstring = read_data[shnum][0] # What we want to do here is to see if their (seqnum, # roothash, salt) is the same as our (seqnum, roothash, # salt), or the equivalent for MDMF. The best way to do this # is to store a packed representation of our checkstring # somewhere, then not bother unpacking the other # checkstring. if checkstring == self._checkstring: # they have the right share, somehow if (server,shnum) in self.goal: # and we want them to have it, so we probably sent them a # copy in an earlier write. This is ok, and avoids the # #546 problem. continue # They aren't in our goal, but they are still for the right # version. Somebody else wrote them, and it's a convergent # uncoordinated write. Pretend this is ok (don't be # surprised), since I suspect there's a decent chance that # we'll hit this in normal operation. continue else: # the new shares are of a different version if server in self._servermap.get_reachable_servers(): # we asked them about their shares, so we had knowledge # of what they used to have. Any surprising shares must # have come from someone else, so UCW. surprised = True else: # we didn't ask them, and now we've discovered that they # have a share we didn't know about. This indicates that # mapupdate should have wokred harder and asked more # servers before concluding that it knew about them all. # signal UCW, but make sure to ask this server next time, # so we'll remember to update it if/when we retry. surprised = True # TODO: ask this server next time. I don't yet have a good # way to do this. Two insufficient possibilities are: # # self._servermap.add_new_share(server, shnum, verinfo, now) # but that requires fetching/validating/parsing the whole # version string, and all we have is the checkstring # self._servermap.mark_bad_share(server, shnum, checkstring) # that will make publish overwrite the share next time, # but it won't re-query the server, and it won't make # mapupdate search further # TODO later: when publish starts, do # servermap.get_best_version(), extract the seqnum, # subtract one, and store as highest-replaceable-seqnum. # Then, if this surprise-because-we-didn't-ask share is # of highest-replaceable-seqnum or lower, we're allowed # to replace it: send out a new writev (or rather add it # to self.goal and loop). pass surprised = True if surprised: self.log("they had shares %s that we didn't know about" % (list(surprise_shares),), parent=lp, level=log.WEIRD, umid="un9CSQ") self.surprised = True if not wrote: # TODO: there are two possibilities. The first is that the server # is full (or just doesn't want to give us any room), which means # we shouldn't ask them again, but is *not* an indication of an # uncoordinated write. The second is that our testv failed, which # *does* indicate an uncoordinated write. We currently don't have # a way to tell these two apart (in fact, the storage server code # doesn't have the option of refusing our share). # # If the server is full, mark the server as bad (so we don't ask # them again), but don't set self.surprised. The loop() will find # a new server. # # If the testv failed, log it, set self.surprised, but don't # bother adding to self.bad_servers . self.log("our testv failed, so the write did not happen", parent=lp, level=log.WEIRD, umid="8sc26g") self.surprised = True self.bad_servers.add(server) # don't ask them again # use the checkstring to add information to the log message unknown_format = False for (shnum,readv) in read_data.items(): checkstring = readv[0] version = get_version_from_checkstring(checkstring) if version == MDMF_VERSION: (other_seqnum, other_roothash) = unpack_mdmf_checkstring(checkstring) elif version == SDMF_VERSION: (other_seqnum, other_roothash, other_IV) = unpack_sdmf_checkstring(checkstring) else: unknown_format = True expected_version = self._servermap.version_on_server(server, shnum) if expected_version: (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = expected_version msg = ("somebody modified the share on us:" " shnum=%d: I thought they had #%d:R=%s," % (shnum, seqnum, base32.b2a(root_hash)[:4])) if unknown_format: msg += (" but I don't know how to read share" " format %d" % version) else: msg += " but testv reported #%d:R=%s" % \ (other_seqnum, other_roothash) self.log(msg, parent=lp, level=log.NOISY) # if expected_version==None, then we didn't expect to see a # share on that server, and the 'surprise_shares' clause # above will have logged it. return # and update the servermap # self.versioninfo is set during the last phase of publishing. # If we get there, we know that responses correspond to placed # shares, and can safely execute these statements. if self.versioninfo: self.log("wrote successfully: adding new share to servermap") self._servermap.add_new_share(server, writer.shnum, self.versioninfo, started) self.placed.add( (server, writer.shnum) ) self._update_status() # the next method in the deferred chain will check to see if # we're done and successful. return def _done(self): if not self._running: return self._running = False now = time.time() self._status.timings["total"] = now - self._started elapsed = now - self._started_pushing self._status.timings['push'] = elapsed self._status.set_active(False) self.log("Publish done, success") self._status.set_status("Finished") self._status.set_progress(1.0) # Get k and segsize, then give them to the caller. hints = {} hints['segsize'] = self.segment_size hints['k'] = self.required_shares self._node.set_downloader_hints(hints) eventually(self.done_deferred.callback, None) def _failure(self, f=None): if f: self._last_failure = f if not self.surprised: # We ran out of servers msg = "Publish ran out of good servers" if self._last_failure: msg += ", last failure was: %s" % str(self._last_failure) self.log(msg) e = NotEnoughServersError(msg) else: # We ran into shares that we didn't recognize, which means # that we need to return an UncoordinatedWriteError. self.log("Publish failed with UncoordinatedWriteError") e = UncoordinatedWriteError() f = failure.Failure(e) eventually(self.done_deferred.callback, f) class MutableFileHandle: """ I am a mutable uploadable built around a filehandle-like object, usually either a StringIO instance or a handle to an actual file. """ implements(IMutableUploadable) def __init__(self, filehandle): # The filehandle is defined as a generally file-like object that # has these two methods. We don't care beyond that. assert hasattr(filehandle, "read") assert hasattr(filehandle, "close") self._filehandle = filehandle # We must start reading at the beginning of the file, or we risk # encountering errors when the data read does not match the size # reported to the uploader. self._filehandle.seek(0) # We have not yet read anything, so our position is 0. self._marker = 0 def get_size(self): """ I return the amount of data in my filehandle. """ if not hasattr(self, "_size"): old_position = self._filehandle.tell() # Seek to the end of the file by seeking 0 bytes from the # file's end self._filehandle.seek(0, os.SEEK_END) self._size = self._filehandle.tell() # Restore the previous position, in case this was called # after a read. self._filehandle.seek(old_position) assert self._filehandle.tell() == old_position assert hasattr(self, "_size") return self._size def pos(self): """ I return the position of my read marker -- i.e., how much data I have already read and returned to callers. """ return self._marker def read(self, length): """ I return some data (up to length bytes) from my filehandle. In most cases, I return length bytes, but sometimes I won't -- for example, if I am asked to read beyond the end of a file, or an error occurs. """ results = self._filehandle.read(length) self._marker += len(results) return [results] def close(self): """ I close the underlying filehandle. Any further operations on the filehandle fail at this point. """ self._filehandle.close() class MutableData(MutableFileHandle): """ I am a mutable uploadable built around a string, which I then cast into a StringIO and treat as a filehandle. """ def __init__(self, s): # Take a string and return a file-like uploadable. assert isinstance(s, str) MutableFileHandle.__init__(self, StringIO(s)) class TransformingUploadable: """ I am an IMutableUploadable that wraps another IMutableUploadable, and some segments that are already on the grid. When I am called to read, I handle merging of boundary segments. """ implements(IMutableUploadable) def __init__(self, data, offset, segment_size, start, end): assert IMutableUploadable.providedBy(data) self._newdata = data self._offset = offset self._segment_size = segment_size self._start = start self._end = end self._read_marker = 0 self._first_segment_offset = offset % segment_size num = self.log("TransformingUploadable: starting", parent=None) self._log_number = num self.log("got fso: %d" % self._first_segment_offset) self.log("got offset: %d" % self._offset) def log(self, *args, **kwargs): if 'parent' not in kwargs: kwargs['parent'] = self._log_number if "facility" not in kwargs: kwargs["facility"] = "tahoe.mutable.transforminguploadable" return log.msg(*args, **kwargs) def get_size(self): return self._offset + self._newdata.get_size() def read(self, length): # We can get data from 3 sources here. # 1. The first of the segments provided to us. # 2. The data that we're replacing things with. # 3. The last of the segments provided to us. # are we in state 0? self.log("reading %d bytes" % length) old_start_data = "" old_data_length = self._first_segment_offset - self._read_marker if old_data_length > 0: if old_data_length > length: old_data_length = length self.log("returning %d bytes of old start data" % old_data_length) old_data_end = old_data_length + self._read_marker old_start_data = self._start[self._read_marker:old_data_end] length -= old_data_length else: # otherwise calculations later get screwed up. old_data_length = 0 # Is there enough new data to satisfy this read? If not, we need # to pad the end of the data with data from our last segment. old_end_length = length - \ (self._newdata.get_size() - self._newdata.pos()) old_end_data = "" if old_end_length > 0: self.log("reading %d bytes of old end data" % old_end_length) # TODO: We're not explicitly checking for tail segment size # here. Is that a problem? old_data_offset = (length - old_end_length + \ old_data_length) % self._segment_size self.log("reading at offset %d" % old_data_offset) old_end = old_data_offset + old_end_length old_end_data = self._end[old_data_offset:old_end] length -= old_end_length assert length == self._newdata.get_size() - self._newdata.pos() self.log("reading %d bytes of new data" % length) new_data = self._newdata.read(length) new_data = "".join(new_data) self._read_marker += len(old_start_data + new_data + old_end_data) return old_start_data + new_data + old_end_data def close(self): pass tahoe-lafs-1.10.0/src/allmydata/mutable/repairer.py000066400000000000000000000135611221140116300221640ustar00rootroot00000000000000 from zope.interface import implements from twisted.internet import defer from allmydata.interfaces import IRepairResults, ICheckResults from allmydata.mutable.publish import MutableData from allmydata.mutable.common import MODE_REPAIR from allmydata.mutable.servermap import ServerMap, ServermapUpdater class RepairResults: implements(IRepairResults) def __init__(self, smap): self.servermap = smap def set_successful(self, successful): self.successful = successful def get_successful(self): return self.successful def to_string(self): return "" class RepairRequiresWritecapError(Exception): """Repair currently requires a writecap.""" class MustForceRepairError(Exception): pass class Repairer: def __init__(self, node, check_results, storage_broker, history, monitor): self.node = node self.check_results = ICheckResults(check_results) assert check_results.get_storage_index() == node.get_storage_index() self._storage_broker = storage_broker self._history = history self._monitor = monitor def start(self, force=False): # download, then re-publish. If a server had a bad share, try to # replace it with a good one of the same shnum. # The normal repair operation should not be used to replace # application-specific merging of alternate versions: i.e if there # are multiple highest seqnums with different roothashes. In this # case, the application must use node.upload() (referencing the # servermap that indicates the multiple-heads condition), or # node.overwrite(). The repair() operation will refuse to run in # these conditions unless a force=True argument is provided. If # force=True is used, then the highest root hash will be reinforced. # Likewise, the presence of an unrecoverable latest version is an # unusual event, and should ideally be handled by retrying a couple # times (spaced out over hours or days) and hoping that new shares # will become available. If repair(force=True) is called, data will # be lost: a new seqnum will be generated with the same contents as # the most recent recoverable version, skipping over the lost # version. repair(force=False) will refuse to run in a situation like # this. # Repair is designed to fix the following injuries: # missing shares: add new ones to get at least N distinct ones # old shares: replace old shares with the latest version # bogus shares (bad sigs): replace the bad one with a good one # first, update the servermap in MODE_REPAIR, which files all shares # and makes sure we get the privkey. u = ServermapUpdater(self.node, self._storage_broker, self._monitor, ServerMap(), MODE_REPAIR) if self._history: self._history.notify_mapupdate(u.get_status()) d = u.update() d.addCallback(self._got_full_servermap, force) return d def _got_full_servermap(self, smap, force): best_version = smap.best_recoverable_version() if not best_version: # the file is damaged beyond repair rr = RepairResults(smap) rr.set_successful(False) return defer.succeed(rr) if smap.unrecoverable_newer_versions(): if not force: raise MustForceRepairError("There were unrecoverable newer " "versions, so force=True must be " "passed to the repair() operation") # continuing on means that node.upload() will pick a seqnum that # is higher than everything visible in the servermap, effectively # discarding the unrecoverable versions. if smap.needs_merge(): if not force: raise MustForceRepairError("There were multiple recoverable " "versions with identical seqnums, " "so force=True must be passed to " "the repair() operation") # continuing on means that smap.best_recoverable_version() will # pick the one with the highest roothash, and then node.upload() # will replace all shares with its contents # missing shares are handled during upload, which tries to find a # home for every share # old shares are handled during upload, which will replace any share # that was present in the servermap # bogus shares need to be managed here. We might notice a bogus share # during mapupdate (whether done for a filecheck or just before a # download) by virtue of it having an invalid signature. We might # also notice a bad hash in the share during verify or download. In # either case, the problem will be noted in the servermap, and the # bad share (along with its checkstring) will be recorded in # servermap.bad_shares . Publish knows that it should try and replace # these. # I chose to use the retrieve phase to ensure that the privkey is # available, to avoid the extra roundtrip that would occur if we, # say, added an smap.get_privkey() method. if not self.node.get_writekey(): raise RepairRequiresWritecapError("Sorry, repair currently requires a writecap, to set the write-enabler properly.") d = self.node.download_version(smap, best_version, fetch_privkey=True) d.addCallback(lambda data: MutableData(data)) d.addCallback(self.node.upload, smap) d.addCallback(self.get_results, smap) return d def get_results(self, res, smap): rr = RepairResults(smap) rr.set_successful(True) return rr tahoe-lafs-1.10.0/src/allmydata/mutable/retrieve.py000066400000000000000000001245021221140116300221760ustar00rootroot00000000000000 import time from itertools import count from zope.interface import implements from twisted.internet import defer from twisted.python import failure from twisted.internet.interfaces import IPushProducer, IConsumer from foolscap.api import eventually, fireEventually, DeadReferenceError, \ RemoteException from allmydata.interfaces import IRetrieveStatus, NotEnoughSharesError, \ DownloadStopped, MDMF_VERSION, SDMF_VERSION from allmydata.util import hashutil, log, mathutil, deferredutil from allmydata.util.dictutil import DictOfSets from allmydata import hashtree, codec from allmydata.storage.server import si_b2a from pycryptopp.cipher.aes import AES from pycryptopp.publickey import rsa from allmydata.mutable.common import CorruptShareError, BadShareError, \ UncoordinatedWriteError from allmydata.mutable.layout import MDMFSlotReadProxy class RetrieveStatus: implements(IRetrieveStatus) statusid_counter = count(0) def __init__(self): self.timings = {} self.timings["fetch_per_server"] = {} self.timings["decode"] = 0.0 self.timings["decrypt"] = 0.0 self.timings["cumulative_verify"] = 0.0 self._problems = {} self.active = True self.storage_index = None self.helper = False self.encoding = ("?","?") self.size = None self.status = "Not started" self.progress = 0.0 self.counter = self.statusid_counter.next() self.started = time.time() def get_started(self): return self.started def get_storage_index(self): return self.storage_index def get_encoding(self): return self.encoding def using_helper(self): return self.helper def get_size(self): return self.size def get_status(self): return self.status def get_progress(self): return self.progress def get_active(self): return self.active def get_counter(self): return self.counter def get_problems(self): return self._problems def add_fetch_timing(self, server, elapsed): if server not in self.timings["fetch_per_server"]: self.timings["fetch_per_server"][server] = [] self.timings["fetch_per_server"][server].append(elapsed) def accumulate_decode_time(self, elapsed): self.timings["decode"] += elapsed def accumulate_decrypt_time(self, elapsed): self.timings["decrypt"] += elapsed def set_storage_index(self, si): self.storage_index = si def set_helper(self, helper): self.helper = helper def set_encoding(self, k, n): self.encoding = (k, n) def set_size(self, size): self.size = size def set_status(self, status): self.status = status def set_progress(self, value): self.progress = value def set_active(self, value): self.active = value def add_problem(self, server, f): serverid = server.get_serverid() self._problems[serverid] = f class Marker: pass class Retrieve: # this class is currently single-use. Eventually (in MDMF) we will make # it multi-use, in which case you can call download(range) multiple # times, and each will have a separate response chain. However the # Retrieve object will remain tied to a specific version of the file, and # will use a single ServerMap instance. implements(IPushProducer) def __init__(self, filenode, storage_broker, servermap, verinfo, fetch_privkey=False, verify=False): self._node = filenode assert self._node.get_pubkey() self._storage_broker = storage_broker self._storage_index = filenode.get_storage_index() assert self._node.get_readkey() self._last_failure = None prefix = si_b2a(self._storage_index)[:5] self._log_number = log.msg("Retrieve(%s): starting" % prefix) self._running = True self._decoding = False self._bad_shares = set() self.servermap = servermap assert self._node.get_pubkey() self.verinfo = verinfo # during repair, we may be called upon to grab the private key, since # it wasn't picked up during a verify=False checker run, and we'll # need it for repair to generate a new version. self._need_privkey = verify or (fetch_privkey and not self._node.get_privkey()) if self._need_privkey: # TODO: Evaluate the need for this. We'll use it if we want # to limit how many queries are on the wire for the privkey # at once. self._privkey_query_markers = [] # one Marker for each time we've # tried to get the privkey. # verify means that we are using the downloader logic to verify all # of our shares. This tells the downloader a few things. # # 1. We need to download all of the shares. # 2. We don't need to decode or decrypt the shares, since our # caller doesn't care about the plaintext, only the # information about which shares are or are not valid. # 3. When we are validating readers, we need to validate the # signature on the prefix. Do we? We already do this in the # servermap update? self._verify = verify self._status = RetrieveStatus() self._status.set_storage_index(self._storage_index) self._status.set_helper(False) self._status.set_progress(0.0) self._status.set_active(True) (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo self._status.set_size(datalength) self._status.set_encoding(k, N) self.readers = {} self._stopped = False self._pause_deferred = None self._offset = None self._read_length = None self.log("got seqnum %d" % self.verinfo[0]) def get_status(self): return self._status def log(self, *args, **kwargs): if "parent" not in kwargs: kwargs["parent"] = self._log_number if "facility" not in kwargs: kwargs["facility"] = "tahoe.mutable.retrieve" return log.msg(*args, **kwargs) def _set_current_status(self, state): seg = "%d/%d" % (self._current_segment, self._last_segment) self._status.set_status("segment %s (%s)" % (seg, state)) ################### # IPushProducer def pauseProducing(self): """ I am called by my download target if we have produced too much data for it to handle. I make the downloader stop producing new data until my resumeProducing method is called. """ if self._pause_deferred is not None: return # fired when the download is unpaused. self._old_status = self._status.get_status() self._set_current_status("paused") self._pause_deferred = defer.Deferred() def resumeProducing(self): """ I am called by my download target once it is ready to begin receiving data again. """ if self._pause_deferred is None: return p = self._pause_deferred self._pause_deferred = None self._status.set_status(self._old_status) eventually(p.callback, None) def stopProducing(self): self._stopped = True self.resumeProducing() def _check_for_paused(self, res): """ I am called just before a write to the consumer. I return a Deferred that eventually fires with the data that is to be written to the consumer. If the download has not been paused, the Deferred fires immediately. Otherwise, the Deferred fires when the downloader is unpaused. """ if self._pause_deferred is not None: d = defer.Deferred() self._pause_deferred.addCallback(lambda ignored: d.callback(res)) return d return res def _check_for_stopped(self, res): if self._stopped: raise DownloadStopped("our Consumer called stopProducing()") return res def download(self, consumer=None, offset=0, size=None): assert IConsumer.providedBy(consumer) or self._verify if consumer: self._consumer = consumer # we provide IPushProducer, so streaming=True, per # IConsumer. self._consumer.registerProducer(self, streaming=True) self._done_deferred = defer.Deferred() self._offset = offset self._read_length = size self._setup_download() self._setup_encoding_parameters() self.log("starting download") self._started_fetching = time.time() # The download process beyond this is a state machine. # _add_active_servers will select the servers that we want to use # for the download, and then attempt to start downloading. After # each segment, it will check for doneness, reacting to broken # servers and corrupt shares as necessary. If it runs out of good # servers before downloading all of the segments, _done_deferred # will errback. Otherwise, it will eventually callback with the # contents of the mutable file. self.loop() return self._done_deferred def loop(self): d = fireEventually(None) # avoid #237 recursion limit problem d.addCallback(lambda ign: self._activate_enough_servers()) d.addCallback(lambda ign: self._download_current_segment()) # when we're done, _download_current_segment will call _done. If we # aren't, it will call loop() again. d.addErrback(self._error) def _setup_download(self): self._started = time.time() self._status.set_status("Retrieving Shares") # how many shares do we need? (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo # first, which servers can we use? versionmap = self.servermap.make_versionmap() shares = versionmap[self.verinfo] # this sharemap is consumed as we decide to send requests self.remaining_sharemap = DictOfSets() for (shnum, server, timestamp) in shares: self.remaining_sharemap.add(shnum, server) # Reuse the SlotReader from the servermap. key = (self.verinfo, server.get_serverid(), self._storage_index, shnum) if key in self.servermap.proxies: reader = self.servermap.proxies[key] else: reader = MDMFSlotReadProxy(server.get_rref(), self._storage_index, shnum, None) reader.server = server self.readers[shnum] = reader assert len(self.remaining_sharemap) >= k self.shares = {} # maps shnum to validated blocks self._active_readers = [] # list of active readers for this dl. self._block_hash_trees = {} # shnum => hashtree # We need one share hash tree for the entire file; its leaves # are the roots of the block hash trees for the shares that # comprise it, and its root is in the verinfo. self.share_hash_tree = hashtree.IncompleteHashTree(N) self.share_hash_tree.set_hashes({0: root_hash}) def decode(self, blocks_and_salts, segnum): """ I am a helper method that the mutable file update process uses as a shortcut to decode and decrypt the segments that it needs to fetch in order to perform a file update. I take in a collection of blocks and salts, and pick some of those to make a segment with. I return the plaintext associated with that segment. """ # shnum => block hash tree. Unused, but setup_encoding_parameters will # want to set this. self._block_hash_trees = None self._setup_encoding_parameters() # _decode_blocks() expects the output of a gatherResults that # contains the outputs of _validate_block() (each of which is a dict # mapping shnum to (block,salt) bytestrings). d = self._decode_blocks([blocks_and_salts], segnum) d.addCallback(self._decrypt_segment) return d def _setup_encoding_parameters(self): """ I set up the encoding parameters, including k, n, the number of segments associated with this file, and the segment decoders. """ (seqnum, root_hash, IV, segsize, datalength, k, n, known_prefix, offsets_tuple) = self.verinfo self._required_shares = k self._total_shares = n self._segment_size = segsize self._data_length = datalength if not IV: self._version = MDMF_VERSION else: self._version = SDMF_VERSION if datalength and segsize: self._num_segments = mathutil.div_ceil(datalength, segsize) self._tail_data_size = datalength % segsize else: self._num_segments = 0 self._tail_data_size = 0 self._segment_decoder = codec.CRSDecoder() self._segment_decoder.set_params(segsize, k, n) if not self._tail_data_size: self._tail_data_size = segsize self._tail_segment_size = mathutil.next_multiple(self._tail_data_size, self._required_shares) if self._tail_segment_size == self._segment_size: self._tail_decoder = self._segment_decoder else: self._tail_decoder = codec.CRSDecoder() self._tail_decoder.set_params(self._tail_segment_size, self._required_shares, self._total_shares) self.log("got encoding parameters: " "k: %d " "n: %d " "%d segments of %d bytes each (%d byte tail segment)" % \ (k, n, self._num_segments, self._segment_size, self._tail_segment_size)) if self._block_hash_trees is not None: for i in xrange(self._total_shares): # So we don't have to do this later. self._block_hash_trees[i] = hashtree.IncompleteHashTree(self._num_segments) # Our last task is to tell the downloader where to start and # where to stop. We use three parameters for that: # - self._start_segment: the segment that we need to start # downloading from. # - self._current_segment: the next segment that we need to # download. # - self._last_segment: The last segment that we were asked to # download. # # We say that the download is complete when # self._current_segment > self._last_segment. We use # self._start_segment and self._last_segment to know when to # strip things off of segments, and how much to strip. if self._offset: self.log("got offset: %d" % self._offset) # our start segment is the first segment containing the # offset we were given. start = self._offset // self._segment_size assert start < self._num_segments self._start_segment = start self.log("got start segment: %d" % self._start_segment) else: self._start_segment = 0 # If self._read_length is None, then we want to read the whole # file. Otherwise, we want to read only part of the file, and # need to figure out where to stop reading. if self._read_length is not None: # our end segment is the last segment containing part of the # segment that we were asked to read. self.log("got read length %d" % self._read_length) if self._read_length != 0: end_data = self._offset + self._read_length # We don't actually need to read the byte at end_data, # but the one before it. end = (end_data - 1) // self._segment_size assert end < self._num_segments self._last_segment = end else: self._last_segment = self._start_segment self.log("got end segment: %d" % self._last_segment) else: self._last_segment = self._num_segments - 1 self._current_segment = self._start_segment def _activate_enough_servers(self): """ I populate self._active_readers with enough active readers to retrieve the contents of this mutable file. I am called before downloading starts, and (eventually) after each validation error, connection error, or other problem in the download. """ # TODO: It would be cool to investigate other heuristics for # reader selection. For instance, the cost (in time the user # spends waiting for their file) of selecting a really slow server # that happens to have a primary share is probably more than # selecting a really fast server that doesn't have a primary # share. Maybe the servermap could be extended to provide this # information; it could keep track of latency information while # it gathers more important data, and then this routine could # use that to select active readers. # # (these and other questions would be easier to answer with a # robust, configurable tahoe-lafs simulator, which modeled node # failures, differences in node speed, and other characteristics # that we expect storage servers to have. You could have # presets for really stable grids (like allmydata.com), # friendnets, make it easy to configure your own settings, and # then simulate the effect of big changes on these use cases # instead of just reasoning about what the effect might be. Out # of scope for MDMF, though.) # XXX: Why don't format= log messages work here? known_shnums = set(self.remaining_sharemap.keys()) used_shnums = set([r.shnum for r in self._active_readers]) unused_shnums = known_shnums - used_shnums if self._verify: new_shnums = unused_shnums # use them all elif len(self._active_readers) < self._required_shares: # need more shares more = self._required_shares - len(self._active_readers) # We favor lower numbered shares, since FEC is faster with # primary shares than with other shares, and lower-numbered # shares are more likely to be primary than higher numbered # shares. new_shnums = sorted(unused_shnums)[:more] if len(new_shnums) < more: # We don't have enough readers to retrieve the file; fail. self._raise_notenoughshareserror() else: new_shnums = [] self.log("adding %d new servers to the active list" % len(new_shnums)) for shnum in new_shnums: reader = self.readers[shnum] self._active_readers.append(reader) self.log("added reader for share %d" % shnum) # Each time we add a reader, we check to see if we need the # private key. If we do, we politely ask for it and then continue # computing. If we find that we haven't gotten it at the end of # segment decoding, then we'll take more drastic measures. if self._need_privkey and not self._node.is_readonly(): d = reader.get_encprivkey() d.addCallback(self._try_to_validate_privkey, reader, reader.server) # XXX: don't just drop the Deferred. We need error-reporting # but not flow-control here. def _try_to_validate_prefix(self, prefix, reader): """ I check that the prefix returned by a candidate server for retrieval matches the prefix that the servermap knows about (and, hence, the prefix that was validated earlier). If it does, I return True, which means that I approve of the use of the candidate server for segment retrieval. If it doesn't, I return False, which means that another server must be chosen. """ (seqnum, root_hash, IV, segsize, datalength, k, N, known_prefix, offsets_tuple) = self.verinfo if known_prefix != prefix: self.log("prefix from share %d doesn't match" % reader.shnum) raise UncoordinatedWriteError("Mismatched prefix -- this could " "indicate an uncoordinated write") # Otherwise, we're okay -- no issues. def _remove_reader(self, reader): """ At various points, we will wish to remove a server from consideration and/or use. These include, but are not necessarily limited to: - A connection error. - A mismatched prefix (that is, a prefix that does not match our conception of the version information string). - A failing block hash, salt hash, or share hash, which can indicate disk failure/bit flips, or network trouble. This method will do that. I will make sure that the (shnum,reader) combination represented by my reader argument is not used for anything else during this download. I will not advise the reader of any corruption, something that my callers may wish to do on their own. """ # TODO: When you're done writing this, see if this is ever # actually used for something that _mark_bad_share isn't. I have # a feeling that they will be used for very similar things, and # that having them both here is just going to be an epic amount # of code duplication. # # (well, okay, not epic, but meaningful) self.log("removing reader %s" % reader) # Remove the reader from _active_readers self._active_readers.remove(reader) # TODO: self.readers.remove(reader)? for shnum in list(self.remaining_sharemap.keys()): self.remaining_sharemap.discard(shnum, reader.server) def _mark_bad_share(self, server, shnum, reader, f): """ I mark the given (server, shnum) as a bad share, which means that it will not be used anywhere else. There are several reasons to want to mark something as a bad share. These include: - A connection error to the server. - A mismatched prefix (that is, a prefix that does not match our local conception of the version information string). - A failing block hash, salt hash, share hash, or other integrity check. This method will ensure that readers that we wish to mark bad (for these reasons or other reasons) are not used for the rest of the download. Additionally, it will attempt to tell the remote server (with no guarantee of success) that its share is corrupt. """ self.log("marking share %d on server %s as bad" % \ (shnum, server.get_name())) prefix = self.verinfo[-2] self.servermap.mark_bad_share(server, shnum, prefix) self._remove_reader(reader) self._bad_shares.add((server, shnum, f)) self._status.add_problem(server, f) self._last_failure = f if f.check(BadShareError): self.notify_server_corruption(server, shnum, str(f.value)) def _download_current_segment(self): """ I download, validate, decode, decrypt, and assemble the segment that this Retrieve is currently responsible for downloading. """ if self._current_segment > self._last_segment: # No more segments to download, we're done. self.log("got plaintext, done") return self._done() elif self._verify and len(self._active_readers) == 0: self.log("no more good shares, no need to keep verifying") return self._done() self.log("on segment %d of %d" % (self._current_segment + 1, self._num_segments)) d = self._process_segment(self._current_segment) d.addCallback(lambda ign: self.loop()) return d def _process_segment(self, segnum): """ I download, validate, decode, and decrypt one segment of the file that this Retrieve is retrieving. This means coordinating the process of getting k blocks of that file, validating them, assembling them into one segment with the decoder, and then decrypting them. """ self.log("processing segment %d" % segnum) # TODO: The old code uses a marker. Should this code do that # too? What did the Marker do? # We need to ask each of our active readers for its block and # salt. We will then validate those. If validation is # successful, we will assemble the results into plaintext. ds = [] for reader in self._active_readers: started = time.time() d1 = reader.get_block_and_salt(segnum) d2,d3 = self._get_needed_hashes(reader, segnum) d = deferredutil.gatherResults([d1,d2,d3]) d.addCallback(self._validate_block, segnum, reader, reader.server, started) # _handle_bad_share takes care of recoverable errors (by dropping # that share and returning None). Any other errors (i.e. code # bugs) are passed through and cause the retrieve to fail. d.addErrback(self._handle_bad_share, [reader]) ds.append(d) dl = deferredutil.gatherResults(ds) if self._verify: dl.addCallback(lambda ignored: "") dl.addCallback(self._set_segment) else: dl.addCallback(self._maybe_decode_and_decrypt_segment, segnum) return dl def _maybe_decode_and_decrypt_segment(self, results, segnum): """ I take the results of fetching and validating the blocks from _process_segment. If validation and fetching succeeded without incident, I will proceed with decoding and decryption. Otherwise, I will do nothing. """ self.log("trying to decode and decrypt segment %d" % segnum) # 'results' is the output of a gatherResults set up in # _process_segment(). Each component Deferred will either contain the # non-Failure output of _validate_block() for a single block (i.e. # {segnum:(block,salt)}), or None if _validate_block threw an # exception and _validation_or_decoding_failed handled it (by # dropping that server). if None in results: self.log("some validation operations failed; not proceeding") return defer.succeed(None) self.log("everything looks ok, building segment %d" % segnum) d = self._decode_blocks(results, segnum) d.addCallback(self._decrypt_segment) # check to see whether we've been paused before writing # anything. d.addCallback(self._check_for_paused) d.addCallback(self._check_for_stopped) d.addCallback(self._set_segment) return d def _set_segment(self, segment): """ Given a plaintext segment, I register that segment with the target that is handling the file download. """ self.log("got plaintext for segment %d" % self._current_segment) if self._current_segment == self._start_segment: # We're on the first segment. It's possible that we want # only some part of the end of this segment, and that we # just downloaded the whole thing to get that part. If so, # we need to account for that and give the reader just the # data that they want. n = self._offset % self._segment_size self.log("stripping %d bytes off of the first segment" % n) self.log("original segment length: %d" % len(segment)) segment = segment[n:] self.log("new segment length: %d" % len(segment)) if self._current_segment == self._last_segment and self._read_length is not None: # We're on the last segment. It's possible that we only want # part of the beginning of this segment, and that we # downloaded the whole thing anyway. Make sure to give the # caller only the portion of the segment that they want to # receive. extra = self._read_length if self._start_segment != self._last_segment: extra -= self._segment_size - \ (self._offset % self._segment_size) extra %= self._segment_size self.log("original segment length: %d" % len(segment)) segment = segment[:extra] self.log("new segment length: %d" % len(segment)) self.log("only taking %d bytes of the last segment" % extra) if not self._verify: self._consumer.write(segment) else: # we don't care about the plaintext if we are doing a verify. segment = None self._current_segment += 1 def _handle_bad_share(self, f, readers): """ I am called when a block or a salt fails to correctly validate, or when the decryption or decoding operation fails for some reason. I react to this failure by notifying the remote server of corruption, and then removing the remote server from further activity. """ # these are the errors we can tolerate: by giving up on this share # and finding others to replace it. Any other errors (i.e. coding # bugs) are re-raised, causing the download to fail. f.trap(DeadReferenceError, RemoteException, BadShareError) # DeadReferenceError happens when we try to fetch data from a server # that has gone away. RemoteException happens if the server had an # internal error. BadShareError encompasses: (UnknownVersionError, # LayoutInvalid, struct.error) which happen when we get obviously # wrong data, and CorruptShareError which happens later, when we # perform integrity checks on the data. assert isinstance(readers, list) bad_shnums = [reader.shnum for reader in readers] self.log("validation or decoding failed on share(s) %s, server(s) %s " ", segment %d: %s" % \ (bad_shnums, readers, self._current_segment, str(f))) for reader in readers: self._mark_bad_share(reader.server, reader.shnum, reader, f) return None def _validate_block(self, results, segnum, reader, server, started): """ I validate a block from one share on a remote server. """ # Grab the part of the block hash tree that is necessary to # validate this block, then generate the block hash root. self.log("validating share %d for segment %d" % (reader.shnum, segnum)) elapsed = time.time() - started self._status.add_fetch_timing(server, elapsed) self._set_current_status("validating blocks") block_and_salt, blockhashes, sharehashes = results block, salt = block_and_salt assert type(block) is str, (block, salt) blockhashes = dict(enumerate(blockhashes)) self.log("the reader gave me the following blockhashes: %s" % \ blockhashes.keys()) self.log("the reader gave me the following sharehashes: %s" % \ sharehashes.keys()) bht = self._block_hash_trees[reader.shnum] if bht.needed_hashes(segnum, include_leaf=True): try: bht.set_hashes(blockhashes) except (hashtree.BadHashError, hashtree.NotEnoughHashesError, \ IndexError), e: raise CorruptShareError(server, reader.shnum, "block hash tree failure: %s" % e) if self._version == MDMF_VERSION: blockhash = hashutil.block_hash(salt + block) else: blockhash = hashutil.block_hash(block) # If this works without an error, then validation is # successful. try: bht.set_hashes(leaves={segnum: blockhash}) except (hashtree.BadHashError, hashtree.NotEnoughHashesError, \ IndexError), e: raise CorruptShareError(server, reader.shnum, "block hash tree failure: %s" % e) # Reaching this point means that we know that this segment # is correct. Now we need to check to see whether the share # hash chain is also correct. # SDMF wrote share hash chains that didn't contain the # leaves, which would be produced from the block hash tree. # So we need to validate the block hash tree first. If # successful, then bht[0] will contain the root for the # shnum, which will be a leaf in the share hash tree, which # will allow us to validate the rest of the tree. try: self.share_hash_tree.set_hashes(hashes=sharehashes, leaves={reader.shnum: bht[0]}) except (hashtree.BadHashError, hashtree.NotEnoughHashesError, \ IndexError), e: raise CorruptShareError(server, reader.shnum, "corrupt hashes: %s" % e) self.log('share %d is valid for segment %d' % (reader.shnum, segnum)) return {reader.shnum: (block, salt)} def _get_needed_hashes(self, reader, segnum): """ I get the hashes needed to validate segnum from the reader, then return to my caller when this is done. """ bht = self._block_hash_trees[reader.shnum] needed = bht.needed_hashes(segnum, include_leaf=True) # The root of the block hash tree is also a leaf in the share # hash tree. So we don't need to fetch it from the remote # server. In the case of files with one segment, this means that # we won't fetch any block hash tree from the remote server, # since the hash of each share of the file is the entire block # hash tree, and is a leaf in the share hash tree. This is fine, # since any share corruption will be detected in the share hash # tree. #needed.discard(0) self.log("getting blockhashes for segment %d, share %d: %s" % \ (segnum, reader.shnum, str(needed))) # TODO is force_remote necessary here? d1 = reader.get_blockhashes(needed, force_remote=False) if self.share_hash_tree.needed_hashes(reader.shnum): need = self.share_hash_tree.needed_hashes(reader.shnum) self.log("also need sharehashes for share %d: %s" % (reader.shnum, str(need))) d2 = reader.get_sharehashes(need, force_remote=False) else: d2 = defer.succeed({}) # the logic in the next method # expects a dict return d1,d2 def _decode_blocks(self, results, segnum): """ I take a list of k blocks and salts, and decode that into a single encrypted segment. """ # 'results' is one or more dicts (each {shnum:(block,salt)}), and we # want to merge them all blocks_and_salts = {} for d in results: blocks_and_salts.update(d) # All of these blocks should have the same salt; in SDMF, it is # the file-wide IV, while in MDMF it is the per-segment salt. In # either case, we just need to get one of them and use it. # # d.items()[0] is like (shnum, (block, salt)) # d.items()[0][1] is like (block, salt) # d.items()[0][1][1] is the salt. salt = blocks_and_salts.items()[0][1][1] # Next, extract just the blocks from the dict. We'll use the # salt in the next step. share_and_shareids = [(k, v[0]) for k, v in blocks_and_salts.items()] d2 = dict(share_and_shareids) shareids = [] shares = [] for shareid, share in d2.items(): shareids.append(shareid) shares.append(share) self._set_current_status("decoding") started = time.time() assert len(shareids) >= self._required_shares, len(shareids) # zfec really doesn't want extra shares shareids = shareids[:self._required_shares] shares = shares[:self._required_shares] self.log("decoding segment %d" % segnum) if segnum == self._num_segments - 1: d = defer.maybeDeferred(self._tail_decoder.decode, shares, shareids) else: d = defer.maybeDeferred(self._segment_decoder.decode, shares, shareids) def _process(buffers): segment = "".join(buffers) self.log(format="now decoding segment %(segnum)s of %(numsegs)s", segnum=segnum, numsegs=self._num_segments, level=log.NOISY) self.log(" joined length %d, datalength %d" % (len(segment), self._data_length)) if segnum == self._num_segments - 1: size_to_use = self._tail_data_size else: size_to_use = self._segment_size segment = segment[:size_to_use] self.log(" segment len=%d" % len(segment)) self._status.accumulate_decode_time(time.time() - started) return segment, salt d.addCallback(_process) return d def _decrypt_segment(self, segment_and_salt): """ I take a single segment and its salt, and decrypt it. I return the plaintext of the segment that is in my argument. """ segment, salt = segment_and_salt self._set_current_status("decrypting") self.log("decrypting segment %d" % self._current_segment) started = time.time() key = hashutil.ssk_readkey_data_hash(salt, self._node.get_readkey()) decryptor = AES(key) plaintext = decryptor.process(segment) self._status.accumulate_decrypt_time(time.time() - started) return plaintext def notify_server_corruption(self, server, shnum, reason): rref = server.get_rref() rref.callRemoteOnly("advise_corrupt_share", "mutable", self._storage_index, shnum, reason) def _try_to_validate_privkey(self, enc_privkey, reader, server): alleged_privkey_s = self._node._decrypt_privkey(enc_privkey) alleged_writekey = hashutil.ssk_writekey_hash(alleged_privkey_s) if alleged_writekey != self._node.get_writekey(): self.log("invalid privkey from %s shnum %d" % (reader, reader.shnum), level=log.WEIRD, umid="YIw4tA") if self._verify: self.servermap.mark_bad_share(server, reader.shnum, self.verinfo[-2]) e = CorruptShareError(server, reader.shnum, "invalid privkey") f = failure.Failure(e) self._bad_shares.add((server, reader.shnum, f)) return # it's good self.log("got valid privkey from shnum %d on reader %s" % (reader.shnum, reader)) privkey = rsa.create_signing_key_from_string(alleged_privkey_s) self._node._populate_encprivkey(enc_privkey) self._node._populate_privkey(privkey) self._need_privkey = False def _done(self): """ I am called by _download_current_segment when the download process has finished successfully. After making some useful logging statements, I return the decrypted contents to the owner of this Retrieve object through self._done_deferred. """ self._running = False self._status.set_active(False) now = time.time() self._status.timings['total'] = now - self._started self._status.timings['fetch'] = now - self._started_fetching self._status.set_status("Finished") self._status.set_progress(1.0) # remember the encoding parameters, use them again next time (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo self._node._populate_required_shares(k) self._node._populate_total_shares(N) if self._verify: ret = self._bad_shares self.log("done verifying, found %d bad shares" % len(ret)) else: # TODO: upload status here? ret = self._consumer self._consumer.unregisterProducer() eventually(self._done_deferred.callback, ret) def _raise_notenoughshareserror(self): """ I am called by _activate_enough_servers when there are not enough active servers left to complete the download. After making some useful logging statements, I throw an exception to that effect to the caller of this Retrieve object through self._done_deferred. """ format = ("ran out of servers: " "have %(have)d of %(total)d segments " "found %(bad)d bad shares " "encoding %(k)d-of-%(n)d") args = {"have": self._current_segment, "total": self._num_segments, "need": self._last_segment, "k": self._required_shares, "n": self._total_shares, "bad": len(self._bad_shares)} raise NotEnoughSharesError("%s, last failure: %s" % (format % args, str(self._last_failure))) def _error(self, f): # all errors, including NotEnoughSharesError, land here self._running = False self._status.set_active(False) now = time.time() self._status.timings['total'] = now - self._started self._status.timings['fetch'] = now - self._started_fetching self._status.set_status("Failed") eventually(self._done_deferred.errback, f) tahoe-lafs-1.10.0/src/allmydata/mutable/servermap.py000066400000000000000000001464661221140116300223720ustar00rootroot00000000000000 import sys, time, copy from zope.interface import implements from itertools import count from twisted.internet import defer from twisted.python import failure from foolscap.api import DeadReferenceError, RemoteException, eventually, \ fireEventually from allmydata.util import base32, hashutil, log, deferredutil from allmydata.util.dictutil import DictOfSets from allmydata.storage.server import si_b2a from allmydata.interfaces import IServermapUpdaterStatus from pycryptopp.publickey import rsa from allmydata.mutable.common import MODE_CHECK, MODE_ANYTHING, MODE_WRITE, \ MODE_READ, MODE_REPAIR, CorruptShareError from allmydata.mutable.layout import SIGNED_PREFIX_LENGTH, MDMFSlotReadProxy class UpdateStatus: implements(IServermapUpdaterStatus) statusid_counter = count(0) def __init__(self): self.timings = {} self.timings["per_server"] = {} self.timings["cumulative_verify"] = 0.0 self.privkey_from = None self.problems = {} self.active = True self.storage_index = None self.mode = "?" self.status = "Not started" self.progress = 0.0 self.counter = self.statusid_counter.next() self.started = time.time() self.finished = None def add_per_server_time(self, server, op, sent, elapsed): assert op in ("query", "late", "privkey") if server not in self.timings["per_server"]: self.timings["per_server"][server] = [] self.timings["per_server"][server].append((op,sent,elapsed)) def get_started(self): return self.started def get_finished(self): return self.finished def get_storage_index(self): return self.storage_index def get_mode(self): return self.mode def get_servermap(self): return self.servermap def get_privkey_from(self): return self.privkey_from def using_helper(self): return False def get_size(self): return "-NA-" def get_status(self): return self.status def get_progress(self): return self.progress def get_active(self): return self.active def get_counter(self): return self.counter def set_storage_index(self, si): self.storage_index = si def set_mode(self, mode): self.mode = mode def set_privkey_from(self, server): self.privkey_from = server def set_status(self, status): self.status = status def set_progress(self, value): self.progress = value def set_active(self, value): self.active = value def set_finished(self, when): self.finished = when class ServerMap: """I record the placement of mutable shares. This object records which shares (of various versions) are located on which servers. One purpose I serve is to inform callers about which versions of the mutable file are recoverable and 'current'. A second purpose is to serve as a state marker for test-and-set operations. I am passed out of retrieval operations and back into publish operations, which means 'publish this new version, but only if nothing has changed since I last retrieved this data'. This reduces the chances of clobbering a simultaneous (uncoordinated) write. @var _known_shares: a dictionary, mapping a (server, shnum) tuple to a (versionid, timestamp) tuple. Each 'versionid' is a tuple of (seqnum, root_hash, IV, segsize, datalength, k, N, signed_prefix, offsets) @ivar _bad_shares: dict with keys of (server, shnum) tuples, describing shares that I should ignore (because a previous user of the servermap determined that they were invalid). The updater only locates a certain number of shares: if some of these turn out to have integrity problems and are unusable, the caller will need to mark those shares as bad, then re-update the servermap, then try again. The dict maps (server, shnum) tuple to old checkstring. """ def __init__(self): self._known_shares = {} self.unreachable_servers = set() # servers that didn't respond to queries self.reachable_servers = set() # servers that did respond to queries self._problems = [] # mostly for debugging self._bad_shares = {} # maps (server,shnum) to old checkstring self._last_update_mode = None self._last_update_time = 0 self.proxies = {} self.update_data = {} # shnum -> [(verinfo,(blockhashes,start,end)),..] # where blockhashes is a list of bytestrings (the result of # layout.MDMFSlotReadProxy.get_blockhashes), and start/end are both # (block,salt) tuple-of-bytestrings from get_block_and_salt() def copy(self): s = ServerMap() s._known_shares = self._known_shares.copy() # tuple->tuple s.unreachable_servers = set(self.unreachable_servers) s.reachable_servers = set(self.reachable_servers) s._problems = self._problems[:] s._bad_shares = self._bad_shares.copy() # tuple->str s._last_update_mode = self._last_update_mode s._last_update_time = self._last_update_time s.update_data = copy.deepcopy(self.update_data) return s def get_reachable_servers(self): return self.reachable_servers def mark_server_reachable(self, server): self.reachable_servers.add(server) def mark_server_unreachable(self, server): self.unreachable_servers.add(server) def mark_bad_share(self, server, shnum, checkstring): """This share was found to be bad, either in the checkstring or signature (detected during mapupdate), or deeper in the share (detected at retrieve time). Remove it from our list of useful shares, and remember that it is bad so we don't add it back again later. We record the share's old checkstring (which might be corrupted or badly signed) so that a repair operation can do the test-and-set using it as a reference. """ key = (server, shnum) # record checkstring self._bad_shares[key] = checkstring self._known_shares.pop(key, None) def get_bad_shares(self): # key=(server,shnum) -> checkstring return self._bad_shares def add_new_share(self, server, shnum, verinfo, timestamp): """We've written a new share out, replacing any that was there before.""" key = (server, shnum) self._bad_shares.pop(key, None) self._known_shares[key] = (verinfo, timestamp) def add_problem(self, f): self._problems.append(f) def get_problems(self): return self._problems def set_last_update(self, mode, when): self._last_update_mode = mode self._last_update_time = when def get_last_update(self): return (self._last_update_mode, self._last_update_time) def dump(self, out=sys.stdout): print >>out, "servermap:" for ( (server, shnum), (verinfo, timestamp) ) in self._known_shares.items(): (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo print >>out, ("[%s]: sh#%d seq%d-%s %d-of-%d len%d" % (server.get_name(), shnum, seqnum, base32.b2a(root_hash)[:4], k, N, datalength)) if self._problems: print >>out, "%d PROBLEMS" % len(self._problems) for f in self._problems: print >>out, str(f) return out def all_servers(self): return set([server for (server, shnum) in self._known_shares]) def all_servers_for_version(self, verinfo): """Return a set of servers that hold shares for the given version.""" return set([server for ( (server, shnum), (verinfo2, timestamp) ) in self._known_shares.items() if verinfo == verinfo2]) def get_known_shares(self): # maps (server,shnum) to (versionid,timestamp) return self._known_shares def make_sharemap(self): """Return a dict that maps shnum to a set of servers that hold it.""" sharemap = DictOfSets() for (server, shnum) in self._known_shares: sharemap.add(shnum, server) return sharemap def make_versionmap(self): """Return a dict that maps versionid to sets of (shnum, server, timestamp) tuples.""" versionmap = DictOfSets() for ( (server, shnum), (verinfo, timestamp) ) in self._known_shares.items(): versionmap.add(verinfo, (shnum, server, timestamp)) return versionmap def debug_shares_on_server(self, server): # used by tests return set([shnum for (s, shnum) in self._known_shares if s == server]) def version_on_server(self, server, shnum): key = (server, shnum) if key in self._known_shares: (verinfo, timestamp) = self._known_shares[key] return verinfo return None def shares_available(self): """Return a dict that maps verinfo to tuples of (num_distinct_shares, k, N) tuples.""" versionmap = self.make_versionmap() all_shares = {} for verinfo, shares in versionmap.items(): s = set() for (shnum, server, timestamp) in shares: s.add(shnum) (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo all_shares[verinfo] = (len(s), k, N) return all_shares def highest_seqnum(self): available = self.shares_available() seqnums = [verinfo[0] for verinfo in available.keys()] seqnums.append(0) return max(seqnums) def summarize_version(self, verinfo): """Take a versionid, return a string that describes it.""" (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo return "seq%d-%s" % (seqnum, base32.b2a(root_hash)[:4]) def summarize_versions(self): """Return a string describing which versions we know about.""" versionmap = self.make_versionmap() bits = [] for (verinfo, shares) in versionmap.items(): vstr = self.summarize_version(verinfo) shnums = set([shnum for (shnum, server, timestamp) in shares]) bits.append("%d*%s" % (len(shnums), vstr)) return "/".join(bits) def recoverable_versions(self): """Return a set of versionids, one for each version that is currently recoverable.""" versionmap = self.make_versionmap() recoverable_versions = set() for (verinfo, shares) in versionmap.items(): (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo shnums = set([shnum for (shnum, server, timestamp) in shares]) if len(shnums) >= k: # this one is recoverable recoverable_versions.add(verinfo) return recoverable_versions def unrecoverable_versions(self): """Return a set of versionids, one for each version that is currently unrecoverable.""" versionmap = self.make_versionmap() unrecoverable_versions = set() for (verinfo, shares) in versionmap.items(): (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo shnums = set([shnum for (shnum, server, timestamp) in shares]) if len(shnums) < k: unrecoverable_versions.add(verinfo) return unrecoverable_versions def best_recoverable_version(self): """Return a single versionid, for the so-called 'best' recoverable version. Sequence number is the primary sort criteria, followed by root hash. Returns None if there are no recoverable versions.""" recoverable = list(self.recoverable_versions()) recoverable.sort() if recoverable: return recoverable[-1] return None def size_of_version(self, verinfo): """Given a versionid (perhaps returned by best_recoverable_version), return the size of the file in bytes.""" (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo return datalength def unrecoverable_newer_versions(self): # Return a dict of versionid -> health, for versions that are # unrecoverable and have later seqnums than any recoverable versions. # These indicate that a write will lose data. versionmap = self.make_versionmap() healths = {} # maps verinfo to (found,k) unrecoverable = set() highest_recoverable_seqnum = -1 for (verinfo, shares) in versionmap.items(): (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo shnums = set([shnum for (shnum, server, timestamp) in shares]) healths[verinfo] = (len(shnums),k) if len(shnums) < k: unrecoverable.add(verinfo) else: highest_recoverable_seqnum = max(seqnum, highest_recoverable_seqnum) newversions = {} for verinfo in unrecoverable: (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo if seqnum > highest_recoverable_seqnum: newversions[verinfo] = healths[verinfo] return newversions def needs_merge(self): # return True if there are multiple recoverable versions with the # same seqnum, meaning that MutableFileNode.read_best_version is not # giving you the whole story, and that using its data to do a # subsequent publish will lose information. recoverable_seqnums = [verinfo[0] for verinfo in self.recoverable_versions()] for seqnum in recoverable_seqnums: if recoverable_seqnums.count(seqnum) > 1: return True return False def get_update_data_for_share_and_verinfo(self, shnum, verinfo): """ I return the update data for the given shnum """ update_data = self.update_data[shnum] update_datum = [i[1] for i in update_data if i[0] == verinfo][0] return update_datum def set_update_data_for_share_and_verinfo(self, shnum, verinfo, data): """ I record the block hash tree for the given shnum. """ self.update_data.setdefault(shnum , []).append((verinfo, data)) class ServermapUpdater: def __init__(self, filenode, storage_broker, monitor, servermap, mode=MODE_READ, add_lease=False, update_range=None): """I update a servermap, locating a sufficient number of useful shares and remembering where they are located. """ self._node = filenode self._storage_broker = storage_broker self._monitor = monitor self._servermap = servermap self.mode = mode self._add_lease = add_lease self._running = True self._storage_index = filenode.get_storage_index() self._last_failure = None self._status = UpdateStatus() self._status.set_storage_index(self._storage_index) self._status.set_progress(0.0) self._status.set_mode(mode) self._servers_responded = set() # how much data should we read? # SDMF: # * if we only need the checkstring, then [0:75] # * if we need to validate the checkstring sig, then [543ish:799ish] # * if we need the verification key, then [107:436ish] # * the offset table at [75:107] tells us about the 'ish' # * if we need the encrypted private key, we want [-1216ish:] # * but we can't read from negative offsets # * the offset table tells us the 'ish', also the positive offset # MDMF: # * Checkstring? [0:72] # * If we want to validate the checkstring, then [0:72], [143:?] -- # the offset table will tell us for sure. # * If we need the verification key, we have to consult the offset # table as well. # At this point, we don't know which we are. Our filenode can # tell us, but it might be lying -- in some cases, we're # responsible for telling it which kind of file it is. self._read_size = 4000 if mode == MODE_CHECK: # we use unpack_prefix_and_signature, so we need 1k self._read_size = 1000 self._need_privkey = False if mode in (MODE_WRITE, MODE_REPAIR) and not self._node.get_privkey(): self._need_privkey = True # check+repair: repair requires the privkey, so if we didn't happen # to ask for it during the check, we'll have problems doing the # publish. self.fetch_update_data = False if mode == MODE_WRITE and update_range: # We're updating the servermap in preparation for an # in-place file update, so we need to fetch some additional # data from each share that we find. assert len(update_range) == 2 self.start_segment = update_range[0] self.end_segment = update_range[1] self.fetch_update_data = True prefix = si_b2a(self._storage_index)[:5] self._log_number = log.msg(format="SharemapUpdater(%(si)s): starting (%(mode)s)", si=prefix, mode=mode) def get_status(self): return self._status def log(self, *args, **kwargs): if "parent" not in kwargs: kwargs["parent"] = self._log_number if "facility" not in kwargs: kwargs["facility"] = "tahoe.mutable.mapupdate" return log.msg(*args, **kwargs) def update(self): """Update the servermap to reflect current conditions. Returns a Deferred that fires with the servermap once the update has finished.""" self._started = time.time() self._status.set_active(True) # self._valid_versions is a set of validated verinfo tuples. We just # use it to remember which versions had valid signatures, so we can # avoid re-checking the signatures for each share. self._valid_versions = set() self._done_deferred = defer.Deferred() # first, which servers should be talk to? Any that were in our old # servermap, plus "enough" others. self._queries_completed = 0 sb = self._storage_broker # All of the servers, permuted by the storage index, as usual. full_serverlist = list(sb.get_servers_for_psi(self._storage_index)) self.full_serverlist = full_serverlist # for use later, immutable self.extra_servers = full_serverlist[:] # servers are removed as we use them self._good_servers = set() # servers who had some shares self._empty_servers = set() # servers who don't have any shares self._bad_servers = set() # servers to whom our queries failed k = self._node.get_required_shares() # For what cases can these conditions work? if k is None: # make a guess k = 3 N = self._node.get_total_shares() if N is None: N = 10 self.EPSILON = k # we want to send queries to at least this many servers (although we # might not wait for all of their answers to come back) self.num_servers_to_query = k + self.EPSILON if self.mode in (MODE_CHECK, MODE_REPAIR): # We want to query all of the servers. initial_servers_to_query = list(full_serverlist) must_query = set(initial_servers_to_query) self.extra_servers = [] elif self.mode == MODE_WRITE: # we're planning to replace all the shares, so we want a good # chance of finding them all. We will keep searching until we've # seen epsilon that don't have a share. # We don't query all of the servers because that could take a while. self.num_servers_to_query = N + self.EPSILON initial_servers_to_query, must_query = self._build_initial_querylist() self.required_num_empty_servers = self.EPSILON # TODO: arrange to read lots of data from k-ish servers, to avoid # the extra round trip required to read large directories. This # might also avoid the round trip required to read the encrypted # private key. else: # MODE_READ, MODE_ANYTHING # 2*k servers is good enough. initial_servers_to_query, must_query = self._build_initial_querylist() # this is a set of servers that we are required to get responses # from: they are servers who used to have a share, so we need to know # where they currently stand, even if that means we have to wait for # a silently-lost TCP connection to time out. We remove servers from # this set as we get responses. self._must_query = set(must_query) # now initial_servers_to_query contains the servers that we should # ask, self.must_query contains the servers that we must have heard # from before we can consider ourselves finished, and # self.extra_servers contains the overflow (servers that we should # tap if we don't get enough responses) # I guess that self._must_query is a subset of # initial_servers_to_query? assert must_query.issubset(initial_servers_to_query) self._send_initial_requests(initial_servers_to_query) self._status.timings["initial_queries"] = time.time() - self._started return self._done_deferred def _build_initial_querylist(self): # we send queries to everyone who was already in the sharemap initial_servers_to_query = set(self._servermap.all_servers()) # and we must wait for responses from them must_query = set(initial_servers_to_query) while ((self.num_servers_to_query > len(initial_servers_to_query)) and self.extra_servers): initial_servers_to_query.add(self.extra_servers.pop(0)) return initial_servers_to_query, must_query def _send_initial_requests(self, serverlist): self._status.set_status("Sending %d initial queries" % len(serverlist)) self._queries_outstanding = set() for server in serverlist: self._queries_outstanding.add(server) self._do_query(server, self._storage_index, self._read_size) if not serverlist: # there is nobody to ask, so we need to short-circuit the state # machine. d = defer.maybeDeferred(self._check_for_done, None) d.addErrback(self._fatal_error) # control flow beyond this point: state machine. Receiving responses # from queries is the input. We might send out more queries, or we # might produce a result. return None def _do_query(self, server, storage_index, readsize): self.log(format="sending query to [%(name)s], readsize=%(readsize)d", name=server.get_name(), readsize=readsize, level=log.NOISY) started = time.time() self._queries_outstanding.add(server) d = self._do_read(server, storage_index, [], [(0, readsize)]) d.addCallback(self._got_results, server, readsize, storage_index, started) d.addErrback(self._query_failed, server) # errors that aren't handled by _query_failed (and errors caused by # _query_failed) get logged, but we still want to check for doneness. d.addErrback(log.err) d.addErrback(self._fatal_error) d.addCallback(self._check_for_done) return d def _do_read(self, server, storage_index, shnums, readv): ss = server.get_rref() if self._add_lease: # send an add-lease message in parallel. The results are handled # separately. This is sent before the slot_readv() so that we can # be sure the add_lease is retired by the time slot_readv comes # back (this relies upon our knowledge that the server code for # add_lease is synchronous). renew_secret = self._node.get_renewal_secret(server) cancel_secret = self._node.get_cancel_secret(server) d2 = ss.callRemote("add_lease", storage_index, renew_secret, cancel_secret) # we ignore success d2.addErrback(self._add_lease_failed, server, storage_index) d = ss.callRemote("slot_readv", storage_index, shnums, readv) return d def _got_corrupt_share(self, e, shnum, server, data, lp): """ I am called when a remote server returns a corrupt share in response to one of our queries. By corrupt, I mean a share without a valid signature. I then record the failure, notify the server of the corruption, and record the share as bad. """ f = failure.Failure(e) self.log(format="bad share: %(f_value)s", f_value=str(f), failure=f, parent=lp, level=log.WEIRD, umid="h5llHg") # Notify the server that its share is corrupt. self.notify_server_corruption(server, shnum, str(e)) # By flagging this as a bad server, we won't count any of # the other shares on that server as valid, though if we # happen to find a valid version string amongst those # shares, we'll keep track of it so that we don't need # to validate the signature on those again. self._bad_servers.add(server) self._last_failure = f # XXX: Use the reader for this? checkstring = data[:SIGNED_PREFIX_LENGTH] self._servermap.mark_bad_share(server, shnum, checkstring) self._servermap.add_problem(f) def _got_results(self, datavs, server, readsize, storage_index, started): lp = self.log(format="got result from [%(name)s], %(numshares)d shares", name=server.get_name(), numshares=len(datavs)) ss = server.get_rref() now = time.time() elapsed = now - started def _done_processing(ignored=None): self._queries_outstanding.discard(server) self._servermap.mark_server_reachable(server) self._must_query.discard(server) self._queries_completed += 1 if not self._running: self.log("but we're not running, so we'll ignore it", parent=lp) _done_processing() self._status.add_per_server_time(server, "late", started, elapsed) return self._status.add_per_server_time(server, "query", started, elapsed) if datavs: self._good_servers.add(server) else: self._empty_servers.add(server) ds = [] for shnum,datav in datavs.items(): data = datav[0] reader = MDMFSlotReadProxy(ss, storage_index, shnum, data, data_is_everything=(len(data) < readsize)) # our goal, with each response, is to validate the version # information and share data as best we can at this point -- # we do this by validating the signature. To do this, we # need to do the following: # - If we don't already have the public key, fetch the # public key. We use this to validate the signature. if not self._node.get_pubkey(): # fetch and set the public key. d = reader.get_verification_key() d.addCallback(lambda results, shnum=shnum: self._try_to_set_pubkey(results, server, shnum, lp)) # XXX: Make self._pubkey_query_failed? d.addErrback(lambda error, shnum=shnum, data=data: self._got_corrupt_share(error, shnum, server, data, lp)) else: # we already have the public key. d = defer.succeed(None) # Neither of these two branches return anything of # consequence, so the first entry in our deferredlist will # be None. # - Next, we need the version information. We almost # certainly got this by reading the first thousand or so # bytes of the share on the storage server, so we # shouldn't need to fetch anything at this step. d2 = reader.get_verinfo() d2.addErrback(lambda error, shnum=shnum, data=data: self._got_corrupt_share(error, shnum, server, data, lp)) # - Next, we need the signature. For an SDMF share, it is # likely that we fetched this when doing our initial fetch # to get the version information. In MDMF, this lives at # the end of the share, so unless the file is quite small, # we'll need to do a remote fetch to get it. d3 = reader.get_signature() d3.addErrback(lambda error, shnum=shnum, data=data: self._got_corrupt_share(error, shnum, server, data, lp)) # Once we have all three of these responses, we can move on # to validating the signature # Does the node already have a privkey? If not, we'll try to # fetch it here. if self._need_privkey: d4 = reader.get_encprivkey() d4.addCallback(lambda results, shnum=shnum: self._try_to_validate_privkey(results, server, shnum, lp)) d4.addErrback(lambda error, shnum=shnum: self._privkey_query_failed(error, server, shnum, lp)) else: d4 = defer.succeed(None) if self.fetch_update_data: # fetch the block hash tree and first + last segment, as # configured earlier. # Then set them in wherever we happen to want to set # them. ds = [] # XXX: We do this above, too. Is there a good way to # make the two routines share the value without # introducing more roundtrips? ds.append(reader.get_verinfo()) ds.append(reader.get_blockhashes()) ds.append(reader.get_block_and_salt(self.start_segment)) ds.append(reader.get_block_and_salt(self.end_segment)) d5 = deferredutil.gatherResults(ds) d5.addCallback(self._got_update_results_one_share, shnum) else: d5 = defer.succeed(None) dl = defer.DeferredList([d, d2, d3, d4, d5]) def _append_proxy(passthrough, shnum=shnum, reader=reader): # Store the proxy (with its cache) keyed by serverid and # version. _, (_,verinfo), _, _, _ = passthrough verinfo = self._make_verinfo_hashable(verinfo) self._servermap.proxies[(verinfo, server.get_serverid(), storage_index, shnum)] = reader return passthrough dl.addCallback(_append_proxy) dl.addBoth(self._turn_barrier) dl.addCallback(lambda results, shnum=shnum: self._got_signature_one_share(results, shnum, server, lp)) dl.addErrback(lambda error, shnum=shnum, data=data: self._got_corrupt_share(error, shnum, server, data, lp)) ds.append(dl) # dl is a deferred list that will fire when all of the shares # that we found on this server are done processing. When dl fires, # we know that processing is done, so we can decrement the # semaphore-like thing that we incremented earlier. dl = defer.DeferredList(ds, fireOnOneErrback=True) # Are we done? Done means that there are no more queries to # send, that there are no outstanding queries, and that we # haven't received any queries that are still processing. If we # are done, self._check_for_done will cause the done deferred # that we returned to our caller to fire, which tells them that # they have a complete servermap, and that we won't be touching # the servermap anymore. dl.addCallback(_done_processing) dl.addCallback(self._check_for_done) dl.addErrback(self._fatal_error) # all done! self.log("_got_results done", parent=lp, level=log.NOISY) return dl def _turn_barrier(self, result): """ I help the servermap updater avoid the recursion limit issues discussed in #237. """ return fireEventually(result) def _try_to_set_pubkey(self, pubkey_s, server, shnum, lp): if self._node.get_pubkey(): return # don't go through this again if we don't have to fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s) assert len(fingerprint) == 32 if fingerprint != self._node.get_fingerprint(): raise CorruptShareError(server, shnum, "pubkey doesn't match fingerprint") self._node._populate_pubkey(self._deserialize_pubkey(pubkey_s)) assert self._node.get_pubkey() def notify_server_corruption(self, server, shnum, reason): rref = server.get_rref() rref.callRemoteOnly("advise_corrupt_share", "mutable", self._storage_index, shnum, reason) def _got_signature_one_share(self, results, shnum, server, lp): # It is our job to give versioninfo to our caller. We need to # raise CorruptShareError if the share is corrupt for any # reason, something that our caller will handle. self.log(format="_got_results: got shnum #%(shnum)d from serverid %(name)s", shnum=shnum, name=server.get_name(), level=log.NOISY, parent=lp) if not self._running: # We can't process the results, since we can't touch the # servermap anymore. self.log("but we're not running anymore.") return None _, verinfo, signature, __, ___ = results verinfo = self._make_verinfo_hashable(verinfo[1]) # This tuple uniquely identifies a share on the grid; we use it # to keep track of the ones that we've already seen. (seqnum, root_hash, saltish, segsize, datalen, k, n, prefix, offsets_tuple) = verinfo if verinfo not in self._valid_versions: # This is a new version tuple, and we need to validate it # against the public key before keeping track of it. assert self._node.get_pubkey() valid = self._node.get_pubkey().verify(prefix, signature[1]) if not valid: raise CorruptShareError(server, shnum, "signature is invalid") # ok, it's a valid verinfo. Add it to the list of validated # versions. self.log(" found valid version %d-%s from %s-sh%d: %d-%d/%d/%d" % (seqnum, base32.b2a(root_hash)[:4], server.get_name(), shnum, k, n, segsize, datalen), parent=lp) self._valid_versions.add(verinfo) # We now know that this is a valid candidate verinfo. Whether or # not this instance of it is valid is a matter for the next # statement; at this point, we just know that if we see this # version info again, that its signature checks out and that # we're okay to skip the signature-checking step. # (server, shnum) are bound in the method invocation. if (server, shnum) in self._servermap.get_bad_shares(): # we've been told that the rest of the data in this share is # unusable, so don't add it to the servermap. self.log("but we've been told this is a bad share", parent=lp, level=log.UNUSUAL) return verinfo # Add the info to our servermap. timestamp = time.time() self._servermap.add_new_share(server, shnum, verinfo, timestamp) return verinfo def _make_verinfo_hashable(self, verinfo): (seqnum, root_hash, saltish, segsize, datalen, k, n, prefix, offsets) = verinfo offsets_tuple = tuple( [(key,value) for key,value in offsets.items()] ) verinfo = (seqnum, root_hash, saltish, segsize, datalen, k, n, prefix, offsets_tuple) return verinfo def _got_update_results_one_share(self, results, share): """ I record the update results in results. """ assert len(results) == 4 verinfo, blockhashes, start, end = results verinfo = self._make_verinfo_hashable(verinfo) update_data = (blockhashes, start, end) self._servermap.set_update_data_for_share_and_verinfo(share, verinfo, update_data) def _deserialize_pubkey(self, pubkey_s): verifier = rsa.create_verifying_key_from_string(pubkey_s) return verifier def _try_to_validate_privkey(self, enc_privkey, server, shnum, lp): """ Given a writekey from a remote server, I validate it against the writekey stored in my node. If it is valid, then I set the privkey and encprivkey properties of the node. """ alleged_privkey_s = self._node._decrypt_privkey(enc_privkey) alleged_writekey = hashutil.ssk_writekey_hash(alleged_privkey_s) if alleged_writekey != self._node.get_writekey(): self.log("invalid privkey from %s shnum %d" % (server.get_name(), shnum), parent=lp, level=log.WEIRD, umid="aJVccw") return # it's good self.log("got valid privkey from shnum %d on serverid %s" % (shnum, server.get_name()), parent=lp) privkey = rsa.create_signing_key_from_string(alleged_privkey_s) self._node._populate_encprivkey(enc_privkey) self._node._populate_privkey(privkey) self._need_privkey = False self._status.set_privkey_from(server) def _add_lease_failed(self, f, server, storage_index): # Older versions of Tahoe didn't handle the add-lease message very # well: <=1.1.0 throws a NameError because it doesn't implement # remote_add_lease(), 1.2.0/1.3.0 throw IndexError on unknown buckets # (which is most of them, since we send add-lease to everybody, # before we know whether or not they have any shares for us), and # 1.2.0 throws KeyError even on known buckets due to an internal bug # in the latency-measuring code. # we want to ignore the known-harmless errors and log the others. In # particular we want to log any local errors caused by coding # problems. if f.check(DeadReferenceError): return if f.check(RemoteException): if f.value.failure.check(KeyError, IndexError, NameError): # this may ignore a bit too much, but that only hurts us # during debugging return self.log(format="error in add_lease from [%(name)s]: %(f_value)s", name=server.get_name(), f_value=str(f.value), failure=f, level=log.WEIRD, umid="iqg3mw") return # local errors are cause for alarm log.err(f, format="local error in add_lease to [%(name)s]: %(f_value)s", name=server.get_name(), f_value=str(f.value), level=log.WEIRD, umid="ZWh6HA") def _query_failed(self, f, server): if not self._running: return level = log.WEIRD if f.check(DeadReferenceError): level = log.UNUSUAL self.log(format="error during query: %(f_value)s", f_value=str(f.value), failure=f, level=level, umid="IHXuQg") self._must_query.discard(server) self._queries_outstanding.discard(server) self._bad_servers.add(server) self._servermap.add_problem(f) # a server could be in both ServerMap.reachable_servers and # .unreachable_servers if they responded to our query, but then an # exception was raised in _got_results. self._servermap.mark_server_unreachable(server) self._queries_completed += 1 self._last_failure = f def _privkey_query_failed(self, f, server, shnum, lp): self._queries_outstanding.discard(server) if not self._running: return level = log.WEIRD if f.check(DeadReferenceError): level = log.UNUSUAL self.log(format="error during privkey query: %(f_value)s", f_value=str(f.value), failure=f, parent=lp, level=level, umid="McoJ5w") self._servermap.add_problem(f) self._last_failure = f def _check_for_done(self, res): # exit paths: # return self._send_more_queries(outstanding) : send some more queries # return self._done() : all done # return : keep waiting, no new queries lp = self.log(format=("_check_for_done, mode is '%(mode)s', " "%(outstanding)d queries outstanding, " "%(extra)d extra servers available, " "%(must)d 'must query' servers left, " "need_privkey=%(need_privkey)s" ), mode=self.mode, outstanding=len(self._queries_outstanding), extra=len(self.extra_servers), must=len(self._must_query), need_privkey=self._need_privkey, level=log.NOISY, ) if not self._running: self.log("but we're not running", parent=lp, level=log.NOISY) return if self._must_query: # we are still waiting for responses from servers that used to have # a share, so we must continue to wait. No additional queries are # required at this time. self.log("%d 'must query' servers left" % len(self._must_query), level=log.NOISY, parent=lp) return if (not self._queries_outstanding and not self.extra_servers): # all queries have retired, and we have no servers left to ask. No # more progress can be made, therefore we are done. self.log("all queries are retired, no extra servers: done", parent=lp) return self._done() recoverable_versions = self._servermap.recoverable_versions() unrecoverable_versions = self._servermap.unrecoverable_versions() # what is our completion policy? how hard should we work? if self.mode == MODE_ANYTHING: if recoverable_versions: self.log("%d recoverable versions: done" % len(recoverable_versions), parent=lp) return self._done() if self.mode in (MODE_CHECK, MODE_REPAIR): # we used self._must_query, and we know there aren't any # responses still waiting, so that means we must be done self.log("done", parent=lp) return self._done() MAX_IN_FLIGHT = 5 if self.mode == MODE_READ: # if we've queried k+epsilon servers, and we see a recoverable # version, and we haven't seen any unrecoverable higher-seqnum'ed # versions, then we're done. if self._queries_completed < self.num_servers_to_query: self.log(format="%(completed)d completed, %(query)d to query: need more", completed=self._queries_completed, query=self.num_servers_to_query, level=log.NOISY, parent=lp) return self._send_more_queries(MAX_IN_FLIGHT) if not recoverable_versions: self.log("no recoverable versions: need more", level=log.NOISY, parent=lp) return self._send_more_queries(MAX_IN_FLIGHT) highest_recoverable = max(recoverable_versions) highest_recoverable_seqnum = highest_recoverable[0] for unrec_verinfo in unrecoverable_versions: if unrec_verinfo[0] > highest_recoverable_seqnum: # there is evidence of a higher-seqnum version, but we # don't yet see enough shares to recover it. Try harder. # TODO: consider sending more queries. # TODO: consider limiting the search distance self.log("evidence of higher seqnum: need more", level=log.UNUSUAL, parent=lp) return self._send_more_queries(MAX_IN_FLIGHT) # all the unrecoverable versions were old or concurrent with a # recoverable version. Good enough. self.log("no higher-seqnum: done", parent=lp) return self._done() if self.mode == MODE_WRITE: # we want to keep querying until we've seen a few that don't have # any shares, to be sufficiently confident that we've seen all # the shares. This is still less work than MODE_CHECK, which asks # every server in the world. if not recoverable_versions: self.log("no recoverable versions: need more", parent=lp, level=log.NOISY) return self._send_more_queries(MAX_IN_FLIGHT) last_found = -1 last_not_responded = -1 num_not_responded = 0 num_not_found = 0 states = [] found_boundary = False for i,server in enumerate(self.full_serverlist): if server in self._bad_servers: # query failed states.append("x") #self.log("loop [%s]: x" % server.get_name() elif server in self._empty_servers: # no shares states.append("0") #self.log("loop [%s]: 0" % server.get_name() if last_found != -1: num_not_found += 1 if num_not_found >= self.EPSILON: self.log("found our boundary, %s" % "".join(states), parent=lp, level=log.NOISY) found_boundary = True break elif server in self._good_servers: # yes shares states.append("1") #self.log("loop [%s]: 1" % server.get_name() last_found = i num_not_found = 0 else: # not responded yet states.append("?") #self.log("loop [%s]: ?" % server.get_name() last_not_responded = i num_not_responded += 1 if found_boundary: # we need to know that we've gotten answers from # everybody to the left of here if last_not_responded == -1: # we're done self.log("have all our answers", parent=lp, level=log.NOISY) # .. unless we're still waiting on the privkey if self._need_privkey: self.log("but we're still waiting for the privkey", parent=lp, level=log.NOISY) # if we found the boundary but we haven't yet found # the privkey, we may need to look further. If # somehow all the privkeys were corrupted (but the # shares were readable), then this is likely to do an # exhaustive search. return self._send_more_queries(MAX_IN_FLIGHT) return self._done() # still waiting for somebody return self._send_more_queries(num_not_responded) # if we hit here, we didn't find our boundary, so we're still # waiting for servers self.log("no boundary yet, %s" % "".join(states), parent=lp, level=log.NOISY) return self._send_more_queries(MAX_IN_FLIGHT) # otherwise, keep up to 5 queries in flight. TODO: this is pretty # arbitrary, really I want this to be something like k - # max(known_version_sharecounts) + some extra self.log("catchall: need more", parent=lp, level=log.NOISY) return self._send_more_queries(MAX_IN_FLIGHT) def _send_more_queries(self, num_outstanding): more_queries = [] while True: self.log(format=" there are %(outstanding)d queries outstanding", outstanding=len(self._queries_outstanding), level=log.NOISY) active_queries = len(self._queries_outstanding) + len(more_queries) if active_queries >= num_outstanding: break if not self.extra_servers: break more_queries.append(self.extra_servers.pop(0)) self.log(format="sending %(more)d more queries: %(who)s", more=len(more_queries), who=" ".join(["[%s]" % s.get_name() for s in more_queries]), level=log.NOISY) for server in more_queries: self._do_query(server, self._storage_index, self._read_size) # we'll retrigger when those queries come back def _done(self): if not self._running: self.log("not running; we're already done") return self._running = False now = time.time() elapsed = now - self._started self._status.set_finished(now) self._status.timings["total"] = elapsed self._status.set_progress(1.0) self._status.set_status("Finished") self._status.set_active(False) self._servermap.set_last_update(self.mode, self._started) # the servermap will not be touched after this self.log("servermap: %s" % self._servermap.summarize_versions()) eventually(self._done_deferred.callback, self._servermap) def _fatal_error(self, f): self.log("fatal error", failure=f, level=log.WEIRD, umid="1cNvlw") self._done_deferred.errback(f) tahoe-lafs-1.10.0/src/allmydata/node.py000066400000000000000000000410051221140116300176410ustar00rootroot00000000000000import datetime, os.path, re, types, ConfigParser, tempfile from base64 import b32decode, b32encode from twisted.python import log as twlog from twisted.application import service from twisted.internet import defer, reactor from foolscap.api import Tub, eventually, app_versions import foolscap.logging.log from allmydata import get_package_versions, get_package_versions_string from allmydata.util import log from allmydata.util import fileutil, iputil, observer from allmydata.util.assertutil import precondition, _assert from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.encodingutil import get_filesystem_encoding, quote_output # Add our application versions to the data that Foolscap's LogPublisher # reports. for thing, things_version in get_package_versions().iteritems(): app_versions.add_version(thing, str(things_version)) # group 1 will be addr (dotted quad string), group 3 if any will be portnum (string) ADDR_RE=re.compile("^([1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*)(:([1-9][0-9]*))?$") def formatTimeTahoeStyle(self, when): # we want UTC timestamps that look like: # 2007-10-12 00:26:28.566Z [Client] rnp752lz: 'client running' d = datetime.datetime.utcfromtimestamp(when) if d.microsecond: return d.isoformat(" ")[:-3]+"Z" else: return d.isoformat(" ") + ".000Z" PRIV_README=""" This directory contains files which contain private data for the Tahoe node, such as private keys. On Unix-like systems, the permissions on this directory are set to disallow users other than its owner from reading the contents of the files. See the 'configuration.rst' documentation file for details.""" class _None: # used as a marker in get_config() pass class MissingConfigEntry(Exception): """ A required config entry was not found. """ class OldConfigError(Exception): """ An obsolete config file was found. See docs/historical/configuration.rst. """ def __str__(self): return ("Found pre-Tahoe-LAFS-v1.3 configuration file(s):\n" "%s\n" "See docs/historical/configuration.rst." % "\n".join([quote_output(fname) for fname in self.args[0]])) class OldConfigOptionError(Exception): pass class Node(service.MultiService): # this implements common functionality of both Client nodes and Introducer # nodes. NODETYPE = "unknown NODETYPE" PORTNUMFILE = None CERTFILE = "node.pem" GENERATED_FILES = [] def __init__(self, basedir=u"."): service.MultiService.__init__(self) self.basedir = abspath_expanduser_unicode(unicode(basedir)) self._portnumfile = os.path.join(self.basedir, self.PORTNUMFILE) self._tub_ready_observerlist = observer.OneShotObserverList() fileutil.make_dirs(os.path.join(self.basedir, "private"), 0700) open(os.path.join(self.basedir, "private", "README"), "w").write(PRIV_README) # creates self.config self.read_config() nickname_utf8 = self.get_config("node", "nickname", "") self.nickname = nickname_utf8.decode("utf-8") assert type(self.nickname) is unicode self.init_tempdir() self.create_tub() self.logSource="Node" self.setup_ssh() self.setup_logging() self.log("Node constructed. " + get_package_versions_string()) iputil.increase_rlimits() def init_tempdir(self): local_tempdir_utf8 = "tmp" # default is NODEDIR/tmp/ tempdir = self.get_config("node", "tempdir", local_tempdir_utf8).decode('utf-8') tempdir = os.path.join(self.basedir, tempdir) if not os.path.exists(tempdir): fileutil.make_dirs(tempdir) tempfile.tempdir = abspath_expanduser_unicode(tempdir) # this should cause twisted.web.http (which uses # tempfile.TemporaryFile) to put large request bodies in the given # directory. Without this, the default temp dir is usually /tmp/, # which is frequently too small. test_name = tempfile.mktemp() _assert(os.path.dirname(test_name) == tempdir, test_name, tempdir) def get_config(self, section, option, default=_None, boolean=False): try: if boolean: return self.config.getboolean(section, option) return self.config.get(section, option) except (ConfigParser.NoOptionError, ConfigParser.NoSectionError): if default is _None: fn = os.path.join(self.basedir, u"tahoe.cfg") raise MissingConfigEntry("%s is missing the [%s]%s entry" % (quote_output(fn), section, option)) return default def set_config(self, section, option, value): if not self.config.has_section(section): self.config.add_section(section) self.config.set(section, option, value) assert self.config.get(section, option) == value def read_config(self): self.error_about_old_config_files() self.config = ConfigParser.SafeConfigParser() tahoe_cfg = os.path.join(self.basedir, "tahoe.cfg") try: f = open(tahoe_cfg, "rb") try: # Skip any initial Byte Order Mark. Since this is an ordinary file, we # don't need to handle incomplete reads, and can assume seekability. if f.read(3) != '\xEF\xBB\xBF': f.seek(0) self.config.readfp(f) finally: f.close() except EnvironmentError: if os.path.exists(tahoe_cfg): raise cfg_tubport = self.get_config("node", "tub.port", "") if not cfg_tubport: # For 'tub.port', tahoe.cfg overrides the individual file on # disk. So only read self._portnumfile if tahoe.cfg doesn't # provide a value. try: file_tubport = fileutil.read(self._portnumfile).strip() self.set_config("node", "tub.port", file_tubport) except EnvironmentError: if os.path.exists(self._portnumfile): raise def error_about_old_config_files(self): """ If any old configuration files are detected, raise OldConfigError. """ oldfnames = set() for name in [ 'nickname', 'webport', 'keepalive_timeout', 'log_gatherer.furl', 'disconnect_timeout', 'advertised_ip_addresses', 'introducer.furl', 'helper.furl', 'key_generator.furl', 'stats_gatherer.furl', 'no_storage', 'readonly_storage', 'sizelimit', 'debug_discard_storage', 'run_helper']: if name not in self.GENERATED_FILES: fullfname = os.path.join(self.basedir, name) if os.path.exists(fullfname): oldfnames.add(fullfname) if oldfnames: e = OldConfigError(oldfnames) twlog.msg(e) raise e def create_tub(self): certfile = os.path.join(self.basedir, "private", self.CERTFILE) self.tub = Tub(certFile=certfile) self.tub.setOption("logLocalFailures", True) self.tub.setOption("logRemoteFailures", True) self.tub.setOption("expose-remote-exception-types", False) # see #521 for a discussion of how to pick these timeout values. keepalive_timeout_s = self.get_config("node", "timeout.keepalive", "") if keepalive_timeout_s: self.tub.setOption("keepaliveTimeout", int(keepalive_timeout_s)) disconnect_timeout_s = self.get_config("node", "timeout.disconnect", "") if disconnect_timeout_s: # N.B.: this is in seconds, so use "1800" to get 30min self.tub.setOption("disconnectTimeout", int(disconnect_timeout_s)) self.nodeid = b32decode(self.tub.tubID.upper()) # binary format self.write_config("my_nodeid", b32encode(self.nodeid).lower() + "\n") self.short_nodeid = b32encode(self.nodeid).lower()[:8] # ready for printing tubport = self.get_config("node", "tub.port", "tcp:0") self.tub.listenOn(tubport) # we must wait until our service has started before we can find out # our IP address and thus do tub.setLocation, and we can't register # any services with the Tub until after that point self.tub.setServiceParent(self) def setup_ssh(self): ssh_port = self.get_config("node", "ssh.port", "") if ssh_port: ssh_keyfile = self.get_config("node", "ssh.authorized_keys_file").decode('utf-8') from allmydata import manhole m = manhole.AuthorizedKeysManhole(ssh_port, ssh_keyfile.encode(get_filesystem_encoding())) m.setServiceParent(self) self.log("AuthorizedKeysManhole listening on %s" % ssh_port) def get_app_versions(self): # TODO: merge this with allmydata.get_package_versions return dict(app_versions.versions) def get_config_from_file(self, name, required=False): """Get the (string) contents of a config file, or None if the file did not exist. If required=True, raise an exception rather than returning None. Any leading or trailing whitespace will be stripped from the data.""" fn = os.path.join(self.basedir, name) try: return fileutil.read(fn).strip() except EnvironmentError: if not required: return None raise def write_private_config(self, name, value): """Write the (string) contents of a private config file (which is a config file that resides within the subdirectory named 'private'), and return it. """ privname = os.path.join(self.basedir, "private", name) open(privname, "w").write(value) def get_private_config(self, name, default=_None): """Read the (string) contents of a private config file (which is a config file that resides within the subdirectory named 'private'), and return it. Return a default, or raise an error if one was not given. """ privname = os.path.join(self.basedir, "private", name) try: return fileutil.read(privname) except EnvironmentError: if os.path.exists(privname): raise if default is _None: raise MissingConfigEntry("The required configuration file %s is missing." % (quote_output(privname),)) return default def get_or_create_private_config(self, name, default=_None): """Try to get the (string) contents of a private config file (which is a config file that resides within the subdirectory named 'private'), and return it. Any leading or trailing whitespace will be stripped from the data. If the file does not exist, and default is not given, report an error. If the file does not exist and a default is specified, try to create it using that default, and then return the value that was written. If 'default' is a string, use it as a default value. If not, treat it as a zero-argument callable that is expected to return a string. """ privname = os.path.join(self.basedir, "private", name) try: value = fileutil.read(privname) except EnvironmentError: if os.path.exists(privname): raise if default is _None: raise MissingConfigEntry("The required configuration file %s is missing." % (quote_output(privname),)) if isinstance(default, basestring): value = default else: value = default() fileutil.write(privname, value) return value.strip() def write_config(self, name, value, mode="w"): """Write a string to a config file.""" fn = os.path.join(self.basedir, name) try: fileutil.write(fn, value, mode) except EnvironmentError, e: self.log("Unable to write config file '%s'" % fn) self.log(e) def startService(self): # Note: this class can be started and stopped at most once. self.log("Node.startService") # Record the process id in the twisted log, after startService() # (__init__ is called before fork(), but startService is called # after). Note that Foolscap logs handle pid-logging by itself, no # need to send a pid to the foolscap log here. twlog.msg("My pid: %s" % os.getpid()) try: os.chmod("twistd.pid", 0644) except EnvironmentError: pass # Delay until the reactor is running. eventually(self._startService) def _startService(self): precondition(reactor.running) self.log("Node._startService") service.MultiService.startService(self) d = defer.succeed(None) d.addCallback(lambda res: iputil.get_local_addresses_async()) d.addCallback(self._setup_tub) def _ready(res): self.log("%s running" % self.NODETYPE) self._tub_ready_observerlist.fire(self) return self d.addCallback(_ready) d.addErrback(self._service_startup_failed) def _service_startup_failed(self, failure): self.log('_startService() failed') log.err(failure) print "Node._startService failed, aborting" print failure #reactor.stop() # for unknown reasons, reactor.stop() isn't working. [ ] TODO self.log('calling os.abort()') twlog.msg('calling os.abort()') # make sure it gets into twistd.log print "calling os.abort()" os.abort() def stopService(self): self.log("Node.stopService") d = self._tub_ready_observerlist.when_fired() def _really_stopService(ignored): self.log("Node._really_stopService") return service.MultiService.stopService(self) d.addCallback(_really_stopService) return d def shutdown(self): """Shut down the node. Returns a Deferred that fires (with None) when it finally stops kicking.""" self.log("Node.shutdown") return self.stopService() def setup_logging(self): # we replace the formatTime() method of the log observer that # twistd set up for us, with a method that uses our preferred # timestamp format. for o in twlog.theLogPublisher.observers: # o might be a FileLogObserver's .emit method if type(o) is type(self.setup_logging): # bound method ob = o.im_self if isinstance(ob, twlog.FileLogObserver): newmeth = types.UnboundMethodType(formatTimeTahoeStyle, ob, ob.__class__) ob.formatTime = newmeth # TODO: twisted >2.5.0 offers maxRotatedFiles=50 lgfurl_file = os.path.join(self.basedir, "private", "logport.furl").encode(get_filesystem_encoding()) self.tub.setOption("logport-furlfile", lgfurl_file) lgfurl = self.get_config("node", "log_gatherer.furl", "") if lgfurl: # this is in addition to the contents of log-gatherer-furlfile self.tub.setOption("log-gatherer-furl", lgfurl) self.tub.setOption("log-gatherer-furlfile", os.path.join(self.basedir, "log_gatherer.furl")) self.tub.setOption("bridge-twisted-logs", True) incident_dir = os.path.join(self.basedir, "logs", "incidents") # this doesn't quite work yet: unit tests fail foolscap.logging.log.setLogDir(incident_dir.encode(get_filesystem_encoding())) def log(self, *args, **kwargs): return log.msg(*args, **kwargs) def _setup_tub(self, local_addresses): # we can't get a dynamically-assigned portnum until our Tub is # running, which means after startService. l = self.tub.getListeners()[0] portnum = l.getPortnum() # record which port we're listening on, so we can grab the same one # next time fileutil.write_atomically(self._portnumfile, "%d\n" % portnum, mode="") base_location = ",".join([ "%s:%d" % (addr, portnum) for addr in local_addresses ]) location = self.get_config("node", "tub.location", base_location) self.log("Tub location set to %s" % location) self.tub.setLocation(location) return self.tub def when_tub_ready(self): return self._tub_ready_observerlist.when_fired() def add_service(self, s): s.setServiceParent(self) return s tahoe-lafs-1.10.0/src/allmydata/nodemaker.py000066400000000000000000000156071221140116300206720ustar00rootroot00000000000000import weakref from zope.interface import implements from allmydata.util.assertutil import precondition from allmydata.interfaces import INodeMaker from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode, CiphertextFileNode from allmydata.immutable.upload import Data from allmydata.mutable.filenode import MutableFileNode from allmydata.mutable.publish import MutableData from allmydata.dirnode import DirectoryNode, pack_children from allmydata.unknown import UnknownNode from allmydata.blacklist import ProhibitedNode from allmydata import uri class NodeMaker: implements(INodeMaker) def __init__(self, storage_broker, secret_holder, history, uploader, terminator, default_encoding_parameters, mutable_file_default, key_generator, blacklist=None): self.storage_broker = storage_broker self.secret_holder = secret_holder self.history = history self.uploader = uploader self.terminator = terminator self.default_encoding_parameters = default_encoding_parameters self.mutable_file_default = mutable_file_default self.key_generator = key_generator self.blacklist = blacklist self._node_cache = weakref.WeakValueDictionary() # uri -> node def _create_lit(self, cap): return LiteralFileNode(cap) def _create_immutable(self, cap): return ImmutableFileNode(cap, self.storage_broker, self.secret_holder, self.terminator, self.history) def _create_immutable_verifier(self, cap): return CiphertextFileNode(cap, self.storage_broker, self.secret_holder, self.terminator, self.history) def _create_mutable(self, cap): n = MutableFileNode(self.storage_broker, self.secret_holder, self.default_encoding_parameters, self.history) return n.init_from_cap(cap) def _create_dirnode(self, filenode): return DirectoryNode(filenode, self, self.uploader) def create_from_cap(self, writecap, readcap=None, deep_immutable=False, name=u""): # this returns synchronously. It starts with a "cap string". assert isinstance(writecap, (str, type(None))), type(writecap) assert isinstance(readcap, (str, type(None))), type(readcap) bigcap = writecap or readcap if not bigcap: # maybe the writecap was hidden because we're in a readonly # directory, and the future cap format doesn't have a readcap, or # something. return UnknownNode(None, None) # deep_immutable and name not needed # The name doesn't matter for caching since it's only used in the error # attribute of an UnknownNode, and we don't cache those. if deep_immutable: memokey = "I" + bigcap else: memokey = "M" + bigcap if memokey in self._node_cache: node = self._node_cache[memokey] else: cap = uri.from_string(bigcap, deep_immutable=deep_immutable, name=name) node = self._create_from_single_cap(cap) # node is None for an unknown URI, otherwise it is a type for which # is_mutable() is known. We avoid cacheing mutable nodes due to # ticket #1679. if node is None: # don't cache UnknownNode node = UnknownNode(writecap, readcap, deep_immutable=deep_immutable, name=name) elif node.is_mutable(): self._node_cache[memokey] = node # note: WeakValueDictionary if self.blacklist: si = node.get_storage_index() # if this node is blacklisted, return the reason, otherwise return None reason = self.blacklist.check_storageindex(si) if reason is not None: # The original node object is cached above, not the ProhibitedNode wrapper. # This ensures that removing the blacklist entry will make the node # accessible if create_from_cap is called again. node = ProhibitedNode(node, reason) return node def _create_from_single_cap(self, cap): if isinstance(cap, uri.LiteralFileURI): return self._create_lit(cap) if isinstance(cap, uri.CHKFileURI): return self._create_immutable(cap) if isinstance(cap, uri.CHKFileVerifierURI): return self._create_immutable_verifier(cap) if isinstance(cap, (uri.ReadonlySSKFileURI, uri.WriteableSSKFileURI, uri.WriteableMDMFFileURI, uri.ReadonlyMDMFFileURI)): return self._create_mutable(cap) if isinstance(cap, (uri.DirectoryURI, uri.ReadonlyDirectoryURI, uri.ImmutableDirectoryURI, uri.LiteralDirectoryURI, uri.MDMFDirectoryURI, uri.ReadonlyMDMFDirectoryURI)): filenode = self._create_from_single_cap(cap.get_filenode_cap()) return self._create_dirnode(filenode) return None def create_mutable_file(self, contents=None, keysize=None, version=None): if version is None: version = self.mutable_file_default n = MutableFileNode(self.storage_broker, self.secret_holder, self.default_encoding_parameters, self.history) d = self.key_generator.generate(keysize) d.addCallback(n.create_with_keys, contents, version=version) d.addCallback(lambda res: n) return d def create_new_mutable_directory(self, initial_children={}, version=None): # initial_children must have metadata (i.e. {} instead of None) for (name, (node, metadata)) in initial_children.iteritems(): precondition(isinstance(metadata, dict), "create_new_mutable_directory requires metadata to be a dict, not None", metadata) node.raise_error() d = self.create_mutable_file(lambda n: MutableData(pack_children(initial_children, n.get_writekey())), version=version) d.addCallback(self._create_dirnode) return d def create_immutable_directory(self, children, convergence=None): if convergence is None: convergence = self.secret_holder.get_convergence_secret() packed = pack_children(children, None, deep_immutable=True) uploadable = Data(packed, convergence) d = self.uploader.upload(uploadable) d.addCallback(lambda results: self.create_from_cap(None, results.get_uri())) d.addCallback(self._create_dirnode) return d tahoe-lafs-1.10.0/src/allmydata/scripts/000077500000000000000000000000001221140116300200315ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/scripts/__init__.py000066400000000000000000000000001221140116300221300ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/scripts/admin.py000066400000000000000000000047341221140116300215030ustar00rootroot00000000000000 from twisted.python import usage from allmydata.scripts.common import BaseOptions class GenerateKeypairOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-opts] admin generate-keypair" def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Generate a public/private keypair, dumped to stdout as two lines of ASCII.. """ return t def print_keypair(options): from allmydata.util.keyutil import make_keypair out = options.stdout privkey_vs, pubkey_vs = make_keypair() print >>out, "private:", privkey_vs print >>out, "public:", pubkey_vs class DerivePubkeyOptions(BaseOptions): def parseArgs(self, privkey): self.privkey = privkey def getSynopsis(self): return "Usage: tahoe [global-opts] admin derive-pubkey PRIVKEY" def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Given a private (signing) key that was previously generated with generate-keypair, derive the public key and print it to stdout. """ return t def derive_pubkey(options): out = options.stdout from allmydata.util import keyutil privkey_vs = options.privkey sk, pubkey_vs = keyutil.parse_privkey(privkey_vs) print >>out, "private:", privkey_vs print >>out, "public:", pubkey_vs return 0 class AdminCommand(BaseOptions): subCommands = [ ("generate-keypair", None, GenerateKeypairOptions, "Generate a public/private keypair, write to stdout."), ("derive-pubkey", None, DerivePubkeyOptions, "Derive a public key from a private key."), ] def postOptions(self): if not hasattr(self, 'subOptions'): raise usage.UsageError("must specify a subcommand") def getSynopsis(self): return "Usage: tahoe [global-opts] admin SUBCOMMAND" def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Please run e.g. 'tahoe admin generate-keypair --help' for more details on each subcommand. """ return t subDispatch = { "generate-keypair": print_keypair, "derive-pubkey": derive_pubkey, } def do_admin(options): so = options.subOptions so.stdout = options.stdout so.stderr = options.stderr f = subDispatch[options.subCommand] return f(so) subCommands = [ ["admin", None, AdminCommand, "admin subcommands: use 'tahoe admin' for a list"], ] dispatch = { "admin": do_admin, } tahoe-lafs-1.10.0/src/allmydata/scripts/backupdb.py000066400000000000000000000324371221140116300221670ustar00rootroot00000000000000 import os.path, sys, time, random, stat from allmydata.util.netstring import netstring from allmydata.util.hashutil import backupdb_dirhash from allmydata.util import base32 from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.encodingutil import to_str DAY = 24*60*60 MONTH = 30*DAY SCHEMA_v1 = """ CREATE TABLE version -- added in v1 ( version INTEGER -- contains one row, set to 2 ); CREATE TABLE local_files -- added in v1 ( path VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename size INTEGER, -- os.stat(fn)[stat.ST_SIZE] mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME] ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME] fileid INTEGER ); CREATE TABLE caps -- added in v1 ( fileid INTEGER PRIMARY KEY AUTOINCREMENT, filecap VARCHAR(256) UNIQUE -- URI:CHK:... ); CREATE TABLE last_upload -- added in v1 ( fileid INTEGER PRIMARY KEY, last_uploaded TIMESTAMP, last_checked TIMESTAMP ); """ TABLE_DIRECTORY = """ CREATE TABLE directories -- added in v2 ( dirhash varchar(256) PRIMARY KEY, -- base32(dirhash) dircap varchar(256), -- URI:DIR2-CHK:... last_uploaded TIMESTAMP, last_checked TIMESTAMP ); """ SCHEMA_v2 = SCHEMA_v1 + TABLE_DIRECTORY UPDATE_v1_to_v2 = TABLE_DIRECTORY + """ UPDATE version SET version=2; """ def get_backupdb(dbfile, stderr=sys.stderr, create_version=(SCHEMA_v2, 2), just_create=False): # open or create the given backupdb file. The parent directory must # exist. import sqlite3 must_create = not os.path.exists(dbfile) try: db = sqlite3.connect(dbfile) except (EnvironmentError, sqlite3.OperationalError), e: print >>stderr, "Unable to create/open backupdb file %s: %s" % (dbfile, e) return None c = db.cursor() if must_create: schema, version = create_version c.executescript(schema) c.execute("INSERT INTO version (version) VALUES (?)", (version,)) db.commit() try: c.execute("SELECT version FROM version") version = c.fetchone()[0] except sqlite3.DatabaseError, e: # this indicates that the file is not a compatible database format. # Perhaps it was created with an old version, or it might be junk. print >>stderr, "backupdb file is unusable: %s" % e return None if just_create: # for tests return True if version == 1: c.executescript(UPDATE_v1_to_v2) db.commit() version = 2 if version == 2: return BackupDB_v2(sqlite3, db) print >>stderr, "Unable to handle backupdb version %s" % version return None class FileResult: def __init__(self, bdb, filecap, should_check, path, mtime, ctime, size): self.bdb = bdb self.filecap = filecap self.should_check_p = should_check self.path = path self.mtime = mtime self.ctime = ctime self.size = size def was_uploaded(self): if self.filecap: return self.filecap return False def did_upload(self, filecap): self.bdb.did_upload_file(filecap, self.path, self.mtime, self.ctime, self.size) def should_check(self): return self.should_check_p def did_check_healthy(self, results): self.bdb.did_check_file_healthy(self.filecap, results) class DirectoryResult: def __init__(self, bdb, dirhash, dircap, should_check): self.bdb = bdb self.dircap = dircap self.should_check_p = should_check self.dirhash = dirhash def was_created(self): if self.dircap: return self.dircap return False def did_create(self, dircap): self.bdb.did_create_directory(dircap, self.dirhash) def should_check(self): return self.should_check_p def did_check_healthy(self, results): self.bdb.did_check_directory_healthy(self.dircap, results) class BackupDB_v2: VERSION = 2 NO_CHECK_BEFORE = 1*MONTH ALWAYS_CHECK_AFTER = 2*MONTH def __init__(self, sqlite_module, connection): self.sqlite_module = sqlite_module self.connection = connection self.cursor = connection.cursor() def check_file(self, path, use_timestamps=True): """I will tell you if a given local file needs to be uploaded or not, by looking in a database and seeing if I have a record of this file having been uploaded earlier. I return a FileResults object, synchronously. If r.was_uploaded() returns False, you should upload the file. When you are finished uploading it, call r.did_upload(filecap), so I can update my database. If was_uploaded() returns a filecap, you might be able to avoid an upload. Call r.should_check(), and if it says False, you can skip the upload and use the filecap returned by was_uploaded(). If should_check() returns True, you should perform a filecheck on the filecap returned by was_uploaded(). If the check indicates the file is healthy, please call r.did_check_healthy(checker_results) so I can update the database, using the de-JSONized response from the webapi t=check call for 'checker_results'. If the check indicates the file is not healthy, please upload the file and call r.did_upload(filecap) when you're done. I use_timestamps=True (the default), I will compare ctime and mtime of the local file against an entry in my database, and consider the file to be unchanged if ctime, mtime, and filesize are all the same as the earlier version. If use_timestamps=False, I will not trust the timestamps, so more files (perhaps all) will be marked as needing upload. A future version of this database may hash the file to make equality decisions, in which case use_timestamps=False will not always imply r.must_upload()==True. 'path' points to a local file on disk, possibly relative to the current working directory. The database stores absolute pathnames. """ path = abspath_expanduser_unicode(path) s = os.stat(path) size = s[stat.ST_SIZE] ctime = s[stat.ST_CTIME] mtime = s[stat.ST_MTIME] now = time.time() c = self.cursor c.execute("SELECT size,mtime,ctime,fileid" " FROM local_files" " WHERE path=?", (path,)) row = self.cursor.fetchone() if not row: return FileResult(self, None, False, path, mtime, ctime, size) (last_size,last_mtime,last_ctime,last_fileid) = row c.execute("SELECT caps.filecap, last_upload.last_checked" " FROM caps,last_upload" " WHERE caps.fileid=? AND last_upload.fileid=?", (last_fileid, last_fileid)) row2 = c.fetchone() if ((last_size != size or not use_timestamps or last_mtime != mtime or last_ctime != ctime) # the file has been changed or (not row2) # we somehow forgot where we put the file last time ): c.execute("DELETE FROM local_files WHERE path=?", (path,)) self.connection.commit() return FileResult(self, None, False, path, mtime, ctime, size) # at this point, we're allowed to assume the file hasn't been changed (filecap, last_checked) = row2 age = now - last_checked probability = ((age - self.NO_CHECK_BEFORE) / (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) return FileResult(self, to_str(filecap), should_check, path, mtime, ctime, size) def get_or_allocate_fileid_for_cap(self, filecap): # find an existing fileid for this filecap, or insert a new one. The # caller is required to commit() afterwards. # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql # So we use INSERT, ignore any error, then a SELECT c = self.cursor try: c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,)) except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): # sqlite3 on sid gives IntegrityError # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError pass c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,)) foundrow = c.fetchone() assert foundrow fileid = foundrow[0] return fileid def did_upload_file(self, filecap, path, mtime, ctime, size): now = time.time() fileid = self.get_or_allocate_fileid_for_cap(filecap) try: self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)", (fileid, now, now)) except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): self.cursor.execute("UPDATE last_upload" " SET last_uploaded=?, last_checked=?" " WHERE fileid=?", (now, now, fileid)) try: self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)", (path, size, mtime, ctime, fileid)) except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): self.cursor.execute("UPDATE local_files" " SET size=?, mtime=?, ctime=?, fileid=?" " WHERE path=?", (size, mtime, ctime, fileid, path)) self.connection.commit() def did_check_file_healthy(self, filecap, results): now = time.time() fileid = self.get_or_allocate_fileid_for_cap(filecap) self.cursor.execute("UPDATE last_upload" " SET last_checked=?" " WHERE fileid=?", (now, fileid)) self.connection.commit() def check_directory(self, contents): """I will tell you if a new directory needs to be created for a given set of directory contents, or if I know of an existing (immutable) directory that can be used instead. 'contents' should be a dictionary that maps from child name (a single unicode string) to immutable childcap (filecap or dircap). I return a DirectoryResult object, synchronously. If r.was_created() returns False, you should create the directory (with t=mkdir-immutable). When you are finished, call r.did_create(dircap) so I can update my database. If was_created() returns a dircap, you might be able to avoid the mkdir. Call r.should_check(), and if it says False, you can skip the mkdir and use the dircap returned by was_created(). If should_check() returns True, you should perform a check operation on the dircap returned by was_created(). If the check indicates the directory is healthy, please call r.did_check_healthy(checker_results) so I can update the database, using the de-JSONized response from the webapi t=check call for 'checker_results'. If the check indicates the directory is not healthy, please repair or re-create the directory and call r.did_create(dircap) when you're done. """ now = time.time() entries = [] for name in contents: entries.append( [name.encode("utf-8"), contents[name]] ) entries.sort() data = "".join([netstring(name_utf8)+netstring(cap) for (name_utf8,cap) in entries]) dirhash = backupdb_dirhash(data) dirhash_s = base32.b2a(dirhash) c = self.cursor c.execute("SELECT dircap, last_checked" " FROM directories WHERE dirhash=?", (dirhash_s,)) row = c.fetchone() if not row: return DirectoryResult(self, dirhash_s, None, False) (dircap, last_checked) = row age = now - last_checked probability = ((age - self.NO_CHECK_BEFORE) / (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) return DirectoryResult(self, dirhash_s, to_str(dircap), should_check) def did_create_directory(self, dircap, dirhash): now = time.time() # if the dirhash is already present (i.e. we've re-uploaded an # existing directory, possibly replacing the dircap with a new one), # update the record in place. Otherwise create a new record.) self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)", (dirhash, dircap, now, now)) self.connection.commit() def did_check_directory_healthy(self, dircap, results): now = time.time() self.cursor.execute("UPDATE directories" " SET last_checked=?" " WHERE dircap=?", (now, dircap)) self.connection.commit() tahoe-lafs-1.10.0/src/allmydata/scripts/cli.py000066400000000000000000000562421221140116300211630ustar00rootroot00000000000000import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_output NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") _default_nodedir = get_default_nodedir() class FilesystemOptions(BaseOptions): optParameters = [ ["node-url", "u", None, "Specify the URL of the Tahoe gateway node, such as " "'http://127.0.0.1:3456'. " "This overrides the URL found in the --node-directory ."], ["dir-cap", None, None, "Specify which dirnode URI should be used as the 'tahoe' alias."] ] def postOptions(self): self["quiet"] = self.parent["quiet"] if self.parent['node-directory']: self['node-directory'] = argv_to_abspath(self.parent['node-directory']) else: self['node-directory'] = _default_nodedir # compute a node-url from the existing options, put in self['node-url'] if self['node-url']: if (not isinstance(self['node-url'], basestring) or not NODEURL_RE.match(self['node-url'])): msg = ("--node-url is required to be a string and look like " "\"http://HOSTNAMEORADDR:PORT\", not: %r" % (self['node-url'],)) raise usage.UsageError(msg) else: node_url_file = os.path.join(self['node-directory'], "node.url") self['node-url'] = open(node_url_file, "r").read().strip() if self['node-url'][-1] != "/": self['node-url'] += "/" aliases = get_aliases(self['node-directory']) if self['dir-cap']: aliases[DEFAULT_ALIAS] = self['dir-cap'] self.aliases = aliases # maps alias name to dircap class MakeDirectoryOptions(FilesystemOptions): optParameters = [ ("format", None, None, "Create a directory with the given format: SDMF or MDMF (case-insensitive)"), ] def parseArgs(self, where=""): self.where = argv_to_unicode(where) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): raise usage.UsageError("%s is an invalid format" % self['format']) def getSynopsis(self): return "Usage: %s [global-opts] mkdir [options] [REMOTE_DIR]" % (self.command_name,) longdesc = """Create a new directory, either unlinked or as a subdirectory.""" class AddAliasOptions(FilesystemOptions): def parseArgs(self, alias, cap): self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap def getSynopsis(self): return "Usage: %s [global-opts] add-alias [options] ALIAS[:] DIRCAP" % (self.command_name,) longdesc = """Add a new alias for an existing directory.""" class CreateAliasOptions(FilesystemOptions): def parseArgs(self, alias): self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] def getSynopsis(self): return "Usage: %s [global-opts] create-alias [options] ALIAS[:]" % (self.command_name,) longdesc = """Create a new directory and add an alias for it.""" class ListAliasesOptions(FilesystemOptions): def getSynopsis(self): return "Usage: %s [global-opts] list-aliases [options]" % (self.command_name,) longdesc = """Display a table of all configured aliases.""" class ListOptions(FilesystemOptions): optFlags = [ ("long", "l", "Use long format: show file sizes, and timestamps."), ("uri", "u", "Show file/directory URIs."), ("readonly-uri", None, "Show read-only file/directory URIs."), ("classify", "F", "Append '/' to directory names, and '*' to mutable."), ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): self.where = argv_to_unicode(where) def getSynopsis(self): return "Usage: %s [global-opts] ls [options] [PATH]" % (self.command_name,) longdesc = """ List the contents of some portion of the grid. If PATH is omitted, "tahoe:" is assumed. When the -l or --long option is used, each line is shown in the following format: drwx where each of the letters on the left may be replaced by '-'. If 'd' is present, it indicates that the object is a directory. If the 'd' is replaced by a '?', the object type is unknown. 'rwx' is a Unix-like permissions mask: if the mask includes 'w', then the object is writeable through its link in this directory (note that the link might be replaceable even if the object is not writeable through the current link). The 'x' is a legacy of Unix filesystems. In Tahoe it is used only to indicate that the contents of a directory can be listed. Directories have no size, so their size field is shown as '-'. Otherwise the size of the file, when known, is given in bytes. The size of mutable files or unknown objects is shown as '?'. The date/time shows when this link in the Tahoe filesystem was last modified. """ class GetOptions(FilesystemOptions): def parseArgs(self, arg1, arg2=None): # tahoe get FOO |less # write to stdout # tahoe get tahoe:FOO |less # same # tahoe get FOO bar # write to local file # tahoe get tahoe:FOO bar # same self.from_file = argv_to_unicode(arg1) if arg2: self.to_file = argv_to_unicode(arg2) else: self.to_file = None if self.to_file == "-": self.to_file = None def getSynopsis(self): return "Usage: %s [global-opts] get [options] REMOTE_FILE LOCAL_FILE" % (self.command_name,) longdesc = """ Retrieve a file from the grid and write it to the local filesystem. If LOCAL_FILE is omitted or '-', the contents of the file will be written to stdout.""" def getUsage(self, width=None): t = FilesystemOptions.getUsage(self, width) t += """ Examples: % tahoe get FOO |less # write to stdout % tahoe get tahoe:FOO |less # same % tahoe get FOO bar # write to local file % tahoe get tahoe:FOO bar # same """ return t class PutOptions(FilesystemOptions): optFlags = [ ("mutable", "m", "Create a mutable file instead of an immutable one (like --format=SDMF)"), ] optParameters = [ ("format", None, None, "Create a file with the given format: SDMF and MDMF for mutable, CHK (default) for immutable. (case-insensitive)"), ] def parseArgs(self, arg1=None, arg2=None): # see Examples below if arg1 is not None and arg2 is not None: self.from_file = argv_to_unicode(arg1) self.to_file = argv_to_unicode(arg2) elif arg1 is not None and arg2 is None: self.from_file = argv_to_unicode(arg1) # might be "-" self.to_file = None else: self.from_file = None self.to_file = None if self.from_file == u"-": self.from_file = None if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): raise usage.UsageError("%s is an invalid format" % self['format']) def getSynopsis(self): return "Usage: %s [global-opts] put [options] LOCAL_FILE REMOTE_FILE" % (self.command_name,) longdesc = """ Put a file into the grid, copying its contents from the local filesystem. If REMOTE_FILE is missing, upload the file but do not link it into a directory; also print the new filecap to stdout. If LOCAL_FILE is missing or '-', data will be copied from stdin. REMOTE_FILE is assumed to start with tahoe: unless otherwise specified. If the destination file already exists and is mutable, it will be modified in-place, whether or not --mutable is specified. (--mutable only affects creation of new files.)""" def getUsage(self, width=None): t = FilesystemOptions.getUsage(self, width) t += """ Examples: % cat FILE | tahoe put # create unlinked file from stdin % cat FILE | tahoe put - # same % tahoe put bar # create unlinked file from local 'bar' % cat FILE | tahoe put - FOO # create tahoe:FOO from stdin % tahoe put bar FOO # copy local 'bar' to tahoe:FOO % tahoe put bar tahoe:FOO # same % tahoe put bar MUTABLE-FILE-WRITECAP # modify the mutable file in-place """ return t class CpOptions(FilesystemOptions): optFlags = [ ("recursive", "r", "Copy source directory recursively."), ("verbose", "v", "Be noisy about what is happening."), ("caps-only", None, "When copying to local files, write out filecaps instead of actual " "data (only useful for debugging and tree-comparison purposes)."), ] def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") self.sources = map(argv_to_unicode, args[:-1]) self.destination = argv_to_unicode(args[-1]) def getSynopsis(self): return "Usage: %s [global-opts] cp [options] FROM.. TO" % (self.command_name,) longdesc = """ Use 'tahoe cp' to copy files between a local filesystem and a Tahoe grid. Any FROM/TO arguments that begin with an alias indicate Tahoe-side files or non-file arguments. Directories will be copied recursively. New Tahoe-side directories will be created when necessary. Assuming that you have previously set up an alias 'home' with 'tahoe create-alias home', here are some examples: tahoe cp ~/foo.txt home: # creates tahoe-side home:foo.txt tahoe cp ~/foo.txt /tmp/bar.txt home: # copies two files to home: tahoe cp ~/Pictures home:stuff/my-pictures # copies directory recursively You can also use a dircap as either FROM or TO target: tahoe cp URI:DIR2-RO:ixqhc4kdbjxc7o65xjnveoewym:5x6lwoxghrd5rxhwunzavft2qygfkt27oj3fbxlq4c6p45z5uneq/blog.html ./ # copy Zooko's wiki page to a local file This command still has some limitations: symlinks and special files (device nodes, named pipes) are not handled very well. Arguments should probably not have trailing slashes. 'tahoe cp' does not behave as much like /bin/cp as you would wish, especially with respect to trailing slashes. """ class UnlinkOptions(FilesystemOptions): def parseArgs(self, where): self.where = argv_to_unicode(where) def getSynopsis(self): return "Usage: %s [global-opts] unlink [options] REMOTE_FILE" % (self.command_name,) class RmOptions(UnlinkOptions): def getSynopsis(self): return "Usage: %s [global-opts] rm [options] REMOTE_FILE" % (self.command_name,) class MvOptions(FilesystemOptions): def parseArgs(self, frompath, topath): self.from_file = argv_to_unicode(frompath) self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "Usage: %s [global-opts] mv [options] FROM TO" % (self.command_name,) longdesc = """ Use 'tahoe mv' to move files that are already on the grid elsewhere on the grid, e.g., 'tahoe mv alias:some_file alias:new_file'. If moving a remote file into a remote directory, you'll need to append a '/' to the name of the remote directory, e.g., 'tahoe mv tahoe:file1 tahoe:dir/', not 'tahoe mv tahoe:file1 tahoe:dir'. Note that it is not possible to use this command to move local files to the grid -- use 'tahoe cp' for that. """ class LnOptions(FilesystemOptions): def parseArgs(self, frompath, topath): self.from_file = argv_to_unicode(frompath) self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "Usage: %s [global-opts] ln [options] FROM_LINK TO_LINK" % (self.command_name,) longdesc = """ Use 'tahoe ln' to duplicate a link (directory entry) already on the grid to elsewhere on the grid. For example 'tahoe ln alias:some_file alias:new_file'. causes 'alias:new_file' to point to the same object that 'alias:some_file' points to. (The argument order is the same as Unix ln. To remember the order, you can think of this command as copying a link, rather than copying a file as 'tahoe cp' does. Then the argument order is consistent with that of 'tahoe cp'.) When linking a remote file into a remote directory, you'll need to append a '/' to the name of the remote directory, e.g. 'tahoe ln tahoe:file1 tahoe:dir/' (which is shorthand for 'tahoe ln tahoe:file1 tahoe:dir/file1'). If you forget the '/', e.g. 'tahoe ln tahoe:file1 tahoe:dir', the 'ln' command will refuse to overwrite the 'tahoe:dir' directory, and will exit with an error. Note that it is not possible to use this command to create links between local and remote files. """ class BackupConfigurationError(Exception): pass class BackupOptions(FilesystemOptions): optFlags = [ ("verbose", "v", "Be noisy about what is happening."), ("ignore-timestamps", None, "Do not use backupdb timestamps to decide whether a local file is unchanged."), ] vcs_patterns = ('CVS', 'RCS', 'SCCS', '.git', '.gitignore', '.cvsignore', '.svn', '.arch-ids','{arch}', '=RELEASE-ID', '=meta-update', '=update', '.bzr', '.bzrignore', '.bzrtags', '.hg', '.hgignore', '_darcs') def __init__(self): super(BackupOptions, self).__init__() self['exclude'] = set() def parseArgs(self, localdir, topath): self.from_dir = argv_to_unicode(localdir) self.to_dir = argv_to_unicode(topath) def getSynopsis(self): return "Usage: %s [global-opts] backup [options] FROM ALIAS:TO" % (self.command_name,) def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" g = argv_to_unicode(pattern).strip() if g: exclude = self['exclude'] exclude.add(g) def opt_exclude_from(self, filepath): """Ignore file matching glob patterns listed in file, one per line. The file is assumed to be in the argv encoding.""" abs_filepath = argv_to_abspath(filepath) try: exclude_file = file(abs_filepath) except: raise BackupConfigurationError('Error opening exclude file %s.' % quote_output(abs_filepath)) try: for line in exclude_file: self.opt_exclude(line) finally: exclude_file.close() def opt_exclude_vcs(self): """Exclude files and directories used by following version control systems: CVS, RCS, SCCS, Git, SVN, Arch, Bazaar(bzr), Mercurial, Darcs.""" for pattern in self.vcs_patterns: self.opt_exclude(pattern) def filter_listdir(self, listdir): """Yields non-excluded childpaths in path.""" exclude = self['exclude'] exclude_regexps = [re.compile(fnmatch.translate(pat)) for pat in exclude] for filename in listdir: for regexp in exclude_regexps: if regexp.match(filename): break else: yield filename longdesc = """ Add a versioned backup of the local FROM directory to a timestamped subdirectory of the TO/Archives directory on the grid, sharing as many files and directories as possible with earlier backups. Create TO/Latest as a reference to the latest backup. Behaves somewhat like 'rsync -a --link-dest=TO/Archives/(previous) FROM TO/Archives/(new); ln -sf TO/Archives/(new) TO/Latest'.""" class WebopenOptions(FilesystemOptions): optFlags = [ ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): self.where = argv_to_unicode(where) def getSynopsis(self): return "Usage: %s [global-opts] webopen [options] [ALIAS:PATH]" % (self.command_name,) longdesc = """Open a web browser to the contents of some file or directory on the grid. When run without arguments, open the Welcome page.""" class ManifestOptions(FilesystemOptions): optFlags = [ ("storage-index", "s", "Only print storage index strings, not pathname+cap."), ("verify-cap", None, "Only print verifycap, not pathname+cap."), ("repair-cap", None, "Only print repaircap, not pathname+cap."), ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): self.where = argv_to_unicode(where) def getSynopsis(self): return "Usage: %s [global-opts] manifest [options] [ALIAS:PATH]" % (self.command_name,) longdesc = """Print a list of all files and directories reachable from the given starting point.""" class StatsOptions(FilesystemOptions): optFlags = [ ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): self.where = argv_to_unicode(where) def getSynopsis(self): return "Usage: %s [global-opts] stats [options] [ALIAS:PATH]" % (self.command_name,) longdesc = """Print statistics about of all files and directories reachable from the given starting point.""" class CheckOptions(FilesystemOptions): optFlags = [ ("raw", None, "Display raw JSON data instead of parsed."), ("verify", None, "Verify all hashes, instead of merely querying share presence."), ("repair", None, "Automatically repair any problems found."), ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, where=''): self.where = argv_to_unicode(where) def getSynopsis(self): return "Usage: %s [global-opts] check [options] [ALIAS:PATH]" % (self.command_name,) longdesc = """ Check a single file or directory: count how many shares are available and verify their hashes. Optionally repair the file if any problems were found.""" class DeepCheckOptions(FilesystemOptions): optFlags = [ ("raw", None, "Display raw JSON data instead of parsed."), ("verify", None, "Verify all hashes, instead of merely querying share presence."), ("repair", None, "Automatically repair any problems found."), ("add-lease", None, "Add/renew lease on all shares."), ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, where=''): self.where = argv_to_unicode(where) def getSynopsis(self): return "Usage: %s [global-opts] deep-check [options] [ALIAS:PATH]" % (self.command_name,) longdesc = """ Check all files and directories reachable from the given starting point (which must be a directory), like 'tahoe check' but for multiple files. Optionally repair any problems found.""" subCommands = [ ["mkdir", None, MakeDirectoryOptions, "Create a new directory."], ["add-alias", None, AddAliasOptions, "Add a new alias cap."], ["create-alias", None, CreateAliasOptions, "Create a new alias cap."], ["list-aliases", None, ListAliasesOptions, "List all alias caps."], ["ls", None, ListOptions, "List a directory."], ["get", None, GetOptions, "Retrieve a file from the grid."], ["put", None, PutOptions, "Upload a file into the grid."], ["cp", None, CpOptions, "Copy one or more files or directories."], ["unlink", None, UnlinkOptions, "Unlink a file or directory on the grid."], ["rm", None, RmOptions, "Unlink a file or directory on the grid (same as unlink)."], ["mv", None, MvOptions, "Move a file within the grid."], ["ln", None, LnOptions, "Make an additional link to an existing file or directory."], ["backup", None, BackupOptions, "Make target dir look like local dir."], ["webopen", None, WebopenOptions, "Open a web browser to a grid file or directory."], ["manifest", None, ManifestOptions, "List all files/directories in a subtree."], ["stats", None, StatsOptions, "Print statistics about all files/directories in a subtree."], ["check", None, CheckOptions, "Check a single file or directory."], ["deep-check", None, DeepCheckOptions, "Check all files/directories reachable from a starting point."], ] def mkdir(options): from allmydata.scripts import tahoe_mkdir rc = tahoe_mkdir.mkdir(options) return rc def add_alias(options): from allmydata.scripts import tahoe_add_alias rc = tahoe_add_alias.add_alias(options) return rc def create_alias(options): from allmydata.scripts import tahoe_add_alias rc = tahoe_add_alias.create_alias(options) return rc def list_aliases(options): from allmydata.scripts import tahoe_add_alias rc = tahoe_add_alias.list_aliases(options) return rc def list(options): from allmydata.scripts import tahoe_ls rc = tahoe_ls.list(options) return rc def get(options): from allmydata.scripts import tahoe_get rc = tahoe_get.get(options) if rc == 0: if options.to_file is None: # be quiet, since the file being written to stdout should be # proof enough that it worked, unless the user is unlucky # enough to have picked an empty file pass else: print >>options.stderr, "%s retrieved and written to %s" % \ (options.from_file, options.to_file) return rc def put(options): from allmydata.scripts import tahoe_put rc = tahoe_put.put(options) return rc def cp(options): from allmydata.scripts import tahoe_cp rc = tahoe_cp.copy(options) return rc def unlink(options, command="unlink"): from allmydata.scripts import tahoe_unlink rc = tahoe_unlink.unlink(options, command=command) return rc def rm(options): return unlink(options, command="rm") def mv(options): from allmydata.scripts import tahoe_mv rc = tahoe_mv.mv(options, mode="move") return rc def ln(options): from allmydata.scripts import tahoe_mv rc = tahoe_mv.mv(options, mode="link") return rc def backup(options): from allmydata.scripts import tahoe_backup rc = tahoe_backup.backup(options) return rc def webopen(options, opener=None): from allmydata.scripts import tahoe_webopen rc = tahoe_webopen.webopen(options, opener=opener) return rc def manifest(options): from allmydata.scripts import tahoe_manifest rc = tahoe_manifest.manifest(options) return rc def stats(options): from allmydata.scripts import tahoe_manifest rc = tahoe_manifest.stats(options) return rc def check(options): from allmydata.scripts import tahoe_check rc = tahoe_check.check(options) return rc def deepcheck(options): from allmydata.scripts import tahoe_check rc = tahoe_check.deepcheck(options) return rc dispatch = { "mkdir": mkdir, "add-alias": add_alias, "create-alias": create_alias, "list-aliases": list_aliases, "ls": list, "get": get, "put": put, "cp": cp, "unlink": unlink, "rm": rm, "mv": mv, "ln": ln, "backup": backup, "webopen": webopen, "manifest": manifest, "stats": stats, "check": check, "deep-check": deepcheck, } tahoe-lafs-1.10.0/src/allmydata/scripts/common.py000066400000000000000000000155321221140116300217010ustar00rootroot00000000000000 import os, sys, urllib import codecs from twisted.python import usage from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import unicode_to_url, quote_output, argv_to_abspath from allmydata.util.fileutil import abspath_expanduser_unicode _default_nodedir = None if sys.platform == 'win32': from allmydata.windows import registry path = registry.get_base_dir_path() if path: precondition(isinstance(path, unicode), path) _default_nodedir = abspath_expanduser_unicode(path) if _default_nodedir is None: path = abspath_expanduser_unicode(u"~/.tahoe") precondition(isinstance(path, unicode), path) _default_nodedir = path def get_default_nodedir(): return _default_nodedir class BaseOptions(usage.Options): def __init__(self): super(BaseOptions, self).__init__() self.command_name = os.path.basename(sys.argv[0]) if self.command_name == 'trial': self.command_name = 'tahoe' # Only allow "tahoe --version", not e.g. "tahoe start --version" def opt_version(self): raise usage.UsageError("--version not allowed on subcommands") class BasedirOptions(BaseOptions): default_nodedir = _default_nodedir optParameters = [ ["basedir", "C", None, "Same as --node-directory (default %s)." % get_default_nodedir()], ] def parseArgs(self, basedir=None): if self.parent['node-directory'] and self['basedir']: raise usage.UsageError("The --node-directory (or -d) and --basedir (or -C) options cannot both be used.") if self.parent['node-directory'] and basedir: raise usage.UsageError("The --node-directory (or -d) option and a basedir argument cannot both be used.") if self['basedir'] and basedir: raise usage.UsageError("The --basedir (or -C) option and a basedir argument cannot both be used.") if basedir: b = argv_to_abspath(basedir) elif self['basedir']: b = argv_to_abspath(self['basedir']) elif self.parent['node-directory']: b = argv_to_abspath(self.parent['node-directory']) elif self.default_nodedir: b = self.default_nodedir else: raise usage.UsageError("No default basedir available, you must provide one with --node-directory, --basedir, or a basedir argument") self['basedir'] = b def postOptions(self): if not self['basedir']: raise usage.UsageError("A base directory for the node must be provided.") DEFAULT_ALIAS = u"tahoe" def get_aliases(nodedir): aliases = {} aliasfile = os.path.join(nodedir, "private", "aliases") rootfile = os.path.join(nodedir, "private", "root_dir.cap") try: f = open(rootfile, "r") rootcap = f.read().strip() if rootcap: aliases[DEFAULT_ALIAS] = rootcap except EnvironmentError: pass try: f = codecs.open(aliasfile, "r", "utf-8") for line in f.readlines(): line = line.strip() if line.startswith("#") or not line: continue name, cap = line.split(u":", 1) # normalize it: remove http: prefix, urldecode cap = cap.strip().encode('utf-8') aliases[name] = cap except EnvironmentError: pass return aliases class DefaultAliasMarker: pass pretend_platform_uses_lettercolon = False # for tests def platform_uses_lettercolon_drivename(): if ("win32" in sys.platform.lower() or "cygwin" in sys.platform.lower() or pretend_platform_uses_lettercolon): return True return False class TahoeError(Exception): def __init__(self, msg): Exception.__init__(self, msg) self.msg = msg def display(self, err): print >>err, self.msg class UnknownAliasError(TahoeError): def __init__(self, msg): TahoeError.__init__(self, "error: " + msg) def get_alias(aliases, path_unicode, default): """ Transform u"work:path/filename" into (aliases[u"work"], u"path/filename".encode('utf-8')). If default=None, then an empty alias is indicated by returning DefaultAliasMarker. We special-case strings with a recognized cap URI prefix, to make it easy to access specific files/directories by their caps. If the transformed alias is either not found in aliases, or is blank and default is not found in aliases, an UnknownAliasError is raised. """ precondition(isinstance(path_unicode, unicode), path_unicode) from allmydata import uri path = path_unicode.encode('utf-8').strip(" ") if uri.has_uri_prefix(path): # We used to require "URI:blah:./foo" in order to get a subpath, # stripping out the ":./" sequence. We still allow that for compatibility, # but now also allow just "URI:blah/foo". sep = path.find(":./") if sep != -1: return path[:sep], path[sep+3:] sep = path.find("/") if sep != -1: return path[:sep], path[sep+1:] return path, "" colon = path.find(":") if colon == -1: # no alias if default == None: return DefaultAliasMarker, path if default not in aliases: raise UnknownAliasError("No alias specified, and the default %s alias doesn't exist. " "To create it, use 'tahoe create-alias %s'." % (quote_output(default), quote_output(default, quotemarks=False))) return uri.from_string_dirnode(aliases[default]).to_string(), path if colon == 1 and default is None and platform_uses_lettercolon_drivename(): # treat C:\why\must\windows\be\so\weird as a local path, not a tahoe # file in the "C:" alias return DefaultAliasMarker, path # decoding must succeed because path is valid UTF-8 and colon & space are ASCII alias = path[:colon].decode('utf-8') if u"/" in alias: # no alias, but there's a colon in a dirname/filename, like # "foo/bar:7" if default == None: return DefaultAliasMarker, path if default not in aliases: raise UnknownAliasError("No alias specified, and the default %s alias doesn't exist. " "To create it, use 'tahoe create-alias %s'." % (quote_output(default), quote_output(default, quotemarks=False))) return uri.from_string_dirnode(aliases[default]).to_string(), path if alias not in aliases: raise UnknownAliasError("Unknown alias %s, please create it with 'tahoe add-alias' or 'tahoe create-alias'." % quote_output(alias)) return uri.from_string_dirnode(aliases[alias]).to_string(), path[colon+1:] def escape_path(path): segments = path.split("/") return "/".join([urllib.quote(unicode_to_url(s)) for s in segments]) tahoe-lafs-1.10.0/src/allmydata/scripts/common_http.py000066400000000000000000000056471221140116300227460ustar00rootroot00000000000000 import os from cStringIO import StringIO import urlparse, httplib import allmydata # for __full_version__ from allmydata.util.encodingutil import quote_output from allmydata.scripts.common import TahoeError from socket import error as socket_error # copied from twisted/web/client.py def parse_url(url, defaultPort=None): url = url.strip() parsed = urlparse.urlparse(url) scheme = parsed[0] path = urlparse.urlunparse(('','')+parsed[2:]) if defaultPort is None: if scheme == 'https': defaultPort = 443 else: defaultPort = 80 host, port = parsed[1], defaultPort if ':' in host: host, port = host.split(':') port = int(port) if path == "": path = "/" return scheme, host, port, path class BadResponse(object): def __init__(self, url, err): self.status = -1 self.reason = "Error trying to connect to %s: %s" % (url, err) def read(self): return "" def do_http(method, url, body=""): if isinstance(body, str): body = StringIO(body) elif isinstance(body, unicode): raise TypeError("do_http body must be a bytestring, not unicode") else: # We must give a Content-Length header to twisted.web, otherwise it # seems to get a zero-length file. I suspect that "chunked-encoding" # may fix this. assert body.tell assert body.seek assert body.read scheme, host, port, path = parse_url(url) if scheme == "http": c = httplib.HTTPConnection(host, port) elif scheme == "https": c = httplib.HTTPSConnection(host, port) else: raise ValueError("unknown scheme '%s', need http or https" % scheme) c.putrequest(method, path) c.putheader("Hostname", host) c.putheader("User-Agent", allmydata.__full_version__ + " (tahoe-client)") c.putheader("Accept", "text/plain, application/octet-stream") c.putheader("Connection", "close") old = body.tell() body.seek(0, os.SEEK_END) length = body.tell() body.seek(old) c.putheader("Content-Length", str(length)) try: c.endheaders() except socket_error, err: return BadResponse(url, err) while True: data = body.read(8192) if not data: break c.send(data) return c.getresponse() def format_http_success(resp): return "%s %s" % (resp.status, quote_output(resp.reason, quotemarks=False)) def format_http_error(msg, resp): return "%s: %s %s\n%s" % (msg, resp.status, quote_output(resp.reason, quotemarks=False), quote_output(resp.read(), quotemarks=False)) def check_http_error(resp, stderr): if resp.status < 200 or resp.status >= 300: print >>stderr, format_http_error("Error during HTTP request", resp) return 1 class HTTPError(TahoeError): def __init__(self, msg, resp): TahoeError.__init__(self, format_http_error(msg, resp)) tahoe-lafs-1.10.0/src/allmydata/scripts/create_node.py000066400000000000000000000166031221140116300226610ustar00rootroot00000000000000 import os, sys from allmydata.scripts.common import BasedirOptions from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_output import allmydata class CreateClientOptions(BasedirOptions): optParameters = [ # we provide 'create-node'-time options for the most common # configuration knobs. The rest can be controlled by editing # tahoe.cfg before node startup. ("nickname", "n", None, "Specify the nickname for this node."), ("introducer", "i", None, "Specify the introducer FURL to use."), ("webport", "p", "tcp:3456:interface=127.0.0.1", "Specify which TCP port to run the HTTP interface on. Use 'none' to disable."), ] def getSynopsis(self): return "Usage: %s [global-opts] create-client [options] [NODEDIR]" % (self.command_name,) class CreateNodeOptions(CreateClientOptions): optFlags = [ ("no-storage", None, "Do not offer storage service to other nodes."), ] def getSynopsis(self): return "Usage: %s [global-opts] create-node [options] [NODEDIR]" % (self.command_name,) class CreateIntroducerOptions(BasedirOptions): default_nodedir = None def getSynopsis(self): return "Usage: %s [global-opts] create-introducer [options] NODEDIR" % (self.command_name,) client_tac = """ # -*- python -*- import pkg_resources pkg_resources.require('%s') pkg_resources.require('twisted') from allmydata import client from twisted.application import service c = client.Client() application = service.Application("allmydata_client") c.setServiceParent(application) """ % (allmydata.__appname__,) introducer_tac = """ # -*- python -*- import pkg_resources pkg_resources.require('%s') pkg_resources.require('twisted') from allmydata import introducer from twisted.application import service c = introducer.IntroducerNode() application = service.Application("allmydata_introducer") c.setServiceParent(application) """ % (allmydata.__appname__,) def write_node_config(c, config): # this is shared between clients and introducers c.write("# -*- mode: conf; coding: utf-8 -*-\n") c.write("\n") c.write("# This file controls the configuration of the Tahoe node that\n") c.write("# lives in this directory. It is only read at node startup.\n") c.write("# For details about the keys that can be set here, please\n") c.write("# read the 'docs/configuration.rst' file that came with your\n") c.write("# Tahoe installation.\n") c.write("\n\n") c.write("[node]\n") nickname = argv_to_unicode(config.get("nickname") or "") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) # TODO: validate webport webport = argv_to_unicode(config.get("webport") or "none") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) c.write("web.static = public_html\n") c.write("#tub.port =\n") c.write("#tub.location = \n") c.write("#log_gatherer.furl =\n") c.write("#timeout.keepalive =\n") c.write("#timeout.disconnect =\n") c.write("#ssh.port = 8022\n") c.write("#ssh.authorized_keys_file = ~/.ssh/authorized_keys\n") c.write("\n") def create_node(config, out=sys.stdout, err=sys.stderr): basedir = config['basedir'] # This should always be called with an absolute Unicode basedir. precondition(isinstance(basedir, unicode), basedir) if os.path.exists(basedir): if listdir_unicode(basedir): print >>err, "The base directory %s is not empty." % quote_output(basedir) print >>err, "To avoid clobbering anything, I am going to quit now." print >>err, "Please use a different directory, or empty this one." return -1 # we're willing to use an empty directory else: os.mkdir(basedir) f = open(os.path.join(basedir, "tahoe-client.tac"), "w") f.write(client_tac) f.close() c = open(os.path.join(basedir, "tahoe.cfg"), "w") write_node_config(c, config) c.write("[client]\n") c.write("# Which services should this client connect to?\n") c.write("introducer.furl = %s\n" % config.get("introducer", "")) c.write("helper.furl =\n") c.write("#key_generator.furl =\n") c.write("#stats_gatherer.furl =\n") c.write("\n") c.write("# What encoding parameters should this client use for uploads?\n") c.write("#shares.needed = 3\n") c.write("#shares.happy = 7\n") c.write("#shares.total = 10\n") c.write("\n") boolstr = {True:"true", False:"false"} c.write("[storage]\n") c.write("# Shall this node provide storage service?\n") storage_enabled = not config.get("no-storage", None) c.write("enabled = %s\n" % boolstr[storage_enabled]) c.write("#readonly =\n") c.write("reserved_space = 1G\n") c.write("#expire.enabled =\n") c.write("#expire.mode =\n") c.write("\n") c.write("[helper]\n") c.write("# Shall this node run a helper service that clients can use?\n") c.write("enabled = false\n") c.write("\n") c.write("[drop_upload]\n") c.write("# Shall this node automatically upload files created or modified in a local directory?\n") c.write("enabled = false\n") c.write("# To specify the target of uploads, a mutable directory writecap URI must be placed\n" "# in 'private/drop_upload_dircap'.\n") c.write("local.directory = ~/drop_upload\n") c.write("\n") c.close() from allmydata.util import fileutil fileutil.make_dirs(os.path.join(basedir, "private"), 0700) print >>out, "Node created in %s" % quote_output(basedir) if not config.get("introducer", ""): print >>out, " Please set [client]introducer.furl= in tahoe.cfg!" print >>out, " The node cannot connect to a grid without it." if not config.get("nickname", ""): print >>out, " Please set [node]nickname= in tahoe.cfg" return 0 def create_client(config, out=sys.stdout, err=sys.stderr): config['no-storage'] = True return create_node(config, out=out, err=err) def create_introducer(config, out=sys.stdout, err=sys.stderr): basedir = config['basedir'] # This should always be called with an absolute Unicode basedir. precondition(isinstance(basedir, unicode), basedir) if os.path.exists(basedir): if listdir_unicode(basedir): print >>err, "The base directory %s is not empty." % quote_output(basedir) print >>err, "To avoid clobbering anything, I am going to quit now." print >>err, "Please use a different directory, or empty this one." return -1 # we're willing to use an empty directory else: os.mkdir(basedir) f = open(os.path.join(basedir, "tahoe-introducer.tac"), "w") f.write(introducer_tac) f.close() c = open(os.path.join(basedir, "tahoe.cfg"), "w") write_node_config(c, config) c.close() print >>out, "Introducer created in %s" % quote_output(basedir) return 0 subCommands = [ ["create-node", None, CreateNodeOptions, "Create a node that acts as a client, server or both."], ["create-client", None, CreateClientOptions, "Create a client node (with storage initially disabled)."], ["create-introducer", None, CreateIntroducerOptions, "Create an introducer node."], ] dispatch = { "create-node": create_node, "create-client": create_client, "create-introducer": create_introducer, } tahoe-lafs-1.10.0/src/allmydata/scripts/debug.py000066400000000000000000001255401221140116300215000ustar00rootroot00000000000000 # do not import any allmydata modules at this level. Do that from inside # individual functions instead. import struct, time, os, sys from twisted.python import usage, failure from twisted.internet import defer from twisted.scripts import trial as twisted_trial from foolscap.logging import cli as foolscap_cli from allmydata.scripts.common import BaseOptions class DumpOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-opts] debug dump-share SHARE_FILENAME" optFlags = [ ["offsets", None, "Display a table of section offsets."], ["leases-only", None, "Dump leases but not CHK contents."], ] def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Print lots of information about the given share, by parsing the share's contents. This includes share type, lease information, encoding parameters, hash-tree roots, public keys, and segment sizes. This command also emits a verify-cap for the file that uses the share. tahoe debug dump-share testgrid/node-3/storage/shares/4v/4vozh77tsrw7mdhnj7qvp5ky74/0 """ return t def parseArgs(self, filename): from allmydata.util.encodingutil import argv_to_abspath self['filename'] = argv_to_abspath(filename) def dump_share(options): from allmydata.storage.mutable import MutableShareFile from allmydata.util.encodingutil import quote_output out = options.stdout # check the version, to see if we have a mutable or immutable share print >>out, "share filename: %s" % quote_output(options['filename']) f = open(options['filename'], "rb") prefix = f.read(32) f.close() if prefix == MutableShareFile.MAGIC: return dump_mutable_share(options) # otherwise assume it's immutable return dump_immutable_share(options) def dump_immutable_share(options): from allmydata.storage.immutable import ShareFile out = options.stdout f = ShareFile(options['filename']) if not options["leases-only"]: dump_immutable_chk_share(f, out, options) dump_immutable_lease_info(f, out) print >>out return 0 def dump_immutable_chk_share(f, out, options): from allmydata import uri from allmydata.util import base32 from allmydata.immutable.layout import ReadBucketProxy from allmydata.util.encodingutil import quote_output, to_str # use a ReadBucketProxy to parse the bucket and find the uri extension bp = ReadBucketProxy(None, None, '') offsets = bp._parse_offsets(f.read_share_data(0, 0x44)) print >>out, "%20s: %d" % ("version", bp._version) seek = offsets['uri_extension'] length = struct.unpack(bp._fieldstruct, f.read_share_data(seek, bp._fieldsize))[0] seek += bp._fieldsize UEB_data = f.read_share_data(seek, length) unpacked = uri.unpack_extension_readable(UEB_data) keys1 = ("size", "num_segments", "segment_size", "needed_shares", "total_shares") keys2 = ("codec_name", "codec_params", "tail_codec_params") keys3 = ("plaintext_hash", "plaintext_root_hash", "crypttext_hash", "crypttext_root_hash", "share_root_hash", "UEB_hash") display_keys = {"size": "file_size"} for k in keys1: if k in unpacked: dk = display_keys.get(k, k) print >>out, "%20s: %s" % (dk, unpacked[k]) print >>out for k in keys2: if k in unpacked: dk = display_keys.get(k, k) print >>out, "%20s: %s" % (dk, unpacked[k]) print >>out for k in keys3: if k in unpacked: dk = display_keys.get(k, k) print >>out, "%20s: %s" % (dk, unpacked[k]) leftover = set(unpacked.keys()) - set(keys1 + keys2 + keys3) if leftover: print >>out print >>out, "LEFTOVER:" for k in sorted(leftover): print >>out, "%20s: %s" % (k, unpacked[k]) # the storage index isn't stored in the share itself, so we depend upon # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: piece = to_str(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) uri_extension_hash = base32.a2b(unpacked["UEB_hash"]) u = uri.CHKFileVerifierURI(storage_index, uri_extension_hash, unpacked["needed_shares"], unpacked["total_shares"], unpacked["size"]) verify_cap = u.to_string() print >>out, "%20s: %s" % ("verify-cap", quote_output(verify_cap, quotemarks=False)) sizes = {} sizes['data'] = (offsets['plaintext_hash_tree'] - offsets['data']) sizes['validation'] = (offsets['uri_extension'] - offsets['plaintext_hash_tree']) sizes['uri-extension'] = len(UEB_data) print >>out print >>out, " Size of data within the share:" for k in sorted(sizes): print >>out, "%20s: %s" % (k, sizes[k]) if options['offsets']: print >>out print >>out, " Section Offsets:" print >>out, "%20s: %s" % ("share data", f._data_offset) for k in ["data", "plaintext_hash_tree", "crypttext_hash_tree", "block_hashes", "share_hashes", "uri_extension"]: name = {"data": "block data"}.get(k,k) offset = f._data_offset + offsets[k] print >>out, " %20s: %s (0x%x)" % (name, offset, offset) print >>out, "%20s: %s" % ("leases", f._lease_offset) def dump_immutable_lease_info(f, out): # display lease information too print >>out leases = list(f.get_leases()) if leases: for i,lease in enumerate(leases): when = format_expiration_time(lease.expiration_time) print >>out, " Lease #%d: owner=%d, expire in %s" \ % (i, lease.owner_num, when) else: print >>out, " No leases." def format_expiration_time(expiration_time): now = time.time() remains = expiration_time - now when = "%ds" % remains if remains > 24*3600: when += " (%d days)" % (remains / (24*3600)) elif remains > 3600: when += " (%d hours)" % (remains / 3600) return when def dump_mutable_share(options): from allmydata.storage.mutable import MutableShareFile from allmydata.util import base32, idlib out = options.stdout m = MutableShareFile(options['filename']) f = open(options['filename'], "rb") WE, nodeid = m._read_write_enabler_and_nodeid(f) num_extra_leases = m._read_num_extra_leases(f) data_length = m._read_data_length(f) extra_lease_offset = m._read_extra_lease_offset(f) container_size = extra_lease_offset - m.DATA_OFFSET leases = list(m._enumerate_leases(f)) share_type = "unknown" f.seek(m.DATA_OFFSET) version = f.read(1) if version == "\x00": # this slot contains an SMDF share share_type = "SDMF" elif version == "\x01": share_type = "MDMF" f.close() print >>out print >>out, "Mutable slot found:" print >>out, " share_type: %s" % share_type print >>out, " write_enabler: %s" % base32.b2a(WE) print >>out, " WE for nodeid: %s" % idlib.nodeid_b2a(nodeid) print >>out, " num_extra_leases: %d" % num_extra_leases print >>out, " container_size: %d" % container_size print >>out, " data_length: %d" % data_length if leases: for (leasenum, lease) in leases: print >>out print >>out, " Lease #%d:" % leasenum print >>out, " ownerid: %d" % lease.owner_num when = format_expiration_time(lease.expiration_time) print >>out, " expires in %s" % when print >>out, " renew_secret: %s" % base32.b2a(lease.renew_secret) print >>out, " cancel_secret: %s" % base32.b2a(lease.cancel_secret) print >>out, " secrets are for nodeid: %s" % idlib.nodeid_b2a(lease.nodeid) else: print >>out, "No leases." print >>out if share_type == "SDMF": dump_SDMF_share(m, data_length, options) elif share_type == "MDMF": dump_MDMF_share(m, data_length, options) return 0 def dump_SDMF_share(m, length, options): from allmydata.mutable.layout import unpack_share, unpack_header from allmydata.mutable.common import NeedMoreDataError from allmydata.util import base32, hashutil from allmydata.uri import SSKVerifierURI from allmydata.util.encodingutil import quote_output, to_str offset = m.DATA_OFFSET out = options.stdout f = open(options['filename'], "rb") f.seek(offset) data = f.read(min(length, 2000)) f.close() try: pieces = unpack_share(data) except NeedMoreDataError, e: # retry once with the larger size size = e.needed_bytes f = open(options['filename'], "rb") f.seek(offset) data = f.read(min(length, size)) f.close() pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces (ig_version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) print >>out, " SDMF contents:" print >>out, " seqnum: %d" % seqnum print >>out, " root_hash: %s" % base32.b2a(root_hash) print >>out, " IV: %s" % base32.b2a(IV) print >>out, " required_shares: %d" % k print >>out, " total_shares: %d" % N print >>out, " segsize: %d" % segsize print >>out, " datalen: %d" % datalen print >>out, " enc_privkey: %d bytes" % len(enc_privkey) print >>out, " pubkey: %d bytes" % len(pubkey) print >>out, " signature: %d bytes" % len(signature) share_hash_ids = ",".join(sorted([str(hid) for hid in share_hash_chain.keys()])) print >>out, " share_hash_chain: %s" % share_hash_ids print >>out, " block_hash_tree: %d nodes" % len(block_hash_tree) # the storage index isn't stored in the share itself, so we depend upon # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: piece = to_str(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) u = SSKVerifierURI(storage_index, fingerprint) verify_cap = u.to_string() print >>out, " verify-cap:", quote_output(verify_cap, quotemarks=False) if options['offsets']: # NOTE: this offset-calculation code is fragile, and needs to be # merged with MutableShareFile's internals. print >>out print >>out, " Section Offsets:" def printoffset(name, value, shift=0): print >>out, "%s%20s: %s (0x%x)" % (" "*shift, name, value, value) printoffset("first lease", m.HEADER_SIZE) printoffset("share data", m.DATA_OFFSET) o_seqnum = m.DATA_OFFSET + struct.calcsize(">B") printoffset("seqnum", o_seqnum, 2) o_root_hash = m.DATA_OFFSET + struct.calcsize(">BQ") printoffset("root_hash", o_root_hash, 2) for k in ["signature", "share_hash_chain", "block_hash_tree", "share_data", "enc_privkey", "EOF"]: name = {"share_data": "block data", "EOF": "end of share data"}.get(k,k) offset = m.DATA_OFFSET + offsets[k] printoffset(name, offset, 2) f = open(options['filename'], "rb") printoffset("extra leases", m._read_extra_lease_offset(f) + 4) f.close() print >>out def dump_MDMF_share(m, length, options): from allmydata.mutable.layout import MDMFSlotReadProxy from allmydata.util import base32, hashutil from allmydata.uri import MDMFVerifierURI from allmydata.util.encodingutil import quote_output, to_str offset = m.DATA_OFFSET out = options.stdout f = open(options['filename'], "rb") storage_index = None; shnum = 0 class ShareDumper(MDMFSlotReadProxy): def _read(self, readvs, force_remote=False, queue=False): data = [] for (where,length) in readvs: f.seek(offset+where) data.append(f.read(length)) return defer.succeed({shnum: data}) p = ShareDumper(None, storage_index, shnum) def extract(func): stash = [] # these methods return Deferreds, but we happen to know that they run # synchronously when not actually talking to a remote server d = func() d.addCallback(stash.append) return stash[0] verinfo = extract(p.get_verinfo) encprivkey = extract(p.get_encprivkey) signature = extract(p.get_signature) pubkey = extract(p.get_verification_key) block_hash_tree = extract(p.get_blockhashes) share_hash_chain = extract(p.get_sharehashes) f.close() (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, offsets) = verinfo print >>out, " MDMF contents:" print >>out, " seqnum: %d" % seqnum print >>out, " root_hash: %s" % base32.b2a(root_hash) #print >>out, " IV: %s" % base32.b2a(IV) print >>out, " required_shares: %d" % k print >>out, " total_shares: %d" % N print >>out, " segsize: %d" % segsize print >>out, " datalen: %d" % datalen print >>out, " enc_privkey: %d bytes" % len(encprivkey) print >>out, " pubkey: %d bytes" % len(pubkey) print >>out, " signature: %d bytes" % len(signature) share_hash_ids = ",".join([str(hid) for hid in sorted(share_hash_chain.keys())]) print >>out, " share_hash_chain: %s" % share_hash_ids print >>out, " block_hash_tree: %d nodes" % len(block_hash_tree) # the storage index isn't stored in the share itself, so we depend upon # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: piece = to_str(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) u = MDMFVerifierURI(storage_index, fingerprint) verify_cap = u.to_string() print >>out, " verify-cap:", quote_output(verify_cap, quotemarks=False) if options['offsets']: # NOTE: this offset-calculation code is fragile, and needs to be # merged with MutableShareFile's internals. print >>out print >>out, " Section Offsets:" def printoffset(name, value, shift=0): print >>out, "%s%.20s: %s (0x%x)" % (" "*shift, name, value, value) printoffset("first lease", m.HEADER_SIZE, 2) printoffset("share data", m.DATA_OFFSET, 2) o_seqnum = m.DATA_OFFSET + struct.calcsize(">B") printoffset("seqnum", o_seqnum, 4) o_root_hash = m.DATA_OFFSET + struct.calcsize(">BQ") printoffset("root_hash", o_root_hash, 4) for k in ["enc_privkey", "share_hash_chain", "signature", "verification_key", "verification_key_end", "share_data", "block_hash_tree", "EOF"]: name = {"share_data": "block data", "verification_key": "pubkey", "verification_key_end": "end of pubkey", "EOF": "end of share data"}.get(k,k) offset = m.DATA_OFFSET + offsets[k] printoffset(name, offset, 4) f = open(options['filename'], "rb") printoffset("extra leases", m._read_extra_lease_offset(f) + 4, 2) f.close() print >>out class DumpCapOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-opts] debug dump-cap [options] FILECAP" optParameters = [ ["nodeid", "n", None, "Specify the storage server nodeid (ASCII), to construct WE and secrets."], ["client-secret", "c", None, "Specify the client's base secret (ASCII), to construct secrets."], ["client-dir", "d", None, "Specify the client's base directory, from which a -c secret will be read."], ] def parseArgs(self, cap): self.cap = cap def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Print information about the given cap-string (aka: URI, file-cap, dir-cap, read-cap, write-cap). The URI string is parsed and unpacked. This prints the type of the cap, its storage index, and any derived keys. tahoe debug dump-cap URI:SSK-Verifier:4vozh77tsrw7mdhnj7qvp5ky74:q7f3dwz76sjys4kqfdt3ocur2pay3a6rftnkqmi2uxu3vqsdsofq This may be useful to determine if a read-cap and a write-cap refer to the same time, or to extract the storage-index from a file-cap (to then use with find-shares) If additional information is provided (storage server nodeid and/or client base secret), this command will compute the shared secrets used for the write-enabler and for lease-renewal. """ return t def dump_cap(options): from allmydata import uri from allmydata.util import base32 from base64 import b32decode import urlparse, urllib out = options.stdout cap = options.cap nodeid = None if options['nodeid']: nodeid = b32decode(options['nodeid'].upper()) secret = None if options['client-secret']: secret = base32.a2b(options['client-secret']) elif options['client-dir']: secretfile = os.path.join(options['client-dir'], "private", "secret") try: secret = base32.a2b(open(secretfile, "r").read().strip()) except EnvironmentError: pass if cap.startswith("http"): scheme, netloc, path, params, query, fragment = urlparse.urlparse(cap) assert path.startswith("/uri/") cap = urllib.unquote(path[len("/uri/"):]) u = uri.from_string(cap) print >>out dump_uri_instance(u, nodeid, secret, out) def _dump_secrets(storage_index, secret, nodeid, out): from allmydata.util import hashutil from allmydata.util import base32 if secret: crs = hashutil.my_renewal_secret_hash(secret) print >>out, " client renewal secret:", base32.b2a(crs) frs = hashutil.file_renewal_secret_hash(crs, storage_index) print >>out, " file renewal secret:", base32.b2a(frs) if nodeid: renew = hashutil.bucket_renewal_secret_hash(frs, nodeid) print >>out, " lease renewal secret:", base32.b2a(renew) ccs = hashutil.my_cancel_secret_hash(secret) print >>out, " client cancel secret:", base32.b2a(ccs) fcs = hashutil.file_cancel_secret_hash(ccs, storage_index) print >>out, " file cancel secret:", base32.b2a(fcs) if nodeid: cancel = hashutil.bucket_cancel_secret_hash(fcs, nodeid) print >>out, " lease cancel secret:", base32.b2a(cancel) def dump_uri_instance(u, nodeid, secret, out, show_header=True): from allmydata import uri from allmydata.storage.server import si_b2a from allmydata.util import base32, hashutil from allmydata.util.encodingutil import quote_output if isinstance(u, uri.CHKFileURI): if show_header: print >>out, "CHK File:" print >>out, " key:", base32.b2a(u.key) print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash) print >>out, " size:", u.size print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares) print >>out, " storage index:", si_b2a(u.get_storage_index()) _dump_secrets(u.get_storage_index(), secret, nodeid, out) elif isinstance(u, uri.CHKFileVerifierURI): if show_header: print >>out, "CHK Verifier URI:" print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash) print >>out, " size:", u.size print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares) print >>out, " storage index:", si_b2a(u.get_storage_index()) elif isinstance(u, uri.LiteralFileURI): if show_header: print >>out, "Literal File URI:" print >>out, " data:", quote_output(u.data) elif isinstance(u, uri.WriteableSSKFileURI): # SDMF if show_header: print >>out, "SDMF Writeable URI:" print >>out, " writekey:", base32.b2a(u.writekey) print >>out, " readkey:", base32.b2a(u.readkey) print >>out, " storage index:", si_b2a(u.get_storage_index()) print >>out, " fingerprint:", base32.b2a(u.fingerprint) print >>out if nodeid: we = hashutil.ssk_write_enabler_hash(u.writekey, nodeid) print >>out, " write_enabler:", base32.b2a(we) print >>out _dump_secrets(u.get_storage_index(), secret, nodeid, out) elif isinstance(u, uri.ReadonlySSKFileURI): if show_header: print >>out, "SDMF Read-only URI:" print >>out, " readkey:", base32.b2a(u.readkey) print >>out, " storage index:", si_b2a(u.get_storage_index()) print >>out, " fingerprint:", base32.b2a(u.fingerprint) elif isinstance(u, uri.SSKVerifierURI): if show_header: print >>out, "SDMF Verifier URI:" print >>out, " storage index:", si_b2a(u.get_storage_index()) print >>out, " fingerprint:", base32.b2a(u.fingerprint) elif isinstance(u, uri.WriteableMDMFFileURI): # MDMF if show_header: print >>out, "MDMF Writeable URI:" print >>out, " writekey:", base32.b2a(u.writekey) print >>out, " readkey:", base32.b2a(u.readkey) print >>out, " storage index:", si_b2a(u.get_storage_index()) print >>out, " fingerprint:", base32.b2a(u.fingerprint) print >>out if nodeid: we = hashutil.ssk_write_enabler_hash(u.writekey, nodeid) print >>out, " write_enabler:", base32.b2a(we) print >>out _dump_secrets(u.get_storage_index(), secret, nodeid, out) elif isinstance(u, uri.ReadonlyMDMFFileURI): if show_header: print >>out, "MDMF Read-only URI:" print >>out, " readkey:", base32.b2a(u.readkey) print >>out, " storage index:", si_b2a(u.get_storage_index()) print >>out, " fingerprint:", base32.b2a(u.fingerprint) elif isinstance(u, uri.MDMFVerifierURI): if show_header: print >>out, "MDMF Verifier URI:" print >>out, " storage index:", si_b2a(u.get_storage_index()) print >>out, " fingerprint:", base32.b2a(u.fingerprint) elif isinstance(u, uri.ImmutableDirectoryURI): # CHK-based directory if show_header: print >>out, "CHK Directory URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) elif isinstance(u, uri.ImmutableDirectoryURIVerifier): if show_header: print >>out, "CHK Directory Verifier URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) elif isinstance(u, uri.DirectoryURI): # SDMF-based directory if show_header: print >>out, "Directory Writeable URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) elif isinstance(u, uri.ReadonlyDirectoryURI): if show_header: print >>out, "Directory Read-only URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) elif isinstance(u, uri.DirectoryURIVerifier): if show_header: print >>out, "Directory Verifier URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) elif isinstance(u, uri.MDMFDirectoryURI): # MDMF-based directory if show_header: print >>out, "Directory Writeable URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) elif isinstance(u, uri.ReadonlyMDMFDirectoryURI): if show_header: print >>out, "Directory Read-only URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) elif isinstance(u, uri.MDMFDirectoryURIVerifier): if show_header: print >>out, "Directory Verifier URI:" dump_uri_instance(u._filenode_uri, nodeid, secret, out, False) else: print >>out, "unknown cap type" class FindSharesOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-opts] debug find-shares STORAGE_INDEX NODEDIRS.." def parseArgs(self, storage_index_s, *nodedirs): from allmydata.util.encodingutil import argv_to_abspath self.si_s = storage_index_s self.nodedirs = map(argv_to_abspath, nodedirs) def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Locate all shares for the given storage index. This command looks through one or more node directories to find the shares. It returns a list of filenames, one per line, for each share file found. tahoe debug find-shares 4vozh77tsrw7mdhnj7qvp5ky74 testgrid/node-* It may be useful during testing, when running a test grid in which all the nodes are on a local disk. The share files thus located can be counted, examined (with dump-share), or corrupted/deleted to test checker/repairer. """ return t def find_shares(options): """Given a storage index and a list of node directories, emit a list of all matching shares to stdout, one per line. For example: find-shares.py 44kai1tui348689nrw8fjegc8c ~/testnet/node-* gives: /home/warner/testnet/node-1/storage/shares/44k/44kai1tui348689nrw8fjegc8c/5 /home/warner/testnet/node-1/storage/shares/44k/44kai1tui348689nrw8fjegc8c/9 /home/warner/testnet/node-2/storage/shares/44k/44kai1tui348689nrw8fjegc8c/2 """ from allmydata.storage.server import si_a2b, storage_index_to_dir from allmydata.util.encodingutil import listdir_unicode out = options.stdout sharedir = storage_index_to_dir(si_a2b(options.si_s)) for d in options.nodedirs: d = os.path.join(d, "storage/shares", sharedir) if os.path.exists(d): for shnum in listdir_unicode(d): print >>out, os.path.join(d, shnum) return 0 class CatalogSharesOptions(BaseOptions): """ """ def parseArgs(self, *nodedirs): from allmydata.util.encodingutil import argv_to_abspath self.nodedirs = map(argv_to_abspath, nodedirs) if not nodedirs: raise usage.UsageError("must specify at least one node directory") def getSynopsis(self): return "Usage: tahoe [global-opts] debug catalog-shares NODEDIRS.." def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Locate all shares in the given node directories, and emit a one-line summary of each share. Run it like this: tahoe debug catalog-shares testgrid/node-* >allshares.txt The lines it emits will look like the following: CHK $SI $k/$N $filesize $UEB_hash $expiration $abspath_sharefile SDMF $SI $k/$N $filesize $seqnum/$roothash $expiration $abspath_sharefile UNKNOWN $abspath_sharefile This command can be used to build up a catalog of shares from many storage servers and then sort the results to compare all shares for the same file. If you see shares with the same SI but different parameters/filesize/UEB_hash, then something is wrong. The misc/find-share/anomalies.py script may be useful for purpose. """ return t def call(c, *args, **kwargs): # take advantage of the fact that ImmediateReadBucketProxy returns # Deferreds that are already fired results = [] failures = [] d = defer.maybeDeferred(c, *args, **kwargs) d.addCallbacks(results.append, failures.append) if failures: failures[0].raiseException() return results[0] def describe_share(abs_sharefile, si_s, shnum_s, now, out): from allmydata import uri from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile from allmydata.mutable.layout import unpack_share from allmydata.mutable.common import NeedMoreDataError from allmydata.immutable.layout import ReadBucketProxy from allmydata.util import base32 from allmydata.util.encodingutil import quote_output import struct f = open(abs_sharefile, "rb") prefix = f.read(32) if prefix == MutableShareFile.MAGIC: # mutable share m = MutableShareFile(abs_sharefile) WE, nodeid = m._read_write_enabler_and_nodeid(f) data_length = m._read_data_length(f) expiration_time = min( [lease.expiration_time for (i,lease) in m._enumerate_leases(f)] ) expiration = max(0, expiration_time - now) share_type = "unknown" f.seek(m.DATA_OFFSET) version = f.read(1) if version == "\x00": # this slot contains an SMDF share share_type = "SDMF" elif version == "\x01": share_type = "MDMF" if share_type == "SDMF": f.seek(m.DATA_OFFSET) data = f.read(min(data_length, 2000)) try: pieces = unpack_share(data) except NeedMoreDataError, e: # retry once with the larger size size = e.needed_bytes f.seek(m.DATA_OFFSET) data = f.read(min(data_length, size)) pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces print >>out, "SDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, base32.b2a(root_hash), expiration, quote_output(abs_sharefile)) elif share_type == "MDMF": from allmydata.mutable.layout import MDMFSlotReadProxy fake_shnum = 0 # TODO: factor this out with dump_MDMF_share() class ShareDumper(MDMFSlotReadProxy): def _read(self, readvs, force_remote=False, queue=False): data = [] for (where,length) in readvs: f.seek(m.DATA_OFFSET+where) data.append(f.read(length)) return defer.succeed({fake_shnum: data}) p = ShareDumper(None, "fake-si", fake_shnum) def extract(func): stash = [] # these methods return Deferreds, but we happen to know that # they run synchronously when not actually talking to a # remote server d = func() d.addCallback(stash.append) return stash[0] verinfo = extract(p.get_verinfo) (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, offsets) = verinfo print >>out, "MDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, base32.b2a(root_hash), expiration, quote_output(abs_sharefile)) else: print >>out, "UNKNOWN mutable %s" % quote_output(abs_sharefile) elif struct.unpack(">L", prefix[:4]) == (1,): # immutable class ImmediateReadBucketProxy(ReadBucketProxy): def __init__(self, sf): self.sf = sf ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min( [lease.expiration_time for lease in sf.get_leases()] ) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print >>out, "CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, ueb_hash, expiration, quote_output(abs_sharefile)) else: print >>out, "UNKNOWN really-unknown %s" % quote_output(abs_sharefile) f.close() def catalog_shares(options): from allmydata.util.encodingutil import listdir_unicode, quote_output out = options.stdout err = options.stderr now = time.time() for d in options.nodedirs: d = os.path.join(d, "storage/shares") try: abbrevs = listdir_unicode(d) except EnvironmentError: # ignore nodes that have storage turned off altogether pass else: for abbrevdir in sorted(abbrevs): if abbrevdir == "incoming": continue abbrevdir = os.path.join(d, abbrevdir) # this tool may get run against bad disks, so we can't assume # that listdir_unicode will always succeed. Try to catalog as much # as possible. try: sharedirs = listdir_unicode(abbrevdir) for si_s in sorted(sharedirs): si_dir = os.path.join(abbrevdir, si_s) catalog_shares_one_abbrevdir(si_s, si_dir, now, out,err) except: print >>err, "Error processing %s" % quote_output(abbrevdir) failure.Failure().printTraceback(err) return 0 def _as_number(s): try: return int(s) except ValueError: return "not int" def catalog_shares_one_abbrevdir(si_s, si_dir, now, out, err): from allmydata.util.encodingutil import listdir_unicode, quote_output try: for shnum_s in sorted(listdir_unicode(si_dir), key=_as_number): abs_sharefile = os.path.join(si_dir, shnum_s) assert os.path.isfile(abs_sharefile) try: describe_share(abs_sharefile, si_s, shnum_s, now, out) except: print >>err, "Error processing %s" % quote_output(abs_sharefile) failure.Failure().printTraceback(err) except: print >>err, "Error processing %s" % quote_output(si_dir) failure.Failure().printTraceback(err) class CorruptShareOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-opts] debug corrupt-share SHARE_FILENAME" optParameters = [ ["offset", "o", "block-random", "Specify which bit to flip."], ] def getUsage(self, width=None): t = BaseOptions.getUsage(self, width) t += """ Corrupt the given share by flipping a bit. This will cause a verifying/downloading client to log an integrity-check failure incident, and downloads will proceed with a different share. The --offset parameter controls which bit should be flipped. The default is to flip a single random bit of the block data. tahoe debug corrupt-share testgrid/node-3/storage/shares/4v/4vozh77tsrw7mdhnj7qvp5ky74/0 Obviously, this command should not be used in normal operation. """ return t def parseArgs(self, filename): self['filename'] = filename def corrupt_share(options): import random from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile from allmydata.mutable.layout import unpack_header from allmydata.immutable.layout import ReadBucketProxy out = options.stdout fn = options['filename'] assert options["offset"] == "block-random", "other offsets not implemented" # first, what kind of share is it? def flip_bit(start, end): offset = random.randrange(start, end) bit = random.randrange(0, 8) print >>out, "[%d..%d): %d.b%d" % (start, end, offset, bit) f = open(fn, "rb+") f.seek(offset) d = f.read(1) d = chr(ord(d) ^ 0x01) f.seek(offset) f.write(d) f.close() f = open(fn, "rb") prefix = f.read(32) f.close() if prefix == MutableShareFile.MAGIC: # mutable m = MutableShareFile(fn) f = open(fn, "rb") f.seek(m.DATA_OFFSET) data = f.read(2000) # make sure this slot contains an SMDF share assert data[0] == "\x00", "non-SDMF mutable shares not supported" f.close() (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) assert version == 0, "we only handle v0 SDMF files" start = m.DATA_OFFSET + offsets["share_data"] end = m.DATA_OFFSET + offsets["enc_privkey"] flip_bit(start, end) else: # otherwise assume it's immutable f = ShareFile(fn) bp = ReadBucketProxy(None, None, '') offsets = bp._parse_offsets(f.read_share_data(0, 0x24)) start = f._data_offset + offsets["data"] end = f._data_offset + offsets["plaintext_hash_tree"] flip_bit(start, end) class ReplOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-opts] debug repl" def repl(options): import code return code.interact() DEFAULT_TESTSUITE = 'allmydata' class TrialOptions(twisted_trial.Options): def getSynopsis(self): return "Usage: tahoe [global-opts] debug trial [options] [[file|package|module|TestCase|testmethod]...]" def parseOptions(self, all_subargs, *a, **kw): self.trial_args = list(all_subargs) # any output from the option parsing will be printed twice, but that's harmless return twisted_trial.Options.parseOptions(self, all_subargs, *a, **kw) def parseArgs(self, *nonoption_args): if not nonoption_args: self.trial_args.append(DEFAULT_TESTSUITE) def getUsage(self, width=None): t = twisted_trial.Options.getUsage(self, width) t += """ The 'tahoe debug trial' command uses the correct imports for this instance of Tahoe-LAFS. The default test suite is '%s'. """ % (DEFAULT_TESTSUITE,) return t def trial(config): sys.argv = ['trial'] + config.trial_args # This does not return. twisted_trial.run() def fixOptionsClass( (subcmd, shortcut, OptionsClass, desc) ): class FixedOptionsClass(OptionsClass): def getSynopsis(self): t = OptionsClass.getSynopsis(self) i = t.find("Usage: flogtool ") if i >= 0: return "Usage: tahoe [global-opts] debug flogtool " + t[i+len("Usage: flogtool "):] else: return "Usage: tahoe [global-opts] debug flogtool %s [options]" % (subcmd,) return (subcmd, shortcut, FixedOptionsClass, desc) class FlogtoolOptions(foolscap_cli.Options): def __init__(self): super(FlogtoolOptions, self).__init__() self.subCommands = map(fixOptionsClass, self.subCommands) def getSynopsis(self): return "Usage: tahoe [global-opts] debug flogtool (%s) [command options]" % ("|".join([x[0] for x in self.subCommands])) def parseOptions(self, all_subargs, *a, **kw): self.flogtool_args = list(all_subargs) return super(FlogtoolOptions, self).parseOptions(self.flogtool_args, *a, **kw) def getUsage(self, width=None): t = super(FlogtoolOptions, self).getUsage(width) t += """ The 'tahoe debug flogtool' command uses the correct imports for this instance of Tahoe-LAFS. Please run 'tahoe debug flogtool SUBCOMMAND --help' for more details on each subcommand. """ return t def opt_help(self): print str(self) sys.exit(0) def flogtool(config): sys.argv = ['flogtool'] + config.flogtool_args return foolscap_cli.run_flogtool() class DebugCommand(BaseOptions): subCommands = [ ["dump-share", None, DumpOptions, "Unpack and display the contents of a share (uri_extension and leases)."], ["dump-cap", None, DumpCapOptions, "Unpack a read-cap or write-cap."], ["find-shares", None, FindSharesOptions, "Locate sharefiles in node dirs."], ["catalog-shares", None, CatalogSharesOptions, "Describe all shares in node dirs."], ["corrupt-share", None, CorruptShareOptions, "Corrupt a share by flipping a bit."], ["repl", None, ReplOptions, "Open a Python interpreter."], ["trial", None, TrialOptions, "Run tests using Twisted Trial with the right imports."], ["flogtool", None, FlogtoolOptions, "Utilities to access log files."], ] def postOptions(self): if not hasattr(self, 'subOptions'): raise usage.UsageError("must specify a subcommand") def getSynopsis(self): return "" def getUsage(self, width=None): #t = BaseOptions.getUsage(self, width) t = """Usage: tahoe debug SUBCOMMAND Subcommands: tahoe debug dump-share Unpack and display the contents of a share. tahoe debug dump-cap Unpack a read-cap or write-cap. tahoe debug find-shares Locate sharefiles in node directories. tahoe debug catalog-shares Describe all shares in node dirs. tahoe debug corrupt-share Corrupt a share by flipping a bit. tahoe debug repl Open a Python interpreter. tahoe debug trial Run tests using Twisted Trial with the right imports. tahoe debug flogtool Utilities to access log files. Please run e.g. 'tahoe debug dump-share --help' for more details on each subcommand. """ # See ticket #1441 for why we print different information when # run via /usr/bin/tahoe. Note that argv[0] is the full path. if sys.argv[0] == '/usr/bin/tahoe': t += """ To get branch coverage for the Tahoe test suite (on the installed copy of Tahoe), install the 'python-coverage' package and then use: python-coverage run --branch /usr/bin/tahoe debug trial """ else: t += """ Another debugging feature is that bin%stahoe allows executing an arbitrary "runner" command (typically an installed Python script, such as 'coverage'), with the Tahoe libraries on the PYTHONPATH. The runner command name is prefixed with '@', and any occurrences of '@tahoe' in its arguments are replaced by the full path to the tahoe script. For example, if 'coverage' is installed and on the PATH, you can use: bin%stahoe @coverage run --branch @tahoe debug trial to get branch coverage for the Tahoe test suite. Or, to run python with the -3 option that warns about Python 3 incompatibilities: bin%stahoe @python -3 @tahoe command [options] """ % (os.sep, os.sep, os.sep) return t subDispatch = { "dump-share": dump_share, "dump-cap": dump_cap, "find-shares": find_shares, "catalog-shares": catalog_shares, "corrupt-share": corrupt_share, "repl": repl, "trial": trial, "flogtool": flogtool, } def do_debug(options): so = options.subOptions so.stdout = options.stdout so.stderr = options.stderr f = subDispatch[options.subCommand] return f(so) subCommands = [ ["debug", None, DebugCommand, "debug subcommands: use 'tahoe debug' for a list."], ] dispatch = { "debug": do_debug, } tahoe-lafs-1.10.0/src/allmydata/scripts/keygen.py000066400000000000000000000034051221140116300216670ustar00rootroot00000000000000 import os, sys from allmydata.scripts.common import BasedirOptions from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import listdir_unicode, quote_output class CreateKeyGeneratorOptions(BasedirOptions): default_nodedir = None def getSynopsis(self): return "Usage: %s [global-opts] create-key-generator [options] NODEDIR" % (self.command_name,) keygen_tac = """ # -*- python -*- import pkg_resources pkg_resources.require('allmydata-tahoe') from allmydata import key_generator from twisted.application import service k = key_generator.KeyGeneratorService(default_key_size=2048) #k.key_generator.verbose = False #k.key_generator.pool_size = 16 #k.key_generator.pool_refresh_delay = 6 application = service.Application("allmydata_key_generator") k.setServiceParent(application) """ def create_key_generator(config, out=sys.stdout, err=sys.stderr): basedir = config['basedir'] # This should always be called with an absolute Unicode basedir. precondition(isinstance(basedir, unicode), basedir) if os.path.exists(basedir): if listdir_unicode(basedir): print >>err, "The base directory %s is not empty." % quote_output(basedir) print >>err, "To avoid clobbering anything, I am going to quit now." print >>err, "Please use a different directory, or empty this one." return -1 # we're willing to use an empty directory else: os.mkdir(basedir) f = open(os.path.join(basedir, "tahoe-key-generator.tac"), "wb") f.write(keygen_tac) f.close() return 0 subCommands = [ ["create-key-generator", None, CreateKeyGeneratorOptions, "Create a key generator service."], ] dispatch = { "create-key-generator": create_key_generator, } tahoe-lafs-1.10.0/src/allmydata/scripts/runner.py000066400000000000000000000123541221140116300217210ustar00rootroot00000000000000 import os, sys from cStringIO import StringIO from twisted.python import usage from allmydata.scripts.common import get_default_nodedir from allmydata.scripts import debug, create_node, startstop_node, cli, keygen, stats_gatherer, admin from allmydata.util.encodingutil import quote_output, get_io_encoding def GROUP(s): # Usage.parseOptions compares argv[1] against command[0], so it will # effectively ignore any "subcommand" that starts with a newline. We use # these to insert section headers into the --help output. return [("\n" + s, None, None, None)] _default_nodedir = get_default_nodedir() NODEDIR_HELP = ("Specify which Tahoe node directory should be used. The " "directory should either contain a full Tahoe node, or a " "file named node.url that points to some other Tahoe node. " "It should also contain a file named '" + os.path.join('private', 'aliases') + "' which contains the mapping from alias name to root " "dirnode URI.") if _default_nodedir: NODEDIR_HELP += " [default for most commands: " + quote_output(_default_nodedir) + "]" class Options(usage.Options): # unit tests can override these to point at StringIO instances stdin = sys.stdin stdout = sys.stdout stderr = sys.stderr synopsis = "\nUsage: tahoe [command options]" subCommands = ( GROUP("Administration") + create_node.subCommands + keygen.subCommands + stats_gatherer.subCommands + admin.subCommands + GROUP("Controlling a node") + startstop_node.subCommands + GROUP("Debugging") + debug.subCommands + GROUP("Using the filesystem") + cli.subCommands ) optFlags = [ ["quiet", "q", "Operate silently."], ["version", "V", "Display version numbers."], ["version-and-path", None, "Display version numbers and paths to their locations."], ] optParameters = [ ["node-directory", "d", None, NODEDIR_HELP], ] def opt_version(self): import allmydata print >>self.stdout, allmydata.get_package_versions_string(debug=True) self.no_command_needed = True def opt_version_and_path(self): import allmydata print >>self.stdout, allmydata.get_package_versions_string(show_paths=True, debug=True) self.no_command_needed = True def getSynopsis(self): return "\nUsage: tahoe [global-opts] [command-options]" def getUsage(self, **kwargs): t = usage.Options.getUsage(self, **kwargs) return t + "\nPlease run 'tahoe --help' for more details on each command.\n" def postOptions(self): if not hasattr(self, 'subOptions'): if not hasattr(self, 'no_command_needed'): raise usage.UsageError("must specify a command") sys.exit(0) create_dispatch = {} for module in (create_node, keygen, stats_gatherer): create_dispatch.update(module.dispatch) def runner(argv, run_by_human=True, stdin=None, stdout=None, stderr=None, install_node_control=True, additional_commands=None): stdin = stdin or sys.stdin stdout = stdout or sys.stdout stderr = stderr or sys.stderr config = Options() if install_node_control: config.subCommands.extend(startstop_node.subCommands) ac_dispatch = {} if additional_commands: for ac in additional_commands: config.subCommands.extend(ac.subCommands) ac_dispatch.update(ac.dispatch) try: config.parseOptions(argv) except usage.error, e: if not run_by_human: raise c = config while hasattr(c, 'subOptions'): c = c.subOptions print >>stdout, str(c) try: msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) print >>stdout, "%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)) return 1 command = config.subCommand so = config.subOptions if config['quiet']: stdout = StringIO() so.stdout = stdout so.stderr = stderr so.stdin = stdin if command in create_dispatch: rc = create_dispatch[command](so, stdout, stderr) elif command in startstop_node.dispatch: rc = startstop_node.dispatch[command](so, stdout, stderr) elif command in debug.dispatch: rc = debug.dispatch[command](so) elif command in admin.dispatch: rc = admin.dispatch[command](so) elif command in cli.dispatch: rc = cli.dispatch[command](so) elif command in ac_dispatch: rc = ac_dispatch[command](so, stdout, stderr) else: raise usage.UsageError() return rc def run(install_node_control=True): try: if sys.platform == "win32": from allmydata.windows.fixups import initialize initialize() rc = runner(sys.argv[1:], install_node_control=install_node_control) except Exception: import traceback traceback.print_exc() rc = 1 sys.exit(rc) tahoe-lafs-1.10.0/src/allmydata/scripts/slow_operation.py000066400000000000000000000053441221140116300234550ustar00rootroot00000000000000 import os, time from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error from allmydata.util import base32 from allmydata.util.encodingutil import quote_output, is_printable_ascii import urllib import simplejson class SlowOperationRunner: def run(self, options): stderr = options.stderr self.options = options self.ophandle = ophandle = base32.b2a(os.urandom(16)) nodeurl = options['node-url'] if not nodeurl.endswith("/"): nodeurl += "/" self.nodeurl = nodeurl where = options.where try: rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if path == '/': path = '' url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: url += "/" + escape_path(path) # todo: should it end with a slash? url = self.make_url(url, ophandle) resp = do_http("POST", url) if resp.status not in (200, 302): print >>stderr, format_http_error("ERROR", resp) return 1 # now we poll for results. We nominally poll at t=1, 5, 10, 30, 60, # 90, k*120 seconds, but if the poll takes non-zero time, that will # be slightly longer. I'm not worried about trying to make up for # that time. return self.wait_for_results() def poll_times(self): for i in (1,5,10,30,60,90): yield i i = 120 while True: yield i i += 120 def wait_for_results(self): last = 0 for next in self.poll_times(): delay = next - last time.sleep(delay) last = next if self.poll(): return 0 def poll(self): url = self.nodeurl + "operations/" + self.ophandle url += "?t=status&output=JSON&release-after-complete=true" stdout = self.options.stdout stderr = self.options.stderr resp = do_http("GET", url) if resp.status != 200: print >>stderr, format_http_error("ERROR", resp) return True jdata = resp.read() data = simplejson.loads(jdata) if not data["finished"]: return False if self.options.get("raw"): if is_printable_ascii(jdata): print >>stdout, jdata else: print >>stderr, "The JSON response contained unprintable characters:\n%s" % quote_output(jdata) return True self.write_results(data) return True tahoe-lafs-1.10.0/src/allmydata/scripts/startstop_node.py000066400000000000000000000147651221140116300234700ustar00rootroot00000000000000 import os, sys, signal, time from allmydata.scripts.common import BasedirOptions from allmydata.util import fileutil from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import listdir_unicode, quote_output class StartOptions(BasedirOptions): optFlags = [ ["profile", "p", "Run under the Python profiler, putting results in 'profiling_results.prof'."], ["syslog", None, "Tell the node to log to syslog, not a file."], ] def getSynopsis(self): return "Usage: %s [global-opts] start [options] [NODEDIR]" % (self.command_name,) class StopOptions(BasedirOptions): def getSynopsis(self): return "Usage: %s [global-opts] stop [options] [NODEDIR]" % (self.command_name,) class RestartOptions(BasedirOptions): optFlags = [ ["profile", "p", "Run under the Python profiler, putting results in 'profiling_results.prof'."], ["syslog", None, "Tell the node to log to syslog, not a file."], ] def getSynopsis(self): return "Usage: %s [global-opts] restart [options] [NODEDIR]" % (self.command_name,) class RunOptions(BasedirOptions): default_nodedir = u"." def getSynopsis(self): return "Usage: %s [global-opts] run [options] [NODEDIR]" % (self.command_name,) def start(opts, out=sys.stdout, err=sys.stderr): basedir = opts['basedir'] print >>out, "STARTING", quote_output(basedir) if not os.path.isdir(basedir): print >>err, "%s does not look like a directory at all" % quote_output(basedir) return 1 for fn in listdir_unicode(basedir): if fn.endswith(u".tac"): tac = str(fn) break else: print >>err, "%s does not look like a node directory (no .tac file)" % quote_output(basedir) return 1 if "client" in tac: nodetype = "client" elif "introducer" in tac: nodetype = "introducer" else: nodetype = "unknown (%s)" % tac args = ["twistd", "-y", tac] if opts["syslog"]: args.append("--syslog") elif nodetype in ("client", "introducer"): fileutil.make_dirs(os.path.join(basedir, "logs")) args.extend(["--logfile", os.path.join("logs", "twistd.log")]) if opts["profile"]: args.extend(["--profile=profiling_results.prof", "--savestats",]) # now we're committed os.chdir(basedir) from twisted.scripts import twistd sys.argv = args twistd.run() # run() doesn't return: the parent does os._exit(0) in daemonize(), so # we'll never get here. If application setup fails (e.g. ImportError), # run() will raise an exception. def stop(config, out=sys.stdout, err=sys.stderr): basedir = config['basedir'] print >>out, "STOPPING", quote_output(basedir) pidfile = os.path.join(basedir, "twistd.pid") if not os.path.exists(pidfile): print >>err, "%s does not look like a running node directory (no twistd.pid)" % quote_output(basedir) # we define rc=2 to mean "nothing is running, but it wasn't me who # stopped it" return 2 pid = open(pidfile, "r").read() pid = int(pid) # kill it hard (SIGKILL), delete the twistd.pid file, then wait for the # process itself to go away. If it hasn't gone away after 20 seconds, warn # the user but keep waiting until they give up. try: os.kill(pid, signal.SIGKILL) except OSError, oserr: if oserr.errno == 3: print oserr.strerror # the process didn't exist, so wipe the pid file os.remove(pidfile) return 2 else: raise try: os.remove(pidfile) except EnvironmentError: pass start = time.time() time.sleep(0.1) wait = 40 first_time = True while True: # poll once per second until we see the process is no longer running try: os.kill(pid, 0) except OSError: print >>out, "process %d is dead" % pid return wait -= 1 if wait < 0: if first_time: print >>err, ("It looks like pid %d is still running " "after %d seconds" % (pid, (time.time() - start))) print >>err, "I will keep watching it until you interrupt me." wait = 10 first_time = False else: print >>err, "pid %d still running after %d seconds" % \ (pid, (time.time() - start)) wait = 10 time.sleep(1) # we define rc=1 to mean "I think something is still running, sorry" return 1 def restart(config, stdout, stderr): rc = stop(config, stdout, stderr) if rc == 2: print >>stderr, "ignoring couldn't-stop" rc = 0 if rc: print >>stderr, "not restarting" return rc return start(config, stdout, stderr) def run(config, stdout, stderr): from twisted.internet import reactor from twisted.python import log, logfile from allmydata import client basedir = config['basedir'] precondition(isinstance(basedir, unicode), basedir) if not os.path.isdir(basedir): print >>stderr, "%s does not look like a directory at all" % quote_output(basedir) return 1 for fn in listdir_unicode(basedir): if fn.endswith(u".tac"): tac = str(fn) break else: print >>stderr, "%s does not look like a node directory (no .tac file)" % quote_output(basedir) return 1 if "client" not in tac: print >>stderr, ("%s looks like it contains a non-client node (%s).\n" "Use 'tahoe start' instead of 'tahoe run'." % (quote_output(basedir), tac)) return 1 os.chdir(basedir) # set up twisted logging. this will become part of the node rsn. logdir = os.path.join(basedir, 'logs') if not os.path.exists(logdir): os.makedirs(logdir) lf = logfile.LogFile('tahoesvc.log', logdir) log.startLogging(lf) # run the node itself c = client.Client(basedir) reactor.callLater(0, c.startService) # after reactor startup reactor.run() return 0 subCommands = [ ["start", None, StartOptions, "Start a node (of any type)."], ["stop", None, StopOptions, "Stop a node."], ["restart", None, RestartOptions, "Restart a node."], ["run", None, RunOptions, "Run a node synchronously."], ] dispatch = { "start": start, "stop": stop, "restart": restart, "run": run, } tahoe-lafs-1.10.0/src/allmydata/scripts/stats_gatherer.py000066400000000000000000000031631221140116300234250ustar00rootroot00000000000000 import os, sys from allmydata.scripts.common import BasedirOptions from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import listdir_unicode, quote_output class CreateStatsGathererOptions(BasedirOptions): default_nodedir = None def getSynopsis(self): return "Usage: %s [global-opts] create-stats-gatherer [options] NODEDIR" % (self.command_name,) stats_gatherer_tac = """ # -*- python -*- from allmydata import stats from twisted.application import service verbose = True g = stats.StatsGathererService(verbose=verbose) application = service.Application('allmydata_stats_gatherer') g.setServiceParent(application) """ def create_stats_gatherer(config, out=sys.stdout, err=sys.stderr): basedir = config['basedir'] # This should always be called with an absolute Unicode basedir. precondition(isinstance(basedir, unicode), basedir) if os.path.exists(basedir): if listdir_unicode(basedir): print >>err, "The base directory %s is not empty." % quote_output(basedir) print >>err, "To avoid clobbering anything, I am going to quit now." print >>err, "Please use a different directory, or empty this one." return -1 # we're willing to use an empty directory else: os.mkdir(basedir) f = open(os.path.join(basedir, "tahoe-stats-gatherer.tac"), "wb") f.write(stats_gatherer_tac) f.close() return 0 subCommands = [ ["create-stats-gatherer", None, CreateStatsGathererOptions, "Create a stats-gatherer service."], ] dispatch = { "create-stats-gatherer": create_stats_gatherer, } tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_add_alias.py000066400000000000000000000072441221140116300234730ustar00rootroot00000000000000 import os.path import codecs from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases from allmydata.util.fileutil import move_into_place from allmydata.util.encodingutil import unicode_to_output, quote_output def add_line_to_aliasfile(aliasfile, alias, cap): # we use os.path.exists, rather than catching EnvironmentError, to avoid # clobbering the valuable alias file in case of spurious or transient # filesystem errors. if os.path.exists(aliasfile): f = codecs.open(aliasfile, "r", "utf-8") aliases = f.read() f.close() if not aliases.endswith("\n"): aliases += "\n" else: aliases = "" aliases += "%s: %s\n" % (alias, cap) f = codecs.open(aliasfile+".tmp", "w", "utf-8") f.write(aliases) f.close() move_into_place(aliasfile+".tmp", aliasfile) def add_alias(options): nodedir = options['node-directory'] alias = options.alias cap = options.cap stdout = options.stdout stderr = options.stderr if u":" in alias: # a single trailing colon will already have been stripped if present print >>stderr, "Alias names cannot contain colons." return 1 if u" " in alias: print >>stderr, "Alias names cannot contain spaces." return 1 old_aliases = get_aliases(nodedir) if alias in old_aliases: print >>stderr, "Alias %s already exists!" % quote_output(alias) return 1 aliasfile = os.path.join(nodedir, "private", "aliases") cap = uri.from_string_dirnode(cap).to_string() add_line_to_aliasfile(aliasfile, alias, cap) print >>stdout, "Alias %s added" % quote_output(alias) return 0 def create_alias(options): # mkdir+add_alias nodedir = options['node-directory'] alias = options.alias stdout = options.stdout stderr = options.stderr if u":" in alias: # a single trailing colon will already have been stripped if present print >>stderr, "Alias names cannot contain colons." return 1 if u" " in alias: print >>stderr, "Alias names cannot contain spaces." return 1 old_aliases = get_aliases(nodedir) if alias in old_aliases: print >>stderr, "Alias %s already exists!" % quote_output(alias) return 1 aliasfile = os.path.join(nodedir, "private", "aliases") nodeurl = options['node-url'] if not nodeurl.endswith("/"): nodeurl += "/" url = nodeurl + "uri?t=mkdir" resp = do_http("POST", url) rc = check_http_error(resp, stderr) if rc: return rc new_uri = resp.read().strip() # probably check for others.. add_line_to_aliasfile(aliasfile, alias, new_uri) print >>stdout, "Alias %s created" % (quote_output(alias),) return 0 def list_aliases(options): nodedir = options['node-directory'] stdout = options.stdout stderr = options.stderr aliases = get_aliases(nodedir) alias_names = sorted(aliases.keys()) max_width = max([len(quote_output(name)) for name in alias_names] + [0]) fmt = "%" + str(max_width) + "s: %s" rc = 0 for name in alias_names: try: print >>stdout, fmt % (unicode_to_output(name), unicode_to_output(aliases[name].decode('utf-8'))) except (UnicodeEncodeError, UnicodeDecodeError): print >>stderr, fmt % (quote_output(name), quote_output(aliases[name])) rc = 1 if rc == 1: print >>stderr, "\nThis listing included aliases or caps that could not be converted to the terminal" \ "\noutput encoding. These are shown using backslash escapes and in quotes." return rc tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_backup.py000066400000000000000000000305061221140116300230340ustar00rootroot00000000000000 import os.path import time import urllib import simplejson import datetime from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \ UnknownAliasError from allmydata.scripts.common_http import do_http, HTTPError, format_http_error from allmydata.util import time_format from allmydata.scripts import backupdb from allmydata.util.encodingutil import listdir_unicode, quote_output, \ to_str, FilenameEncodingError, unicode_to_url from allmydata.util.assertutil import precondition from allmydata.util.fileutil import abspath_expanduser_unicode def get_local_metadata(path): metadata = {} # posix stat(2) metadata, depends on the platform os.stat_float_times(True) s = os.stat(path) metadata["ctime"] = s.st_ctime metadata["mtime"] = s.st_mtime misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid") macos_misc_fields = ("st_rsize", "st_creator", "st_type") for field in misc_fields + macos_misc_fields: if hasattr(s, field): metadata[field] = getattr(s, field) # TODO: extended attributes, like on OS-X's HFS+ return metadata def mkdir(contents, options): kids = dict([ (childname, (contents[childname][0], {"ro_uri": contents[childname][1], "metadata": contents[childname][2], })) for childname in contents ]) body = simplejson.dumps(kids).encode("utf-8") url = options['node-url'] + "uri?t=mkdir-immutable" resp = do_http("POST", url, body) if resp.status < 200 or resp.status >= 300: raise HTTPError("Error during mkdir", resp) dircap = to_str(resp.read().strip()) return dircap def put_child(dirurl, childname, childcap): assert dirurl[-1] == "/" url = dirurl + urllib.quote(unicode_to_url(childname)) + "?t=uri" resp = do_http("PUT", url, childcap) if resp.status not in (200, 201): raise HTTPError("Error during put_child", resp) class BackupProcessingError(Exception): pass class BackerUpper: def __init__(self, options): self.options = options self.files_uploaded = 0 self.files_reused = 0 self.files_checked = 0 self.files_skipped = 0 self.directories_created = 0 self.directories_reused = 0 self.directories_checked = 0 self.directories_skipped = 0 def run(self): options = self.options nodeurl = options['node-url'] self.verbosity = 1 if options['quiet']: self.verbosity = 0 if options['verbose']: self.verbosity = 2 stdout = options.stdout stderr = options.stderr start_timestamp = datetime.datetime.now() self.backupdb = None bdbfile = os.path.join(options["node-directory"], "private", "backupdb.sqlite") bdbfile = abspath_expanduser_unicode(bdbfile) self.backupdb = backupdb.get_backupdb(bdbfile, stderr) if not self.backupdb: print >>stderr, "ERROR: Unable to load backup db." return 1 try: rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap) if path: to_url += escape_path(path) if not to_url.endswith("/"): to_url += "/" archives_url = to_url + "Archives/" # first step: make sure the target directory exists, as well as the # Archives/ subdirectory. resp = do_http("GET", archives_url + "?t=json") if resp.status == 404: resp = do_http("POST", archives_url + "?t=mkdir") if resp.status != 200: print >>stderr, format_http_error("Unable to create target directory", resp) return 1 # second step: process the tree new_backup_dircap = self.process(options.from_dir) # third: attach the new backup to the list now = time_format.iso_utc(int(time.time()), sep="_") + "Z" put_child(archives_url, now, new_backup_dircap) put_child(to_url, "Latest", new_backup_dircap) end_timestamp = datetime.datetime.now() # calc elapsed time, omitting microseconds elapsed_time = str(end_timestamp - start_timestamp).split('.')[0] if self.verbosity >= 1: print >>stdout, (" %d files uploaded (%d reused), " "%d files skipped, " "%d directories created (%d reused), " "%d directories skipped" % (self.files_uploaded, self.files_reused, self.files_skipped, self.directories_created, self.directories_reused, self.directories_skipped)) if self.verbosity >= 2: print >>stdout, (" %d files checked, %d directories checked" % (self.files_checked, self.directories_checked)) print >>stdout, " backup done, elapsed time: %s" % elapsed_time # The command exits with code 2 if files or directories were skipped if self.files_skipped or self.directories_skipped: return 2 # done! return 0 def verboseprint(self, msg): precondition(isinstance(msg, str), msg) if self.verbosity >= 2: print >>self.options.stdout, msg def warn(self, msg): precondition(isinstance(msg, str), msg) print >>self.options.stderr, msg def process(self, localpath): precondition(isinstance(localpath, unicode), localpath) # returns newdircap self.verboseprint("processing %s" % quote_output(localpath)) create_contents = {} # childname -> (type, rocap, metadata) compare_contents = {} # childname -> rocap try: children = listdir_unicode(localpath) except EnvironmentError: self.directories_skipped += 1 self.warn("WARNING: permission denied on directory %s" % quote_output(localpath)) children = [] except FilenameEncodingError: self.directories_skipped += 1 self.warn("WARNING: could not list directory %s due to a filename encoding error" % quote_output(localpath)) children = [] for child in self.options.filter_listdir(children): assert isinstance(child, unicode), child childpath = os.path.join(localpath, child) # note: symlinks to directories are both islink() and isdir() if os.path.isdir(childpath) and not os.path.islink(childpath): metadata = get_local_metadata(childpath) # recurse on the child directory childcap = self.process(childpath) assert isinstance(childcap, str) create_contents[child] = ("dirnode", childcap, metadata) compare_contents[child] = childcap elif os.path.isfile(childpath) and not os.path.islink(childpath): try: childcap, metadata = self.upload(childpath) assert isinstance(childcap, str) create_contents[child] = ("filenode", childcap, metadata) compare_contents[child] = childcap except EnvironmentError: self.files_skipped += 1 self.warn("WARNING: permission denied on file %s" % quote_output(childpath)) else: self.files_skipped += 1 if os.path.islink(childpath): self.warn("WARNING: cannot backup symlink %s" % quote_output(childpath)) else: self.warn("WARNING: cannot backup special file %s" % quote_output(childpath)) must_create, r = self.check_backupdb_directory(compare_contents) if must_create: self.verboseprint(" creating directory for %s" % quote_output(localpath)) newdircap = mkdir(create_contents, self.options) assert isinstance(newdircap, str) if r: r.did_create(newdircap) self.directories_created += 1 return newdircap else: self.verboseprint(" re-using old directory for %s" % quote_output(localpath)) self.directories_reused += 1 return r.was_created() def check_backupdb_file(self, childpath): if not self.backupdb: return True, None use_timestamps = not self.options["ignore-timestamps"] r = self.backupdb.check_file(childpath, use_timestamps) if not r.was_uploaded(): return True, r if not r.should_check(): # the file was uploaded or checked recently, so we can just use # it return False, r # we must check the file before using the results filecap = r.was_uploaded() self.verboseprint("checking %s" % quote_output(filecap)) nodeurl = self.options['node-url'] checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap) self.files_checked += 1 resp = do_http("POST", checkurl) if resp.status != 200: # can't check, so we must assume it's bad return True, r cr = simplejson.loads(resp.read()) healthy = cr["results"]["healthy"] if not healthy: # must upload return True, r # file is healthy, no need to upload r.did_check_healthy(cr) return False, r def check_backupdb_directory(self, compare_contents): if not self.backupdb: return True, None r = self.backupdb.check_directory(compare_contents) if not r.was_created(): return True, r if not r.should_check(): # the file was uploaded or checked recently, so we can just use # it return False, r # we must check the directory before re-using it dircap = r.was_created() self.verboseprint("checking %s" % quote_output(dircap)) nodeurl = self.options['node-url'] checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(dircap) self.directories_checked += 1 resp = do_http("POST", checkurl) if resp.status != 200: # can't check, so we must assume it's bad return True, r cr = simplejson.loads(resp.read()) healthy = cr["results"]["healthy"] if not healthy: # must create return True, r # directory is healthy, no need to upload r.did_check_healthy(cr) return False, r # This function will raise an IOError exception when called on an unreadable file def upload(self, childpath): precondition(isinstance(childpath, unicode), childpath) #self.verboseprint("uploading %s.." % quote_output(childpath)) metadata = get_local_metadata(childpath) # we can use the backupdb here must_upload, bdb_results = self.check_backupdb_file(childpath) if must_upload: self.verboseprint("uploading %s.." % quote_output(childpath)) infileobj = open(childpath, "rb") url = self.options['node-url'] + "uri" resp = do_http("PUT", url, infileobj) if resp.status not in (200, 201): raise HTTPError("Error during file PUT", resp) filecap = resp.read().strip() self.verboseprint(" %s -> %s" % (quote_output(childpath, quotemarks=False), quote_output(filecap, quotemarks=False))) #self.verboseprint(" metadata: %s" % (quote_output(metadata, quotemarks=False),)) if bdb_results: bdb_results.did_upload(filecap) self.files_uploaded += 1 return filecap, metadata else: self.verboseprint("skipping %s.." % quote_output(childpath)) self.files_reused += 1 return bdb_results.was_uploaded(), metadata def backup(options): bu = BackerUpper(options) return bu.run() tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_check.py000066400000000000000000000270621221140116300226470ustar00rootroot00000000000000 import urllib import simplejson from twisted.protocols.basic import LineOnlyReceiver from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error from allmydata.util.encodingutil import quote_output, quote_path class Checker: pass def _quote_serverid_index_share(serverid, storage_index, sharenum): return "server %s, SI %s, shnum %r" % (quote_output(serverid, quotemarks=False), quote_output(storage_index, quotemarks=False), sharenum) def check(options): stdout = options.stdout stderr = options.stderr nodeurl = options['node-url'] if not nodeurl.endswith("/"): nodeurl += "/" where = options.where try: rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if path == '/': path = '' url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: url += "/" + escape_path(path) # todo: should it end with a slash? url += "?t=check&output=JSON" if options["verify"]: url += "&verify=true" if options["repair"]: url += "&repair=true" if options["add-lease"]: url += "&add-lease=true" resp = do_http("POST", url) if resp.status != 200: print >>stderr, format_http_error("ERROR", resp) return 1 jdata = resp.read() if options.get("raw"): stdout.write(jdata) stdout.write("\n") return 0 data = simplejson.loads(jdata) if options["repair"]: # show repair status if data["pre-repair-results"]["results"]["healthy"]: summary = "healthy" else: summary = "not healthy" stdout.write("Summary: %s\n" % summary) cr = data["pre-repair-results"]["results"] stdout.write(" storage index: %s\n" % quote_output(data["storage-index"], quotemarks=False)) stdout.write(" good-shares: %r (encoding is %r-of-%r)\n" % (cr["count-shares-good"], cr["count-shares-needed"], cr["count-shares-expected"])) stdout.write(" wrong-shares: %r\n" % cr["count-wrong-shares"]) corrupt = cr["list-corrupt-shares"] if corrupt: stdout.write(" corrupt shares:\n") for (serverid, storage_index, sharenum) in corrupt: stdout.write(" %s\n" % _quote_serverid_index_share(serverid, storage_index, sharenum)) if data["repair-attempted"]: if data["repair-successful"]: stdout.write(" repair successful\n") else: stdout.write(" repair failed\n") else: # LIT files and directories do not have a "summary" field. summary = data.get("summary", "Healthy (LIT)") stdout.write("Summary: %s\n" % quote_output(summary, quotemarks=False)) cr = data["results"] stdout.write(" storage index: %s\n" % quote_output(data["storage-index"], quotemarks=False)) if all([field in cr for field in ("count-shares-good", "count-shares-needed", "count-shares-expected", "count-wrong-shares")]): stdout.write(" good-shares: %r (encoding is %r-of-%r)\n" % (cr["count-shares-good"], cr["count-shares-needed"], cr["count-shares-expected"])) stdout.write(" wrong-shares: %r\n" % cr["count-wrong-shares"]) corrupt = cr.get("list-corrupt-shares", []) if corrupt: stdout.write(" corrupt shares:\n") for (serverid, storage_index, sharenum) in corrupt: stdout.write(" %s\n" % _quote_serverid_index_share(serverid, storage_index, sharenum)) return 0 class FakeTransport: disconnecting = False class DeepCheckOutput(LineOnlyReceiver): delimiter = "\n" def __init__(self, streamer, options): self.streamer = streamer self.transport = FakeTransport() self.verbose = bool(options["verbose"]) self.stdout = options.stdout self.stderr = options.stderr self.num_objects = 0 self.files_healthy = 0 self.files_unhealthy = 0 self.in_error = False def lineReceived(self, line): if self.in_error: print >>self.stderr, quote_output(line, quotemarks=False) return if line.startswith("ERROR:"): self.in_error = True self.streamer.rc = 1 print >>self.stderr, quote_output(line, quotemarks=False) return d = simplejson.loads(line) stdout = self.stdout if d["type"] not in ("file", "directory"): return self.num_objects += 1 # non-verbose means print a progress marker every 100 files if self.num_objects % 100 == 0: print >>stdout, "%d objects checked.." % self.num_objects cr = d["check-results"] if cr["results"]["healthy"]: self.files_healthy += 1 else: self.files_unhealthy += 1 if self.verbose: # verbose means also print one line per file path = d["path"] if not path: path = [""] # LIT files and directories do not have a "summary" field. summary = cr.get("summary", "Healthy (LIT)") print >>stdout, "%s: %s" % (quote_path(path), quote_output(summary, quotemarks=False)) # always print out corrupt shares for shareloc in cr["results"].get("list-corrupt-shares", []): (serverid, storage_index, sharenum) = shareloc print >>stdout, " corrupt: %s" % _quote_serverid_index_share(serverid, storage_index, sharenum) def done(self): if self.in_error: return stdout = self.stdout print >>stdout, "done: %d objects checked, %d healthy, %d unhealthy" \ % (self.num_objects, self.files_healthy, self.files_unhealthy) class DeepCheckAndRepairOutput(LineOnlyReceiver): delimiter = "\n" def __init__(self, streamer, options): self.streamer = streamer self.transport = FakeTransport() self.verbose = bool(options["verbose"]) self.stdout = options.stdout self.stderr = options.stderr self.num_objects = 0 self.pre_repair_files_healthy = 0 self.pre_repair_files_unhealthy = 0 self.repairs_attempted = 0 self.repairs_successful = 0 self.post_repair_files_healthy = 0 self.post_repair_files_unhealthy = 0 self.in_error = False def lineReceived(self, line): if self.in_error: print >>self.stderr, quote_output(line, quotemarks=False) return if line.startswith("ERROR:"): self.in_error = True self.streamer.rc = 1 print >>self.stderr, quote_output(line, quotemarks=False) return d = simplejson.loads(line) stdout = self.stdout if d["type"] not in ("file", "directory"): return self.num_objects += 1 # non-verbose means print a progress marker every 100 files if self.num_objects % 100 == 0: print >>stdout, "%d objects checked.." % self.num_objects crr = d["check-and-repair-results"] if d["storage-index"]: if crr["pre-repair-results"]["results"]["healthy"]: was_healthy = True self.pre_repair_files_healthy += 1 else: was_healthy = False self.pre_repair_files_unhealthy += 1 if crr["post-repair-results"]["results"]["healthy"]: self.post_repair_files_healthy += 1 else: self.post_repair_files_unhealthy += 1 else: # LIT file was_healthy = True self.pre_repair_files_healthy += 1 self.post_repair_files_healthy += 1 if crr["repair-attempted"]: self.repairs_attempted += 1 if crr["repair-successful"]: self.repairs_successful += 1 if self.verbose: # verbose means also print one line per file path = d["path"] if not path: path = [""] # we don't seem to have a summary available, so build one if was_healthy: summary = "healthy" else: summary = "not healthy" print >>stdout, "%s: %s" % (quote_path(path), summary) # always print out corrupt shares prr = crr.get("pre-repair-results", {}) for shareloc in prr.get("results", {}).get("list-corrupt-shares", []): (serverid, storage_index, sharenum) = shareloc print >>stdout, " corrupt: %s" % _quote_serverid_index_share(serverid, storage_index, sharenum) # always print out repairs if crr["repair-attempted"]: if crr["repair-successful"]: print >>stdout, " repair successful" else: print >>stdout, " repair failed" def done(self): if self.in_error: return stdout = self.stdout print >>stdout, "done: %d objects checked" % self.num_objects print >>stdout, " pre-repair: %d healthy, %d unhealthy" \ % (self.pre_repair_files_healthy, self.pre_repair_files_unhealthy) print >>stdout, " %d repairs attempted, %d successful, %d failed" \ % (self.repairs_attempted, self.repairs_successful, (self.repairs_attempted - self.repairs_successful)) print >>stdout, " post-repair: %d healthy, %d unhealthy" \ % (self.post_repair_files_healthy, self.post_repair_files_unhealthy) class DeepCheckStreamer(LineOnlyReceiver): def run(self, options): stdout = options.stdout stderr = options.stderr self.rc = 0 self.options = options nodeurl = options['node-url'] if not nodeurl.endswith("/"): nodeurl += "/" self.nodeurl = nodeurl where = options.where try: rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if path == '/': path = '' url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: url += "/" + escape_path(path) # todo: should it end with a slash? url += "?t=stream-deep-check" if options["verify"]: url += "&verify=true" if options["repair"]: url += "&repair=true" output = DeepCheckAndRepairOutput(self, options) else: output = DeepCheckOutput(self, options) if options["add-lease"]: url += "&add-lease=true" resp = do_http("POST", url) if resp.status not in (200, 302): print >>stderr, format_http_error("ERROR", resp) return 1 # use Twisted to split this into lines while True: chunk = resp.read(100) if not chunk: break if self.options["raw"]: stdout.write(chunk) else: output.dataReceived(chunk) if not self.options["raw"]: output.done() return self.rc def deepcheck(options): return DeepCheckStreamer().run(options) tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_cp.py000066400000000000000000000751761221140116300222050ustar00rootroot00000000000000 import os.path import urllib import simplejson from cStringIO import StringIO from twisted.python.failure import Failure from allmydata.scripts.common import get_alias, escape_path, \ DefaultAliasMarker, TahoeError from allmydata.scripts.common_http import do_http, HTTPError from allmydata import uri from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, to_str from allmydata.util.assertutil import precondition class MissingSourceError(TahoeError): def __init__(self, name): TahoeError.__init__(self, "No such file or directory %s" % quote_output(name)) def GET_to_file(url): resp = do_http("GET", url) if resp.status == 200: return resp raise HTTPError("Error during GET", resp) def GET_to_string(url): f = GET_to_file(url) return f.read() def PUT(url, data): resp = do_http("PUT", url, data) if resp.status in (200, 201): return resp.read() raise HTTPError("Error during PUT", resp) def POST(url, data): resp = do_http("POST", url, data) if resp.status in (200, 201): return resp.read() raise HTTPError("Error during POST", resp) def mkdir(targeturl): url = targeturl + "?t=mkdir" resp = do_http("POST", url) if resp.status in (200, 201): return resp.read().strip() raise HTTPError("Error during mkdir", resp) def make_tahoe_subdirectory(nodeurl, parent_writecap, name): url = nodeurl + "/".join(["uri", urllib.quote(parent_writecap), urllib.quote(unicode_to_url(name)), ]) + "?t=mkdir" resp = do_http("POST", url) if resp.status in (200, 201): return resp.read().strip() raise HTTPError("Error during mkdir", resp) class LocalFileSource: def __init__(self, pathname): precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def need_to_copy_bytes(self): return True def open(self, caps_only): return open(os.path.expanduser(self.pathname), "rb") class LocalFileTarget: def __init__(self, pathname): precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): fileutil.put_file(self.pathname, inf) class LocalMissingTarget: def __init__(self, pathname): precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): fileutil.put_file(self.pathname, inf) class LocalDirectorySource: def __init__(self, progressfunc, pathname): precondition(isinstance(pathname, unicode), pathname) self.progressfunc = progressfunc self.pathname = pathname self.children = None def populate(self, recurse): if self.children is not None: return self.children = {} children = listdir_unicode(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i+1, len(children))) pn = os.path.join(self.pathname, n) if os.path.isdir(pn): child = LocalDirectorySource(self.progressfunc, pn) self.children[n] = child if recurse: child.populate(True) elif os.path.isfile(pn): self.children[n] = LocalFileSource(pn) else: # Could be dangling symlink; probably not copy-able. # TODO: output a warning pass class LocalDirectoryTarget: def __init__(self, progressfunc, pathname): precondition(isinstance(pathname, unicode), pathname) self.progressfunc = progressfunc self.pathname = pathname self.children = None def populate(self, recurse): if self.children is not None: return self.children = {} children = listdir_unicode(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i+1, len(children))) n = unicode(n) pn = os.path.join(self.pathname, n) if os.path.isdir(pn): child = LocalDirectoryTarget(self.progressfunc, pn) self.children[n] = child if recurse: child.populate(True) else: assert os.path.isfile(pn) self.children[n] = LocalFileTarget(pn) def get_child_target(self, name): if self.children is None: self.populate(False) if name in self.children: return self.children[name] pathname = os.path.join(self.pathname, name) os.makedirs(pathname) return LocalDirectoryTarget(self.progressfunc, pathname) def put_file(self, name, inf): precondition(isinstance(name, unicode), name) pathname = os.path.join(self.pathname, name) fileutil.put_file(pathname, inf) def set_children(self): pass class TahoeFileSource: def __init__(self, nodeurl, mutable, writecap, readcap): self.nodeurl = nodeurl self.mutable = mutable self.writecap = writecap self.readcap = readcap def need_to_copy_bytes(self): if self.mutable: return True return False def open(self, caps_only): if caps_only: return StringIO(self.readcap) url = self.nodeurl + "uri/" + urllib.quote(self.readcap) return GET_to_file(url) def bestcap(self): return self.writecap or self.readcap class TahoeFileTarget: def __init__(self, nodeurl, mutable, writecap, readcap, url): self.nodeurl = nodeurl self.mutable = mutable self.writecap = writecap self.readcap = readcap self.url = url def put_file(self, inf): # We want to replace this object in-place. assert self.url # our do_http() call currently requires a string or a filehandle with # a real .seek if not hasattr(inf, "seek"): inf = inf.read() PUT(self.url, inf) # TODO: this always creates immutable files. We might want an option # to always create mutable files, or to copy mutable files into new # mutable files. ticket #835 class TahoeDirectorySource: def __init__(self, nodeurl, cache, progressfunc): self.nodeurl = nodeurl self.cache = cache self.progressfunc = progressfunc def init_from_grid(self, writecap, readcap): self.writecap = writecap self.readcap = readcap bestcap = writecap or readcap url = self.nodeurl + "uri/%s" % urllib.quote(bestcap) resp = do_http("GET", url + "?t=json") if resp.status != 200: raise HTTPError("Error examining source directory", resp) parsed = simplejson.loads(resp.read()) nodetype, d = parsed assert nodetype == "dirnode" self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) in d["children"].iteritems()] ) self.children = None def init_from_parsed(self, parsed): nodetype, d = parsed self.writecap = to_str(d.get("rw_uri")) self.readcap = to_str(d.get("ro_uri")) self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) in d["children"].iteritems()] ) self.children = None def populate(self, recurse): if self.children is not None: return self.children = {} for i,(name, data) in enumerate(self.children_d.items()): self.progressfunc("examining %d of %d" % (i+1, len(self.children_d))) if data[0] == "filenode": mutable = data[1].get("mutable", False) writecap = to_str(data[1].get("rw_uri")) readcap = to_str(data[1].get("ro_uri")) self.children[name] = TahoeFileSource(self.nodeurl, mutable, writecap, readcap) elif data[0] == "dirnode": writecap = to_str(data[1].get("rw_uri")) readcap = to_str(data[1].get("ro_uri")) if writecap and writecap in self.cache: child = self.cache[writecap] elif readcap and readcap in self.cache: child = self.cache[readcap] else: child = TahoeDirectorySource(self.nodeurl, self.cache, self.progressfunc) child.init_from_grid(writecap, readcap) if writecap: self.cache[writecap] = child if readcap: self.cache[readcap] = child if recurse: child.populate(True) self.children[name] = child else: # TODO: there should be an option to skip unknown nodes. raise TahoeError("Cannot copy unknown nodes (ticket #839). " "You probably need to use a later version of " "Tahoe-LAFS to copy this directory.") class TahoeMissingTarget: def __init__(self, url): self.url = url def put_file(self, inf): # We want to replace this object in-place. if not hasattr(inf, "seek"): inf = inf.read() PUT(self.url, inf) # TODO: this always creates immutable files. We might want an option # to always create mutable files, or to copy mutable files into new # mutable files. def put_uri(self, filecap): # I'm not sure this will always work return PUT(self.url + "?t=uri", filecap) class TahoeDirectoryTarget: def __init__(self, nodeurl, cache, progressfunc): self.nodeurl = nodeurl self.cache = cache self.progressfunc = progressfunc self.new_children = {} def init_from_parsed(self, parsed): nodetype, d = parsed self.writecap = to_str(d.get("rw_uri")) self.readcap = to_str(d.get("ro_uri")) self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) in d["children"].iteritems()] ) self.children = None def init_from_grid(self, writecap, readcap): self.writecap = writecap self.readcap = readcap bestcap = writecap or readcap url = self.nodeurl + "uri/%s" % urllib.quote(bestcap) resp = do_http("GET", url + "?t=json") if resp.status != 200: raise HTTPError("Error examining target directory", resp) parsed = simplejson.loads(resp.read()) nodetype, d = parsed assert nodetype == "dirnode" self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) in d["children"].iteritems()] ) self.children = None def just_created(self, writecap): self.writecap = writecap self.readcap = uri.from_string(writecap).get_readonly().to_string() self.mutable = True self.children_d = {} self.children = {} def populate(self, recurse): if self.children is not None: return self.children = {} for i,(name, data) in enumerate(self.children_d.items()): self.progressfunc("examining %d of %d" % (i+1, len(self.children_d))) if data[0] == "filenode": mutable = data[1].get("mutable", False) writecap = to_str(data[1].get("rw_uri")) readcap = to_str(data[1].get("ro_uri")) url = None if self.writecap: url = self.nodeurl + "/".join(["uri", urllib.quote(self.writecap), urllib.quote(unicode_to_url(name))]) self.children[name] = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) elif data[0] == "dirnode": writecap = to_str(data[1].get("rw_uri")) readcap = to_str(data[1].get("ro_uri")) if writecap and writecap in self.cache: child = self.cache[writecap] elif readcap and readcap in self.cache: child = self.cache[readcap] else: child = TahoeDirectoryTarget(self.nodeurl, self.cache, self.progressfunc) child.init_from_grid(writecap, readcap) if writecap: self.cache[writecap] = child if readcap: self.cache[readcap] = child if recurse: child.populate(True) self.children[name] = child else: # TODO: there should be an option to skip unknown nodes. raise TahoeError("Cannot copy unknown nodes (ticket #839). " "You probably need to use a later version of " "Tahoe-LAFS to copy this directory.") def get_child_target(self, name): # return a new target for a named subdirectory of this dir if self.children is None: self.populate(False) if name in self.children: return self.children[name] writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name) child = TahoeDirectoryTarget(self.nodeurl, self.cache, self.progressfunc) child.just_created(writecap) self.children[name] = child return child def put_file(self, name, inf): url = self.nodeurl + "uri" if not hasattr(inf, "seek"): inf = inf.read() if self.children is None: self.populate(False) # Check to see if we already have a mutable file by this name. # If so, overwrite that file in place. if name in self.children and self.children[name].mutable: self.children[name].put_file(inf) else: filecap = PUT(url, inf) # TODO: this always creates immutable files. We might want an option # to always create mutable files, or to copy mutable files into new # mutable files. self.new_children[name] = filecap def put_uri(self, name, filecap): self.new_children[name] = filecap def set_children(self): if not self.new_children: return url = (self.nodeurl + "uri/" + urllib.quote(self.writecap) + "?t=set_children") set_data = {} for (name, filecap) in self.new_children.items(): # it just so happens that ?t=set_children will accept both file # read-caps and write-caps as ['rw_uri'], and will handle either # correctly. So don't bother trying to figure out whether the one # we have is read-only or read-write. # TODO: think about how this affects forward-compatibility for # unknown caps set_data[name] = ["filenode", {"rw_uri": filecap}] body = simplejson.dumps(set_data) POST(url, body) class Copier: def do_copy(self, options, progressfunc=None): if options['quiet']: verbosity = 0 elif options['verbose']: verbosity = 2 else: verbosity = 1 nodeurl = options['node-url'] if nodeurl[-1] != "/": nodeurl += "/" self.nodeurl = nodeurl self.progressfunc = progressfunc self.options = options self.aliases = options.aliases self.verbosity = verbosity self.stdout = options.stdout self.stderr = options.stderr if verbosity >= 2 and not self.progressfunc: def progress(message): print >>self.stderr, message self.progressfunc = progress self.caps_only = options["caps-only"] self.cache = {} try: status = self.try_copy() return status except TahoeError, te: if verbosity >= 2: Failure().printTraceback(self.stderr) print >>self.stderr te.display(self.stderr) return 1 def try_copy(self): source_specs = self.options.sources destination_spec = self.options.destination recursive = self.options["recursive"] target = self.get_target_info(destination_spec) sources = [] # list of (name, source object) for ss in source_specs: name, source = self.get_source_info(ss) sources.append( (name, source) ) have_source_dirs = bool([s for (name,s) in sources if isinstance(s, (LocalDirectorySource, TahoeDirectorySource))]) if have_source_dirs and not recursive: self.to_stderr("cannot copy directories without --recursive") return 1 if isinstance(target, (LocalFileTarget, TahoeFileTarget)): # cp STUFF foo.txt, where foo.txt already exists. This limits the # possibilities considerably. if len(sources) > 1: self.to_stderr("target %s is not a directory" % quote_output(destination_spec)) return 1 if have_source_dirs: self.to_stderr("cannot copy directory into a file") return 1 name, source = sources[0] return self.copy_file(source, target) if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)): if recursive: return self.copy_to_directory(sources, target) if len(sources) > 1: # if we have -r, we'll auto-create the target directory. Without # it, we'll only create a file. self.to_stderr("cannot copy multiple files into a file without -r") return 1 # cp file1 newfile name, source = sources[0] return self.copy_file(source, target) if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)): # We're copying to an existing directory -- make sure that we # have target names for everything for (name, source) in sources: if name is None and isinstance(source, TahoeFileSource): self.to_stderr( "error: you must specify a destination filename") return 1 return self.copy_to_directory(sources, target) self.to_stderr("unknown target") return 1 def to_stderr(self, text): print >>self.stderr, text def get_target_info(self, destination_spec): rootcap, path = get_alias(self.aliases, destination_spec, None) if rootcap == DefaultAliasMarker: # no alias, so this is a local file pathname = abspath_expanduser_unicode(path.decode('utf-8')) if not os.path.exists(pathname): t = LocalMissingTarget(pathname) elif os.path.isdir(pathname): t = LocalDirectoryTarget(self.progress, pathname) else: assert os.path.isfile(pathname), pathname t = LocalFileTarget(pathname) # non-empty else: # this is a tahoe object url = self.nodeurl + "uri/%s" % urllib.quote(rootcap) if path: url += "/" + escape_path(path) resp = do_http("GET", url + "?t=json") if resp.status == 404: # doesn't exist yet t = TahoeMissingTarget(url) elif resp.status == 200: parsed = simplejson.loads(resp.read()) nodetype, d = parsed if nodetype == "dirnode": t = TahoeDirectoryTarget(self.nodeurl, self.cache, self.progress) t.init_from_parsed(parsed) else: writecap = to_str(d.get("rw_uri")) readcap = to_str(d.get("ro_uri")) mutable = d.get("mutable", False) t = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) else: raise HTTPError("Error examining target %s" % quote_output(destination_spec), resp) return t def get_source_info(self, source_spec): rootcap, path = get_alias(self.aliases, source_spec, None) if rootcap == DefaultAliasMarker: # no alias, so this is a local file pathname = abspath_expanduser_unicode(path.decode('utf-8')) name = os.path.basename(pathname) if not os.path.exists(pathname): raise MissingSourceError(source_spec) if os.path.isdir(pathname): t = LocalDirectorySource(self.progress, pathname) else: assert os.path.isfile(pathname) t = LocalFileSource(pathname) # non-empty else: # this is a tahoe object url = self.nodeurl + "uri/%s" % urllib.quote(rootcap) name = None if path: url += "/" + escape_path(path) last_slash = path.rfind("/") name = path if last_slash: name = path[last_slash+1:] resp = do_http("GET", url + "?t=json") if resp.status == 404: raise MissingSourceError(source_spec) elif resp.status != 200: raise HTTPError("Error examining source %s" % quote_output(source_spec), resp) parsed = simplejson.loads(resp.read()) nodetype, d = parsed if nodetype == "dirnode": t = TahoeDirectorySource(self.nodeurl, self.cache, self.progress) t.init_from_parsed(parsed) else: writecap = to_str(d.get("rw_uri")) readcap = to_str(d.get("ro_uri")) mutable = d.get("mutable", False) # older nodes don't provide it if source_spec.rfind('/') != -1: name = source_spec[source_spec.rfind('/')+1:] t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap) return name, t def dump_graph(self, s, indent=" "): for name, child in s.children.items(): print "%s%s: %r" % (indent, quote_output(name), child) if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)): self.dump_graph(child, indent+" ") def copy_to_directory(self, source_infos, target): # step one: build a recursive graph of the source tree. This returns # a dictionary, with child names as keys, and values that are either # Directory or File instances (local or tahoe). source_dirs = self.build_graphs(source_infos) source_files = [source for source in source_infos if isinstance(source[1], (LocalFileSource, TahoeFileSource))] #print "graphs" #for s in source_dirs: # self.dump_graph(s) # step two: create the top-level target directory object if isinstance(target, LocalMissingTarget): os.makedirs(target.pathname) target = LocalDirectoryTarget(self.progress, target.pathname) elif isinstance(target, TahoeMissingTarget): writecap = mkdir(target.url) target = TahoeDirectoryTarget(self.nodeurl, self.cache, self.progress) target.just_created(writecap) assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)) target.populate(False) # step three: find a target for each source node, creating # directories as necessary. 'targetmap' is a dictionary that uses # target Directory instances as keys, and has values of # (name->sourceobject) dicts for all the files that need to wind up # there. # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory # target is LocalDirectory/TahoeDirectory self.progress("attaching sources to targets, " "%d files / %d dirs in root" % (len(source_files), len(source_dirs))) self.targetmap = {} self.files_to_copy = 0 for (name,s) in source_files: self.attach_to_target(s, name, target) for source in source_dirs: self.assign_targets(source, target) self.progress("targets assigned, %s dirs, %s files" % (len(self.targetmap), self.files_to_copy)) self.progress("starting copy, %d files, %d directories" % (self.files_to_copy, len(self.targetmap))) self.files_copied = 0 self.targets_finished = 0 # step four: walk through the list of targets. For each one, copy all # the files. If the target is a TahoeDirectory, upload and create # read-caps, then do a set_children to the target directory. for target in self.targetmap: self.copy_files_to_target(self.targetmap[target], target) self.targets_finished += 1 self.progress("%d/%d directories" % (self.targets_finished, len(self.targetmap))) return self.announce_success("files copied") def attach_to_target(self, source, name, target): if target not in self.targetmap: self.targetmap[target] = {} self.targetmap[target][name] = source self.files_to_copy += 1 def assign_targets(self, source, target): # copy everything in the source into the target assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource)) for name, child in source.children.items(): if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)): # we will need a target directory for this one subtarget = target.get_child_target(name) self.assign_targets(child, subtarget) else: assert isinstance(child, (LocalFileSource, TahoeFileSource)) self.attach_to_target(child, name, target) def copy_files_to_target(self, targetmap, target): for name, source in targetmap.items(): assert isinstance(source, (LocalFileSource, TahoeFileSource)) self.copy_file_into(source, name, target) self.files_copied += 1 self.progress("%d/%d files, %d/%d directories" % (self.files_copied, self.files_to_copy, self.targets_finished, len(self.targetmap))) target.set_children() def need_to_copy_bytes(self, source, target): if source.need_to_copy_bytes: # mutable tahoe files, and local files return True if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)): return True return False def announce_success(self, msg): if self.verbosity >= 1: print >>self.stdout, "Success: %s" % msg return 0 def copy_file(self, source, target): assert isinstance(source, (LocalFileSource, TahoeFileSource)) assert isinstance(target, (LocalFileTarget, TahoeFileTarget, LocalMissingTarget, TahoeMissingTarget)) if self.need_to_copy_bytes(source, target): # if the target is a local directory, this will just write the # bytes to disk. If it is a tahoe directory, it will upload the # data, and stash the new filecap for a later set_children call. f = source.open(self.caps_only) target.put_file(f) return self.announce_success("file copied") # otherwise we're copying tahoe to tahoe, and using immutable files, # so we can just make a link. TODO: this probably won't always work: # need to enumerate the cases and analyze them. target.put_uri(source.bestcap()) return self.announce_success("file linked") def copy_file_into(self, source, name, target): assert isinstance(source, (LocalFileSource, TahoeFileSource)) assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)) if self.need_to_copy_bytes(source, target): # if the target is a local directory, this will just write the # bytes to disk. If it is a tahoe directory, it will upload the # data, and stash the new filecap for a later set_children call. f = source.open(self.caps_only) target.put_file(name, f) return # otherwise we're copying tahoe to tahoe, and using immutable files, # so we can just make a link target.put_uri(name, source.bestcap()) def progress(self, message): #print message if self.progressfunc: self.progressfunc(message) def build_graphs(self, source_infos): graphs = [] for name,source in source_infos: if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)): source.populate(True) graphs.append(source) return graphs def copy(options): return Copier().do_copy(options) # error cases that need improvement: # local-file-in-the-way # touch proposed # tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt # handling of unknown nodes # things that maybe should be errors but aren't # local-dir-in-the-way # mkdir denver.txt # tahoe cp -r my:docs/proposed/denver.txt denver.txt # (creates denver.txt/denver.txt) # error cases that look good: # tahoe cp -r my:docs/missing missing # disconnect servers # tahoe cp -r my:docs/missing missing -> No JSON object could be decoded # tahoe-file-in-the-way (when we want to make a directory) # tahoe put README my:docs # tahoe cp -r docs/proposed my:docs/proposed tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_get.py000066400000000000000000000023031221140116300223400ustar00rootroot00000000000000 import os, urllib from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error def get(options): nodeurl = options['node-url'] aliases = options.aliases from_file = options.from_file to_file = options.to_file stdout = options.stdout stderr = options.stderr if nodeurl[-1] != "/": nodeurl += "/" try: rootcap, path = get_alias(aliases, from_file, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: url += "/" + escape_path(path) resp = do_http("GET", url) if resp.status in (200, 201,): if to_file: outf = open(os.path.expanduser(to_file), "wb") else: outf = stdout while True: data = resp.read(4096) if not data: break outf.write(data) if to_file: outf.close() rc = 0 else: print >>stderr, format_http_error("Error during GET", resp) rc = 1 return rc tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_ls.py000066400000000000000000000142531221140116300222060ustar00rootroot00000000000000 import urllib, time import simplejson from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_str def list(options): nodeurl = options['node-url'] aliases = options.aliases where = options.where stdout = options.stdout stderr = options.stderr if not nodeurl.endswith("/"): nodeurl += "/" if where.endswith("/"): where = where[:-1] try: rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: # move where.endswith check here? url += "/" + escape_path(path) assert not url.endswith("/") url += "?t=json" resp = do_http("GET", url) if resp.status == 404: print >>stderr, "No such file or directory" return 2 if resp.status != 200: print >>stderr, format_http_error("Error during GET", resp) if resp.status == 0: return 3 else: return resp.status data = resp.read() if options['json']: # The webapi server should always output printable ASCII. if is_printable_ascii(data): print >>stdout, data return 0 else: print >>stderr, "The JSON response contained unprintable characters:" print >>stderr, quote_output(data, quotemarks=False) return 1 try: parsed = simplejson.loads(data) except Exception, e: print >>stderr, "error: %s" % quote_output(e.args[0], quotemarks=False) print >>stderr, "Could not parse JSON response:" print >>stderr, quote_output(data, quotemarks=False) return 1 nodetype, d = parsed children = {} if nodetype == "dirnode": children = d['children'] else: # paths returned from get_alias are always valid UTF-8 childname = path.split("/")[-1].decode('utf-8') children = {childname: (nodetype, d)} if "metadata" not in d: d["metadata"] = {} childnames = sorted(children.keys()) now = time.time() # we build up a series of rows, then we loop through them to compute a # maxwidth so we can format them tightly. Size, filename, and URI are the # variable-width ones. rows = [] has_unknowns = False for name in childnames: child = children[name] name = unicode(name) childtype = child[0] # See webapi.txt for a discussion of the meanings of unix local # filesystem mtime and ctime, Tahoe mtime and ctime, and Tahoe # linkmotime and linkcrtime. ctime = child[1].get("metadata", {}).get('tahoe', {}).get("linkcrtime") if not ctime: ctime = child[1]["metadata"].get("ctime") mtime = child[1].get("metadata", {}).get('tahoe', {}).get("linkmotime") if not mtime: mtime = child[1]["metadata"].get("mtime") rw_uri = to_str(child[1].get("rw_uri")) ro_uri = to_str(child[1].get("ro_uri")) if ctime: # match for formatting that GNU 'ls' does if (now - ctime) > 6*30*24*60*60: # old files fmt = "%b %d %Y" else: fmt = "%b %d %H:%M" ctime_s = time.strftime(fmt, time.localtime(ctime)) else: ctime_s = "-" if childtype == "dirnode": t0 = "d" size = "-" classify = "/" elif childtype == "filenode": t0 = "-" size = str(child[1].get("size", "?")) classify = "" if rw_uri: classify = "*" else: has_unknowns = True t0 = "?" size = "?" classify = "?" t1 = "-" if ro_uri: t1 = "r" t2 = "-" if rw_uri: t2 = "w" t3 = "-" if childtype == "dirnode": t3 = "x" uri = rw_uri or ro_uri line = [] if options["long"]: line.append(t0+t1+t2+t3) line.append(size) line.append(ctime_s) if not options["classify"]: classify = "" encoding_error = False try: line.append(unicode_to_output(name) + classify) except UnicodeEncodeError: encoding_error = True line.append(quote_output(name) + classify) if options["uri"]: line.append(uri) if options["readonly-uri"]: line.append(quote_output(ro_uri or "-", quotemarks=False)) rows.append((encoding_error, line)) max_widths = [] left_justifys = [] for (encoding_error, row) in rows: for i,cell in enumerate(row): while len(max_widths) <= i: max_widths.append(0) while len(left_justifys) <= i: left_justifys.append(False) max_widths[i] = max(max_widths[i], len(cell)) if cell.startswith("URI"): left_justifys[i] = True if len(left_justifys) == 1: left_justifys[0] = True fmt_pieces = [] for i in range(len(max_widths)): piece = "%" if left_justifys[i]: piece += "-" piece += str(max_widths[i]) piece += "s" fmt_pieces.append(piece) fmt = " ".join(fmt_pieces) rc = 0 for (encoding_error, row) in rows: if encoding_error: print >>stderr, (fmt % tuple(row)).rstrip() rc = 1 else: print >>stdout, (fmt % tuple(row)).rstrip() if rc == 1: print >>stderr, "\nThis listing included files whose names could not be converted to the terminal" \ "\noutput encoding. Their names are shown using backslash escapes and in quotes." if has_unknowns: print >>stderr, "\nThis listing included unknown objects. Using a webapi server that supports" \ "\na later version of Tahoe may help." return rc tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_manifest.py000066400000000000000000000130661221140116300233770ustar00rootroot00000000000000 import urllib, simplejson from twisted.protocols.basic import LineOnlyReceiver from allmydata.util.abbreviate import abbreviate_space_both from allmydata.scripts.slow_operation import SlowOperationRunner from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error from allmydata.util.encodingutil import quote_output, quote_path class FakeTransport: disconnecting = False class ManifestStreamer(LineOnlyReceiver): delimiter = "\n" def __init__(self): self.transport = FakeTransport() def run(self, options): self.rc = 0 stdout = options.stdout stderr = options.stderr self.options = options nodeurl = options['node-url'] if not nodeurl.endswith("/"): nodeurl += "/" self.nodeurl = nodeurl where = options.where try: rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if path == '/': path = '' url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: url += "/" + escape_path(path) # todo: should it end with a slash? url += "?t=stream-manifest" resp = do_http("POST", url) if resp.status not in (200, 302): print >>stderr, format_http_error("ERROR", resp) return 1 #print "RESP", dir(resp) # use Twisted to split this into lines self.in_error = False while True: chunk = resp.read(100) if not chunk: break if self.options["raw"]: stdout.write(chunk) else: self.dataReceived(chunk) return self.rc def lineReceived(self, line): stdout = self.options.stdout stderr = self.options.stderr if self.in_error: print >>stderr, quote_output(line, quotemarks=False) return if line.startswith("ERROR:"): self.in_error = True self.rc = 1 print >>stderr, quote_output(line, quotemarks=False) return try: d = simplejson.loads(line.decode('utf-8')) except Exception, e: print >>stderr, "ERROR could not decode/parse %s\nERROR %r" % (quote_output(line), e) else: if d["type"] in ("file", "directory"): if self.options["storage-index"]: si = d.get("storage-index", None) if si: print >>stdout, quote_output(si, quotemarks=False) elif self.options["verify-cap"]: vc = d.get("verifycap", None) if vc: print >>stdout, quote_output(vc, quotemarks=False) elif self.options["repair-cap"]: vc = d.get("repaircap", None) if vc: print >>stdout, quote_output(vc, quotemarks=False) else: print >>stdout, "%s %s" % (quote_output(d["cap"], quotemarks=False), quote_path(d["path"], quotemarks=False)) def manifest(options): return ManifestStreamer().run(options) class StatsGrabber(SlowOperationRunner): def make_url(self, base, ophandle): return base + "?t=start-deep-stats&ophandle=" + ophandle def write_results(self, data): stdout = self.options.stdout keys = ("count-immutable-files", "count-mutable-files", "count-literal-files", "count-files", "count-directories", "size-immutable-files", "size-mutable-files", "size-literal-files", "size-directories", "largest-directory", "largest-immutable-file", ) width = max([len(k) for k in keys]) print >>stdout, "Counts and Total Sizes:" for k in keys: fmt = "%" + str(width) + "s: %d" if k in data: value = data[k] if not k.startswith("count-") and value > 1000: absize = abbreviate_space_both(value) print >>stdout, fmt % (k, data[k]), " ", absize else: print >>stdout, fmt % (k, data[k]) if data["size-files-histogram"]: print >>stdout, "Size Histogram:" prevmax = None maxlen = max([len(str(maxsize)) for (minsize, maxsize, count) in data["size-files-histogram"]]) maxcountlen = max([len(str(count)) for (minsize, maxsize, count) in data["size-files-histogram"]]) minfmt = "%" + str(maxlen) + "d" maxfmt = "%-" + str(maxlen) + "d" countfmt = "%-" + str(maxcountlen) + "d" linefmt = minfmt + "-" + maxfmt + " : " + countfmt + " %s" for (minsize, maxsize, count) in data["size-files-histogram"]: if prevmax is not None and minsize != prevmax+1: print >>stdout, " "*(maxlen-1) + "..." prevmax = maxsize print >>stdout, linefmt % (minsize, maxsize, count, abbreviate_space_both(maxsize)) def stats(options): return StatsGrabber().run(options) tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_mkdir.py000066400000000000000000000031741221140116300226760ustar00rootroot00000000000000 import urllib from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError from allmydata.util.encodingutil import quote_output def mkdir(options): nodeurl = options['node-url'] aliases = options.aliases where = options.where stdout = options.stdout stderr = options.stderr if not nodeurl.endswith("/"): nodeurl += "/" if where: try: rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if not where or not path: # create a new unlinked directory url = nodeurl + "uri?t=mkdir" if options["format"]: url += "&format=%s" % urllib.quote(options['format']) resp = do_http("POST", url) rc = check_http_error(resp, stderr) if rc: return rc new_uri = resp.read().strip() # emit its write-cap print >>stdout, quote_output(new_uri, quotemarks=False) return 0 # create a new directory at the given location if path.endswith("/"): path = path[:-1] # path must be "/".join([s.encode("utf-8") for s in segments]) url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap), urllib.quote(path)) if options['format']: url += "&format=%s" % urllib.quote(options['format']) resp = do_http("POST", url) check_http_error(resp, stderr) new_uri = resp.read().strip() print >>stdout, quote_output(new_uri, quotemarks=False) return 0 tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_mv.py000066400000000000000000000045541221140116300222150ustar00rootroot00000000000000 import re import urllib import simplejson from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error from allmydata.util.encodingutil import to_str # this script is used for both 'mv' and 'ln' def mv(options, mode="move"): nodeurl = options['node-url'] aliases = options.aliases from_file = options.from_file to_file = options.to_file stdout = options.stdout stderr = options.stderr if nodeurl[-1] != "/": nodeurl += "/" try: rootcap, from_path = get_alias(aliases, from_file, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 from_url = nodeurl + "uri/%s" % urllib.quote(rootcap) if from_path: from_url += "/" + escape_path(from_path) # figure out the source cap resp = do_http("GET", from_url + "?t=json") if not re.search(r'^2\d\d$', str(resp.status)): print >>stderr, format_http_error("Error", resp) return 1 data = resp.read() nodetype, attrs = simplejson.loads(data) cap = to_str(attrs.get("rw_uri") or attrs["ro_uri"]) # now get the target try: rootcap, path = get_alias(aliases, to_file, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 to_url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: to_url += "/" + escape_path(path) if to_url.endswith("/"): # "mv foo.txt bar/" == "mv foo.txt bar/foo.txt" to_url += escape_path(from_path[from_path.rfind("/")+1:]) to_url += "?t=uri&replace=only-files" resp = do_http("PUT", to_url, cap) status = resp.status if not re.search(r'^2\d\d$', str(status)): if status == 409: print >>stderr, "Error: You can't overwrite a directory with a file" else: print >>stderr, format_http_error("Error", resp) if mode == "move": print >>stderr, "NOT removing the original" return 1 if mode == "move": # now remove the original resp = do_http("DELETE", from_url) if not re.search(r'^2\d\d$', str(resp.status)): print >>stderr, format_http_error("Error deleting original after move", resp) return 2 print >>stdout, "OK" return 0 tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_put.py000066400000000000000000000061411221140116300223750ustar00rootroot00000000000000 import os from cStringIO import StringIO import urllib from allmydata.scripts.common_http import do_http, format_http_success, format_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.util.encodingutil import quote_output def put(options): """ @param verbosity: 0, 1, or 2, meaning quiet, verbose, or very verbose @return: a Deferred which eventually fires with the exit code """ nodeurl = options['node-url'] aliases = options.aliases from_file = options.from_file to_file = options.to_file mutable = options['mutable'] format = options['format'] if options['quiet']: verbosity = 0 else: verbosity = 2 stdin = options.stdin stdout = options.stdout stderr = options.stderr if nodeurl[-1] != "/": nodeurl += "/" if to_file: # several possibilities for the TO_FILE argument. # : unlinked upload # foo : TAHOE_ALIAS/foo # subdir/foo : TAHOE_ALIAS/subdir/foo # /oops/subdir/foo : DISALLOWED # ALIAS:foo : aliases[ALIAS]/foo # ALIAS:subdir/foo : aliases[ALIAS]/subdir/foo # ALIAS:/oops/subdir/foo : DISALLOWED # DIRCAP:./foo : DIRCAP/foo # DIRCAP:./subdir/foo : DIRCAP/subdir/foo # MUTABLE-FILE-WRITECAP : filecap # FIXME: don't hardcode cap format. if to_file.startswith("URI:MDMF:") or to_file.startswith("URI:SSK:"): url = nodeurl + "uri/%s" % urllib.quote(to_file) else: try: rootcap, path = get_alias(aliases, to_file, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if path.startswith("/"): suggestion = to_file.replace(u"/", u"", 1) print >>stderr, "Error: The remote filename must not start with a slash" print >>stderr, "Please try again, perhaps with %s" % quote_output(suggestion) return 1 url = nodeurl + "uri/%s/" % urllib.quote(rootcap) if path: url += escape_path(path) else: # unlinked upload url = nodeurl + "uri" queryargs = [] if mutable: queryargs.append("mutable=true") if format: queryargs.append("format=%s" % format) if queryargs: url += "?" + "&".join(queryargs) if from_file: infileobj = open(os.path.expanduser(from_file), "rb") else: # do_http() can't use stdin directly: for one thing, we need a # Content-Length field. So we currently must copy it. if verbosity > 0: print >>stderr, "waiting for file data on stdin.." data = stdin.read() infileobj = StringIO(data) resp = do_http("PUT", url, infileobj) if resp.status in (200, 201,): print >>stderr, format_http_success(resp) print >>stdout, quote_output(resp.read(), quotemarks=False) return 0 print >>stderr, format_http_error("Error", resp) return 1 tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_unlink.py000066400000000000000000000021711221140116300230640ustar00rootroot00000000000000 import urllib from allmydata.scripts.common_http import do_http, format_http_success, format_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError def unlink(options, command="unlink"): """ @return: a Deferred which eventually fires with the exit code """ nodeurl = options['node-url'] aliases = options.aliases where = options.where stdout = options.stdout stderr = options.stderr if nodeurl[-1] != "/": nodeurl += "/" try: rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if not path: print >>stderr, """ 'tahoe %s' can only unlink directory entries, so a path must be given.""" % (command,) return 1 url = nodeurl + "uri/%s" % urllib.quote(rootcap) url += "/" + escape_path(path) resp = do_http("DELETE", url) if resp.status in (200,): print >>stdout, format_http_success(resp) return 0 print >>stderr, format_http_error("ERROR", resp) return 1 tahoe-lafs-1.10.0/src/allmydata/scripts/tahoe_webopen.py000066400000000000000000000015511221140116300232240ustar00rootroot00000000000000 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError import urllib def webopen(options, opener=None): nodeurl = options['node-url'] stderr = options.stderr if not nodeurl.endswith("/"): nodeurl += "/" where = options.where if where: try: rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS) except UnknownAliasError, e: e.display(stderr) return 1 if path == '/': path = '' url = nodeurl + "uri/%s" % urllib.quote(rootcap) if path: url += "/" + escape_path(path) else: url = nodeurl if options['info']: url += "?t=info" if not opener: import webbrowser opener = webbrowser.open opener(url) return 0 tahoe-lafs-1.10.0/src/allmydata/stats.py000066400000000000000000000262321221140116300200570ustar00rootroot00000000000000 import os import pickle import pprint import time from collections import deque from twisted.internet import reactor from twisted.application import service from twisted.application.internet import TimerService from zope.interface import implements from foolscap.api import eventually, DeadReferenceError, Referenceable, Tub from allmydata.util import log from allmydata.util.encodingutil import quote_output from allmydata.interfaces import RIStatsProvider, RIStatsGatherer, IStatsProducer class LoadMonitor(service.MultiService): implements(IStatsProducer) loop_interval = 1 num_samples = 60 def __init__(self, provider, warn_if_delay_exceeds=1): service.MultiService.__init__(self) self.provider = provider self.warn_if_delay_exceeds = warn_if_delay_exceeds self.started = False self.last = None self.stats = deque() self.timer = None def startService(self): if not self.started: self.started = True self.timer = reactor.callLater(self.loop_interval, self.loop) service.MultiService.startService(self) def stopService(self): self.started = False if self.timer: self.timer.cancel() self.timer = None return service.MultiService.stopService(self) def loop(self): self.timer = None if not self.started: return now = time.time() if self.last is not None: delay = now - self.last - self.loop_interval if delay > self.warn_if_delay_exceeds: log.msg(format='excessive reactor delay (%ss)', args=(delay,), level=log.UNUSUAL) self.stats.append(delay) while len(self.stats) > self.num_samples: self.stats.popleft() self.last = now self.timer = reactor.callLater(self.loop_interval, self.loop) def get_stats(self): if self.stats: avg = sum(self.stats) / len(self.stats) m_x = max(self.stats) else: avg = m_x = 0 return { 'load_monitor.avg_load': avg, 'load_monitor.max_load': m_x, } class CPUUsageMonitor(service.MultiService): implements(IStatsProducer) HISTORY_LENGTH = 15 POLL_INTERVAL = 60 def __init__(self): service.MultiService.__init__(self) # we don't use time.clock() here, because the constructor is run by # the twistd parent process (as it loads the .tac file), whereas the # rest of the program will be run by the child process, after twistd # forks. Instead, set self.initial_cpu as soon as the reactor starts # up. self.initial_cpu = 0.0 # just in case eventually(self._set_initial_cpu) self.samples = [] # we provide 1min, 5min, and 15min moving averages TimerService(self.POLL_INTERVAL, self.check).setServiceParent(self) def _set_initial_cpu(self): self.initial_cpu = time.clock() def check(self): now_wall = time.time() now_cpu = time.clock() self.samples.append( (now_wall, now_cpu) ) while len(self.samples) > self.HISTORY_LENGTH+1: self.samples.pop(0) def _average_N_minutes(self, size): if len(self.samples) < size+1: return None first = -size-1 elapsed_wall = self.samples[-1][0] - self.samples[first][0] elapsed_cpu = self.samples[-1][1] - self.samples[first][1] fraction = elapsed_cpu / elapsed_wall return fraction def get_stats(self): s = {} avg = self._average_N_minutes(1) if avg is not None: s["cpu_monitor.1min_avg"] = avg avg = self._average_N_minutes(5) if avg is not None: s["cpu_monitor.5min_avg"] = avg avg = self._average_N_minutes(15) if avg is not None: s["cpu_monitor.15min_avg"] = avg now_cpu = time.clock() s["cpu_monitor.total"] = now_cpu - self.initial_cpu return s class StatsProvider(Referenceable, service.MultiService): implements(RIStatsProvider) def __init__(self, node, gatherer_furl): service.MultiService.__init__(self) self.node = node self.gatherer_furl = gatherer_furl # might be None self.counters = {} self.stats_producers = [] # only run the LoadMonitor (which submits a timer every second) if # there is a gatherer who is going to be paying attention. Our stats # are visible through HTTP even without a gatherer, so run the rest # of the stats (including the once-per-minute CPUUsageMonitor) if gatherer_furl: self.load_monitor = LoadMonitor(self) self.load_monitor.setServiceParent(self) self.register_producer(self.load_monitor) self.cpu_monitor = CPUUsageMonitor() self.cpu_monitor.setServiceParent(self) self.register_producer(self.cpu_monitor) def startService(self): if self.node and self.gatherer_furl: d = self.node.when_tub_ready() def connect(junk): nickname_utf8 = self.node.nickname.encode("utf-8") self.node.tub.connectTo(self.gatherer_furl, self._connected, nickname_utf8) d.addCallback(connect) service.MultiService.startService(self) def count(self, name, delta=1): val = self.counters.setdefault(name, 0) self.counters[name] = val + delta def register_producer(self, stats_producer): self.stats_producers.append(IStatsProducer(stats_producer)) def get_stats(self): stats = {} for sp in self.stats_producers: stats.update(sp.get_stats()) ret = { 'counters': self.counters, 'stats': stats } log.msg(format='get_stats() -> %(stats)s', stats=ret, level=log.NOISY) return ret def remote_get_stats(self): return self.get_stats() def _connected(self, gatherer, nickname): gatherer.callRemoteOnly('provide', self, nickname or '') class StatsGatherer(Referenceable, service.MultiService): implements(RIStatsGatherer) poll_interval = 60 def __init__(self, basedir): service.MultiService.__init__(self) self.basedir = basedir self.clients = {} self.nicknames = {} self.timer = TimerService(self.poll_interval, self.poll) self.timer.setServiceParent(self) def get_tubid(self, rref): return rref.getRemoteTubID() def remote_provide(self, provider, nickname): tubid = self.get_tubid(provider) if tubid == '': print "WARNING: failed to get tubid for %s (%s)" % (provider, nickname) # don't add to clients to poll (polluting data) don't care about disconnect return self.clients[tubid] = provider self.nicknames[tubid] = nickname def poll(self): for tubid,client in self.clients.items(): nickname = self.nicknames.get(tubid) d = client.callRemote('get_stats') d.addCallbacks(self.got_stats, self.lost_client, callbackArgs=(tubid, nickname), errbackArgs=(tubid,)) d.addErrback(self.log_client_error, tubid) def lost_client(self, f, tubid): # this is called lazily, when a get_stats request fails del self.clients[tubid] del self.nicknames[tubid] f.trap(DeadReferenceError) def log_client_error(self, f, tubid): log.msg("StatsGatherer: error in get_stats(), peerid=%s" % tubid, level=log.UNUSUAL, failure=f) def got_stats(self, stats, tubid, nickname): raise NotImplementedError() class StdOutStatsGatherer(StatsGatherer): verbose = True def remote_provide(self, provider, nickname): tubid = self.get_tubid(provider) if self.verbose: print 'connect "%s" [%s]' % (nickname, tubid) provider.notifyOnDisconnect(self.announce_lost_client, tubid) StatsGatherer.remote_provide(self, provider, nickname) def announce_lost_client(self, tubid): print 'disconnect "%s" [%s]' % (self.nicknames[tubid], tubid) def got_stats(self, stats, tubid, nickname): print '"%s" [%s]:' % (nickname, tubid) pprint.pprint(stats) class PickleStatsGatherer(StdOutStatsGatherer): # inherit from StdOutStatsGatherer for connect/disconnect notifications def __init__(self, basedir=".", verbose=True): self.verbose = verbose StatsGatherer.__init__(self, basedir) self.picklefile = os.path.join(basedir, "stats.pickle") if os.path.exists(self.picklefile): f = open(self.picklefile, 'rb') try: self.gathered_stats = pickle.load(f) except Exception: print ("Error while attempting to load pickle file %s.\n" "You may need to restore this file from a backup, or delete it if no backup is available.\n" % quote_output(os.path.abspath(self.picklefile))) raise f.close() else: self.gathered_stats = {} def got_stats(self, stats, tubid, nickname): s = self.gathered_stats.setdefault(tubid, {}) s['timestamp'] = time.time() s['nickname'] = nickname s['stats'] = stats self.dump_pickle() def dump_pickle(self): tmp = "%s.tmp" % (self.picklefile,) f = open(tmp, 'wb') pickle.dump(self.gathered_stats, f) f.close() if os.path.exists(self.picklefile): os.unlink(self.picklefile) os.rename(tmp, self.picklefile) class StatsGathererService(service.MultiService): furl_file = "stats_gatherer.furl" def __init__(self, basedir=".", verbose=False): service.MultiService.__init__(self) self.basedir = basedir self.tub = Tub(certFile=os.path.join(self.basedir, "stats_gatherer.pem")) self.tub.setServiceParent(self) self.tub.setOption("logLocalFailures", True) self.tub.setOption("logRemoteFailures", True) self.tub.setOption("expose-remote-exception-types", False) self.stats_gatherer = PickleStatsGatherer(self.basedir, verbose) self.stats_gatherer.setServiceParent(self) portnumfile = os.path.join(self.basedir, "portnum") try: portnum = open(portnumfile, "r").read() except EnvironmentError: portnum = None self.listener = self.tub.listenOn(portnum or "tcp:0") d = self.tub.setLocationAutomatically() if portnum is None: d.addCallback(self.save_portnum) d.addCallback(self.tub_ready) d.addErrback(log.err) def save_portnum(self, junk): portnum = self.listener.getPortnum() portnumfile = os.path.join(self.basedir, 'portnum') open(portnumfile, 'wb').write('%d\n' % (portnum,)) def tub_ready(self, ignored): ff = os.path.join(self.basedir, self.furl_file) self.gatherer_furl = self.tub.registerReference(self.stats_gatherer, furlFile=ff) tahoe-lafs-1.10.0/src/allmydata/storage/000077500000000000000000000000001221140116300200065ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/storage/__init__.py000066400000000000000000000000001221140116300221050ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/storage/common.py000066400000000000000000000007311221140116300216510ustar00rootroot00000000000000 import os.path from allmydata.util import base32 class DataTooLargeError(Exception): pass class UnknownMutableContainerVersionError(Exception): pass class UnknownImmutableContainerVersionError(Exception): pass def si_b2a(storageindex): return base32.b2a(storageindex) def si_a2b(ascii_storageindex): return base32.a2b(ascii_storageindex) def storage_index_to_dir(storageindex): sia = si_b2a(storageindex) return os.path.join(sia[:2], sia) tahoe-lafs-1.10.0/src/allmydata/storage/crawler.py000066400000000000000000000550171221140116300220270ustar00rootroot00000000000000 import os, time, struct import cPickle as pickle from twisted.internet import reactor from twisted.application import service from allmydata.storage.common import si_b2a from allmydata.util import fileutil class TimeSliceExceeded(Exception): pass class ShareCrawler(service.MultiService): """A ShareCrawler subclass is attached to a StorageServer, and periodically walks all of its shares, processing each one in some fashion. This crawl is rate-limited, to reduce the IO burden on the host, since large servers can easily have a terabyte of shares, in several million files, which can take hours or days to read. Once the crawler starts a cycle, it will proceed at a rate limited by the allowed_cpu_percentage= and cpu_slice= parameters: yielding the reactor after it has worked for 'cpu_slice' seconds, and not resuming right away, always trying to use less than 'allowed_cpu_percentage'. Once the crawler finishes a cycle, it will put off starting the next one long enough to ensure that 'minimum_cycle_time' elapses between the start of two consecutive cycles. We assume that the normal upload/download/get_buckets traffic of a tahoe grid will cause the prefixdir contents to be mostly cached in the kernel, or that the number of buckets in each prefixdir will be small enough to load quickly. A 1TB allmydata.com server was measured to have 2.56M buckets, spread into the 1024 prefixdirs, with about 2500 buckets per prefix. On this server, each prefixdir took 130ms-200ms to list the first time, and 17ms to list the second time. To use a crawler, create a subclass which implements the process_bucket() method. It will be called with a prefixdir and a base32 storage index string. process_bucket() must run synchronously. Any keys added to self.state will be preserved. Override add_initial_state() to set up initial state keys. Override finished_cycle() to perform additional processing when the cycle is complete. Any status that the crawler produces should be put in the self.state dictionary. Status renderers (like a web page which describes the accomplishments of your crawler) will use crawler.get_state() to retrieve this dictionary; they can present the contents as they see fit. Then create an instance, with a reference to a StorageServer and a filename where it can store persistent state. The statefile is used to keep track of how far around the ring the process has travelled, as well as timing history to allow the pace to be predicted and controlled. The statefile will be updated and written to disk after each time slice (just before the crawler yields to the reactor), and also after each cycle is finished, and also when stopService() is called. Note that this means that a crawler which is interrupted with SIGKILL while it is in the middle of a time slice will lose progress: the next time the node is started, the crawler will repeat some unknown amount of work. The crawler instance must be started with startService() before it will do any work. To make it stop doing work, call stopService(). """ slow_start = 300 # don't start crawling for 5 minutes after startup # all three of these can be changed at any time allowed_cpu_percentage = .10 # use up to 10% of the CPU, on average cpu_slice = 1.0 # use up to 1.0 seconds before yielding minimum_cycle_time = 300 # don't run a cycle faster than this def __init__(self, server, statefile, allowed_cpu_percentage=None): service.MultiService.__init__(self) if allowed_cpu_percentage is not None: self.allowed_cpu_percentage = allowed_cpu_percentage self.server = server self.sharedir = server.sharedir self.statefile = statefile self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] self.prefixes.sort() self.timer = None self.bucket_cache = (None, []) self.current_sleep_time = None self.next_wake_time = None self.last_prefix_finished_time = None self.last_prefix_elapsed_time = None self.last_cycle_started_time = None self.last_cycle_elapsed_time = None self.load_state() def minus_or_none(self, a, b): if a is None: return None return a-b def get_progress(self): """I return information about how much progress the crawler is making. My return value is a dictionary. The primary key is 'cycle-in-progress': True if the crawler is currently traversing the shares, False if it is idle between cycles. Note that any of these 'time' keys could be None if I am called at certain moments, so application code must be prepared to tolerate this case. The estimates will also be None if insufficient data has been gatherered to form an estimate. If cycle-in-progress is True, the following keys will be present:: cycle-complete-percentage': float, from 0.0 to 100.0, indicating how far the crawler has progressed through the current cycle remaining-sleep-time: float, seconds from now when we do more work estimated-cycle-complete-time-left: float, seconds remaining until the current cycle is finished. TODO: this does not yet include the remaining time left in the current prefixdir, and it will be very inaccurate on fast crawlers (which can process a whole prefix in a single tick) estimated-time-per-cycle: float, seconds required to do a complete cycle If cycle-in-progress is False, the following keys are available:: next-crawl-time: float, seconds-since-epoch when next crawl starts remaining-wait-time: float, seconds from now when next crawl starts estimated-time-per-cycle: float, seconds required to do a complete cycle """ d = {} if self.state["current-cycle"] is None: d["cycle-in-progress"] = False d["next-crawl-time"] = self.next_wake_time d["remaining-wait-time"] = self.minus_or_none(self.next_wake_time, time.time()) else: d["cycle-in-progress"] = True pct = 100.0 * self.last_complete_prefix_index / len(self.prefixes) d["cycle-complete-percentage"] = pct remaining = None if self.last_prefix_elapsed_time is not None: left = len(self.prefixes) - self.last_complete_prefix_index remaining = left * self.last_prefix_elapsed_time # TODO: remainder of this prefix: we need to estimate the # per-bucket time, probably by measuring the time spent on # this prefix so far, divided by the number of buckets we've # processed. d["estimated-cycle-complete-time-left"] = remaining # it's possible to call get_progress() from inside a crawler's # finished_prefix() function d["remaining-sleep-time"] = self.minus_or_none(self.next_wake_time, time.time()) per_cycle = None if self.last_cycle_elapsed_time is not None: per_cycle = self.last_cycle_elapsed_time elif self.last_prefix_elapsed_time is not None: per_cycle = len(self.prefixes) * self.last_prefix_elapsed_time d["estimated-time-per-cycle"] = per_cycle return d def get_state(self): """I return the current state of the crawler. This is a copy of my state dictionary. If we are not currently sleeping (i.e. get_state() was called from inside the process_prefixdir, process_bucket, or finished_cycle() methods, or if startService has not yet been called on this crawler), these two keys will be None. Subclasses can override this to add computed keys to the return value, but don't forget to start with the upcall. """ state = self.state.copy() # it isn't a deepcopy, so don't go crazy return state def load_state(self): # we use this to store state for both the crawler's internals and # anything the subclass-specific code needs. The state is stored # after each bucket is processed, after each prefixdir is processed, # and after a cycle is complete. The internal keys we use are: # ["version"]: int, always 1 # ["last-cycle-finished"]: int, or None if we have not yet finished # any cycle # ["current-cycle"]: int, or None if we are sleeping between cycles # ["current-cycle-start-time"]: int, seconds-since-epoch of when this # cycle was started, possibly by an earlier # process # ["last-complete-prefix"]: str, two-letter name of the last prefixdir # that was fully processed, or None if we # are sleeping between cycles, or if we # have not yet finished any prefixdir since # a cycle was started # ["last-complete-bucket"]: str, base32 storage index bucket name # of the last bucket to be processed, or # None if we are sleeping between cycles try: f = open(self.statefile, "rb") state = pickle.load(f) f.close() except Exception: state = {"version": 1, "last-cycle-finished": None, "current-cycle": None, "last-complete-prefix": None, "last-complete-bucket": None, } state.setdefault("current-cycle-start-time", time.time()) # approximate self.state = state lcp = state["last-complete-prefix"] if lcp == None: self.last_complete_prefix_index = -1 else: self.last_complete_prefix_index = self.prefixes.index(lcp) self.add_initial_state() def add_initial_state(self): """Hook method to add extra keys to self.state when first loaded. The first time this Crawler is used, or when the code has been upgraded, the saved state file may not contain all the keys you expect. Use this method to add any missing keys. Simply modify self.state as needed. This method for subclasses to override. No upcall is necessary. """ pass def save_state(self): lcpi = self.last_complete_prefix_index if lcpi == -1: last_complete_prefix = None else: last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix tmpfile = self.statefile + ".tmp" f = open(tmpfile, "wb") pickle.dump(self.state, f) f.close() fileutil.move_into_place(tmpfile, self.statefile) def startService(self): # arrange things to look like we were just sleeping, so # status/progress values work correctly self.sleeping_between_cycles = True self.current_sleep_time = self.slow_start self.next_wake_time = time.time() + self.slow_start self.timer = reactor.callLater(self.slow_start, self.start_slice) service.MultiService.startService(self) def stopService(self): if self.timer: self.timer.cancel() self.timer = None self.save_state() return service.MultiService.stopService(self) def start_slice(self): start_slice = time.time() self.timer = None self.sleeping_between_cycles = False self.current_sleep_time = None self.next_wake_time = None try: self.start_current_prefix(start_slice) finished_cycle = True except TimeSliceExceeded: finished_cycle = False self.save_state() if not self.running: # someone might have used stopService() to shut us down return # either we finished a whole cycle, or we ran out of time now = time.time() this_slice = now - start_slice # this_slice/(this_slice+sleep_time) = percentage # this_slice/percentage = this_slice+sleep_time # sleep_time = (this_slice/percentage) - this_slice sleep_time = (this_slice / self.allowed_cpu_percentage) - this_slice # if the math gets weird, or a timequake happens, don't sleep # forever. Note that this means that, while a cycle is running, we # will process at least one bucket every 5 minutes, no matter how # long that bucket takes. sleep_time = max(0.0, min(sleep_time, 299)) if finished_cycle: # how long should we sleep between cycles? Don't run faster than # allowed_cpu_percentage says, but also run faster than # minimum_cycle_time self.sleeping_between_cycles = True sleep_time = max(sleep_time, self.minimum_cycle_time) else: self.sleeping_between_cycles = False self.current_sleep_time = sleep_time # for status page self.next_wake_time = now + sleep_time self.yielding(sleep_time) self.timer = reactor.callLater(sleep_time, self.start_slice) def start_current_prefix(self, start_slice): state = self.state if state["current-cycle"] is None: self.last_cycle_started_time = time.time() state["current-cycle-start-time"] = self.last_cycle_started_time if state["last-cycle-finished"] is None: state["current-cycle"] = 0 else: state["current-cycle"] = state["last-cycle-finished"] + 1 self.started_cycle(state["current-cycle"]) cycle = state["current-cycle"] for i in range(self.last_complete_prefix_index+1, len(self.prefixes)): # if we want to yield earlier, just raise TimeSliceExceeded() prefix = self.prefixes[i] prefixdir = os.path.join(self.sharedir, prefix) if i == self.bucket_cache[0]: buckets = self.bucket_cache[1] else: try: buckets = os.listdir(prefixdir) buckets.sort() except EnvironmentError: buckets = [] self.bucket_cache = (i, buckets) self.process_prefixdir(cycle, prefix, prefixdir, buckets, start_slice) self.last_complete_prefix_index = i now = time.time() if self.last_prefix_finished_time is not None: elapsed = now - self.last_prefix_finished_time self.last_prefix_elapsed_time = elapsed self.last_prefix_finished_time = now self.finished_prefix(cycle, prefix) if time.time() >= start_slice + self.cpu_slice: raise TimeSliceExceeded() # yay! we finished the whole cycle self.last_complete_prefix_index = -1 self.last_prefix_finished_time = None # don't include the sleep now = time.time() if self.last_cycle_started_time is not None: self.last_cycle_elapsed_time = now - self.last_cycle_started_time state["last-complete-bucket"] = None state["last-cycle-finished"] = cycle state["current-cycle"] = None self.finished_cycle(cycle) self.save_state() def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice): """This gets a list of bucket names (i.e. storage index strings, base32-encoded) in sorted order. You can override this if your crawler doesn't care about the actual shares, for example a crawler which merely keeps track of how many buckets are being managed by this server. Subclasses which *do* care about actual bucket should leave this method along, and implement process_bucket() instead. """ for bucket in buckets: if bucket <= self.state["last-complete-bucket"]: continue self.process_bucket(cycle, prefix, prefixdir, bucket) self.state["last-complete-bucket"] = bucket if time.time() >= start_slice + self.cpu_slice: raise TimeSliceExceeded() # the remaining methods are explictly for subclasses to implement. def started_cycle(self, cycle): """Notify a subclass that the crawler is about to start a cycle. This method is for subclasses to override. No upcall is necessary. """ pass def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): """Examine a single bucket. Subclasses should do whatever they want to do to the shares therein, then update self.state as necessary. If the crawler is never interrupted by SIGKILL, this method will be called exactly once per share (per cycle). If it *is* interrupted, then the next time the node is started, some amount of work will be duplicated, according to when self.save_state() was last called. By default, save_state() is called at the end of each timeslice, and after finished_cycle() returns, and when stopService() is called. To reduce the chance of duplicate work (i.e. to avoid adding multiple records to a database), you can call save_state() at the end of your process_bucket() method. This will reduce the maximum duplicated work to one bucket per SIGKILL. It will also add overhead, probably 1-20ms per bucket (and some disk writes), which will count against your allowed_cpu_percentage, and which may be considerable if process_bucket() runs quickly. This method is for subclasses to override. No upcall is necessary. """ pass def finished_prefix(self, cycle, prefix): """Notify a subclass that the crawler has just finished processing a prefix directory (all buckets with the same two-character/10bit prefix). To impose a limit on how much work might be duplicated by a SIGKILL that occurs during a timeslice, you can call self.save_state() here, but be aware that it may represent a significant performance hit. This method is for subclasses to override. No upcall is necessary. """ pass def finished_cycle(self, cycle): """Notify subclass that a cycle (one complete traversal of all prefixdirs) has just finished. 'cycle' is the number of the cycle that just finished. This method should perform summary work and update self.state to publish information to status displays. One-shot crawlers, such as those used to upgrade shares to a new format or populate a database for the first time, can call self.stopService() (or more likely self.disownServiceParent()) to prevent it from running a second time. Don't forget to set some persistent state so that the upgrader won't be run again the next time the node is started. This method is for subclasses to override. No upcall is necessary. """ pass def yielding(self, sleep_time): """The crawler is about to sleep for 'sleep_time' seconds. This method is mostly for the convenience of unit tests. This method is for subclasses to override. No upcall is necessary. """ pass class BucketCountingCrawler(ShareCrawler): """I keep track of how many buckets are being managed by this server. This is equivalent to the number of distributed files and directories for which I am providing storage. The actual number of files+directories in the full grid is probably higher (especially when there are more servers than 'N', the number of generated shares), because some files+directories will have shares on other servers instead of me. Also note that the number of buckets will differ from the number of shares in small grids, when more than one share is placed on a single server. """ minimum_cycle_time = 60*60 # we don't need this more than once an hour def __init__(self, server, statefile, num_sample_prefixes=1): ShareCrawler.__init__(self, server, statefile) self.num_sample_prefixes = num_sample_prefixes def add_initial_state(self): # ["bucket-counts"][cyclenum][prefix] = number # ["last-complete-cycle"] = cyclenum # maintained by base class # ["last-complete-bucket-count"] = number # ["storage-index-samples"][prefix] = (cyclenum, # list of SI strings (base32)) self.state.setdefault("bucket-counts", {}) self.state.setdefault("last-complete-bucket-count", None) self.state.setdefault("storage-index-samples", {}) def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice): # we override process_prefixdir() because we don't want to look at # the individual buckets. We'll save state after each one. On my # laptop, a mostly-empty storage server can process about 70 # prefixdirs in a 1.0s slice. if cycle not in self.state["bucket-counts"]: self.state["bucket-counts"][cycle] = {} self.state["bucket-counts"][cycle][prefix] = len(buckets) if prefix in self.prefixes[:self.num_sample_prefixes]: self.state["storage-index-samples"][prefix] = (cycle, buckets) def finished_cycle(self, cycle): last_counts = self.state["bucket-counts"].get(cycle, []) if len(last_counts) == len(self.prefixes): # great, we have a whole cycle. num_buckets = sum(last_counts.values()) self.state["last-complete-bucket-count"] = num_buckets # get rid of old counts for old_cycle in list(self.state["bucket-counts"].keys()): if old_cycle != cycle: del self.state["bucket-counts"][old_cycle] # get rid of old samples too for prefix in list(self.state["storage-index-samples"].keys()): old_cycle,buckets = self.state["storage-index-samples"][prefix] if old_cycle != cycle: del self.state["storage-index-samples"][prefix] tahoe-lafs-1.10.0/src/allmydata/storage/expirer.py000066400000000000000000000425001221140116300220370ustar00rootroot00000000000000import time, os, pickle, struct from allmydata.storage.crawler import ShareCrawler from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ UnknownImmutableContainerVersionError from twisted.python import log as twlog class LeaseCheckingCrawler(ShareCrawler): """I examine the leases on all shares, determining which are still valid and which have expired. I can remove the expired leases (if so configured), and the share will be deleted when the last lease is removed. I collect statistics on the leases and make these available to a web status page, including:: Space recovered during this cycle-so-far: actual (only if expiration_enabled=True): num-buckets, num-shares, sum of share sizes, real disk usage ('real disk usage' means we use stat(fn).st_blocks*512 and include any space used by the directory) what it would have been with the original lease expiration time what it would have been with our configured expiration time Prediction of space that will be recovered during the rest of this cycle Prediction of space that will be recovered by the entire current cycle. Space recovered during the last 10 cycles <-- saved in separate pickle Shares/buckets examined: this cycle-so-far prediction of rest of cycle during last 10 cycles <-- separate pickle start/finish time of last 10 cycles <-- separate pickle expiration time used for last 10 cycles <-- separate pickle Histogram of leases-per-share: this-cycle-to-date last 10 cycles <-- separate pickle Histogram of lease ages, buckets = 1day cycle-to-date last 10 cycles <-- separate pickle All cycle-to-date values remain valid until the start of the next cycle. """ slow_start = 360 # wait 6 minutes after startup minimum_cycle_time = 12*60*60 # not more than twice per day def __init__(self, server, statefile, historyfile, expiration_enabled, mode, override_lease_duration, # used if expiration_mode=="age" cutoff_date, # used if expiration_mode=="cutoff-date" sharetypes): self.historyfile = historyfile self.expiration_enabled = expiration_enabled self.mode = mode self.override_lease_duration = None self.cutoff_date = None if self.mode == "age": assert isinstance(override_lease_duration, (int, type(None))) self.override_lease_duration = override_lease_duration # seconds elif self.mode == "cutoff-date": assert isinstance(cutoff_date, int) # seconds-since-epoch assert cutoff_date is not None self.cutoff_date = cutoff_date else: raise ValueError("GC mode '%s' must be 'age' or 'cutoff-date'" % mode) self.sharetypes_to_expire = sharetypes ShareCrawler.__init__(self, server, statefile) def add_initial_state(self): # we fill ["cycle-to-date"] here (even though they will be reset in # self.started_cycle) just in case someone grabs our state before we # get started: unit tests do this so_far = self.create_empty_cycle_dict() self.state.setdefault("cycle-to-date", so_far) # in case we upgrade the code while a cycle is in progress, update # the keys individually for k in so_far: self.state["cycle-to-date"].setdefault(k, so_far[k]) # initialize history if not os.path.exists(self.historyfile): history = {} # cyclenum -> dict f = open(self.historyfile, "wb") pickle.dump(history, f) f.close() def create_empty_cycle_dict(self): recovered = self.create_empty_recovered_dict() so_far = {"corrupt-shares": [], "space-recovered": recovered, "lease-age-histogram": {}, # (minage,maxage)->count "leases-per-share-histogram": {}, # leasecount->numshares } return so_far def create_empty_recovered_dict(self): recovered = {} for a in ("actual", "original", "configured", "examined"): for b in ("buckets", "shares", "sharebytes", "diskbytes"): recovered[a+"-"+b] = 0 recovered[a+"-"+b+"-mutable"] = 0 recovered[a+"-"+b+"-immutable"] = 0 return recovered def started_cycle(self, cycle): self.state["cycle-to-date"] = self.create_empty_cycle_dict() def stat(self, fn): return os.stat(fn) def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): bucketdir = os.path.join(prefixdir, storage_index_b32) s = self.stat(bucketdir) would_keep_shares = [] wks = None for fn in os.listdir(bucketdir): try: shnum = int(fn) except ValueError: continue # non-numeric means not a sharefile sharefile = os.path.join(bucketdir, fn) try: wks = self.process_share(sharefile) except (UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, struct.error): twlog.msg("lease-checker error processing %s" % sharefile) twlog.err() which = (storage_index_b32, shnum) self.state["cycle-to-date"]["corrupt-shares"].append(which) wks = (1, 1, 1, "unknown") would_keep_shares.append(wks) sharetype = None if wks: # use the last share's sharetype as the buckettype sharetype = wks[3] rec = self.state["cycle-to-date"]["space-recovered"] self.increment(rec, "examined-buckets", 1) if sharetype: self.increment(rec, "examined-buckets-"+sharetype, 1) try: bucket_diskbytes = s.st_blocks * 512 except AttributeError: bucket_diskbytes = 0 # no stat().st_blocks on windows if sum([wks[0] for wks in would_keep_shares]) == 0: self.increment_bucketspace("original", bucket_diskbytes, sharetype) if sum([wks[1] for wks in would_keep_shares]) == 0: self.increment_bucketspace("configured", bucket_diskbytes, sharetype) if sum([wks[2] for wks in would_keep_shares]) == 0: self.increment_bucketspace("actual", bucket_diskbytes, sharetype) def process_share(self, sharefilename): # first, find out what kind of a share it is sf = get_share_file(sharefilename) sharetype = sf.sharetype now = time.time() s = self.stat(sharefilename) num_leases = 0 num_valid_leases_original = 0 num_valid_leases_configured = 0 expired_leases_configured = [] for li in sf.get_leases(): num_leases += 1 original_expiration_time = li.get_expiration_time() grant_renew_time = li.get_grant_renew_time_time() age = li.get_age() self.add_lease_age_to_histogram(age) # expired-or-not according to original expiration time if original_expiration_time > now: num_valid_leases_original += 1 # expired-or-not according to our configured age limit expired = False if self.mode == "age": age_limit = original_expiration_time if self.override_lease_duration is not None: age_limit = self.override_lease_duration if age > age_limit: expired = True else: assert self.mode == "cutoff-date" if grant_renew_time < self.cutoff_date: expired = True if sharetype not in self.sharetypes_to_expire: expired = False if expired: expired_leases_configured.append(li) else: num_valid_leases_configured += 1 so_far = self.state["cycle-to-date"] self.increment(so_far["leases-per-share-histogram"], num_leases, 1) self.increment_space("examined", s, sharetype) would_keep_share = [1, 1, 1, sharetype] if self.expiration_enabled: for li in expired_leases_configured: sf.cancel_lease(li.cancel_secret) if num_valid_leases_original == 0: would_keep_share[0] = 0 self.increment_space("original", s, sharetype) if num_valid_leases_configured == 0: would_keep_share[1] = 0 self.increment_space("configured", s, sharetype) if self.expiration_enabled: would_keep_share[2] = 0 self.increment_space("actual", s, sharetype) return would_keep_share def increment_space(self, a, s, sharetype): sharebytes = s.st_size try: # note that stat(2) says that st_blocks is 512 bytes, and that # st_blksize is "optimal file sys I/O ops blocksize", which is # independent of the block-size that st_blocks uses. diskbytes = s.st_blocks * 512 except AttributeError: # the docs say that st_blocks is only on linux. I also see it on # MacOS. But it isn't available on windows. diskbytes = sharebytes so_far_sr = self.state["cycle-to-date"]["space-recovered"] self.increment(so_far_sr, a+"-shares", 1) self.increment(so_far_sr, a+"-sharebytes", sharebytes) self.increment(so_far_sr, a+"-diskbytes", diskbytes) if sharetype: self.increment(so_far_sr, a+"-shares-"+sharetype, 1) self.increment(so_far_sr, a+"-sharebytes-"+sharetype, sharebytes) self.increment(so_far_sr, a+"-diskbytes-"+sharetype, diskbytes) def increment_bucketspace(self, a, bucket_diskbytes, sharetype): rec = self.state["cycle-to-date"]["space-recovered"] self.increment(rec, a+"-diskbytes", bucket_diskbytes) self.increment(rec, a+"-buckets", 1) if sharetype: self.increment(rec, a+"-diskbytes-"+sharetype, bucket_diskbytes) self.increment(rec, a+"-buckets-"+sharetype, 1) def increment(self, d, k, delta=1): if k not in d: d[k] = 0 d[k] += delta def add_lease_age_to_histogram(self, age): bucket_interval = 24*60*60 bucket_number = int(age/bucket_interval) bucket_start = bucket_number * bucket_interval bucket_end = bucket_start + bucket_interval k = (bucket_start, bucket_end) self.increment(self.state["cycle-to-date"]["lease-age-histogram"], k, 1) def convert_lease_age_histogram(self, lah): # convert { (minage,maxage) : count } into [ (minage,maxage,count) ] # since the former is not JSON-safe (JSON dictionaries must have # string keys). json_safe_lah = [] for k in sorted(lah): (minage,maxage) = k json_safe_lah.append( (minage, maxage, lah[k]) ) return json_safe_lah def finished_cycle(self, cycle): # add to our history state, prune old history h = {} start = self.state["current-cycle-start-time"] now = time.time() h["cycle-start-finish-times"] = (start, now) h["expiration-enabled"] = self.expiration_enabled h["configured-expiration-mode"] = (self.mode, self.override_lease_duration, self.cutoff_date, self.sharetypes_to_expire) s = self.state["cycle-to-date"] # state["lease-age-histogram"] is a dictionary (mapping # (minage,maxage) tuple to a sharecount), but we report # self.get_state()["lease-age-histogram"] as a list of # (min,max,sharecount) tuples, because JSON can handle that better. # We record the list-of-tuples form into the history for the same # reason. lah = self.convert_lease_age_histogram(s["lease-age-histogram"]) h["lease-age-histogram"] = lah h["leases-per-share-histogram"] = s["leases-per-share-histogram"].copy() h["corrupt-shares"] = s["corrupt-shares"][:] # note: if ["shares-recovered"] ever acquires an internal dict, this # copy() needs to become a deepcopy h["space-recovered"] = s["space-recovered"].copy() history = pickle.load(open(self.historyfile, "rb")) history[cycle] = h while len(history) > 10: oldcycles = sorted(history.keys()) del history[oldcycles[0]] f = open(self.historyfile, "wb") pickle.dump(history, f) f.close() def get_state(self): """In addition to the crawler state described in ShareCrawler.get_state(), I return the following keys which are specific to the lease-checker/expirer. Note that the non-history keys (with 'cycle' in their names) are only present if a cycle is currently running. If the crawler is between cycles, it appropriate to show the latest item in the 'history' key instead. Also note that each history item has all the data in the 'cycle-to-date' value, plus cycle-start-finish-times. cycle-to-date: expiration-enabled configured-expiration-mode lease-age-histogram (list of (minage,maxage,sharecount) tuples) leases-per-share-histogram corrupt-shares (list of (si_b32,shnum) tuples, minimal verification) space-recovered estimated-remaining-cycle: # Values may be None if not enough data has been gathered to # produce an estimate. space-recovered estimated-current-cycle: # cycle-to-date plus estimated-remaining. Values may be None if # not enough data has been gathered to produce an estimate. space-recovered history: maps cyclenum to a dict with the following keys: cycle-start-finish-times expiration-enabled configured-expiration-mode lease-age-histogram leases-per-share-histogram corrupt-shares space-recovered The 'space-recovered' structure is a dictionary with the following keys: # 'examined' is what was looked at examined-buckets, examined-buckets-mutable, examined-buckets-immutable examined-shares, -mutable, -immutable examined-sharebytes, -mutable, -immutable examined-diskbytes, -mutable, -immutable # 'actual' is what was actually deleted actual-buckets, -mutable, -immutable actual-shares, -mutable, -immutable actual-sharebytes, -mutable, -immutable actual-diskbytes, -mutable, -immutable # would have been deleted, if the original lease timer was used original-buckets, -mutable, -immutable original-shares, -mutable, -immutable original-sharebytes, -mutable, -immutable original-diskbytes, -mutable, -immutable # would have been deleted, if our configured max_age was used configured-buckets, -mutable, -immutable configured-shares, -mutable, -immutable configured-sharebytes, -mutable, -immutable configured-diskbytes, -mutable, -immutable """ progress = self.get_progress() state = ShareCrawler.get_state(self) # does a shallow copy history = pickle.load(open(self.historyfile, "rb")) state["history"] = history if not progress["cycle-in-progress"]: del state["cycle-to-date"] return state so_far = state["cycle-to-date"].copy() state["cycle-to-date"] = so_far lah = so_far["lease-age-histogram"] so_far["lease-age-histogram"] = self.convert_lease_age_histogram(lah) so_far["expiration-enabled"] = self.expiration_enabled so_far["configured-expiration-mode"] = (self.mode, self.override_lease_duration, self.cutoff_date, self.sharetypes_to_expire) so_far_sr = so_far["space-recovered"] remaining_sr = {} remaining = {"space-recovered": remaining_sr} cycle_sr = {} cycle = {"space-recovered": cycle_sr} if progress["cycle-complete-percentage"] > 0.0: pc = progress["cycle-complete-percentage"] / 100.0 m = (1-pc)/pc for a in ("actual", "original", "configured", "examined"): for b in ("buckets", "shares", "sharebytes", "diskbytes"): for c in ("", "-mutable", "-immutable"): k = a+"-"+b+c remaining_sr[k] = m * so_far_sr[k] cycle_sr[k] = so_far_sr[k] + remaining_sr[k] else: for a in ("actual", "original", "configured", "examined"): for b in ("buckets", "shares", "sharebytes", "diskbytes"): for c in ("", "-mutable", "-immutable"): k = a+"-"+b+c remaining_sr[k] = None cycle_sr[k] = None state["estimated-remaining-cycle"] = remaining state["estimated-current-cycle"] = cycle return state tahoe-lafs-1.10.0/src/allmydata/storage/immutable.py000066400000000000000000000327701221140116300223500ustar00rootroot00000000000000import os, stat, struct, time from foolscap.api import Referenceable from zope.interface import implements from allmydata.interfaces import RIBucketWriter, RIBucketReader from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition from allmydata.util.hashutil import constant_time_compare from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownImmutableContainerVersionError, \ DataTooLargeError # each share file (in storage/shares/$SI/$SHNUM) contains lease information # and share data. The share data is accessed by RIBucketWriter.write and # RIBucketReader.read . The lease information is not accessible through these # interfaces. # The share file has the following layout: # 0x00: share file version number, four bytes, current version is 1 # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. # 0x08: number of leases, four bytes big-endian # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) # A+0x0c = B: first lease. Lease format is: # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner # B+0x04: renew secret, 32 bytes (SHA256) # B+0x24: cancel secret, 32 bytes (SHA256) # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch # B+0x48: next lease, or end of record # Footnote 1: as of Tahoe v1.3.0 this field is not used by storage servers, # but it is still filled in by storage servers in case the storage server # software gets downgraded from >= Tahoe v1.3.0 to < Tahoe v1.3.0, or the # share file is moved from one storage server to another. The value stored in # this field is truncated, so if the actual share data length is >= 2**32, # then the value stored in this field will be the actual share data length # modulo 2**32. class ShareFile: LEASE_SIZE = struct.calcsize(">L32s32sL") sharetype = "immutable" def __init__(self, filename, max_size=None, create=False): """ If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """ precondition((max_size is not None) or (not create), max_size, create) self.home = filename self._max_size = max_size if create: # touch the file, so later callers will see that we're working on # it. Also construct the metadata. assert not os.path.exists(self.home) fileutil.make_dirs(os.path.dirname(self.home)) f = open(self.home, 'wb') # The second field -- the four-byte share data length -- is no # longer used as of Tahoe v1.3.0, but we continue to write it in # there in case someone downgrades a storage server from >= # Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one # server to another, etc. We do saturation -- a share data length # larger than 2**32-1 (what can fit into the field) is marked as # the largest length that can fit into the field. That way, even # if this does happen, the old < v1.3.0 server will still allow # clients to read the first part of the share. f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) f.close() self._lease_offset = max_size + 0x0c self._num_leases = 0 else: f = open(self.home, 'rb') filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) f.close() if version != 1: msg = "sharefile %s had version %d but we wanted 1" % \ (filename, version) raise UnknownImmutableContainerVersionError(msg) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) self._data_offset = 0xc def unlink(self): os.unlink(self.home) def read_share_data(self, offset, length): precondition(offset >= 0) # reads beyond the end of the data are truncated. Reads that start # beyond the end of the data return an empty string. seekpos = self._data_offset+offset actuallength = max(0, min(length, self._lease_offset-seekpos)) if actuallength == 0: return "" f = open(self.home, 'rb') f.seek(seekpos) return f.read(actuallength) def write_share_data(self, offset, data): length = len(data) precondition(offset >= 0, offset) if self._max_size is not None and offset+length > self._max_size: raise DataTooLargeError(self._max_size, offset, length) f = open(self.home, 'rb+') real_offset = self._data_offset+offset f.seek(real_offset) assert f.tell() == real_offset f.write(data) f.close() def _write_lease_record(self, f, lease_number, lease_info): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset f.write(lease_info.to_immutable_data()) def _read_num_leases(self, f): f.seek(0x08) (num_leases,) = struct.unpack(">L", f.read(4)) return num_leases def _write_num_leases(self, f, num_leases): f.seek(0x08) f.write(struct.pack(">L", num_leases)) def _truncate_leases(self, f, num_leases): f.truncate(self._lease_offset + num_leases * self.LEASE_SIZE) def get_leases(self): """Yields a LeaseInfo instance for all leases.""" f = open(self.home, 'rb') (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) f.seek(self._lease_offset) for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: yield LeaseInfo().from_immutable_data(data) def add_lease(self, lease_info): f = open(self.home, 'rb+') num_leases = self._read_num_leases(f) self._write_lease_record(f, num_leases, lease_info) self._write_num_leases(f, num_leases+1) f.close() def renew_lease(self, renew_secret, new_expire_time): for i,lease in enumerate(self.get_leases()): if constant_time_compare(lease.renew_secret, renew_secret): # yup. See if we need to update the owner time. if new_expire_time > lease.expiration_time: # yes lease.expiration_time = new_expire_time f = open(self.home, 'rb+') self._write_lease_record(f, i, lease) f.close() return raise IndexError("unable to renew non-existent lease") def add_or_renew_lease(self, lease_info): try: self.renew_lease(lease_info.renew_secret, lease_info.expiration_time) except IndexError: self.add_lease(lease_info) def cancel_lease(self, cancel_secret): """Remove a lease with the given cancel_secret. If the last lease is cancelled, the file will be removed. Return the number of bytes that were freed (by truncating the list of leases, and possibly by deleting the file. Raise IndexError if there was no lease with the given cancel_secret. """ leases = list(self.get_leases()) num_leases_removed = 0 for i,lease in enumerate(leases): if constant_time_compare(lease.cancel_secret, cancel_secret): leases[i] = None num_leases_removed += 1 if not num_leases_removed: raise IndexError("unable to find matching lease to cancel") if num_leases_removed: # pack and write out the remaining leases. We write these out in # the same order as they were added, so that if we crash while # doing this, we won't lose any non-cancelled leases. leases = [l for l in leases if l] # remove the cancelled leases f = open(self.home, 'rb+') for i,lease in enumerate(leases): self._write_lease_record(f, i, lease) self._write_num_leases(f, len(leases)) self._truncate_leases(f, len(leases)) f.close() space_freed = self.LEASE_SIZE * num_leases_removed if not len(leases): space_freed += os.stat(self.home)[stat.ST_SIZE] self.unlink() return space_freed class BucketWriter(Referenceable): implements(RIBucketWriter) def __init__(self, ss, incominghome, finalhome, max_size, lease_info, canary): self.ss = ss self.incominghome = incominghome self.finalhome = finalhome self._max_size = max_size # don't allow the client to write more than this self._canary = canary self._disconnect_marker = canary.notifyOnDisconnect(self._disconnected) self.closed = False self.throw_out_all_data = False self._sharefile = ShareFile(incominghome, create=True, max_size=max_size) # also, add our lease to the file now, so that other ones can be # added by simultaneous uploaders self._sharefile.add_lease(lease_info) def allocated_size(self): return self._max_size def remote_write(self, offset, data): start = time.time() precondition(not self.closed) if self.throw_out_all_data: return self._sharefile.write_share_data(offset, data) self.ss.add_latency("write", time.time() - start) self.ss.count("write") def remote_close(self): precondition(not self.closed) start = time.time() fileutil.make_dirs(os.path.dirname(self.finalhome)) fileutil.rename(self.incominghome, self.finalhome) try: # self.incominghome is like storage/shares/incoming/ab/abcde/4 . # We try to delete the parent (.../ab/abcde) to avoid leaving # these directories lying around forever, but the delete might # fail if we're working on another share for the same storage # index (like ab/abcde/5). The alternative approach would be to # use a hierarchy of objects (PrefixHolder, BucketHolder, # ShareWriter), each of which is responsible for a single # directory on disk, and have them use reference counting of # their children to know when they should do the rmdir. This # approach is simpler, but relies on os.rmdir refusing to delete # a non-empty directory. Do *not* use fileutil.rm_dir() here! os.rmdir(os.path.dirname(self.incominghome)) # we also delete the grandparent (prefix) directory, .../ab , # again to avoid leaving directories lying around. This might # fail if there is another bucket open that shares a prefix (like # ab/abfff). os.rmdir(os.path.dirname(os.path.dirname(self.incominghome))) # we leave the great-grandparent (incoming/) directory in place. except EnvironmentError: # ignore the "can't rmdir because the directory is not empty" # exceptions, those are normal consequences of the # above-mentioned conditions. pass self._sharefile = None self.closed = True self._canary.dontNotifyOnDisconnect(self._disconnect_marker) filelen = os.stat(self.finalhome)[stat.ST_SIZE] self.ss.bucket_writer_closed(self, filelen) self.ss.add_latency("close", time.time() - start) self.ss.count("close") def _disconnected(self): if not self.closed: self._abort() def remote_abort(self): log.msg("storage: aborting sharefile %s" % self.incominghome, facility="tahoe.storage", level=log.UNUSUAL) if not self.closed: self._canary.dontNotifyOnDisconnect(self._disconnect_marker) self._abort() self.ss.count("abort") def _abort(self): if self.closed: return os.remove(self.incominghome) # if we were the last share to be moved, remove the incoming/ # directory that was our parent parentdir = os.path.split(self.incominghome)[0] if not os.listdir(parentdir): os.rmdir(parentdir) self._sharefile = None # We are now considered closed for further writing. We must tell # the storage server about this so that it stops expecting us to # use the space it allocated for us earlier. self.closed = True self.ss.bucket_writer_closed(self, 0) class BucketReader(Referenceable): implements(RIBucketReader) def __init__(self, ss, sharefname, storage_index=None, shnum=None): self.ss = ss self._share_file = ShareFile(sharefname) self.storage_index = storage_index self.shnum = shnum def __repr__(self): return "<%s %s %s>" % (self.__class__.__name__, base32.b2a_l(self.storage_index[:8], 60), self.shnum) def remote_read(self, offset, length): start = time.time() data = self._share_file.read_share_data(offset, length) self.ss.add_latency("read", time.time() - start) self.ss.count("read") return data def remote_advise_corrupt_share(self, reason): return self.ss.remote_advise_corrupt_share("immutable", self.storage_index, self.shnum, reason) tahoe-lafs-1.10.0/src/allmydata/storage/lease.py000066400000000000000000000033321221140116300214520ustar00rootroot00000000000000import struct, time class LeaseInfo: def __init__(self, owner_num=None, renew_secret=None, cancel_secret=None, expiration_time=None, nodeid=None): self.owner_num = owner_num self.renew_secret = renew_secret self.cancel_secret = cancel_secret self.expiration_time = expiration_time if nodeid is not None: assert isinstance(nodeid, str) assert len(nodeid) == 20 self.nodeid = nodeid def get_expiration_time(self): return self.expiration_time def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period return self.expiration_time - 31*24*60*60 def get_age(self): return time.time() - self.get_grant_renew_time_time() def from_immutable_data(self, data): (self.owner_num, self.renew_secret, self.cancel_secret, self.expiration_time) = struct.unpack(">L32s32sL", data) self.nodeid = None return self def to_immutable_data(self): return struct.pack(">L32s32sL", self.owner_num, self.renew_secret, self.cancel_secret, int(self.expiration_time)) def to_mutable_data(self): return struct.pack(">LL32s32s20s", self.owner_num, int(self.expiration_time), self.renew_secret, self.cancel_secret, self.nodeid) def from_mutable_data(self, data): (self.owner_num, self.expiration_time, self.renew_secret, self.cancel_secret, self.nodeid) = struct.unpack(">LL32s32s20s", data) return self tahoe-lafs-1.10.0/src/allmydata/storage/mutable.py000066400000000000000000000434601221140116300220200ustar00rootroot00000000000000import os, stat, struct from allmydata.interfaces import BadWriteEnablerError from allmydata.util import idlib, log from allmydata.util.assertutil import precondition from allmydata.util.hashutil import constant_time_compare from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownMutableContainerVersionError, \ DataTooLargeError from allmydata.mutable.layout import MAX_MUTABLE_SHARE_SIZE # the MutableShareFile is like the ShareFile, but used for mutable data. It # has a different layout. See docs/mutable.txt for more details. # # offset size name # 1 0 32 magic verstr "tahoe mutable container v1" plus binary # 2 32 20 write enabler's nodeid # 3 52 32 write enabler # 4 84 8 data size (actual share data present) (a) # 5 92 8 offset of (8) count of extra leases (after data) # 6 100 368 four leases, 92 bytes each # 0 4 ownerid (0 means "no lease here") # 4 4 expiration timestamp # 8 32 renewal token # 40 32 cancel token # 72 20 nodeid which accepted the tokens # 7 468 (a) data # 8 ?? 4 count of extra leases # 9 ?? n*92 extra leases # The struct module doc says that L's are 4 bytes in size., and that Q's are # 8 bytes in size. Since compatibility depends upon this, double-check it. assert struct.calcsize(">L") == 4, struct.calcsize(">L") assert struct.calcsize(">Q") == 8, struct.calcsize(">Q") class MutableShareFile: sharetype = "mutable" DATA_LENGTH_OFFSET = struct.calcsize(">32s20s32s") EXTRA_LEASE_OFFSET = DATA_LENGTH_OFFSET + 8 HEADER_SIZE = struct.calcsize(">32s20s32sQQ") # doesn't include leases LEASE_SIZE = struct.calcsize(">LL32s32s20s") assert LEASE_SIZE == 92 DATA_OFFSET = HEADER_SIZE + 4*LEASE_SIZE assert DATA_OFFSET == 468, DATA_OFFSET # our sharefiles share with a recognizable string, plus some random # binary data to reduce the chance that a regular text file will look # like a sharefile. MAGIC = "Tahoe mutable container v1\n" + "\x75\x09\x44\x03\x8e" assert len(MAGIC) == 32 MAX_SIZE = MAX_MUTABLE_SHARE_SIZE # TODO: decide upon a policy for max share size def __init__(self, filename, parent=None): self.home = filename if os.path.exists(self.home): # we don't cache anything, just check the magic f = open(self.home, 'rb') data = f.read(self.HEADER_SIZE) (magic, write_enabler_nodeid, write_enabler, data_length, extra_least_offset) = \ struct.unpack(">32s20s32sQQ", data) if magic != self.MAGIC: msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ (filename, magic, self.MAGIC) raise UnknownMutableContainerVersionError(msg) self.parent = parent # for logging def log(self, *args, **kwargs): return self.parent.log(*args, **kwargs) def create(self, my_nodeid, write_enabler): assert not os.path.exists(self.home) data_length = 0 extra_lease_offset = (self.HEADER_SIZE + 4 * self.LEASE_SIZE + data_length) assert extra_lease_offset == self.DATA_OFFSET # true at creation num_extra_leases = 0 f = open(self.home, 'wb') header = struct.pack(">32s20s32sQQ", self.MAGIC, my_nodeid, write_enabler, data_length, extra_lease_offset, ) leases = ("\x00"*self.LEASE_SIZE) * 4 f.write(header + leases) # data goes here, empty after creation f.write(struct.pack(">L", num_extra_leases)) # extra leases go here, none at creation f.close() def unlink(self): os.unlink(self.home) def _read_data_length(self, f): f.seek(self.DATA_LENGTH_OFFSET) (data_length,) = struct.unpack(">Q", f.read(8)) return data_length def _write_data_length(self, f, data_length): f.seek(self.DATA_LENGTH_OFFSET) f.write(struct.pack(">Q", data_length)) def _read_share_data(self, f, offset, length): precondition(offset >= 0) data_length = self._read_data_length(f) if offset+length > data_length: # reads beyond the end of the data are truncated. Reads that # start beyond the end of the data return an empty string. length = max(0, data_length-offset) if length == 0: return "" precondition(offset+length <= data_length) f.seek(self.DATA_OFFSET+offset) data = f.read(length) return data def _read_extra_lease_offset(self, f): f.seek(self.EXTRA_LEASE_OFFSET) (extra_lease_offset,) = struct.unpack(">Q", f.read(8)) return extra_lease_offset def _write_extra_lease_offset(self, f, offset): f.seek(self.EXTRA_LEASE_OFFSET) f.write(struct.pack(">Q", offset)) def _read_num_extra_leases(self, f): offset = self._read_extra_lease_offset(f) f.seek(offset) (num_extra_leases,) = struct.unpack(">L", f.read(4)) return num_extra_leases def _write_num_extra_leases(self, f, num_leases): extra_lease_offset = self._read_extra_lease_offset(f) f.seek(extra_lease_offset) f.write(struct.pack(">L", num_leases)) def _change_container_size(self, f, new_container_size): if new_container_size > self.MAX_SIZE: raise DataTooLargeError() old_extra_lease_offset = self._read_extra_lease_offset(f) new_extra_lease_offset = self.DATA_OFFSET + new_container_size if new_extra_lease_offset < old_extra_lease_offset: # TODO: allow containers to shrink. For now they remain large. return num_extra_leases = self._read_num_extra_leases(f) f.seek(old_extra_lease_offset) leases_size = 4 + num_extra_leases * self.LEASE_SIZE extra_lease_data = f.read(leases_size) # Zero out the old lease info (in order to minimize the chance that # it could accidentally be exposed to a reader later, re #1528). f.seek(old_extra_lease_offset) f.write('\x00' * leases_size) f.flush() # An interrupt here will corrupt the leases. f.seek(new_extra_lease_offset) f.write(extra_lease_data) self._write_extra_lease_offset(f, new_extra_lease_offset) def _write_share_data(self, f, offset, data): length = len(data) precondition(offset >= 0) data_length = self._read_data_length(f) extra_lease_offset = self._read_extra_lease_offset(f) if offset+length >= data_length: # They are expanding their data size. if self.DATA_OFFSET+offset+length > extra_lease_offset: # TODO: allow containers to shrink. For now, they remain # large. # Their new data won't fit in the current container, so we # have to move the leases. With luck, they're expanding it # more than the size of the extra lease block, which will # minimize the corrupt-the-share window self._change_container_size(f, offset+length) extra_lease_offset = self._read_extra_lease_offset(f) # an interrupt here is ok.. the container has been enlarged # but the data remains untouched assert self.DATA_OFFSET+offset+length <= extra_lease_offset # Their data now fits in the current container. We must write # their new data and modify the recorded data size. # Fill any newly exposed empty space with 0's. if offset > data_length: f.seek(self.DATA_OFFSET+data_length) f.write('\x00'*(offset - data_length)) f.flush() new_data_length = offset+length self._write_data_length(f, new_data_length) # an interrupt here will result in a corrupted share # now all that's left to do is write out their data f.seek(self.DATA_OFFSET+offset) f.write(data) return def _write_lease_record(self, f, lease_number, lease_info): extra_lease_offset = self._read_extra_lease_offset(f) num_extra_leases = self._read_num_extra_leases(f) if lease_number < 4: offset = self.HEADER_SIZE + lease_number * self.LEASE_SIZE elif (lease_number-4) < num_extra_leases: offset = (extra_lease_offset + 4 + (lease_number-4)*self.LEASE_SIZE) else: # must add an extra lease record self._write_num_extra_leases(f, num_extra_leases+1) offset = (extra_lease_offset + 4 + (lease_number-4)*self.LEASE_SIZE) f.seek(offset) assert f.tell() == offset f.write(lease_info.to_mutable_data()) def _read_lease_record(self, f, lease_number): # returns a LeaseInfo instance, or None extra_lease_offset = self._read_extra_lease_offset(f) num_extra_leases = self._read_num_extra_leases(f) if lease_number < 4: offset = self.HEADER_SIZE + lease_number * self.LEASE_SIZE elif (lease_number-4) < num_extra_leases: offset = (extra_lease_offset + 4 + (lease_number-4)*self.LEASE_SIZE) else: raise IndexError("No such lease number %d" % lease_number) f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) lease_info = LeaseInfo().from_mutable_data(data) if lease_info.owner_num == 0: return None return lease_info def _get_num_lease_slots(self, f): # how many places do we have allocated for leases? Not all of them # are filled. num_extra_leases = self._read_num_extra_leases(f) return 4+num_extra_leases def _get_first_empty_lease_slot(self, f): # return an int with the index of an empty slot, or None if we do not # currently have an empty slot for i in range(self._get_num_lease_slots(f)): if self._read_lease_record(f, i) is None: return i return None def get_leases(self): """Yields a LeaseInfo instance for all leases.""" f = open(self.home, 'rb') for i, lease in self._enumerate_leases(f): yield lease f.close() def _enumerate_leases(self, f): for i in range(self._get_num_lease_slots(f)): try: data = self._read_lease_record(f, i) if data is not None: yield i,data except IndexError: return def add_lease(self, lease_info): precondition(lease_info.owner_num != 0) # 0 means "no lease here" f = open(self.home, 'rb+') num_lease_slots = self._get_num_lease_slots(f) empty_slot = self._get_first_empty_lease_slot(f) if empty_slot is not None: self._write_lease_record(f, empty_slot, lease_info) else: self._write_lease_record(f, num_lease_slots, lease_info) f.close() def renew_lease(self, renew_secret, new_expire_time): accepting_nodeids = set() f = open(self.home, 'rb+') for (leasenum,lease) in self._enumerate_leases(f): if constant_time_compare(lease.renew_secret, renew_secret): # yup. See if we need to update the owner time. if new_expire_time > lease.expiration_time: # yes lease.expiration_time = new_expire_time self._write_lease_record(f, leasenum, lease) f.close() return accepting_nodeids.add(lease.nodeid) f.close() # Return the accepting_nodeids set, to give the client a chance to # update the leases on a share which has been migrated from its # original server to a new one. msg = ("Unable to renew non-existent lease. I have leases accepted by" " nodeids: ") msg += ",".join([("'%s'" % idlib.nodeid_b2a(anid)) for anid in accepting_nodeids]) msg += " ." raise IndexError(msg) def add_or_renew_lease(self, lease_info): precondition(lease_info.owner_num != 0) # 0 means "no lease here" try: self.renew_lease(lease_info.renew_secret, lease_info.expiration_time) except IndexError: self.add_lease(lease_info) def cancel_lease(self, cancel_secret): """Remove any leases with the given cancel_secret. If the last lease is cancelled, the file will be removed. Return the number of bytes that were freed (by truncating the list of leases, and possibly by deleting the file. Raise IndexError if there was no lease with the given cancel_secret.""" accepting_nodeids = set() modified = 0 remaining = 0 blank_lease = LeaseInfo(owner_num=0, renew_secret="\x00"*32, cancel_secret="\x00"*32, expiration_time=0, nodeid="\x00"*20) f = open(self.home, 'rb+') for (leasenum,lease) in self._enumerate_leases(f): accepting_nodeids.add(lease.nodeid) if constant_time_compare(lease.cancel_secret, cancel_secret): self._write_lease_record(f, leasenum, blank_lease) modified += 1 else: remaining += 1 if modified: freed_space = self._pack_leases(f) f.close() if not remaining: freed_space += os.stat(self.home)[stat.ST_SIZE] self.unlink() return freed_space msg = ("Unable to cancel non-existent lease. I have leases " "accepted by nodeids: ") msg += ",".join([("'%s'" % idlib.nodeid_b2a(anid)) for anid in accepting_nodeids]) msg += " ." raise IndexError(msg) def _pack_leases(self, f): # TODO: reclaim space from cancelled leases return 0 def _read_write_enabler_and_nodeid(self, f): f.seek(0) data = f.read(self.HEADER_SIZE) (magic, write_enabler_nodeid, write_enabler, data_length, extra_least_offset) = \ struct.unpack(">32s20s32sQQ", data) assert magic == self.MAGIC return (write_enabler, write_enabler_nodeid) def readv(self, readv): datav = [] f = open(self.home, 'rb') for (offset, length) in readv: datav.append(self._read_share_data(f, offset, length)) f.close() return datav # def remote_get_length(self): # f = open(self.home, 'rb') # data_length = self._read_data_length(f) # f.close() # return data_length def check_write_enabler(self, write_enabler, si_s): f = open(self.home, 'rb+') (real_write_enabler, write_enabler_nodeid) = \ self._read_write_enabler_and_nodeid(f) f.close() # avoid a timing attack #if write_enabler != real_write_enabler: if not constant_time_compare(write_enabler, real_write_enabler): # accomodate share migration by reporting the nodeid used for the # old write enabler. self.log(format="bad write enabler on SI %(si)s," " recorded by nodeid %(nodeid)s", facility="tahoe.storage", level=log.WEIRD, umid="cE1eBQ", si=si_s, nodeid=idlib.nodeid_b2a(write_enabler_nodeid)) msg = "The write enabler was recorded by nodeid '%s'." % \ (idlib.nodeid_b2a(write_enabler_nodeid),) raise BadWriteEnablerError(msg) def check_testv(self, testv): test_good = True f = open(self.home, 'rb+') for (offset, length, operator, specimen) in testv: data = self._read_share_data(f, offset, length) if not testv_compare(data, operator, specimen): test_good = False break f.close() return test_good def writev(self, datav, new_length): f = open(self.home, 'rb+') for (offset, data) in datav: self._write_share_data(f, offset, data) if new_length is not None: cur_length = self._read_data_length(f) if new_length < cur_length: self._write_data_length(f, new_length) # TODO: if we're going to shrink the share file when the # share data has shrunk, then call # self._change_container_size() here. f.close() def testv_compare(a, op, b): assert op in ("lt", "le", "eq", "ne", "ge", "gt") if op == "lt": return a < b if op == "le": return a <= b if op == "eq": return a == b if op == "ne": return a != b if op == "ge": return a >= b if op == "gt": return a > b # never reached class EmptyShare: def check_testv(self, testv): test_good = True for (offset, length, operator, specimen) in testv: data = "" if not testv_compare(data, operator, specimen): test_good = False break return test_good def create_mutable_sharefile(filename, my_nodeid, write_enabler, parent): ms = MutableShareFile(filename, parent) ms.create(my_nodeid, write_enabler) del ms return MutableShareFile(filename, parent) tahoe-lafs-1.10.0/src/allmydata/storage/server.py000066400000000000000000000567211221140116300217010ustar00rootroot00000000000000import os, re, weakref, struct, time from foolscap.api import Referenceable from twisted.application import service from zope.interface import implements from allmydata.interfaces import RIStorageServer, IStatsProducer from allmydata.util import fileutil, idlib, log, time_format import allmydata # for __full_version__ from allmydata.storage.common import si_b2a, si_a2b, storage_index_to_dir _pyflakes_hush = [si_b2a, si_a2b, storage_index_to_dir] # re-exported from allmydata.storage.lease import LeaseInfo from allmydata.storage.mutable import MutableShareFile, EmptyShare, \ create_mutable_sharefile from allmydata.mutable.layout import MAX_MUTABLE_SHARE_SIZE from allmydata.storage.immutable import ShareFile, BucketWriter, BucketReader from allmydata.storage.crawler import BucketCountingCrawler from allmydata.storage.expirer import LeaseCheckingCrawler # storage/ # storage/shares/incoming # incoming/ holds temp dirs named $START/$STORAGEINDEX/$SHARENUM which will # be moved to storage/shares/$START/$STORAGEINDEX/$SHARENUM upon success # storage/shares/$START/$STORAGEINDEX # storage/shares/$START/$STORAGEINDEX/$SHARENUM # Where "$START" denotes the first 10 bits worth of $STORAGEINDEX (that's 2 # base-32 chars). # $SHARENUM matches this regex: NUM_RE=re.compile("^[0-9]+$") class StorageServer(service.MultiService, Referenceable): implements(RIStorageServer, IStatsProducer) name = 'storage' LeaseCheckerClass = LeaseCheckingCrawler def __init__(self, storedir, nodeid, reserved_space=0, discard_storage=False, readonly_storage=False, stats_provider=None, expiration_enabled=False, expiration_mode="age", expiration_override_lease_duration=None, expiration_cutoff_date=None, expiration_sharetypes=("mutable", "immutable")): service.MultiService.__init__(self) assert isinstance(nodeid, str) assert len(nodeid) == 20 self.my_nodeid = nodeid self.storedir = storedir sharedir = os.path.join(storedir, "shares") fileutil.make_dirs(sharedir) self.sharedir = sharedir # we don't actually create the corruption-advisory dir until necessary self.corruption_advisory_dir = os.path.join(storedir, "corruption-advisories") self.reserved_space = int(reserved_space) self.no_storage = discard_storage self.readonly_storage = readonly_storage self.stats_provider = stats_provider if self.stats_provider: self.stats_provider.register_producer(self) self.incomingdir = os.path.join(sharedir, 'incoming') self._clean_incomplete() fileutil.make_dirs(self.incomingdir) self._active_writers = weakref.WeakKeyDictionary() log.msg("StorageServer created", facility="tahoe.storage") if reserved_space: if self.get_available_space() is None: log.msg("warning: [storage]reserved_space= is set, but this platform does not support an API to get disk statistics (statvfs(2) or GetDiskFreeSpaceEx), so this reservation cannot be honored", umin="0wZ27w", level=log.UNUSUAL) self.latencies = {"allocate": [], # immutable "write": [], "close": [], "read": [], "get": [], "writev": [], # mutable "readv": [], "add-lease": [], # both "renew": [], "cancel": [], } self.add_bucket_counter() statefile = os.path.join(self.storedir, "lease_checker.state") historyfile = os.path.join(self.storedir, "lease_checker.history") klass = self.LeaseCheckerClass self.lease_checker = klass(self, statefile, historyfile, expiration_enabled, expiration_mode, expiration_override_lease_duration, expiration_cutoff_date, expiration_sharetypes) self.lease_checker.setServiceParent(self) def __repr__(self): return "" % (idlib.shortnodeid_b2a(self.my_nodeid),) def have_shares(self): # quick test to decide if we need to commit to an implicit # permutation-seed or if we should use a new one return bool(set(os.listdir(self.sharedir)) - set(["incoming"])) def add_bucket_counter(self): statefile = os.path.join(self.storedir, "bucket_counter.state") self.bucket_counter = BucketCountingCrawler(self, statefile) self.bucket_counter.setServiceParent(self) def count(self, name, delta=1): if self.stats_provider: self.stats_provider.count("storage_server." + name, delta) def add_latency(self, category, latency): a = self.latencies[category] a.append(latency) if len(a) > 1000: self.latencies[category] = a[-1000:] def get_latencies(self): """Return a dict, indexed by category, that contains a dict of latency numbers for each category. If there are sufficient samples for unambiguous interpretation, each dict will contain the following keys: mean, 01_0_percentile, 10_0_percentile, 50_0_percentile (median), 90_0_percentile, 95_0_percentile, 99_0_percentile, 99_9_percentile. If there are insufficient samples for a given percentile to be interpreted unambiguously that percentile will be reported as None. If no samples have been collected for the given category, then that category name will not be present in the return value. """ # note that Amazon's Dynamo paper says they use 99.9% percentile. output = {} for category in self.latencies: if not self.latencies[category]: continue stats = {} samples = self.latencies[category][:] count = len(samples) stats["samplesize"] = count samples.sort() if count > 1: stats["mean"] = sum(samples) / count else: stats["mean"] = None orderstatlist = [(0.01, "01_0_percentile", 100), (0.1, "10_0_percentile", 10),\ (0.50, "50_0_percentile", 10), (0.90, "90_0_percentile", 10),\ (0.95, "95_0_percentile", 20), (0.99, "99_0_percentile", 100),\ (0.999, "99_9_percentile", 1000)] for percentile, percentilestring, minnumtoobserve in orderstatlist: if count >= minnumtoobserve: stats[percentilestring] = samples[int(percentile*count)] else: stats[percentilestring] = None output[category] = stats return output def log(self, *args, **kwargs): if "facility" not in kwargs: kwargs["facility"] = "tahoe.storage" return log.msg(*args, **kwargs) def _clean_incomplete(self): fileutil.rm_dir(self.incomingdir) def get_stats(self): # remember: RIStatsProvider requires that our return dict # contains numeric values. stats = { 'storage_server.allocated': self.allocated_size(), } stats['storage_server.reserved_space'] = self.reserved_space for category,ld in self.get_latencies().items(): for name,v in ld.items(): stats['storage_server.latencies.%s.%s' % (category, name)] = v try: disk = fileutil.get_disk_stats(self.sharedir, self.reserved_space) writeable = disk['avail'] > 0 # spacetime predictors should use disk_avail / (d(disk_used)/dt) stats['storage_server.disk_total'] = disk['total'] stats['storage_server.disk_used'] = disk['used'] stats['storage_server.disk_free_for_root'] = disk['free_for_root'] stats['storage_server.disk_free_for_nonroot'] = disk['free_for_nonroot'] stats['storage_server.disk_avail'] = disk['avail'] except AttributeError: writeable = True except EnvironmentError: log.msg("OS call to get disk statistics failed", level=log.UNUSUAL) writeable = False if self.readonly_storage: stats['storage_server.disk_avail'] = 0 writeable = False stats['storage_server.accepting_immutable_shares'] = int(writeable) s = self.bucket_counter.get_state() bucket_count = s.get("last-complete-bucket-count") if bucket_count: stats['storage_server.total_bucket_count'] = bucket_count return stats def get_available_space(self): """Returns available space for share storage in bytes, or None if no API to get this information is available.""" if self.readonly_storage: return 0 return fileutil.get_available_space(self.sharedir, self.reserved_space) def allocated_size(self): space = 0 for bw in self._active_writers: space += bw.allocated_size() return space def remote_get_version(self): remaining_space = self.get_available_space() if remaining_space is None: # We're on a platform that has no API to get disk stats. remaining_space = 2**64 version = { "http://allmydata.org/tahoe/protocols/storage/v1" : { "maximum-immutable-share-size": remaining_space, "maximum-mutable-share-size": MAX_MUTABLE_SHARE_SIZE, "tolerates-immutable-read-overrun": True, "delete-mutable-shares-with-zero-length-writev": True, "fills-holes-with-zero-bytes": True, "prevents-read-past-end-of-share-data": True, }, "application-version": str(allmydata.__full_version__), } return version def remote_allocate_buckets(self, storage_index, renew_secret, cancel_secret, sharenums, allocated_size, canary, owner_num=0): # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated # to a particular owner. start = time.time() self.count("allocate") alreadygot = set() bucketwriters = {} # k: shnum, v: BucketWriter si_dir = storage_index_to_dir(storage_index) si_s = si_b2a(storage_index) log.msg("storage: allocate_buckets %s" % si_s) # in this implementation, the lease information (including secrets) # goes into the share files themselves. It could also be put into a # separate database. Note that the lease should not be added until # the BucketWriter has been closed. expire_time = time.time() + 31*24*60*60 lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, expire_time, self.my_nodeid) max_space_per_bucket = allocated_size remaining_space = self.get_available_space() limited = remaining_space is not None if limited: # this is a bit conservative, since some of this allocated_size() # has already been written to disk, where it will show up in # get_available_space. remaining_space -= self.allocated_size() # self.readonly_storage causes remaining_space <= 0 # fill alreadygot with all shares that we have, not just the ones # they asked about: this will save them a lot of work. Add or update # leases for all of them: if they want us to hold shares for this # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): alreadygot.add(shnum) sf = ShareFile(fn) sf.add_or_renew_lease(lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) finalhome = os.path.join(self.sharedir, si_dir, "%d" % shnum) if os.path.exists(finalhome): # great! we already have it. easy. pass elif os.path.exists(incominghome): # Note that we don't create BucketWriters for shnums that # have a partial share (in incoming/), so if a second upload # occurs while the first is still in progress, the second # uploader will use different storage servers. pass elif (not limited) or (remaining_space >= max_space_per_bucket): # ok! we need to create the new share file. bw = BucketWriter(self, incominghome, finalhome, max_space_per_bucket, lease_info, canary) if self.no_storage: bw.throw_out_all_data = True bucketwriters[shnum] = bw self._active_writers[bw] = 1 if limited: remaining_space -= max_space_per_bucket else: # bummer! not enough space to accept this bucket pass if bucketwriters: fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) self.add_latency("allocate", time.time() - start) return alreadygot, bucketwriters def _iter_share_files(self, storage_index): for shnum, filename in self._get_bucket_shares(storage_index): f = open(filename, 'rb') header = f.read(32) f.close() if header[:32] == MutableShareFile.MAGIC: sf = MutableShareFile(filename, self) # note: if the share has been migrated, the renew_lease() # call will throw an exception, with information to help the # client update the lease. elif header[:4] == struct.pack(">L", 1): sf = ShareFile(filename) else: continue # non-sharefile yield sf def remote_add_lease(self, storage_index, renew_secret, cancel_secret, owner_num=1): start = time.time() self.count("add-lease") new_expire_time = time.time() + 31*24*60*60 lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, new_expire_time, self.my_nodeid) for sf in self._iter_share_files(storage_index): sf.add_or_renew_lease(lease_info) self.add_latency("add-lease", time.time() - start) return None def remote_renew_lease(self, storage_index, renew_secret): start = time.time() self.count("renew") new_expire_time = time.time() + 31*24*60*60 found_buckets = False for sf in self._iter_share_files(storage_index): found_buckets = True sf.renew_lease(renew_secret, new_expire_time) self.add_latency("renew", time.time() - start) if not found_buckets: raise IndexError("no such lease to renew") def bucket_writer_closed(self, bw, consumed_size): if self.stats_provider: self.stats_provider.count('storage_server.bytes_added', consumed_size) del self._active_writers[bw] def _get_bucket_shares(self, storage_index): """Return a list of (shnum, pathname) tuples for files that hold shares for this storage_index. In each tuple, 'shnum' will always be the integer form of the last component of 'pathname'.""" storagedir = os.path.join(self.sharedir, storage_index_to_dir(storage_index)) try: for f in os.listdir(storagedir): if NUM_RE.match(f): filename = os.path.join(storagedir, f) yield (int(f), filename) except OSError: # Commonly caused by there being no buckets at all. pass def remote_get_buckets(self, storage_index): start = time.time() self.count("get") si_s = si_b2a(storage_index) log.msg("storage: get_buckets %s" % si_s) bucketreaders = {} # k: sharenum, v: BucketReader for shnum, filename in self._get_bucket_shares(storage_index): bucketreaders[shnum] = BucketReader(self, filename, storage_index, shnum) self.add_latency("get", time.time() - start) return bucketreaders def get_leases(self, storage_index): """Provide an iterator that yields all of the leases attached to this bucket. Each lease is returned as a LeaseInfo instance. This method is not for client use. """ # since all shares get the same lease data, we just grab the leases # from the first share try: shnum, filename = self._get_bucket_shares(storage_index).next() sf = ShareFile(filename) return sf.get_leases() except StopIteration: return iter([]) def remote_slot_testv_and_readv_and_writev(self, storage_index, secrets, test_and_write_vectors, read_vector): start = time.time() self.count("writev") si_s = si_b2a(storage_index) log.msg("storage: slot_writev %s" % si_s) si_dir = storage_index_to_dir(storage_index) (write_enabler, renew_secret, cancel_secret) = secrets # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) shares = {} if os.path.isdir(bucketdir): for sharenum_s in os.listdir(bucketdir): try: sharenum = int(sharenum_s) except ValueError: continue filename = os.path.join(bucketdir, sharenum_s) msf = MutableShareFile(filename, self) msf.check_write_enabler(write_enabler, si_s) shares[sharenum] = msf # write_enabler is good for all existing shares. # Now evaluate test vectors. testv_is_good = True for sharenum in test_and_write_vectors: (testv, datav, new_length) = test_and_write_vectors[sharenum] if sharenum in shares: if not shares[sharenum].check_testv(testv): self.log("testv failed: [%d]: %r" % (sharenum, testv)) testv_is_good = False break else: # compare the vectors against an empty share, in which all # reads return empty strings. if not EmptyShare().check_testv(testv): self.log("testv failed (empty): [%d] %r" % (sharenum, testv)) testv_is_good = False break # now gather the read vectors, before we do any writes read_data = {} for sharenum, share in shares.items(): read_data[sharenum] = share.readv(read_vector) ownerid = 1 # TODO expire_time = time.time() + 31*24*60*60 # one month lease_info = LeaseInfo(ownerid, renew_secret, cancel_secret, expire_time, self.my_nodeid) if testv_is_good: # now apply the write vectors for sharenum in test_and_write_vectors: (testv, datav, new_length) = test_and_write_vectors[sharenum] if new_length == 0: if sharenum in shares: shares[sharenum].unlink() else: if sharenum not in shares: # allocate a new share allocated_size = 2000 # arbitrary, really share = self._allocate_slot_share(bucketdir, secrets, sharenum, allocated_size, owner_num=0) shares[sharenum] = share shares[sharenum].writev(datav, new_length) # and update the lease shares[sharenum].add_or_renew_lease(lease_info) if new_length == 0: # delete empty bucket directories if not os.listdir(bucketdir): os.rmdir(bucketdir) # all done self.add_latency("writev", time.time() - start) return (testv_is_good, read_data) def _allocate_slot_share(self, bucketdir, secrets, sharenum, allocated_size, owner_num=0): (write_enabler, renew_secret, cancel_secret) = secrets my_nodeid = self.my_nodeid fileutil.make_dirs(bucketdir) filename = os.path.join(bucketdir, "%d" % sharenum) share = create_mutable_sharefile(filename, my_nodeid, write_enabler, self) return share def remote_slot_readv(self, storage_index, shares, readv): start = time.time() self.count("readv") si_s = si_b2a(storage_index) lp = log.msg("storage: slot_readv %s %s" % (si_s, shares), facility="tahoe.storage", level=log.OPERATIONAL) si_dir = storage_index_to_dir(storage_index) # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) if not os.path.isdir(bucketdir): self.add_latency("readv", time.time() - start) return {} datavs = {} for sharenum_s in os.listdir(bucketdir): try: sharenum = int(sharenum_s) except ValueError: continue if sharenum in shares or not shares: filename = os.path.join(bucketdir, sharenum_s) msf = MutableShareFile(filename, self) datavs[sharenum] = msf.readv(readv) log.msg("returning shares %s" % (datavs.keys(),), facility="tahoe.storage", level=log.NOISY, parent=lp) self.add_latency("readv", time.time() - start) return datavs def remote_advise_corrupt_share(self, share_type, storage_index, shnum, reason): fileutil.make_dirs(self.corruption_advisory_dir) now = time_format.iso_utc(sep="T") si_s = si_b2a(storage_index) # windows can't handle colons in the filename fn = os.path.join(self.corruption_advisory_dir, "%s--%s-%d" % (now, si_s, shnum)).replace(":","") f = open(fn, "w") f.write("report: Share Corruption\n") f.write("type: %s\n" % share_type) f.write("storage_index: %s\n" % si_s) f.write("share_number: %d\n" % shnum) f.write("\n") f.write(reason) f.write("\n") f.close() log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") return None tahoe-lafs-1.10.0/src/allmydata/storage/shares.py000066400000000000000000000005701221140116300216470ustar00rootroot00000000000000#! /usr/bin/python from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile def get_share_file(filename): f = open(filename, "rb") prefix = f.read(32) f.close() if prefix == MutableShareFile.MAGIC: return MutableShareFile(filename) # otherwise assume it's immutable return ShareFile(filename) tahoe-lafs-1.10.0/src/allmydata/storage_client.py000066400000000000000000000306771221140116300217330ustar00rootroot00000000000000 """ I contain the client-side code which speaks to storage servers, in particular the foolscap-based server implemented in src/allmydata/storage/*.py . """ # roadmap: # # 1: implement StorageFarmBroker (i.e. "storage broker"), change Client to # create it, change uploader/servermap to get rrefs from it. ServerFarm calls # IntroducerClient.subscribe_to . ServerFarm hides descriptors, passes rrefs # to clients. webapi status pages call broker.get_info_about_serverid. # # 2: move get_info methods to the descriptor, webapi status pages call # broker.get_descriptor_for_serverid().get_info # # 3?later?: store descriptors in UploadResults/etc instead of serverids, # webapi status pages call descriptor.get_info and don't use storage_broker # or Client # # 4: enable static config: tahoe.cfg can add descriptors. Make the introducer # optional. This closes #467 # # 5: implement NativeStorageClient, pass it to Tahoe2PeerSelector and other # clients. Clients stop doing callRemote(), use NativeStorageClient methods # instead (which might do something else, i.e. http or whatever). The # introducer and tahoe.cfg only create NativeStorageClients for now. # # 6: implement other sorts of IStorageClient classes: S3, etc import re, time from zope.interface import implements from foolscap.api import eventually from allmydata.interfaces import IStorageBroker, IDisplayableServer, IServer from allmydata.util import log, base32 from allmydata.util.assertutil import precondition from allmydata.util.rrefutil import add_version_to_remote_reference from allmydata.util.hashutil import sha1 # who is responsible for de-duplication? # both? # IC remembers the unpacked announcements it receives, to provide for late # subscribers and to remove duplicates # if a client subscribes after startup, will they receive old announcements? # yes # who will be responsible for signature checking? # make it be IntroducerClient, so they can push the filter outwards and # reduce inbound network traffic # what should the interface between StorageFarmBroker and IntroducerClient # look like? # don't pass signatures: only pass validated blessed-objects class StorageFarmBroker: implements(IStorageBroker) """I live on the client, and know about storage servers. For each server that is participating in a grid, I either maintain a connection to it or remember enough information to establish a connection to it on demand. I'm also responsible for subscribing to the IntroducerClient to find out about new servers as they are announced by the Introducer. """ def __init__(self, tub, permute_peers): self.tub = tub assert permute_peers # False not implemented yet self.permute_peers = permute_peers # self.servers maps serverid -> IServer, and keeps track of all the # storage servers that we've heard about. Each descriptor manages its # own Reconnector, and will give us a RemoteReference when we ask # them for it. self.servers = {} self.introducer_client = None # these two are used in unit tests def test_add_rref(self, serverid, rref, ann): s = NativeStorageServer(serverid, ann.copy()) s.rref = rref s._is_connected = True self.servers[serverid] = s def test_add_server(self, serverid, s): self.servers[serverid] = s def use_introducer(self, introducer_client): self.introducer_client = ic = introducer_client ic.subscribe_to("storage", self._got_announcement) def _got_announcement(self, key_s, ann): if key_s is not None: precondition(isinstance(key_s, str), key_s) precondition(key_s.startswith("v0-"), key_s) assert ann["service-name"] == "storage" s = NativeStorageServer(key_s, ann) serverid = s.get_serverid() old = self.servers.get(serverid) if old: if old.get_announcement() == ann: return # duplicate # replacement del self.servers[serverid] old.stop_connecting() # now we forget about them and start using the new one self.servers[serverid] = s s.start_connecting(self.tub, self._trigger_connections) # the descriptor will manage their own Reconnector, and each time we # need servers, we'll ask them if they're connected or not. def _trigger_connections(self): # when one connection is established, reset the timers on all others, # to trigger a reconnection attempt in one second. This is intended # to accelerate server connections when we've been offline for a # while. The goal is to avoid hanging out for a long time with # connections to only a subset of the servers, which would increase # the chances that we'll put shares in weird places (and not update # existing shares of mutable files). See #374 for more details. for dsc in self.servers.values(): dsc.try_to_connect() def get_servers_for_psi(self, peer_selection_index): # return a list of server objects (IServers) assert self.permute_peers == True def _permuted(server): seed = server.get_permutation_seed() return sha1(peer_selection_index + seed).digest() return sorted(self.get_connected_servers(), key=_permuted) def get_all_serverids(self): return frozenset(self.servers.keys()) def get_connected_servers(self): return frozenset([s for s in self.servers.values() if s.is_connected()]) def get_known_servers(self): return frozenset(self.servers.values()) def get_nickname_for_serverid(self, serverid): if serverid in self.servers: return self.servers[serverid].get_nickname() return None def get_stub_server(self, serverid): if serverid in self.servers: return self.servers[serverid] return StubServer(serverid) class StubServer: implements(IDisplayableServer) def __init__(self, serverid): self.serverid = serverid # binary tubid def get_serverid(self): return self.serverid def get_name(self): return base32.b2a(self.serverid)[:8] def get_longname(self): return base32.b2a(self.serverid) def get_nickname(self): return "?" class NativeStorageServer: """I hold information about a storage server that we want to connect to. If we are connected, I hold the RemoteReference, their host address, and the their version information. I remember information about when we were last connected too, even if we aren't currently connected. @ivar announcement_time: when we first heard about this service @ivar last_connect_time: when we last established a connection @ivar last_loss_time: when we last lost a connection @ivar version: the server's versiondict, from the most recent announcement @ivar nickname: the server's self-reported nickname (unicode), same @ivar rref: the RemoteReference, if connected, otherwise None @ivar remote_host: the IAddress, if connected, otherwise None """ implements(IServer) VERSION_DEFAULTS = { "http://allmydata.org/tahoe/protocols/storage/v1" : { "maximum-immutable-share-size": 2**32 - 1, "maximum-mutable-share-size": 2*1000*1000*1000, # maximum prior to v1.9.2 "tolerates-immutable-read-overrun": False, "delete-mutable-shares-with-zero-length-writev": False, }, "application-version": "unknown: no get_version()", } def __init__(self, key_s, ann, min_shares=1): self.key_s = key_s self.announcement = ann self.min_shares = min_shares assert "anonymous-storage-FURL" in ann, ann furl = str(ann["anonymous-storage-FURL"]) m = re.match(r'pb://(\w+)@', furl) assert m, furl tubid_s = m.group(1).lower() self._tubid = base32.a2b(tubid_s) assert "permutation-seed-base32" in ann, ann ps = base32.a2b(str(ann["permutation-seed-base32"])) self._permutation_seed = ps if key_s: self._long_description = key_s if key_s.startswith("v0-"): # remove v0- prefix from abbreviated name self._short_description = key_s[3:3+8] else: self._short_description = key_s[:8] else: self._long_description = tubid_s self._short_description = tubid_s[:6] self.announcement_time = time.time() self.last_connect_time = None self.last_loss_time = None self.remote_host = None self.rref = None self._is_connected = False self._reconnector = None self._trigger_cb = None # Special methods used by copy.copy() and copy.deepcopy(). When those are # used in allmydata.immutable.filenode to copy CheckResults during # repair, we want it to treat the IServer instances as singletons, and # not attempt to duplicate them.. def __copy__(self): return self def __deepcopy__(self, memodict): return self def __repr__(self): return "" % self.get_name() def get_serverid(self): return self._tubid # XXX replace with self.key_s def get_permutation_seed(self): return self._permutation_seed def get_version(self): if self.rref: return self.rref.version return None def get_name(self): # keep methodname short # TODO: decide who adds [] in the short description. It should # probably be the output side, not here. return self._short_description def get_longname(self): return self._long_description def get_lease_seed(self): return self._tubid def get_foolscap_write_enabler_seed(self): return self._tubid def get_nickname(self): return self.announcement["nickname"] def get_announcement(self): return self.announcement def get_remote_host(self): return self.remote_host def is_connected(self): return self._is_connected def get_last_connect_time(self): return self.last_connect_time def get_last_loss_time(self): return self.last_loss_time def get_announcement_time(self): return self.announcement_time def start_connecting(self, tub, trigger_cb): furl = str(self.announcement["anonymous-storage-FURL"]) self._trigger_cb = trigger_cb self._reconnector = tub.connectTo(furl, self._got_connection) def _got_connection(self, rref): lp = log.msg(format="got connection to %(name)s, getting versions", name=self.get_name(), facility="tahoe.storage_broker", umid="coUECQ") if self._trigger_cb: eventually(self._trigger_cb) default = self.VERSION_DEFAULTS d = add_version_to_remote_reference(rref, default) d.addCallback(self._got_versioned_service, lp) d.addErrback(log.err, format="storageclient._got_connection", name=self.get_name(), umid="Sdq3pg") def _got_versioned_service(self, rref, lp): log.msg(format="%(name)s provided version info %(version)s", name=self.get_name(), version=rref.version, facility="tahoe.storage_broker", umid="SWmJYg", level=log.NOISY, parent=lp) self.last_connect_time = time.time() self.remote_host = rref.getPeer() self.rref = rref self._is_connected = True rref.notifyOnDisconnect(self._lost) def get_rref(self): return self.rref def _lost(self): log.msg(format="lost connection to %(name)s", name=self.get_name(), facility="tahoe.storage_broker", umid="zbRllw") self.last_loss_time = time.time() # self.rref is now stale: all callRemote()s will get a # DeadReferenceError. We leave the stale reference in place so that # uploader/downloader code (which received this IServer through # get_connected_servers() or get_servers_for_psi()) can continue to # use s.get_rref().callRemote() and not worry about it being None. self._is_connected = False self.remote_host = None def stop_connecting(self): # used when this descriptor has been superceded by another self._reconnector.stopConnecting() def try_to_connect(self): # used when the broker wants us to hurry up self._reconnector.reset() class UnknownServerTypeError(Exception): pass tahoe-lafs-1.10.0/src/allmydata/test/000077500000000000000000000000001221140116300173215ustar00rootroot00000000000000tahoe-lafs-1.10.0/src/allmydata/test/__init__.py000066400000000000000000000023451221140116300214360ustar00rootroot00000000000000 from foolscap.logging.incident import IncidentQualifier class NonQualifier(IncidentQualifier): def check_event(self, ev): return False def disable_foolscap_incidents(): # Foolscap-0.2.9 (at least) uses "trailing delay" in its default incident # reporter: after a severe log event is recorded (thus triggering an # "incident" in which recent events are dumped to a file), a few seconds # of subsequent events are also recorded in the incident file. The timer # that this leaves running will cause "Unclean Reactor" unit test # failures. The simplest workaround is to disable this timer. Note that # this disables the timer for the entire process: do not call this from # regular runtime code; only use it for unit tests that are running under # Trial. #IncidentReporter.TRAILING_DELAY = None # # Also, using Incidents more than doubles the test time. So we just # disable them entirely. from foolscap.logging.log import theLogger iq = NonQualifier() theLogger.setIncidentQualifier(iq) # we disable incident reporting for all unit tests. disable_foolscap_incidents() import sys if sys.platform == "win32": from allmydata.windows.fixups import initialize initialize() tahoe-lafs-1.10.0/src/allmydata/test/bench_dirnode.py000066400000000000000000000117121221140116300224600ustar00rootroot00000000000000import hotshot.stats, os, random, sys from pyutil import benchutil, randutil # http://tahoe-lafs.org/trac/pyutil from zope.interface import implements from allmydata import dirnode, uri from allmydata.interfaces import IFileNode from allmydata.mutable.filenode import MutableFileNode from allmydata.immutable.filenode import ImmutableFileNode class ContainerNode: implements(IFileNode) # dirnodes sit on top of a "container" filenode, from which it extracts a # writekey def __init__(self): self._writekey = randutil.insecurerandstr(16) self._fingerprint = randutil.insecurerandstr(32) self._cap = uri.WriteableSSKFileURI(self._writekey, self._fingerprint) def get_writekey(self): return self._writekey def get_cap(self): return self._cap def get_uri(self): return self._cap.to_string() def is_readonly(self): return False def is_mutable(self): return True class FakeNode: def raise_error(self): return None class FakeNodeMaker: def create_from_cap(self, writecap, readcap=None, deep_immutable=False, name=''): return FakeNode() def random_unicode(n=140, b=3, codec='utf-8'): l = [] while len(l) < n: try: u = os.urandom(b).decode(codec)[0] except UnicodeDecodeError: pass else: l.append(u) return u''.join(l) encoding_parameters = {"k": 3, "n": 10} def random_metadata(): d = {} d['tahoe'] = {} d['tahoe']['linkcrtime'] = random.random() d['tahoe']['linkmotime'] = random.random() return d PROF_FILE_NAME="bench_dirnode.prof" class B(object): def __init__(self): self.children = [] # tuples of (k, v) (suitable for passing to dict()) self.packstr = None self.nodemaker = FakeNodeMaker() self.testdirnode = dirnode.DirectoryNode(ContainerNode(), self.nodemaker, uploader=None) def random_fsnode(self): coin = random.randrange(0, 3) if coin == 0: cap = uri.CHKFileURI(randutil.insecurerandstr(16), randutil.insecurerandstr(32), random.randrange(1, 5), random.randrange(6, 15), random.randrange(99, 1000000000000)) return ImmutableFileNode(cap, None, None, None, None, None) elif coin == 1: cap = uri.WriteableSSKFileURI(randutil.insecurerandstr(16), randutil.insecurerandstr(32)) n = MutableFileNode(None, None, encoding_parameters, None) return n.init_from_cap(cap) else: assert coin == 2 cap = uri.WriteableSSKFileURI(randutil.insecurerandstr(16), randutil.insecurerandstr(32)) n = MutableFileNode(None, None, encoding_parameters, None) n.init_from_cap(cap) return dirnode.DirectoryNode(n, self.nodemaker, uploader=None) def random_child(self): return self.random_fsnode(), random_metadata() def init_for_pack(self, N): for i in xrange(len(self.children), N): name = random_unicode(random.randrange(0, 10)) self.children.append( (name, self.random_child()) ) def init_for_unpack(self, N): self.init_for_pack(N) self.packstr = self.pack(N) def pack(self, N): return self.testdirnode._pack_contents(dict(self.children[:N])) def unpack(self, N): return self.testdirnode._unpack_contents(self.packstr) def unpack_and_repack(self, N): return self.testdirnode._pack_contents(self.testdirnode._unpack_contents(self.packstr)) def run_benchmarks(self, profile=False): for (initfunc, func) in [(self.init_for_unpack, self.unpack), (self.init_for_pack, self.pack), (self.init_for_unpack, self.unpack_and_repack)]: print "benchmarking %s" % (func,) for N in 16, 512, 2048, 16384: print "%5d" % N, benchutil.rep_bench(func, N, initfunc=initfunc, MAXREPS=20, UNITS_PER_SECOND=1000) benchutil.print_bench_footer(UNITS_PER_SECOND=1000) print "(milliseconds)" def prof_benchmarks(self): # This requires pyutil >= v1.3.34. self.run_benchmarks(profile=True) def print_stats(self): s = hotshot.stats.load(PROF_FILE_NAME) s.strip_dirs().sort_stats("time").print_stats(32) if __name__ == "__main__": if '--profile' in sys.argv: if os.path.exists(PROF_FILE_NAME): print "WARNING: profiling results file '%s' already exists -- the profiling results from this run will be added into the profiling results stored in that file and then the sum of them will be printed out after this run." % (PROF_FILE_NAME,) b = B() b.prof_benchmarks() b.print_stats() else: b = B() b.run_benchmarks() tahoe-lafs-1.10.0/src/allmydata/test/check_grid.py000066400000000000000000000202301221140116300217520ustar00rootroot00000000000000""" Test an existing Tahoe grid, both to see if the grid is still running and to see if the client is still compatible with it. This script is suitable for running from a periodic monitoring script, perhaps by an hourly cronjob. This script uses a pre-established client node (configured to connect to the grid being tested) and a pre-established directory (stored as the 'testgrid:' alias in that client node's aliases file). It then performs a number of uploads and downloads to exercise compatibility in various directions (new client vs old data). All operations are performed by invoking various CLI commands through bin/tahoe . The script must be given two arguments: the client node directory, and the location of the bin/tahoe executable. Note that this script does not import anything from tahoe directly, so it doesn't matter what its PYTHONPATH is, as long as the bin/tahoe that it uses is functional. This script expects that the client node will be not running when the script starts, but it will forcibly shut down the node just to be sure. It will shut down the node after the test finishes. To set up the client node, do the following: tahoe create-client DIR populate DIR/introducer.furl tahoe start DIR tahoe add-alias -d DIR testgrid `tahoe mkdir -d DIR` pick a 10kB-ish test file, compute its md5sum tahoe put -d DIR FILE testgrid:old.MD5SUM tahoe put -d DIR FILE testgrid:recent.MD5SUM tahoe put -d DIR FILE testgrid:recentdir/recent.MD5SUM echo "" | tahoe put -d DIR --mutable testgrid:log echo "" | tahoe put -d DIR --mutable testgrid:recentlog This script will perform the following steps (the kind of compatibility that is being tested is in [brackets]): read old.* and check the md5sums [confirm that new code can read old files] read all recent.* files and check md5sums [read recent files] delete all recent.* files and verify they're gone [modify an old directory] read recentdir/recent.* files and check [read recent directory] delete recentdir/recent.* and verify [modify recent directory] delete recentdir and verify (keep the directory from growing unboundedly) mkdir recentdir upload random 10kB file to recentdir/recent.MD5SUM (prepare for next time) upload random 10kB file to recent.MD5SUM [new code can upload to old servers] append one-line timestamp to log [read/write old mutable files] append one-line timestamp to recentlog [read/write recent mutable files] delete recentlog upload small header to new mutable recentlog [create mutable files] This script will also keep track of speeds and latencies and will write them in a machine-readable logfile. """ import time, subprocess, md5, os.path, random from twisted.python import usage class GridTesterOptions(usage.Options): optFlags = [ ("no", "n", "Dry run: do not run any commands, just print them."), ] def parseArgs(self, nodedir, tahoe): # Note: does not support Unicode arguments. self.nodedir = os.path.expanduser(nodedir) self.tahoe = os.path.abspath(os.path.expanduser(tahoe)) class CommandFailed(Exception): pass class GridTester: def __init__(self, config): self.config = config self.tahoe = config.tahoe self.nodedir = config.nodedir def command(self, *cmd, **kwargs): expected_rc = kwargs.get("expected_rc", 0) stdin = kwargs.get("stdin", None) if self.config["no"]: return if stdin is not None: p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout,stderr) = p.communicate(stdin) else: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout,stderr) = p.communicate() rc = p.returncode if expected_rc != None and rc != expected_rc: if stderr: print "STDERR:" print stderr raise CommandFailed("command '%s' failed: rc=%d" % (cmd, rc)) return stdout, stderr def cli(self, cmd, *args, **kwargs): print "tahoe", cmd, " ".join(args) stdout, stderr = self.command(self.tahoe, cmd, "-d", self.nodedir, *args, **kwargs) if not kwargs.get("ignore_stderr", False) and stderr != "": raise CommandFailed("command '%s' had stderr: %s" % (" ".join(args), stderr)) return stdout def stop_old_node(self): print "tahoe stop", self.nodedir, "(force)" self.command(self.tahoe, "stop", self.nodedir, expected_rc=None) def start_node(self): print "tahoe start", self.nodedir self.command(self.tahoe, "start", self.nodedir) time.sleep(5) def stop_node(self): print "tahoe stop", self.nodedir self.command(self.tahoe, "stop", self.nodedir) def read_and_check(self, f): expected_md5_s = f[f.find(".")+1:] out = self.cli("get", "testgrid:" + f) got_md5_s = md5.new(out).hexdigest() if got_md5_s != expected_md5_s: raise CommandFailed("%s had md5sum of %s" % (f, got_md5_s)) def delete_and_check(self, dirname, f): oldfiles = self.listdir(dirname) if dirname: absfilename = "testgrid:" + dirname + "/" + f else: absfilename = "testgrid:" + f if f not in oldfiles: raise CommandFailed("um, '%s' was supposed to already be in %s" % (f, dirname)) self.cli("rm", absfilename) newfiles = self.listdir(dirname) if f in newfiles: raise CommandFailed("failed to remove '%s' from %s" % (f, dirname)) def listdir(self, dirname): out = self.cli("ls", "testgrid:"+dirname).strip().split("\n") files = [f.strip() for f in out] print " ", files return files def do_test(self): files = self.listdir("") for f in files: if f.startswith("old.") or f.startswith("recent."): self.read_and_check("" + f) for f in files: if f.startswith("recent."): self.delete_and_check("", f) files = self.listdir("recentdir") for f in files: if f.startswith("old.") or f.startswith("recent."): self.read_and_check("recentdir/" + f) for f in files: if f.startswith("recent."): self.delete_and_check("recentdir", f) self.delete_and_check("", "recentdir") self.cli("mkdir", "testgrid:recentdir") fn, data = self.makefile("recent") self.put("recentdir/"+fn, data) files = self.listdir("recentdir") if fn not in files: raise CommandFailed("failed to put %s in recentdir/" % fn) fn, data = self.makefile("recent") self.put(fn, data) files = self.listdir("") if fn not in files: raise CommandFailed("failed to put %s in testgrid:" % fn) self.update("log") self.update("recentlog") self.delete_and_check("", "recentlog") self.put_mutable("recentlog", "Recent Mutable Log Header\n\n") def put(self, fn, data): self.cli("put", "-", "testgrid:"+fn, stdin=data, ignore_stderr=True) def put_mutable(self, fn, data): self.cli("put", "--mutable", "-", "testgrid:"+fn, stdin=data, ignore_stderr=True) def update(self, fn): old = self.cli("get", "testgrid:"+fn) new = old + time.ctime() + "\n" self.put(fn, new) def makefile(self, prefix): size = random.randint(10001, 10100) data = os.urandom(size) md5sum = md5.new(data).hexdigest() fn = prefix + "." + md5sum return fn, data def run(self): self.stop_old_node() self.start_node() try: self.do_test() finally: self.stop_node() def main(): config = GridTesterOptions() config.parseOptions() gt = GridTester(config) gt.run() if __name__ == "__main__": main() tahoe-lafs-1.10.0/src/allmydata/test/check_load.py000066400000000000000000000210451221140116300217510ustar00rootroot00000000000000""" this is a load-generating client program. It does all of its work through a given tahoe node (specified by URL), and performs random reads and writes to the target. Run this in a directory with the following files: server-URLs : a list of tahoe node URLs (one per line). Each operation will use a randomly-selected server. root.cap: (string) the top-level directory rwcap to use delay: (float) seconds to delay between operations operation-mix: "R/W": two ints, relative frequency of read and write ops #size:? Set argv[1] to a per-client stats-NN.out file. This will will be updated with running totals of bytes-per-second and operations-per-second. The stats from multiple clients can be totalled together and averaged over time to compute the traffic being accepted by the grid. Each time a 'read' operation is performed, the client will begin at the root and randomly choose a child. If the child is a directory, the client will recurse. If the child is a file, the client will read the contents of the file. Each time a 'write' operation is performed, the client will generate a target filename (a random string). 90% of the time, the file will be written into the same directory that was used last time (starting at the root). 10% of the time, a new directory is created by assembling 1 to 5 pathnames chosen at random. The client then writes a certain number of zero bytes to this file. The filesize is determined with something like a power-law distribution, with a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8) """ import os, sys, httplib, binascii import urllib, simplejson, random, time, urlparse if sys.argv[1] == "--stats": statsfiles = sys.argv[2:] # gather stats every 10 seconds, do a moving-window average of the last # 60 seconds DELAY = 10 MAXSAMPLES = 6 totals = [] last_stats = {} while True: stats = {} for sf in statsfiles: for line in open(sf, "r").readlines(): name, value = line.split(":") value = int(value.strip()) if name not in stats: stats[name] = 0 stats[name] += float(value) if last_stats: delta = dict( [ (name,stats[name]-last_stats[name]) for name in stats ] ) print "THIS SAMPLE:" for name in sorted(delta.keys()): avg = float(delta[name]) / float(DELAY) print "%20s: %0.2f per second" % (name, avg) totals.append(delta) while len(totals) > MAXSAMPLES: totals.pop(0) # now compute average print print "MOVING WINDOW AVERAGE:" for name in sorted(delta.keys()): avg = sum([ s[name] for s in totals]) / (DELAY*len(totals)) print "%20s %0.2f per second" % (name, avg) last_stats = stats print print time.sleep(DELAY) stats_out = sys.argv[1] server_urls = [] for url in open("server-URLs", "r").readlines(): url = url.strip() if url: server_urls.append(url) root = open("root.cap", "r").read().strip() delay = float(open("delay", "r").read().strip()) readfreq, writefreq = ( [int(x) for x in open("operation-mix", "r").read().strip().split("/")]) files_uploaded = 0 files_downloaded = 0 bytes_uploaded = 0 bytes_downloaded = 0 directories_read = 0 directories_written = 0 def listdir(nodeurl, root, remote_pathname): if nodeurl[-1] != "/": nodeurl += "/" url = nodeurl + "uri/%s/" % urllib.quote(root) if remote_pathname: url += urllib.quote(remote_pathname) url += "?t=json" data = urllib.urlopen(url).read() try: parsed = simplejson.loads(data) except ValueError: print "URL was", url print "DATA was", data raise nodetype, d = parsed assert nodetype == "dirnode" global directories_read directories_read += 1 children = dict( [(unicode(name),value) for (name,value) in d["children"].iteritems()] ) return children def choose_random_descendant(server_url, root, pathname=""): children = listdir(server_url, root, pathname) name = random.choice(children.keys()) child = children[name] if pathname: new_pathname = pathname + "/" + name else: new_pathname = name if child[0] == "filenode": return new_pathname return choose_random_descendant(server_url, root, new_pathname) def read_and_discard(nodeurl, root, pathname): if nodeurl[-1] != "/": nodeurl += "/" url = nodeurl + "uri/%s/" % urllib.quote(root) if pathname: url += urllib.quote(pathname) f = urllib.urlopen(url) global bytes_downloaded while True: data = f.read(4096) if not data: break bytes_downloaded += len(data) directories = [ "dreamland/disengaging/hucksters", "dreamland/disengaging/klondikes", "dreamland/disengaging/neatly", "dreamland/cottages/richmond", "dreamland/cottages/perhaps", "dreamland/cottages/spies", "dreamland/finder/diversion", "dreamland/finder/cigarette", "dreamland/finder/album", "hazing/licences/comedian", "hazing/licences/goat", "hazing/licences/shopkeeper", "hazing/regiment/frigate", "hazing/regiment/quackery", "hazing/regiment/centerpiece", "hazing/disassociate/mob", "hazing/disassociate/nihilistic", "hazing/disassociate/bilbo", ] def create_random_directory(): d = random.choice(directories) pieces = d.split("/") numsegs = random.randint(1, len(pieces)) return "/".join(pieces[0:numsegs]) def generate_filename(): fn = binascii.hexlify(os.urandom(4)) return fn def choose_size(): mean = 10e3 size = random.expovariate(1.0 / mean) return int(min(size, 100e6)) # copied from twisted/web/client.py def parse_url(url, defaultPort=None): url = url.strip() parsed = urlparse.urlparse(url) scheme = parsed[0] path = urlparse.urlunparse(('','')+parsed[2:]) if defaultPort is None: if scheme == 'https': defaultPort = 443 else: defaultPort = 80 host, port = parsed[1], defaultPort if ':' in host: host, port = host.split(':') port = int(port) if path == "": path = "/" return scheme, host, port, path def generate_and_put(nodeurl, root, remote_filename, size): if nodeurl[-1] != "/": nodeurl += "/" url = nodeurl + "uri/%s/" % urllib.quote(root) url += urllib.quote(remote_filename) scheme, host, port, path = parse_url(url) if scheme == "http": c = httplib.HTTPConnection(host, port) elif scheme == "https": c = httplib.HTTPSConnection(host, port) else: raise ValueError("unknown scheme '%s', need http or https" % scheme) c.putrequest("PUT", path) c.putheader("Hostname", host) c.putheader("User-Agent", "tahoe-check-load") c.putheader("Connection", "close") c.putheader("Content-Length", "%d" % size) c.endheaders() global bytes_uploaded while size: chunksize = min(size, 4096) size -= chunksize c.send("\x00" * chunksize) bytes_uploaded += chunksize return c.getresponse() current_writedir = "" while True: time.sleep(delay) if random.uniform(0, readfreq+writefreq) < readfreq: op = "read" else: op = "write" print "OP:", op server = random.choice(server_urls) if op == "read": pathname = choose_random_descendant(server, root) print " reading", pathname read_and_discard(server, root, pathname) files_downloaded += 1 elif op == "write": if random.uniform(0, 100) < 10: current_writedir = create_random_directory() filename = generate_filename() if current_writedir: pathname = current_writedir + "/" + filename else: pathname = filename print " writing", pathname size = choose_size() print " size", size generate_and_put(server, root, pathname, size) files_uploaded += 1 f = open(stats_out+".tmp", "w") f.write("files-uploaded: %d\n" % files_uploaded) f.write("files-downloaded: %d\n" % files_downloaded) f.write("bytes-uploaded: %d\n" % bytes_uploaded) f.write("bytes-downloaded: %d\n" % bytes_downloaded) f.write("directories-read: %d\n" % directories_read) f.write("directories-written: %d\n" % directories_written) f.close() os.rename(stats_out+".tmp", stats_out) tahoe-lafs-1.10.0/src/allmydata/test/check_memory.py000066400000000000000000000472331221140116300223510ustar00rootroot00000000000000import os, shutil, sys, urllib, time, stat from cStringIO import StringIO from twisted.internet import defer, reactor, protocol, error from twisted.application import service, internet from twisted.web import client as tw_client from allmydata import client, introducer from allmydata.immutable import upload from allmydata.scripts import create_node from allmydata.util import fileutil, pollmixin from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.encodingutil import get_filesystem_encoding from foolscap.api import Tub, fireEventually, flushEventualQueue from twisted.python import log class StallableHTTPGetterDiscarder(tw_client.HTTPPageGetter): full_speed_ahead = False _bytes_so_far = 0 stalled = None def handleResponsePart(self, data): self._bytes_so_far += len(data) if not self.factory.do_stall: return if self.full_speed_ahead: return if self._bytes_so_far > 1e6+100: if not self.stalled: print "STALLING" self.transport.pauseProducing() self.stalled = reactor.callLater(10.0, self._resume_speed) def _resume_speed(self): print "RESUME SPEED" self.stalled = None self.full_speed_ahead = True self.transport.resumeProducing() def handleResponseEnd(self): if self.stalled: print "CANCEL" self.stalled.cancel() self.stalled = None return tw_client.HTTPPageGetter.handleResponseEnd(self) class StallableDiscardingHTTPClientFactory(tw_client.HTTPClientFactory): protocol = StallableHTTPGetterDiscarder def discardPage(url, stall=False, *args, **kwargs): """Start fetching the URL, but stall our pipe after the first 1MB. Wait 10 seconds, then resume downloading (and discarding) everything. """ # adapted from twisted.web.client.getPage . We can't just wrap or # subclass because it provides no way to override the HTTPClientFactory # that it creates. scheme, host, port, path = tw_client._parse(url) factory = StallableDiscardingHTTPClientFactory(url, *args, **kwargs) factory.do_stall = stall assert scheme == 'http' reactor.connectTCP(host, port, factory) return factory.deferred class ChildDidNotStartError(Exception): pass class SystemFramework(pollmixin.PollMixin): numnodes = 7 def __init__(self, basedir, mode): self.basedir = basedir = abspath_expanduser_unicode(unicode(basedir)) if not (basedir + os.path.sep).startswith(abspath_expanduser_unicode(u".") + os.path.sep): raise AssertionError("safety issue: basedir must be a subdir") self.testdir = testdir = os.path.join(basedir, "test") if os.path.exists(testdir): shutil.rmtree(testdir) fileutil.make_dirs(testdir) self.sparent = service.MultiService() self.sparent.startService() self.proc = None self.tub = Tub() self.tub.setOption("expose-remote-exception-types", False) self.tub.setServiceParent(self.sparent) self.mode = mode self.failed = False self.keepalive_file = None def run(self): framelog = os.path.join(self.basedir, "driver.log") log.startLogging(open(framelog, "a"), setStdout=False) log.msg("CHECK_MEMORY(mode=%s) STARTING" % self.mode) #logfile = open(os.path.join(self.testdir, "log"), "w") #flo = log.FileLogObserver(logfile) #log.startLoggingWithObserver(flo.emit, setStdout=False) d = fireEventually() d.addCallback(lambda res: self.setUp()) d.addCallback(lambda res: self.record_initial_memusage()) d.addCallback(lambda res: self.make_nodes()) d.addCallback(lambda res: self.wait_for_client_connected()) d.addCallback(lambda res: self.do_test()) d.addBoth(self.tearDown) def _err(err): self.failed = err log.err(err) print err d.addErrback(_err) def _done(res): reactor.stop() return res d.addBoth(_done) reactor.run() if self.failed: # raiseException doesn't work for CopiedFailures self.failed.raiseException() def setUp(self): #print "STARTING" self.stats = {} self.statsfile = open(os.path.join(self.basedir, "stats.out"), "a") d = self.make_introducer() def _more(res): return self.start_client() d.addCallback(_more) def _record_control_furl(control_furl): self.control_furl = control_furl #print "OBTAINING '%s'" % (control_furl,) return self.tub.getReference(self.control_furl) d.addCallback(_record_control_furl) def _record_control(control_rref): self.control_rref = control_rref d.addCallback(_record_control) def _ready(res): #print "CLIENT READY" pass d.addCallback(_ready) return d def record_initial_memusage(self): print print "Client started (no connections yet)" d = self._print_usage() d.addCallback(self.stash_stats, "init") return d def wait_for_client_connected(self): print print "Client connecting to other nodes.." return self.control_rref.callRemote("wait_for_client_connections", self.numnodes+1) def tearDown(self, passthrough): # the client node will shut down in a few seconds #os.remove(os.path.join(self.clientdir, "suicide_prevention_hotline")) log.msg("shutting down SystemTest services") if self.keepalive_file and os.path.exists(self.keepalive_file): age = time.time() - os.stat(self.keepalive_file)[stat.ST_MTIME] log.msg("keepalive file at shutdown was %ds old" % age) d = defer.succeed(None) if self.proc: d.addCallback(lambda res: self.kill_client()) d.addCallback(lambda res: self.sparent.stopService()) d.addCallback(lambda res: flushEventualQueue()) def _close_statsfile(res): self.statsfile.close() d.addCallback(_close_statsfile) d.addCallback(lambda res: passthrough) return d def add_service(self, s): s.setServiceParent(self.sparent) return s def make_introducer(self): iv_basedir = os.path.join(self.testdir, "introducer") os.mkdir(iv_basedir) iv = introducer.IntroducerNode(basedir=iv_basedir) self.introducer = self.add_service(iv) d = self.introducer.when_tub_ready() def _introducer_ready(res): q = self.introducer self.introducer_furl = q.introducer_url d.addCallback(_introducer_ready) return d def make_nodes(self): self.nodes = [] for i in range(self.numnodes): nodedir = os.path.join(self.testdir, "node%d" % i) os.mkdir(nodedir) f = open(os.path.join(nodedir, "tahoe.cfg"), "w") f.write("[client]\n" "introducer.furl = %s\n" "shares.happy = 1\n" "[storage]\n" % (self.introducer_furl,)) # the only tests for which we want the internal nodes to actually # retain shares are the ones where somebody's going to download # them. if self.mode in ("download", "download-GET", "download-GET-slow"): # retain shares pass else: # for these tests, we tell the storage servers to pretend to # accept shares, but really just throw them out, since we're # only testing upload and not download. f.write("debug_discard = true\n") if self.mode in ("receive",): # for this mode, the client-under-test gets all the shares, # so our internal nodes can refuse requests f.write("readonly = true\n") f.close() c = self.add_service(client.Client(basedir=nodedir)) self.nodes.append(c) # the peers will start running, eventually they will connect to each # other and the introducer def touch_keepalive(self): if os.path.exists(self.keepalive_file): age = time.time() - os.stat(self.keepalive_file)[stat.ST_MTIME] log.msg("touching keepalive file, was %ds old" % age) f = open(self.keepalive_file, "w") f.write("""\ If the node notices this file at startup, it will poll every 5 seconds and terminate if the file is more than 10 seconds old, or if it has been deleted. If the test harness has an internal failure and neglects to kill off the node itself, this helps to avoid leaving processes lying around. The contents of this file are ignored. """) f.close() def start_client(self): # this returns a Deferred that fires with the client's control.furl log.msg("MAKING CLIENT") # self.testdir is an absolute Unicode path clientdir = self.clientdir = os.path.join(self.testdir, u"client") clientdir_str = clientdir.encode(get_filesystem_encoding()) quiet = StringIO() create_node.create_node({'basedir': clientdir}, out=quiet) log.msg("DONE MAKING CLIENT") # now replace tahoe.cfg # set webport=0 and then ask the node what port it picked. f = open(os.path.join(clientdir, "tahoe.cfg"), "w") f.write("[node]\n" "web.port = tcp:0:interface=127.0.0.1\n" "[client]\n" "introducer.furl = %s\n" "shares.happy = 1\n" "[storage]\n" % (self.introducer_furl,)) if self.mode in ("upload-self", "receive"): # accept and store shares, to trigger the memory consumption bugs pass else: # don't accept any shares f.write("readonly = true\n") ## also, if we do receive any shares, throw them away #f.write("debug_discard = true") if self.mode == "upload-self": pass f.close() self.keepalive_file = os.path.join(clientdir, "suicide_prevention_hotline") # now start updating the mtime. self.touch_keepalive() ts = internet.TimerService(1.0, self.touch_keepalive) ts.setServiceParent(self.sparent) pp = ClientWatcher() self.proc_done = pp.d = defer.Deferred() logfile = os.path.join(self.basedir, "client.log") cmd = ["twistd", "-n", "-y", "tahoe-client.tac", "-l", logfile] env = os.environ.copy() self.proc = reactor.spawnProcess(pp, cmd[0], cmd, env, path=clientdir_str) log.msg("CLIENT STARTED") # now we wait for the client to get started. we're looking for the # control.furl file to appear. furl_file = os.path.join(clientdir, "private", "control.furl") url_file = os.path.join(clientdir, "node.url") def _check(): if pp.ended and pp.ended.value.status != 0: # the twistd process ends normally (with rc=0) if the child # is successfully launched. It ends abnormally (with rc!=0) # if the child cannot be launched. raise ChildDidNotStartError("process ended while waiting for startup") return os.path.exists(furl_file) d = self.poll(_check, 0.1) # once it exists, wait a moment before we read from it, just in case # it hasn't finished writing the whole thing. Ideally control.furl # would be created in some atomic fashion, or made non-readable until # it's ready, but I can't think of an easy way to do that, and I # think the chances that we'll observe a half-write are pretty low. def _stall(res): d2 = defer.Deferred() reactor.callLater(0.1, d2.callback, None) return d2 d.addCallback(_stall) def _read(res): # read the node's URL self.webish_url = open(url_file, "r").read().strip() if self.webish_url[-1] == "/": # trim trailing slash, since the rest of the code wants it gone self.webish_url = self.webish_url[:-1] f = open(furl_file, "r") furl = f.read() return furl.strip() d.addCallback(_read) return d def kill_client(self): # returns a Deferred that fires when the process exits. This may only # be called once. try: self.proc.signalProcess("INT") except error.ProcessExitedAlready: pass return self.proc_done def create_data(self, name, size): filename = os.path.join(self.testdir, name + ".data") f = open(filename, "wb") block = "a" * 8192 while size > 0: l = min(size, 8192) f.write(block[:l]) size -= l return filename def stash_stats(self, stats, name): self.statsfile.write("%s %s: %d\n" % (self.mode, name, stats['VmPeak'])) self.statsfile.flush() self.stats[name] = stats['VmPeak'] def POST(self, urlpath, **fields): url = self.webish_url + urlpath sepbase = "boogabooga" sep = "--" + sepbase form = [] form.append(sep) form.append('Content-Disposition: form-data; name="_charset"') form.append('') form.append('UTF-8') form.append(sep) for name, value in fields.iteritems(): if isinstance(value, tuple): filename, value = value form.append('Content-Disposition: form-data; name="%s"; ' 'filename="%s"' % (name, filename)) else: form.append('Content-Disposition: form-data; name="%s"' % name) form.append('') form.append(value) form.append(sep) form[-1] += "--" body = "\r\n".join(form) + "\r\n" headers = {"content-type": "multipart/form-data; boundary=%s" % sepbase, } return tw_client.getPage(url, method="POST", postdata=body, headers=headers, followRedirect=False) def GET_discard(self, urlpath, stall): url = self.webish_url + urlpath + "?filename=dummy-get.out" return discardPage(url, stall) def _print_usage(self, res=None): d = self.control_rref.callRemote("get_memory_usage") def _print(stats): print "VmSize: %9d VmPeak: %9d" % (stats["VmSize"], stats["VmPeak"]) return stats d.addCallback(_print) return d def _do_upload(self, res, size, files, uris): name = '%d' % size print print "uploading %s" % name if self.mode in ("upload", "upload-self"): files[name] = self.create_data(name, size) d = self.control_rref.callRemote("upload_from_file_to_uri", files[name].encode("utf-8"), convergence="check-memory") def _done(uri): os.remove(files[name]) del files[name] return uri d.addCallback(_done) elif self.mode == "upload-POST": data = "a" * size url = "/uri" d = self.POST(url, t="upload", file=("%d.data" % size, data)) elif self.mode in ("receive", "download", "download-GET", "download-GET-slow"): # mode=receive: upload the data from a local peer, so that the # client-under-test receives and stores the shares # # mode=download*: upload the data from a local peer, then have # the client-under-test download it. # # we need to wait until the uploading node has connected to all # peers, since the wait_for_client_connections() above doesn't # pay attention to our self.nodes[] and their connections. files[name] = self.create_data(name, size) u = self.nodes[0].getServiceNamed("uploader") d = self.nodes[0].debug_wait_for_client_connections(self.numnodes+1) d.addCallback(lambda res: u.upload(upload.FileName(files[name], convergence="check-memory"))) d.addCallback(lambda results: results.get_uri()) else: raise ValueError("unknown mode=%s" % self.mode) def _complete(uri): uris[name] = uri print "uploaded %s" % name d.addCallback(_complete) return d def _do_download(self, res, size, uris): if self.mode not in ("download", "download-GET", "download-GET-slow"): return name = '%d' % size print "downloading %s" % name uri = uris[name] if self.mode == "download": d = self.control_rref.callRemote("download_from_uri_to_file", uri, "dummy.out") elif self.mode == "download-GET": url = "/uri/%s" % uri d = self.GET_discard(urllib.quote(url), stall=False) elif self.mode == "download-GET-slow": url = "/uri/%s" % uri d = self.GET_discard(urllib.quote(url), stall=True) def _complete(res): print "downloaded %s" % name return res d.addCallback(_complete) return d def do_test(self): #print "CLIENT STARTED" #print "FURL", self.control_furl #print "RREF", self.control_rref #print kB = 1000; MB = 1000*1000 files = {} uris = {} d = self._print_usage() d.addCallback(self.stash_stats, "0B") for i in range(10): d.addCallback(self._do_upload, 10*kB+i, files, uris) d.addCallback(self._do_download, 10*kB+i, uris) d.addCallback(self._print_usage) d.addCallback(self.stash_stats, "10kB") for i in range(3): d.addCallback(self._do_upload, 10*MB+i, files, uris) d.addCallback(self._do_download, 10*MB+i, uris) d.addCallback(self._print_usage) d.addCallback(self.stash_stats, "10MB") for i in range(1): d.addCallback(self._do_upload, 50*MB+i, files, uris) d.addCallback(self._do_download, 50*MB+i, uris) d.addCallback(self._print_usage) d.addCallback(self.stash_stats, "50MB") #for i in range(1): # d.addCallback(self._do_upload, 100*MB+i, files, uris) # d.addCallback(self._do_download, 100*MB+i, uris) # d.addCallback(self._print_usage) #d.addCallback(self.stash_stats, "100MB") #d.addCallback(self.stall) def _done(res): print "FINISHING" d.addCallback(_done) return d def stall(self, res): d = defer.Deferred() reactor.callLater(5, d.callback, None) return d class ClientWatcher(protocol.ProcessProtocol): ended = False def outReceived(self, data): print "OUT:", data def errReceived(self, data): print "ERR:", data def processEnded(self, reason): self.ended = reason self.d.callback(None) if __name__ == '__main__': mode = "upload" if len(sys.argv) > 1: mode = sys.argv[1] # put the logfile and stats.out in _test_memory/ . These stick around. # put the nodes and other files in _test_memory/test/ . These are # removed each time we run. sf = SystemFramework("_test_memory", mode) sf.run() tahoe-lafs-1.10.0/src/allmydata/test/check_speed.py000066400000000000000000000216611221140116300221360ustar00rootroot00000000000000import os, sys from twisted.internet import reactor, defer from twisted.python import log from twisted.application import service from foolscap.api import Tub, fireEventually MB = 1000000 class SpeedTest: DO_IMMUTABLE = True DO_MUTABLE_CREATE = True DO_MUTABLE = True def __init__(self, test_client_dir): #self.real_stderr = sys.stderr log.startLogging(open("st.log", "a"), setStdout=False) f = open(os.path.join(test_client_dir, "private", "control.furl"), "r") self.control_furl = f.read().strip() f.close() self.base_service = service.MultiService() self.failed = None self.upload_times = {} self.download_times = {} def run(self): print "STARTING" d = fireEventually() d.addCallback(lambda res: self.setUp()) d.addCallback(lambda res: self.do_test()) d.addBoth(self.tearDown) def _err(err): self.failed = err log.err(err) print err d.addErrback(_err) def _done(res): reactor.stop() return res d.addBoth(_done) reactor.run() if self.failed: print "EXCEPTION" print self.failed sys.exit(1) def setUp(self): self.base_service.startService() self.tub = Tub() self.tub.setOption("expose-remote-exception-types", False) self.tub.setServiceParent(self.base_service) d = self.tub.getReference(self.control_furl) def _gotref(rref): self.client_rref = rref print "Got Client Control reference" return self.stall(5) d.addCallback(_gotref) return d def stall(self, delay, result=None): d = defer.Deferred() reactor.callLater(delay, d.callback, result) return d def record_times(self, times, key): print "TIME (%s): %s up, %s down" % (key, times[0], times[1]) self.upload_times[key], self.download_times[key] = times def one_test(self, res, name, count, size, mutable): # values for 'mutable': # False (upload a different CHK file for each 'count') # "create" (upload different contents into a new SSK file) # "upload" (upload different contents into the same SSK file. The # time consumed does not include the creation of the file) d = self.client_rref.callRemote("speed_test", count, size, mutable) d.addCallback(self.record_times, name) return d def measure_rtt(self, res): # use RIClient.get_nodeid() to measure the foolscap-level RTT d = self.client_rref.callRemote("measure_peer_response_time") def _got(res): assert len(res) # need at least one peer times = res.values() self.total_rtt = sum(times) self.average_rtt = sum(times) / len(times) self.max_rtt = max(times) print "num-peers: %d" % len(times) print "total-RTT: %f" % self.total_rtt print "average-RTT: %f" % self.average_rtt print "max-RTT: %f" % self.max_rtt d.addCallback(_got) return d def do_test(self): print "doing test" d = defer.succeed(None) d.addCallback(self.one_test, "startup", 1, 1000, False) #ignore this one d.addCallback(self.measure_rtt) if self.DO_IMMUTABLE: # immutable files d.addCallback(self.one_test, "1x 200B", 1, 200, False) d.addCallback(self.one_test, "10x 200B", 10, 200, False) def _maybe_do_100x_200B(res): if self.upload_times["10x 200B"] < 5: print "10x 200B test went too fast, doing 100x 200B test" return self.one_test(None, "100x 200B", 100, 200, False) return d.addCallback(_maybe_do_100x_200B) d.addCallback(self.one_test, "1MB", 1, 1*MB, False) d.addCallback(self.one_test, "10MB", 1, 10*MB, False) def _maybe_do_100MB(res): if self.upload_times["10MB"] > 30: print "10MB test took too long, skipping 100MB test" return return self.one_test(None, "100MB", 1, 100*MB, False) d.addCallback(_maybe_do_100MB) if self.DO_MUTABLE_CREATE: # mutable file creation d.addCallback(self.one_test, "10x 200B SSK creation", 10, 200, "create") if self.DO_MUTABLE: # mutable file upload/download d.addCallback(self.one_test, "10x 200B SSK", 10, 200, "upload") def _maybe_do_100x_200B_SSK(res): if self.upload_times["10x 200B SSK"] < 5: print "10x 200B SSK test went too fast, doing 100x 200B SSK" return self.one_test(None, "100x 200B SSK", 100, 200, "upload") return d.addCallback(_maybe_do_100x_200B_SSK) d.addCallback(self.one_test, "1MB SSK", 1, 1*MB, "upload") d.addCallback(self.calculate_speeds) return d def calculate_speeds(self, res): # time = A*size+B # we assume that A*200bytes is negligible if self.DO_IMMUTABLE: # upload if "100x 200B" in self.upload_times: B = self.upload_times["100x 200B"] / 100 else: B = self.upload_times["10x 200B"] / 10 print "upload per-file time: %.3fs" % B print "upload per-file times-avg-RTT: %f" % (B / self.average_rtt) print "upload per-file times-total-RTT: %f" % (B / self.total_rtt) A1 = 1*MB / (self.upload_times["1MB"] - B) # in bytes per second print "upload speed (1MB):", self.number(A1, "Bps") A2 = 10*MB / (self.upload_times["10MB"] - B) print "upload speed (10MB):", self.number(A2, "Bps") if "100MB" in self.upload_times: A3 = 100*MB / (self.upload_times["100MB"] - B) print "upload speed (100MB):", self.number(A3, "Bps") # download if "100x 200B" in self.download_times: B = self.download_times["100x 200B"] / 100 else: B = self.download_times["10x 200B"] / 10 print "download per-file time: %.3fs" % B print "download per-file times-avg-RTT: %f" % (B / self.average_rtt) print "download per-file times-total-RTT: %f" % (B / self.total_rtt) A1 = 1*MB / (self.download_times["1MB"] - B) # in bytes per second print "download speed (1MB):", self.number(A1, "Bps") A2 = 10*MB / (self.download_times["10MB"] - B) print "download speed (10MB):", self.number(A2, "Bps") if "100MB" in self.download_times: A3 = 100*MB / (self.download_times["100MB"] - B) print "download speed (100MB):", self.number(A3, "Bps") if self.DO_MUTABLE_CREATE: # SSK creation B = self.upload_times["10x 200B SSK creation"] / 10 print "create per-file time SSK: %.3fs" % B if self.DO_MUTABLE: # upload SSK if "100x 200B SSK" in self.upload_times: B = self.upload_times["100x 200B SSK"] / 100 else: B = self.upload_times["10x 200B SSK"] / 10 print "upload per-file time SSK: %.3fs" % B A1 = 1*MB / (self.upload_times["1MB SSK"] - B) # in bytes per second print "upload speed SSK (1MB):", self.number(A1, "Bps") # download SSK if "100x 200B SSK" in self.download_times: B = self.download_times["100x 200B SSK"] / 100 else: B = self.download_times["10x 200B SSK"] / 10 print "download per-file time SSK: %.3fs" % B A1 = 1*MB / (self.download_times["1MB SSK"] - B) # in bytes per # second print "download speed SSK (1MB):", self.number(A1, "Bps") def number(self, value, suffix=""): scaling = 1 if value < 1: fmt = "%1.2g%s" elif value < 100: fmt = "%.1f%s" elif value < 1000: fmt = "%d%s" elif value < 1e6: fmt = "%.2fk%s"; scaling = 1e3 elif value < 1e9: fmt = "%.2fM%s"; scaling = 1e6 elif value < 1e12: fmt = "%.2fG%s"; scaling = 1e9 elif value < 1e15: fmt = "%.2fT%s"; scaling = 1e12 elif value < 1e18: fmt = "%.2fP%s"; scaling = 1e15 else: fmt = "huge! %g%s" return fmt % (value / scaling, suffix) def tearDown(self, res): d = self.base_service.stopService() d.addCallback(lambda ignored: res) return d if __name__ == '__main__': test_client_dir = sys.argv[1] st = SpeedTest(test_client_dir) st.run() tahoe-lafs-1.10.0/src/allmydata/test/common.py000066400000000000000000001752401221140116300211740ustar00rootroot00000000000000import os, random, struct from zope.interface import implements from twisted.internet import defer from twisted.internet.interfaces import IPullProducer from twisted.python import failure from twisted.application import service from twisted.web.error import Error as WebError from foolscap.api import flushEventualQueue, fireEventually from allmydata import uri, client from allmydata.introducer.server import IntroducerNode from allmydata.interfaces import IMutableFileNode, IImmutableFileNode,\ NotEnoughSharesError, ICheckable, \ IMutableUploadable, SDMF_VERSION, \ MDMF_VERSION from allmydata.check_results import CheckResults, CheckAndRepairResults, \ DeepCheckResults, DeepCheckAndRepairResults from allmydata.storage_client import StubServer from allmydata.mutable.layout import unpack_header from allmydata.mutable.publish import MutableData from allmydata.storage.mutable import MutableShareFile from allmydata.util import hashutil, log, fileutil, pollmixin from allmydata.util.assertutil import precondition from allmydata.util.consumer import download_to_data from allmydata.stats import StatsGathererService from allmydata.key_generator import KeyGeneratorService import allmydata.test.common_util as testutil from allmydata import immutable TEST_RSA_KEY_SIZE = 522 def flush_but_dont_ignore(res): d = flushEventualQueue() def _done(ignored): return res d.addCallback(_done) return d class DummyProducer: implements(IPullProducer) def resumeProducing(self): pass class FakeCHKFileNode: """I provide IImmutableFileNode, but all of my data is stored in a class-level dictionary.""" implements(IImmutableFileNode) def __init__(self, filecap, all_contents): precondition(isinstance(filecap, (uri.CHKFileURI, uri.LiteralFileURI)), filecap) self.all_contents = all_contents self.my_uri = filecap self.storage_index = self.my_uri.get_storage_index() def get_uri(self): return self.my_uri.to_string() def get_write_uri(self): return None def get_readonly_uri(self): return self.my_uri.to_string() def get_cap(self): return self.my_uri def get_verify_cap(self): return self.my_uri.get_verify_cap() def get_repair_cap(self): return self.my_uri.get_verify_cap() def get_storage_index(self): return self.storage_index def check(self, monitor, verify=False, add_lease=False): s = StubServer("\x00"*20) r = CheckResults(self.my_uri, self.storage_index, healthy=True, recoverable=True, needs_rebalancing=False, count_shares_needed=3, count_shares_expected=10, count_shares_good=10, count_good_share_hosts=10, count_recoverable_versions=1, count_unrecoverable_versions=0, servers_responding=[s], sharemap={1: [s]}, count_wrong_shares=0, list_corrupt_shares=[], count_corrupt_shares=0, list_incompatible_shares=[], count_incompatible_shares=0, summary="", report=[], share_problems=[], servermap=None) return defer.succeed(r) def check_and_repair(self, monitor, verify=False, add_lease=False): d = self.check(verify) def _got(cr): r = CheckAndRepairResults(self.storage_index) r.pre_repair_results = r.post_repair_results = cr return r d.addCallback(_got) return d def is_mutable(self): return False def is_readonly(self): return True def is_unknown(self): return False def is_allowed_in_immutable_directory(self): return True def raise_error(self): pass def get_size(self): if isinstance(self.my_uri, uri.LiteralFileURI): return self.my_uri.get_size() try: data = self.all_contents[self.my_uri.to_string()] except KeyError, le: raise NotEnoughSharesError(le, 0, 3) return len(data) def get_current_size(self): return defer.succeed(self.get_size()) def read(self, consumer, offset=0, size=None): # we don't bother to call registerProducer/unregisterProducer, # because it's a hassle to write a dummy Producer that does the right # thing (we have to make sure that DummyProducer.resumeProducing # writes the data into the consumer immediately, otherwise it will # loop forever). d = defer.succeed(None) d.addCallback(self._read, consumer, offset, size) return d def _read(self, ignored, consumer, offset, size): if isinstance(self.my_uri, uri.LiteralFileURI): data = self.my_uri.data else: if self.my_uri.to_string() not in self.all_contents: raise NotEnoughSharesError(None, 0, 3) data = self.all_contents[self.my_uri.to_string()] start = offset if size is not None: end = offset + size else: end = len(data) consumer.write(data[start:end]) return consumer def get_best_readable_version(self): return defer.succeed(self) def download_to_data(self): return download_to_data(self) download_best_version = download_to_data def get_size_of_best_version(self): return defer.succeed(self.get_size) def make_chk_file_cap(size): return uri.CHKFileURI(key=os.urandom(16), uri_extension_hash=os.urandom(32), needed_shares=3, total_shares=10, size=size) def make_chk_file_uri(size): return make_chk_file_cap(size).to_string() def create_chk_filenode(contents, all_contents): filecap = make_chk_file_cap(len(contents)) n = FakeCHKFileNode(filecap, all_contents) all_contents[filecap.to_string()] = contents return n class FakeMutableFileNode: """I provide IMutableFileNode, but all of my data is stored in a class-level dictionary.""" implements(IMutableFileNode, ICheckable) MUTABLE_SIZELIMIT = 10000 def __init__(self, storage_broker, secret_holder, default_encoding_parameters, history, all_contents): self.all_contents = all_contents self.file_types = {} # storage index => MDMF_VERSION or SDMF_VERSION self.init_from_cap(make_mutable_file_cap()) self._k = default_encoding_parameters['k'] self._segsize = default_encoding_parameters['max_segment_size'] def create(self, contents, key_generator=None, keysize=None, version=SDMF_VERSION): if version == MDMF_VERSION and \ isinstance(self.my_uri, (uri.ReadonlySSKFileURI, uri.WriteableSSKFileURI)): self.init_from_cap(make_mdmf_mutable_file_cap()) self.file_types[self.storage_index] = version initial_contents = self._get_initial_contents(contents) data = initial_contents.read(initial_contents.get_size()) data = "".join(data) self.all_contents[self.storage_index] = data return defer.succeed(self) def _get_initial_contents(self, contents): if contents is None: return MutableData("") if IMutableUploadable.providedBy(contents): return contents assert callable(contents), "%s should be callable, not %s" % \ (contents, type(contents)) return contents(self) def init_from_cap(self, filecap): assert isinstance(filecap, (uri.WriteableSSKFileURI, uri.ReadonlySSKFileURI, uri.WriteableMDMFFileURI, uri.ReadonlyMDMFFileURI)) self.my_uri = filecap self.storage_index = self.my_uri.get_storage_index() if isinstance(filecap, (uri.WriteableMDMFFileURI, uri.ReadonlyMDMFFileURI)): self.file_types[self.storage_index] = MDMF_VERSION else: self.file_types[self.storage_index] = SDMF_VERSION return self def get_cap(self): return self.my_uri def get_readcap(self): return self.my_uri.get_readonly() def get_uri(self): return self.my_uri.to_string() def get_write_uri(self): if self.is_readonly(): return None return self.my_uri.to_string() def get_readonly(self): return self.my_uri.get_readonly() def get_readonly_uri(self): return self.my_uri.get_readonly().to_string() def get_verify_cap(self): return self.my_uri.get_verify_cap() def get_repair_cap(self): if self.my_uri.is_readonly(): return None return self.my_uri def is_readonly(self): return self.my_uri.is_readonly() def is_mutable(self): return self.my_uri.is_mutable() def is_unknown(self): return False def is_allowed_in_immutable_directory(self): return not self.my_uri.is_mutable() def raise_error(self): pass def get_writekey(self): return "\x00"*16 def get_size(self): return len(self.all_contents[self.storage_index]) def get_current_size(self): return self.get_size_of_best_version() def get_size_of_best_version(self): return defer.succeed(len(self.all_contents[self.storage_index])) def get_storage_index(self): return self.storage_index def get_servermap(self, mode): return defer.succeed(None) def get_version(self): assert self.storage_index in self.file_types return self.file_types[self.storage_index] def check(self, monitor, verify=False, add_lease=False): s = StubServer("\x00"*20) r = CheckResults(self.my_uri, self.storage_index, healthy=True, recoverable=True, needs_rebalancing=False, count_shares_needed=3, count_shares_expected=10, count_shares_good=10, count_good_share_hosts=10, count_recoverable_versions=1, count_unrecoverable_versions=0, servers_responding=[s], sharemap={"seq1-abcd-sh0": [s]}, count_wrong_shares=0, list_corrupt_shares=[], count_corrupt_shares=0, list_incompatible_shares=[], count_incompatible_shares=0, summary="", report=[], share_problems=[], servermap=None) return defer.succeed(r) def check_and_repair(self, monitor, verify=False, add_lease=False): d = self.check(verify) def _got(cr): r = CheckAndRepairResults(self.storage_index) r.pre_repair_results = r.post_repair_results = cr return r d.addCallback(_got) return d def deep_check(self, verify=False, add_lease=False): d = self.check(verify) def _done(r): dr = DeepCheckResults(self.storage_index) dr.add_check(r, []) return dr d.addCallback(_done) return d def deep_check_and_repair(self, verify=False, add_lease=False): d = self.check_and_repair(verify) def _done(r): dr = DeepCheckAndRepairResults(self.storage_index) dr.add_check(r, []) return dr d.addCallback(_done) return d def download_best_version(self): return defer.succeed(self._download_best_version()) def _download_best_version(self, ignored=None): if isinstance(self.my_uri, uri.LiteralFileURI): return self.my_uri.data if self.storage_index not in self.all_contents: raise NotEnoughSharesError(None, 0, 3) return self.all_contents[self.storage_index] def overwrite(self, new_contents): assert not self.is_readonly() new_data = new_contents.read(new_contents.get_size()) new_data = "".join(new_data) self.all_contents[self.storage_index] = new_data return defer.succeed(None) def modify(self, modifier): # this does not implement FileTooLargeError, but the real one does return defer.maybeDeferred(self._modify, modifier) def _modify(self, modifier): assert not self.is_readonly() old_contents = self.all_contents[self.storage_index] new_data = modifier(old_contents, None, True) self.all_contents[self.storage_index] = new_data return None # As actually implemented, MutableFilenode and MutableFileVersion # are distinct. However, nothing in the webapi uses (yet) that # distinction -- it just uses the unified download interface # provided by get_best_readable_version and read. When we start # doing cooler things like LDMF, we will want to revise this code to # be less simplistic. def get_best_readable_version(self): return defer.succeed(self) def get_best_mutable_version(self): return defer.succeed(self) # Ditto for this, which is an implementation of IWriteable. # XXX: Declare that the same is implemented. def update(self, data, offset): assert not self.is_readonly() def modifier(old, servermap, first_time): new = old[:offset] + "".join(data.read(data.get_size())) new += old[len(new):] return new return self.modify(modifier) def read(self, consumer, offset=0, size=None): data = self._download_best_version() if size: data = data[offset:offset+size] consumer.write(data) return defer.succeed(consumer) def make_mutable_file_cap(): return uri.WriteableSSKFileURI(writekey=os.urandom(16), fingerprint=os.urandom(32)) def make_mdmf_mutable_file_cap(): return uri.WriteableMDMFFileURI(writekey=os.urandom(16), fingerprint=os.urandom(32)) def make_mutable_file_uri(mdmf=False): if mdmf: uri = make_mdmf_mutable_file_cap() else: uri = make_mutable_file_cap() return uri.to_string() def make_verifier_uri(): return uri.SSKVerifierURI(storage_index=os.urandom(16), fingerprint=os.urandom(32)).to_string() def create_mutable_filenode(contents, mdmf=False, all_contents=None): # XXX: All of these arguments are kind of stupid. if mdmf: cap = make_mdmf_mutable_file_cap() else: cap = make_mutable_file_cap() encoding_params = {} encoding_params['k'] = 3 encoding_params['max_segment_size'] = 128*1024 filenode = FakeMutableFileNode(None, None, encoding_params, None, all_contents) filenode.init_from_cap(cap) if mdmf: filenode.create(MutableData(contents), version=MDMF_VERSION) else: filenode.create(MutableData(contents), version=SDMF_VERSION) return filenode class LoggingServiceParent(service.MultiService): def log(self, *args, **kwargs): return log.msg(*args, **kwargs) class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): # SystemTestMixin tests tend to be a lot of work, and we have a few # buildslaves that are pretty slow, and every once in a while these tests # run up against the default 120 second timeout. So increase the default # timeout. Individual test cases can override this, of course. timeout = 300 def setUp(self): self.sparent = service.MultiService() self.sparent.startService() self.stats_gatherer = None self.stats_gatherer_furl = None self.key_generator_svc = None self.key_generator_furl = None def tearDown(self): log.msg("shutting down SystemTest services") d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) return d def getdir(self, subdir): return os.path.join(self.basedir, subdir) def add_service(self, s): s.setServiceParent(self.sparent) return s def set_up_nodes(self, NUMCLIENTS=5, use_stats_gatherer=False, use_key_generator=False): self.numclients = NUMCLIENTS iv_dir = self.getdir("introducer") if not os.path.isdir(iv_dir): fileutil.make_dirs(iv_dir) fileutil.write(os.path.join(iv_dir, 'tahoe.cfg'), "[node]\n" + u"nickname = introducer \u263A\n".encode('utf-8') + "web.port = tcp:0:interface=127.0.0.1\n") if SYSTEM_TEST_CERTS: os.mkdir(os.path.join(iv_dir, "private")) f = open(os.path.join(iv_dir, "private", "node.pem"), "w") f.write(SYSTEM_TEST_CERTS[0]) f.close() iv = IntroducerNode(basedir=iv_dir) self.introducer = self.add_service(iv) d = self.introducer.when_tub_ready() d.addCallback(self._get_introducer_web) if use_stats_gatherer: d.addCallback(self._set_up_stats_gatherer) if use_key_generator: d.addCallback(self._set_up_key_generator) d.addCallback(self._set_up_nodes_2) if use_stats_gatherer: d.addCallback(self._grab_stats) return d def _get_introducer_web(self, res): f = open(os.path.join(self.getdir("introducer"), "node.url"), "r") self.introweb_url = f.read().strip() f.close() def _set_up_stats_gatherer(self, res): statsdir = self.getdir("stats_gatherer") fileutil.make_dirs(statsdir) self.stats_gatherer_svc = StatsGathererService(statsdir) self.stats_gatherer = self.stats_gatherer_svc.stats_gatherer self.add_service(self.stats_gatherer_svc) d = fireEventually() sgf = os.path.join(statsdir, 'stats_gatherer.furl') def check_for_furl(): return os.path.exists(sgf) d.addCallback(lambda junk: self.poll(check_for_furl, timeout=30)) def get_furl(junk): self.stats_gatherer_furl = file(sgf, 'rb').read().strip() d.addCallback(get_furl) return d def _set_up_key_generator(self, res): kgsdir = self.getdir("key_generator") fileutil.make_dirs(kgsdir) self.key_generator_svc = KeyGeneratorService(kgsdir, display_furl=False, default_key_size=TEST_RSA_KEY_SIZE) self.key_generator_svc.key_generator.pool_size = 4 self.key_generator_svc.key_generator.pool_refresh_delay = 60 self.add_service(self.key_generator_svc) d = fireEventually() def check_for_furl(): return os.path.exists(os.path.join(kgsdir, 'key_generator.furl')) d.addCallback(lambda junk: self.poll(check_for_furl, timeout=30)) def get_furl(junk): kgf = os.path.join(kgsdir, 'key_generator.furl') self.key_generator_furl = file(kgf, 'rb').read().strip() d.addCallback(get_furl) return d def _set_up_nodes_2(self, res): q = self.introducer self.introducer_furl = q.introducer_url self.clients = [] basedirs = [] for i in range(self.numclients): basedir = self.getdir("client%d" % i) basedirs.append(basedir) fileutil.make_dirs(os.path.join(basedir, "private")) if len(SYSTEM_TEST_CERTS) > (i+1): f = open(os.path.join(basedir, "private", "node.pem"), "w") f.write(SYSTEM_TEST_CERTS[i+1]) f.close() config = "[client]\n" config += "introducer.furl = %s\n" % self.introducer_furl if self.stats_gatherer_furl: config += "stats_gatherer.furl = %s\n" % self.stats_gatherer_furl nodeconfig = "[node]\n" nodeconfig += (u"nickname = client %d \u263A\n" % (i,)).encode('utf-8') if i == 0: # clients[0] runs a webserver and a helper, no key_generator config += nodeconfig config += "web.port = tcp:0:interface=127.0.0.1\n" config += "timeout.keepalive = 600\n" config += "[helper]\n" config += "enabled = True\n" elif i == 3: # clients[3] runs a webserver and uses a helper, uses # key_generator if self.key_generator_furl: config += "key_generator.furl = %s\n" % self.key_generator_furl config += nodeconfig config += "web.port = tcp:0:interface=127.0.0.1\n" config += "timeout.disconnect = 1800\n" else: config += nodeconfig fileutil.write(os.path.join(basedir, 'tahoe.cfg'), config) # give subclasses a chance to append lines to the node's tahoe.cfg # files before they are launched. self._set_up_nodes_extra_config() # start clients[0], wait for it's tub to be ready (at which point it # will have registered the helper furl). c = self.add_service(client.Client(basedir=basedirs[0])) self.clients.append(c) c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE) d = c.when_tub_ready() def _ready(res): f = open(os.path.join(basedirs[0],"private","helper.furl"), "r") helper_furl = f.read() f.close() self.helper_furl = helper_furl if self.numclients >= 4: f = open(os.path.join(basedirs[3], 'tahoe.cfg'), 'ab+') f.write( "[client]\n" "helper.furl = %s\n" % helper_furl) f.close() # this starts the rest of the clients for i in range(1, self.numclients): c = self.add_service(client.Client(basedir=basedirs[i])) self.clients.append(c) c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE) log.msg("STARTING") return self.wait_for_connections() d.addCallback(_ready) def _connected(res): log.msg("CONNECTED") # now find out where the web port was self.webish_url = self.clients[0].getServiceNamed("webish").getURL() if self.numclients >=4: # and the helper-using webport self.helper_webish_url = self.clients[3].getServiceNamed("webish").getURL() d.addCallback(_connected) return d def _set_up_nodes_extra_config(self): # for overriding by subclasses pass def _grab_stats(self, res): d = self.stats_gatherer.poll() return d def bounce_client(self, num): c = self.clients[num] d = c.disownServiceParent() # I think windows requires a moment to let the connection really stop # and the port number made available for re-use. TODO: examine the # behavior, see if this is really the problem, see if we can do # better than blindly waiting for a second. d.addCallback(self.stall, 1.0) def _stopped(res): new_c = client.Client(basedir=self.getdir("client%d" % num)) self.clients[num] = new_c new_c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE) self.add_service(new_c) return new_c.when_tub_ready() d.addCallback(_stopped) d.addCallback(lambda res: self.wait_for_connections()) def _maybe_get_webport(res): if num == 0: # now find out where the web port was self.webish_url = self.clients[0].getServiceNamed("webish").getURL() d.addCallback(_maybe_get_webport) return d def add_extra_node(self, client_num, helper_furl=None, add_to_sparent=False): # usually this node is *not* parented to our self.sparent, so we can # shut it down separately from the rest, to exercise the # connection-lost code basedir = self.getdir("client%d" % client_num) if not os.path.isdir(basedir): fileutil.make_dirs(basedir) config = "[client]\n" config += "introducer.furl = %s\n" % self.introducer_furl if helper_furl: config += "helper.furl = %s\n" % helper_furl fileutil.write(os.path.join(basedir, 'tahoe.cfg'), config) c = client.Client(basedir=basedir) self.clients.append(c) c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE) self.numclients += 1 if add_to_sparent: c.setServiceParent(self.sparent) else: c.startService() d = self.wait_for_connections() d.addCallback(lambda res: c) return d def _check_connections(self): for c in self.clients: if not c.connected_to_introducer(): return False sb = c.get_storage_broker() if len(sb.get_connected_servers()) != self.numclients: return False up = c.getServiceNamed("uploader") if up._helper_furl and not up._helper: return False return True def wait_for_connections(self, ignored=None): return self.poll(self._check_connections, timeout=200) # our system test uses the same Tub certificates each time, to avoid the # overhead of key generation SYSTEM_TEST_CERTS = [ """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQyMDVaFw0wOTA3MjUyMjQyMDVaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAxHCWajrR 2h/iurw8k93m8WUdE3xypJiiAITw7GkKlKbCLD+dEce2MXwVVYca0n/MZZsj89Cu Ko0lLjksMseoSDoj98iEmVpaY5mc2ntpQ+FXdoEmPP234XRWEg2HQ+EaK6+WkGQg DDXQvFJCVCQk/n1MdAwZZ6vqf2ITzSuD44kCAwEAATANBgkqhkiG9w0BAQQFAAOB gQBn6qPKGdFjWJy7sOOTUFfm/THhHQqAh1pBDLkjR+OtzuobCoP8n8J1LNG3Yxds Jj7NWQL7X5TfOlfoi7e9jK0ujGgWh3yYU6PnHzJLkDiDT3LCSywQuGXCjh0tOStS 2gaCmmAK2cfxSStKzNcewl2Zs8wHMygq8TLFoZ6ozN1+xQ== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXQIBAAKBgQDEcJZqOtHaH+K6vDyT3ebxZR0TfHKkmKIAhPDsaQqUpsIsP50R x7YxfBVVhxrSf8xlmyPz0K4qjSUuOSwyx6hIOiP3yISZWlpjmZzae2lD4Vd2gSY8 /bfhdFYSDYdD4Rorr5aQZCAMNdC8UkJUJCT+fUx0DBlnq+p/YhPNK4PjiQIDAQAB AoGAZyDMdrymiyMOPwavrtlicvyohSBid3MCKc+hRBvpSB0790r2RO1aAySndp1V QYmCXx1RhKDbrs8m49t0Dryu5T+sQrFl0E3usAP3vvXWeh4jwJ9GyiRWy4xOEuEQ 3ewjbEItHqA/bRJF0TNtbOmZTDC7v9FRPf2bTAyFfTZep5kCQQD33q1RA8WUYtmQ IArgHqt69i421lpXlOgqotFHwTx4FiGgVzDQCDuXU6txB9EeKRM340poissav/n6 bkLZ7/VDAkEAyuIPkeI59sE5NnmW+N47NbCfdM1Smy1YxZpv942EmP9Veub5N0dw iK5bLAgEguUIjpTsh3BRmsE9Xd+ItmnRQwJBAMZhbg19G1EbnE0BmDKv2UbcaThy bnPSNc6J6T2opqDl9ZvCrMqTDD6dNIWOYAvni/4a556sFsoeBBAu10peBskCQE6S cB86cuJagLLVMh/dySaI6ahNoFFSpY+ZuQUxfInYUR2Q+DFtbGqyw8JwtHaRBthZ WqU1XZVGg2KooISsxIsCQQD1PS7//xHLumBb0jnpL7n6W8gmiTyzblT+0otaCisP fN6rTlwV1o8VsOUAz0rmKO5RArCbkmb01WtMgPCDBYkk -----END RSA PRIVATE KEY----- """, # 0 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQyMDVaFw0wOTA3MjUyMjQyMDVaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAs9CALdmW kJ6r0KPSLdGCA8rzQKxWayrMckT22ZtbRv3aw6VA96dWclpY+T2maV0LrAzmMSL8 n61ydJHM33iYDOyWbwHWN45XCjY/e20PL54XUl/DmbBHEhQVQLIfCldcRcnWEfoO iOhDJfWpDO1dmP/aOYLdkZCZvBtPAfyUqRcCAwEAATANBgkqhkiG9w0BAQQFAAOB gQAN9eaCREkzzk4yPIaWYkWHg3Igs1vnOR/iDw3OjyxO/xJFP2lkA2WtrwL2RTRq dxA8gwdPyrWgdiZElwZH8mzTJ4OdUXLSMclLOg9kvH6gtSvhLztfEDwDP1wRhikh OeWWu2GIC+uqFCI1ftoGgU+aIa6yrHswf66rrQvBSSvJPQ== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXQIBAAKBgQCz0IAt2ZaQnqvQo9It0YIDyvNArFZrKsxyRPbZm1tG/drDpUD3 p1ZyWlj5PaZpXQusDOYxIvyfrXJ0kczfeJgM7JZvAdY3jlcKNj97bQ8vnhdSX8OZ sEcSFBVAsh8KV1xFydYR+g6I6EMl9akM7V2Y/9o5gt2RkJm8G08B/JSpFwIDAQAB AoGBAIUy5zCPpSP+FeJY6CG+t6Pdm/IFd4KtUoM3KPCrT6M3+uzApm6Ny9Crsor2 qyYTocjSSVaOxzn1fvpw4qWLrH1veUf8ozMs8Z0VuPHD1GYUGjOXaBPXb5o1fQL9 h7pS5/HrDDPN6wwDNTsxRf/fP58CnfwQUhwdoxcx8TnVmDQxAkEA6N3jBXt/Lh0z UbXHhv3QBOcqLZA2I4tY7wQzvUvKvVmCJoW1tfhBdYQWeQv0jzjL5PzrrNY8hC4l 8+sFM3h5TwJBAMWtbFIEZfRSG1JhHK3evYHDTZnr/j+CdoWuhzP5RkjkIKsiLEH7 2ZhA7CdFQLZF14oXy+g1uVCzzfB2WELtUbkCQQDKrb1XWzrBlzbAipfkXWs9qTmj uJ32Z+V6+0xRGPOXxJ0sDDqw7CeFMfchWg98zLFiV+SEZV78qPHtkAPR3ayvAkB+ hUMhM4N13t9x2IoclsXAOhp++9bdG0l0woHyuAdOPATUw6iECwf4NQVxFRgYEZek 4Ro3Y7taddrHn1dabr6xAkAic47OoLOROYLpljmJJO0eRe3Z5IFe+0D2LfhAW3LQ JU+oGq5pCjfnoaDElRRZn0+GmunnWeQEYKoflTi/lI9d -----END RSA PRIVATE KEY----- """, # 1 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQyMDZaFw0wOTA3MjUyMjQyMDZaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAsxG7LTrz DF+9wegOR/BRJhjSumPUbYQnNAUKtPraFsGjAJILP44AHdnHt1MONLgTeX1ynapo q6O/q5cdKtBB7uEh7FpkLCCwpZt/m0y79cynn8AmWoQVgl8oS0567UmPeJnTzFPv dmT5dlaQALeX5YGceAsEvhmAsdOMttaor38CAwEAATANBgkqhkiG9w0BAQQFAAOB gQA345rxotfvh2kfgrmRzAyGewVBV4r23Go30GSZir8X2GoH3qKNwO4SekAohuSw AiXzLUbwIdSRSqaLFxSC7Duqc9eIeFDAWjeEmpfFLBNiw3K8SLA00QrHCUXnECTD b/Kk6OGuvPOiuuONVjEuEcRdCH3/Li30D0AhJaMynjhQJQ== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXQIBAAKBgQCzEbstOvMMX73B6A5H8FEmGNK6Y9RthCc0BQq0+toWwaMAkgs/ jgAd2ce3Uw40uBN5fXKdqmiro7+rlx0q0EHu4SHsWmQsILClm3+bTLv1zKefwCZa hBWCXyhLTnrtSY94mdPMU+92ZPl2VpAAt5flgZx4CwS+GYCx04y21qivfwIDAQAB AoGBAIlhFg/aRPL+VM9539LzHN60dp8GzceDdqwjHhbAySZiQlLCuJx2rcI4/U65 CpIJku9G/fLV9N2RkA/trDPXeGyqCTJfnNzyZcvvMscRMFqSGyc21Y0a+GS8bIxt 1R2B18epSVMsWSWWMypeEgsfv29LV7oSWG8UKaqQ9+0h63DhAkEA4i2L/rori/Fb wpIBfA+xbXL/GmWR7xPW+3nG3LdLQpVzxz4rIsmtO9hIXzvYpcufQbwgVACyMmRf TMABeSDM7wJBAMquEdTaVXjGfH0EJ7z95Ys2rYTiCXjBfyEOi6RXXReqV9SXNKlN aKsO22zYecpkAjY1EdUdXWP/mNVEybjpZnECQQCcuh0JPS5RwcTo9c2rjyBOjGIz g3B1b5UIG2FurmCrWe6pgO3ZJFEzZ/L2cvz0Hj5UCa2JKBZTDvRutZoPumfnAkAb nSW+y1Rz1Q8m9Ub4v9rjYbq4bRd/RVWtyk6KQIDldYbr5wH8wxgsniSVKtVFFuUa P5bDY3HS6wMGo42cTOhxAkAcdweQSQ3j7mfc5vh71HeAC1v/VAKGehGOUdeEIQNl Sb2WuzpZkbfsrVzW6MdlgY6eE7ufRswhDPLWPC8MP0d1 -----END RSA PRIVATE KEY----- """, # 2 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQyMDZaFw0wOTA3MjUyMjQyMDZaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAxnH+pbOS qlJlsHpKUQtV0oN1Mv+ESG+yUDxStFFGjkJv/UIRzpxqFqY/6nJ3D03kZsDdcXyi CfV9hPYQaVNMn6z+puPmIagfBQ0aOyuI+nUhCttZIYD9071BjW5bCMX5NZWL/CZm E0HdAZ77H6UrRckJ7VR8wAFpihBxD5WliZcCAwEAATANBgkqhkiG9w0BAQQFAAOB gQAwXqY1Sjvp9JSTHKklu7s0T6YmH/BKSXrHpS2xO69svK+ze5/+5td3jPn4Qe50 xwRNZSFmSLuJLfCO32QJSJTB7Vs5D3dNTZ2i8umsaodm97t8hit7L75nXRGHKH// xDVWAFB9sSgCQyPMRkL4wB4YSfRhoSKVwMvaz+XRZDUU0A== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXAIBAAKBgQDGcf6ls5KqUmWwekpRC1XSg3Uy/4RIb7JQPFK0UUaOQm/9QhHO nGoWpj/qcncPTeRmwN1xfKIJ9X2E9hBpU0yfrP6m4+YhqB8FDRo7K4j6dSEK21kh gP3TvUGNblsIxfk1lYv8JmYTQd0BnvsfpStFyQntVHzAAWmKEHEPlaWJlwIDAQAB AoGAdHNMlXwtItm7ZrY8ihZ2xFP0IHsk60TwhHkBp2LSXoTKJvnwbSgIcUYZ18BX 8Zkp4MpoqEIU7HcssyuaMdR572huV2w0D/2gYJQLQ5JapaR3hMox3YG4wjXasN1U 1iZt7JkhKlOy+ElL5T9mKTE1jDsX2RAv4WALzMpYFo7vs4ECQQDxqrPaqRQ5uYS/ ejmIk05nM3Q1zmoLtMDrfRqrjBhaf/W3hqGihiqN2kL3PIIYcxSRWiyNlYXjElsR 2sllBTe3AkEA0jcMHVThwKt1+Ce5VcE7N6hFfbsgISTjfJ+Q3K2NkvJkmtE8ZRX5 XprssnPN8owkfF5yuKbcSZL3uvaaSGN9IQJAfTVnN9wwOXQwHhDSbDt9/KRBCnum n+gHqDrKLaVJHOJ9SZf8eLswoww5c+UqtkYxmtlwie61Tp+9BXQosilQ4wJBAIZ1 XVNZmriBM4jR59L5MOZtxF0ilu98R+HLsn3kqLyIPF9mXCoQPxwLHkEan213xFKk mt6PJDIPRlOZLqAEuuECQFQMCrn0VUwPg8E40pxMwgMETvVflPs/oZK1Iu+b7+WY vBptAyhMu31fHQFnJpiUOyHqSZnOZyEn1Qu2lszNvUg= -----END RSA PRIVATE KEY----- """, # 3 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQyMDZaFw0wOTA3MjUyMjQyMDZaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAnjiOwipn jigDuNMfNG/tBJhPwYUHhSbQdvrTubhsxw1oOq5XpNqUwRtC8hktOKM3hghyqExP 62EOi0aJBkRhtwtPSLBCINptArZLfkog/nTIqVv4eLEzJ19nTi/llHHWKcgA6XTI sU/snUhGlySA3RpETvXqIJTauQRZz0kToSUCAwEAATANBgkqhkiG9w0BAQQFAAOB gQCQ+u/CsX5WC5m0cLrpyIS6qZa62lrB3mj9H1aIQhisT5kRsMz3FJ1aOaS8zPRz w0jhyRmamCcSsWf5WK539iOtsXbKMdAyjNtkQO3g+fnsLgmznAjjst24jfr+XU59 0amiy1U6TY93gtEBZHtiLldPdUMsTuFbBlqbcMBQ50x9rA== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXAIBAAKBgQCeOI7CKmeOKAO40x80b+0EmE/BhQeFJtB2+tO5uGzHDWg6rlek 2pTBG0LyGS04ozeGCHKoTE/rYQ6LRokGRGG3C09IsEIg2m0Ctkt+SiD+dMipW/h4 sTMnX2dOL+WUcdYpyADpdMixT+ydSEaXJIDdGkRO9eoglNq5BFnPSROhJQIDAQAB AoGAAPrst3s3xQOucjismtCOsVaYN+SxFTwWUoZfRWlFEz6cBLELzfOktEWM9p79 TrqEH4px22UNobGqO2amdql5yXwEFVhYQkRB8uDA8uVaqpL8NLWTGPRXxZ2DSU+n 7/FLf/TWT3ti/ZtXaPVRj6E2/Mq9AVEVOjUYzkNjM02OxcECQQDKEqmPbdZq2URU 7RbUxkq5aTp8nzAgbpUsgBGQ9PDAymhj60BDEP0q28Ssa7tU70pRnQ3AZs9txgmL kK2g97FNAkEAyHH9cIb6qXOAJPIr/xamFGr5uuYw9TJPz/hfVkVimW/aZnBB+e6Q oALJBDKJWeYPzdNbouJYg8MeU0qWdZ5DOQJADUk+1sxc/bd9U6wnBSRog1pU2x7I VkmPC1b8ULCaJ8LnLDKqjf5O9wNuIfwPXB1DoKwX3F+mIcyUkhWYJO5EPQJAUj5D KMqZSrGzYHVlC/M1Daee88rDR7fu+3wDUhiCDkbQq7tftrbl7GF4LRq3NIWq8l7I eJq6isWiSbaO6Y+YMQJBAJFBpVhlY5Px2BX5+Hsfq6dSP3sVVc0eHkdsoZFFxq37 fksL/q2vlPczvBihgcxt+UzW/UrNkelOuX3i57PDvFs= -----END RSA PRIVATE KEY----- """, # 4 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQyMDZaFw0wOTA3MjUyMjQyMDZaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAsCQuudDF zgmY5tDpT0TkUo8fpJ5JcvgCkLFpSDD8REpXhLFkHWhTmTj3CAxfv4lA3sQzHZxe 4S9YCb5c/VTbFEdgwc/wlxMmJiz2jYghdmWPBb8pBEk31YihIhC+u4kex6gJBH5y ixiZ3PPRRMaOBBo+ZfM50XIyWbFOOM/7FwcCAwEAATANBgkqhkiG9w0BAQQFAAOB gQB4cFURaiiUx6n8eS4j4Vxrii5PtsaNEI4acANFSYknGd0xTP4vnmoivNmo5fWE Q4hYtGezNu4a9MnNhcQmI20KzXmvhLJtkwWCgGOVJtMem8hDWXSALV1Ih8hmVkGS CI1elfr9eyguunGp9eMMQfKhWH52WHFA0NYa0Kpv5BY33A== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICWwIBAAKBgQCwJC650MXOCZjm0OlPRORSjx+knkly+AKQsWlIMPxESleEsWQd aFOZOPcIDF+/iUDexDMdnF7hL1gJvlz9VNsUR2DBz/CXEyYmLPaNiCF2ZY8FvykE STfViKEiEL67iR7HqAkEfnKLGJnc89FExo4EGj5l8znRcjJZsU44z/sXBwIDAQAB AoGABA7xXKqoxBSIh1js5zypHhXaHsre2l1Igdj0mgs25MPpvE7yBZNvyan8Vx0h 36Hj8r4Gh3og3YNfvem67sNTwNwONY0ep+Xho/3vG0jFATGduSXdcT04DusgZNqg UJqW75cqxrD6o/nya5wUoN9NL5pcd5AgVMdOYvJGbrwQuaECQQDiCs/5dsUkUkeC Tlur1wh0wJpW4Y2ctO3ncRdnAoAA9y8dELHXMqwKE4HtlyzHY7Bxds/BDh373EVK rsdl+v9JAkEAx3xTmsOQvWa1tf/O30sdItVpGogKDvYqkLCNthUzPaL85BWB03E2 xunHcVVlqAOE5tFuw0/UEyEkOaGlNTJTzwJAPIVel9FoCUiKYuYt/z1swy3KZRaw /tMmm4AZHvh5Y0jLcYHFy/OCQpRkhkOitqQHWunPyEXKW2PnnY5cTv68GQJAHG7H B88KCUTjb25nkQIGxBlA4swzCtDhXkAb4rEA3a8mdmfuWjHPyeg2ShwO4jSmM7P0 Iph1NMjLff9hKcTjlwJARpItOFkYEdtSODC7FMm7KRKQnNB27gFAizsOYWD4D2b7 w1FTEZ/kSA9wSNhyNGt7dgUo6zFhm2u973HBCUb3dg== -----END RSA PRIVATE KEY----- """, # 5 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQ3NThaFw0wOTA3MjUyMjQ3NThaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAvhTRj1dA NOfse/UBeTfMekZKxZHsNPr+qBYaveWAHDded/BMyMgaMV2n6HQdiDaRjJkzjHCF 3xBtpIJeEGUqfrF0ob8BIZXy3qk68eX/0CVUbgmjSBN44ahlo63NshyXmZtEAkRV VE/+cRKw3N2wtuTed5xwfNcL6dg4KTOEYEkCAwEAATANBgkqhkiG9w0BAQQFAAOB gQCN+CLuVwLeWjSdVbdizYyrOVckqtwiIHG9BbGMlcIdm0qpvD7V7/sN2csk5LaT BNiHi1t5628/4UHqqodYmFw8ri8ItFwB+MmTJi11CX6dIP9OUhS0qO8Z/BKtot7H j04oNwl+WqZZfHIYwTIEL0HBn60nOvCQPDtnWG2BhpUxMA== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXQIBAAKBgQC+FNGPV0A05+x79QF5N8x6RkrFkew0+v6oFhq95YAcN1538EzI yBoxXafodB2INpGMmTOMcIXfEG2kgl4QZSp+sXShvwEhlfLeqTrx5f/QJVRuCaNI E3jhqGWjrc2yHJeZm0QCRFVUT/5xErDc3bC25N53nHB81wvp2DgpM4RgSQIDAQAB AoGALl2BqIdN4Bnac3oV++2CcSkIQB0SEvJOf820hDGhCEDxSCxTbn5w9S21MVxx f7Jf2n3cNxuTbA/jzscGDtW+gXCs+WAbAr5aOqHLUPGEobhKQrQT2hrxQHyv3UFp 0tIl9eXFknOyVAaUJ3athK5tyjSiCZQQHLGzeLaDSKVAPqECQQD1GK7DkTcLaSvw hoTJ3dBK3JoKT2HHLitfEE0QV58mkqFMjofpe+nyeKWvEb/oB4WBp/cfTvtf7DJK zl1OSf11AkEAxomWmJeub0xpqksCmnVI1Jt1mvmcE4xpIcXq8sxzLHRc2QOv0kTw IcFl4QcN6EQBmE+8kl7Tx8SPAVKfJMoZBQJAGsUFYYrczjxAdlba7glyFJsfn/yn m0+poQpwwFYxpc7iGzB+G7xTAw62WfbAVSFtLYog7aR8xC9SFuWPP1vJeQJBAILo xBj3ovgWTXIRJbVM8mnl28UFI0msgsHXK9VOw/6i93nMuYkPFbtcN14KdbwZ42dX 5EIrLr+BNr4riW4LqDUCQQCbsEEpTmj3upKUOONPt+6CH/OOMjazUzYHZ/3ORHGp Q3Wt+I4IrR/OsiACSIQAhS4kBfk/LGggnj56DrWt+oBl -----END RSA PRIVATE KEY----- """, #6 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQ3NThaFw0wOTA3MjUyMjQ3NThaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAtKhx6sEA jn6HWc6T2klwlPn0quyHtATIw8V3ezP46v6g2rRS7dTywo4GTP4vX58l+sC9z9Je qhQ1rWSwMK4FmnDMZCu7AVO7oMIXpXdSz7l0bgCnNjvbpkA2pOfbB1Z8oj8iebff J33ID5DdkmCzqYVtKpII1o/5z7Jo292JYy8CAwEAATANBgkqhkiG9w0BAQQFAAOB gQA0PYMA07wo9kEH4fv9TCfo+zz42Px6lUxrQBPxBvDiGYhk2kME/wX0IcoZPKTV WyBGmDAYWvFaHWbrbbTOfzlLWfYrDD913hCi9cO8iF8oBqRjIlkKcxAoe7vVg5Az ydVcrY+zqULJovWwyNmH1QNIQfMat0rj7fylwjiS1y/YsA== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXAIBAAKBgQC0qHHqwQCOfodZzpPaSXCU+fSq7Ie0BMjDxXd7M/jq/qDatFLt 1PLCjgZM/i9fnyX6wL3P0l6qFDWtZLAwrgWacMxkK7sBU7ugwheld1LPuXRuAKc2 O9umQDak59sHVnyiPyJ5t98nfcgPkN2SYLOphW0qkgjWj/nPsmjb3YljLwIDAQAB AoGAU4CYRv22mCZ7wVLunDLdyr5ODMMPZnHfqj2XoGbBYz0WdIBs5GlNXAfxeZzz oKsbDvAPzANcphh5RxAHMDj/dT8rZOez+eJrs1GEV+crl1T9p83iUkAuOJFtgUgf TtQBL9vHaj7DfvCEXcBPmN/teDFmAAOyUNbtuhTkRa3PbuECQQDwaqZ45Kr0natH V312dqlf9ms8I6e873pAu+RvA3BAWczk65eGcRjEBxVpTvNEcYKFrV8O5ZYtolrr VJl97AfdAkEAwF4w4KJ32fLPVoPnrYlgLw86NejMpAkixblm8cn51avPQmwbtahb BZUuca22IpgDpjeEk5SpEMixKe/UjzxMewJBALy4q2cY8U3F+u6sshLtAPYQZIs3 3fNE9W2dUKsIQvRwyZMlkLN7UhqHCPq6e+HNTM0MlCMIfAPkf4Rdy4N6ZY0CQCKE BAMaQ6TwgzFDw5sIjiCDe+9WUPmRxhJyHL1/fvtOs4Z4fVRP290ZklbFU2vLmMQH LBuKzfb7+4XJyXrV1+cCQBqfPFQQZLr5UgccABYQ2jnWVbJPISJ5h2b0cwXt+pz/ 8ODEYLjqWr9K8dtbgwdpzwbkaGhQYpyvsguMvNPMohs= -----END RSA PRIVATE KEY----- """, #7 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQ3NThaFw0wOTA3MjUyMjQ3NThaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAnBfNHycn 5RnYzDN4EWTk2q1BBxA6ZYtlG1WPkj5iKeaYKzUk58zBL7mNOA0ucq+yTwh9C4IC EutWPaKBSKY5XI+Rdebh+Efq+urtOLgfJHlfcCraEx7hYN+tqqMVgEgnO/MqIsn1 I1Fvnp89mSYbQ9tmvhSH4Hm+nbeK6iL2tIsCAwEAATANBgkqhkiG9w0BAQQFAAOB gQBt9zxfsKWoyyV764rRb6XThuTDMNSDaVofqePEWjudAbDu6tp0pHcrL0XpIrnT 3iPgD47pdlwQNbGJ7xXwZu2QTOq+Lv62E6PCL8FljDVoYqR3WwJFFUigNvBT2Zzu Pxx7KUfOlm/M4XUSMu31sNJ0kQniBwpkW43YmHVNFb/R7g== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXQIBAAKBgQCcF80fJyflGdjMM3gRZOTarUEHEDpli2UbVY+SPmIp5pgrNSTn zMEvuY04DS5yr7JPCH0LggIS61Y9ooFIpjlcj5F15uH4R+r66u04uB8keV9wKtoT HuFg362qoxWASCc78yoiyfUjUW+enz2ZJhtD22a+FIfgeb6dt4rqIva0iwIDAQAB AoGBAIHstcnWd7iUeQYPWUNxLaRvTY8pjNH04yWLZEOgNWkXDVX5mExw++RTmB4t qpm/cLWkJSEtB7jjthb7ao0j/t2ljqfr6kAbClDv3zByAEDhOu8xB/5ne6Ioo+k2 dygC+GcVcobhv8qRU+z0fpeXSP8yS1bQQHOaa17bSGsncvHRAkEAzwsn8jBTOqaW 6Iymvr7Aql++LiwEBrqMMRVyBZlkux4hiKa2P7XXEL6/mOPR0aI2LuCqE2COrO7R 0wAFZ54bjwJBAMEAe6cs0zI3p3STHwA3LoSZB81lzLhGUnYBvOq1yoDSlJCOYpld YM1y3eC0vwiOnEu3GG1bhkW+h6Kx0I/qyUUCQBiH9NqwORxI4rZ4+8S76y4EnA7y biOx9KxYIyNgslutTUHYpt1TmUDFqQPfclvJQWw6eExFc4Iv5bJ/XSSSyicCQGyY 5PrwEfYTsrm5fpwUcKxTnzxHp6WYjBWybKZ0m/lYhBfCxmAdVrbDh21Exqj99Zv0 7l26PhdIWfGFtCEGrzECQQCtPyXa3ostSceR7zEKxyn9QBCNXKARfNNTBja6+VRE qDC6jLqzu/SoOYaqa13QzCsttO2iZk8Ygfy3Yz0n37GE -----END RSA PRIVATE KEY----- """, #8 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQ3NThaFw0wOTA3MjUyMjQ3NThaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA4mnLf+x0 CWKDKP5PLZ87t2ReSDE/J5QoI5VhE0bXaahdhPrQTC2wvOpT+N9nzEpI9ASh/ejV kYGlc03nNKRL7zyVM1UyGduEwsRssFMqfyJhI1p+VmxDMWNplex7mIAheAdskPj3 pwi2CP4VIMjOj368AXvXItPzeCfAhYhEVaMCAwEAATANBgkqhkiG9w0BAQQFAAOB gQAEzmwq5JFI5Z0dX20m9rq7NKgwRyAH3h5aE8bdjO8nEc69qscfDRx79Lws3kK8 A0LG0DhxKB8cTNu3u+jy81tjcC4pLNQ5IKap9ksmP7RtIHfTA55G8M3fPl2ZgDYQ ZzsWAZvTNXd/eme0SgOzD10rfntA6ZIgJTWHx3E0RkdwKw== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXQIBAAKBgQDiact/7HQJYoMo/k8tnzu3ZF5IMT8nlCgjlWETRtdpqF2E+tBM LbC86lP432fMSkj0BKH96NWRgaVzTec0pEvvPJUzVTIZ24TCxGywUyp/ImEjWn5W bEMxY2mV7HuYgCF4B2yQ+PenCLYI/hUgyM6PfrwBe9ci0/N4J8CFiERVowIDAQAB AoGAQYTl+8XcKl8Un4dAOG6M5FwqIHAH25c3Klzu85obehrbvUCriG/sZi7VT/6u VeLlS6APlJ+NNgczbrOLhaNJyYzjICSt8BI96PldFUzCEkVlgE+29pO7RNoZmDYB dSGyIDrWdVYfdzpir6kC0KDcrpA16Sc+/bK6Q8ALLRpC7QECQQD7F7fhIQ03CKSk lS4mgDuBQrB/52jXgBumtjp71ANNeaWR6+06KDPTLysM+olsh97Q7YOGORbrBnBg Y2HPnOgjAkEA5taZaMfdFa8V1SPcX7mgCLykYIujqss0AmauZN/24oLdNE8HtTBF OLaxE6PnQ0JWfx9KGIy3E0V3aFk5FWb0gQJBAO4KFEaXgOG1jfCBhNj3JHJseMso 5Nm4F366r0MJQYBHXNGzqphB2K/Svat2MKX1QSUspk2u/a0d05dtYCLki6UCQHWS sChyQ+UbfF9HGKOZBC3vBzo1ZXNEdIUUj5bJjBHq3YgbCK38nAU66A482TmkvDGb Wj4OzeB+7Ua0yyJfggECQQDVlAa8HqdAcrbEwI/YfPydFsavBJ0KtcIGK2owQ+dk dhlDnpXDud/AtX4Ft2LaquQ15fteRrYjjwI9SFGytjtp -----END RSA PRIVATE KEY----- """, #9 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQ3NThaFw0wOTA3MjUyMjQ3NThaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAueLfowPT kXXtHeU2FZSz2mJhHmjqeyI1oMoyyggonccx65vMxaRfljnz2dOjVVYpCOn/LrdP wVxHO8KNDsmQeWPRjnnBa2dFqqOnp/8gEJFJBW7K/gI9se6o+xe9QIWBq6d/fKVR BURJe5TycLogzZuxQn1xHHILa3XleYuHAbMCAwEAATANBgkqhkiG9w0BAQQFAAOB gQBEC1lfC3XK0galQC96B7faLpnQmhn5lX2FUUoFIQQtBTetoE+gTqnLSOIZcOK4 pkT3YvxUvgOV0LOLClryo2IknMMGWRSAcXtVUBBLRHVTSSuVUyyLr5kdRU7B4E+l OU0j8Md/dzlkm//K1bzLyUaPq204ofH8su2IEX4b3IGmAQ== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICWwIBAAKBgQC54t+jA9ORde0d5TYVlLPaYmEeaOp7IjWgyjLKCCidxzHrm8zF pF+WOfPZ06NVVikI6f8ut0/BXEc7wo0OyZB5Y9GOecFrZ0Wqo6en/yAQkUkFbsr+ Aj2x7qj7F71AhYGrp398pVEFREl7lPJwuiDNm7FCfXEccgtrdeV5i4cBswIDAQAB AoGAO4PnJHNaLs16AMNdgKVevEIZZDolMQ1v7C4w+ryH/JRFaHE2q+UH8bpWV9zK A82VT9RTrqpkb71S1VBiB2UDyz263XdAI/N2HcIVMmfKb72oV4gCI1KOv4DfFwZv tVVcIdVEDBOZ2TgqK4opGOgWMDqgIAl2z3PbsIoNylZHEJECQQDtQeJFhEJGH4Qz BGpdND0j2nnnJyhOFHJqikJNdul3uBwmxTK8FPEUUH/rtpyUan3VMOyDx3kX4OQg GDNSb32rAkEAyJIZIJ0EMRHVedyWsfqR0zTGKRQ+qsc3sCfyUhFksWms9jsSS0DT tVeTdC3F6EIAdpKOGhSyfBTU4jxwbFc0GQJADI4L9znEeAl66Wg2aLA2/Aq3oK/F xjv2wgSG9apxOFCZzMNqp+FD0Jth6YtEReZMuldYbLDFi6nu6HPfY2Fa+QJAdpm1 lAxk6yMxiZK/5VRWoH6HYske2Vtd+aNVbePtF992ME/z3F3kEkpL3hom+dT1cyfs MU3l0Ot8ip7Ul6vlGQJAegNzpcfl2GFSdWQMxQ+nN3woKnPqpR1M3jgnqvo7L4Xe JW3vRxvfdrUuzdlvZ/Pbsu/vOd+cuIa4h0yD5q3N+g== -----END RSA PRIVATE KEY----- """, #10 """-----BEGIN CERTIFICATE----- MIIBnjCCAQcCAgCEMA0GCSqGSIb3DQEBBAUAMBcxFTATBgNVBAMUDG5ld3BiX3Ro aW5neTAeFw0wODA3MjUyMjQ3NThaFw0wOTA3MjUyMjQ3NThaMBcxFTATBgNVBAMU DG5ld3BiX3RoaW5neTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAruBhwk+J XdlwfKXXN8K+43JyEYCV7Fp7ZiES4t4AEJuQuBqJVMxpzeZzu2t/vVb59ThaxxtY NGD3Xy6Og5dTv//ztWng8P7HwwvfbrUICU6zo6JAhg7kfaNa116krCYOkC/cdJWt o5W+zsDmI1jUVGH0D73h29atc1gn6wLpAsMCAwEAATANBgkqhkiG9w0BAQQFAAOB gQAEJ/ITGJ9lK/rk0yHcenW8SHsaSTlZMuJ4yEiIgrJ2t71Rd6mtCC/ljx9USvvK bF500whTiZlnWgKi02boBEKa44z/DytF6pljeNPefBQSqZyUByGEb/8Mn58Idyls q4/d9iKXMPvbpQdcesOzgOffFZevLQSWyPRaIdYBOOiYUA== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- MIICXQIBAAKBgQCu4GHCT4ld2XB8pdc3wr7jcnIRgJXsWntmIRLi3gAQm5C4GolU zGnN5nO7a3+9Vvn1OFrHG1g0YPdfLo6Dl1O///O1aeDw/sfDC99utQgJTrOjokCG DuR9o1rXXqSsJg6QL9x0la2jlb7OwOYjWNRUYfQPveHb1q1zWCfrAukCwwIDAQAB AoGAcZAXC/dYrlBpIxkTRQu7qLqGZuVI9t7fabgqqpceFargdR4Odrn0L5jrKRer MYrM8bjyAoC4a/NYUUBLnhrkcCQWO9q5fSQuFKFVWHY53SM63Qdqk8Y9Fmy/h/4c UtwZ5BWkUWItvnTMgb9bFcvSiIhEcNQauypnMpgNknopu7kCQQDlSQT10LkX2IGT bTUhPcManx92gucaKsPONKq2mP+1sIciThevRTZWZsxyIuoBBY43NcKKi8NlZCtj hhSbtzYdAkEAw0B93CXfso8g2QIMj/HJJz/wNTLtg+rriXp6jh5HWe6lKWRVrce+ 1w8Qz6OI/ZP6xuQ9HNeZxJ/W6rZPW6BGXwJAHcTuRPA1p/fvUvHh7Q/0zfcNAbkb QlV9GL/TzmNtB+0EjpqvDo2g8XTlZIhN85YCEf8D5DMjSn3H+GMHN/SArQJBAJlW MIGPjNoh5V4Hae4xqBOW9wIQeM880rUo5s5toQNTk4mqLk9Hquwh/MXUXGUora08 2XGpMC1midXSTwhaGmkCQQCdivptFEYl33PrVbxY9nzHynpp4Mi89vQF0cjCmaYY N8L+bvLd4BU9g6hRS8b59lQ6GNjryx2bUnCVtLcey4Jd -----END RSA PRIVATE KEY----- """, #11 ] # To disable the pre-computed tub certs, uncomment this line. #SYSTEM_TEST_CERTS = [] TEST_DATA="\x02"*(immutable.upload.Uploader.URI_LIT_SIZE_THRESHOLD+1) class ShouldFailMixin: def shouldFail(self, expected_failure, which, substring, callable, *args, **kwargs): """Assert that a function call raises some exception. This is a Deferred-friendly version of TestCase.assertRaises() . Suppose you want to verify the following function: def broken(a, b, c): if a < 0: raise TypeError('a must not be negative') return defer.succeed(b+c) You can use: d = self.shouldFail(TypeError, 'test name', 'a must not be negative', broken, -4, 5, c=12) in your test method. The 'test name' string will be included in the error message, if any, because Deferred chains frequently make it difficult to tell which assertion was tripped. The substring= argument, if not None, must appear in the 'repr' of the message wrapped by this Failure, or the test will fail. """ assert substring is None or isinstance(substring, str) d = defer.maybeDeferred(callable, *args, **kwargs) def done(res): if isinstance(res, failure.Failure): res.trap(expected_failure) if substring: message = repr(res.value.args[0]) self.failUnless(substring in message, "%s: substring '%s' not in '%s'" % (which, substring, message)) else: self.fail("%s was supposed to raise %s, not get '%s'" % (which, expected_failure, res)) d.addBoth(done) return d class WebErrorMixin: def explain_web_error(self, f): # an error on the server side causes the client-side getPage() to # return a failure(t.web.error.Error), and its str() doesn't show the # response body, which is where the useful information lives. Attach # this method as an errback handler, and it will reveal the hidden # message. f.trap(WebError) print "Web Error:", f.value, ":", f.value.response return f def _shouldHTTPError(self, res, which, validator): if isinstance(res, failure.Failure): res.trap(WebError) return validator(res) else: self.fail("%s was supposed to Error, not get '%s'" % (which, res)) def shouldHTTPError(self, which, code=None, substring=None, response_substring=None, callable=None, *args, **kwargs): # returns a Deferred with the response body assert substring is None or isinstance(substring, str) assert callable def _validate(f): if code is not None: self.failUnlessEqual(f.value.status, str(code), which) if substring: code_string = str(f) self.failUnless(substring in code_string, "%s: substring '%s' not in '%s'" % (which, substring, code_string)) response_body = f.value.response if response_substring: self.failUnless(response_substring in response_body, "%s: response substring '%s' not in '%s'" % (which, response_substring, response_body)) return response_body d = defer.maybeDeferred(callable, *args, **kwargs) d.addBoth(self._shouldHTTPError, which, _validate) return d class ErrorMixin(WebErrorMixin): def explain_error(self, f): if f.check(defer.FirstError): print "First Error:", f.value.subFailure return f def corrupt_field(data, offset, size, debug=False): if random.random() < 0.5: newdata = testutil.flip_one_bit(data, offset, size) if debug: log.msg("testing: corrupting offset %d, size %d flipping one bit orig: %r, newdata: %r" % (offset, size, data[offset:offset+size], newdata[offset:offset+size])) return newdata else: newval = testutil.insecurerandstr(size) if debug: log.msg("testing: corrupting offset %d, size %d randomizing field, orig: %r, newval: %r" % (offset, size, data[offset:offset+size], newval)) return data[:offset]+newval+data[offset+size:] def _corrupt_nothing(data, debug=False): """Leave the data pristine. """ return data def _corrupt_file_version_number(data, debug=False): """Scramble the file data -- the share file version number have one bit flipped or else will be changed to a random value.""" return corrupt_field(data, 0x00, 4) def _corrupt_size_of_file_data(data, debug=False): """Scramble the file data -- the field showing the size of the share data within the file will be set to one smaller.""" return corrupt_field(data, 0x04, 4) def _corrupt_sharedata_version_number(data, debug=False): """Scramble the file data -- the share data version number will have one bit flipped or else will be changed to a random value, but not 1 or 2.""" return corrupt_field(data, 0x0c, 4) sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." newsharevernum = sharevernum while newsharevernum in (1, 2): newsharevernum = random.randrange(0, 2**32) newsharevernumbytes = struct.pack(">L", newsharevernum) return data[:0x0c] + newsharevernumbytes + data[0x0c+4:] def _corrupt_sharedata_version_number_to_plausible_version(data, debug=False): """Scramble the file data -- the share data version number will be changed to 2 if it is 1 or else to 1 if it is 2.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: newsharevernum = 2 else: newsharevernum = 1 newsharevernumbytes = struct.pack(">L", newsharevernum) return data[:0x0c] + newsharevernumbytes + data[0x0c+4:] def _corrupt_segment_size(data, debug=False): """Scramble the file data -- the field showing the size of the segment will have one bit flipped or else be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: return corrupt_field(data, 0x0c+0x04, 4, debug=False) else: return corrupt_field(data, 0x0c+0x04, 8, debug=False) def _corrupt_size_of_sharedata(data, debug=False): """Scramble the file data -- the field showing the size of the data within the share data will have one bit flipped or else will be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: return corrupt_field(data, 0x0c+0x08, 4) else: return corrupt_field(data, 0x0c+0x0c, 8) def _corrupt_offset_of_sharedata(data, debug=False): """Scramble the file data -- the field showing the offset of the data within the share data will have one bit flipped or else be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: return corrupt_field(data, 0x0c+0x0c, 4) else: return corrupt_field(data, 0x0c+0x14, 8) def _corrupt_offset_of_ciphertext_hash_tree(data, debug=False): """Scramble the file data -- the field showing the offset of the ciphertext hash tree within the share data will have one bit flipped or else be changed to a random value. """ sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: return corrupt_field(data, 0x0c+0x14, 4, debug=False) else: return corrupt_field(data, 0x0c+0x24, 8, debug=False) def _corrupt_offset_of_block_hashes(data, debug=False): """Scramble the file data -- the field showing the offset of the block hash tree within the share data will have one bit flipped or else will be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: return corrupt_field(data, 0x0c+0x18, 4) else: return corrupt_field(data, 0x0c+0x2c, 8) def _corrupt_offset_of_block_hashes_to_truncate_crypttext_hashes(data, debug=False): """Scramble the file data -- the field showing the offset of the block hash tree within the share data will have a multiple of hash size subtracted from it, thus causing the downloader to download an incomplete crypttext hash tree.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: curval = struct.unpack(">L", data[0x0c+0x18:0x0c+0x18+4])[0] newval = random.randrange(0, max(1, (curval/hashutil.CRYPTO_VAL_SIZE)/2))*hashutil.CRYPTO_VAL_SIZE newvalstr = struct.pack(">L", newval) return data[:0x0c+0x18]+newvalstr+data[0x0c+0x18+4:] else: curval = struct.unpack(">Q", data[0x0c+0x2c:0x0c+0x2c+8])[0] newval = random.randrange(0, max(1, (curval/hashutil.CRYPTO_VAL_SIZE)/2))*hashutil.CRYPTO_VAL_SIZE newvalstr = struct.pack(">Q", newval) return data[:0x0c+0x2c]+newvalstr+data[0x0c+0x2c+8:] def _corrupt_offset_of_share_hashes(data, debug=False): """Scramble the file data -- the field showing the offset of the share hash tree within the share data will have one bit flipped or else will be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: return corrupt_field(data, 0x0c+0x1c, 4) else: return corrupt_field(data, 0x0c+0x34, 8) def _corrupt_offset_of_uri_extension(data, debug=False): """Scramble the file data -- the field showing the offset of the uri extension will have one bit flipped or else will be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: return corrupt_field(data, 0x0c+0x20, 4) else: return corrupt_field(data, 0x0c+0x3c, 8) def _corrupt_offset_of_uri_extension_to_force_short_read(data, debug=False): """Scramble the file data -- the field showing the offset of the uri extension will be set to the size of the file minus 3. This means when the client tries to read the length field from that location it will get a short read -- the result string will be only 3 bytes long, not the 4 or 8 bytes necessary to do a successful struct.unpack.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." # The "-0x0c" in here is to skip the server-side header in the share # file, which the client doesn't see when seeking and reading. if sharevernum == 1: if debug: log.msg("testing: corrupting offset %d, size %d, changing %d to %d (len(data) == %d)" % (0x2c, 4, struct.unpack(">L", data[0x2c:0x2c+4])[0], len(data)-0x0c-3, len(data))) return data[:0x2c] + struct.pack(">L", len(data)-0x0c-3) + data[0x2c+4:] else: if debug: log.msg("testing: corrupting offset %d, size %d, changing %d to %d (len(data) == %d)" % (0x48, 8, struct.unpack(">Q", data[0x48:0x48+8])[0], len(data)-0x0c-3, len(data))) return data[:0x48] + struct.pack(">Q", len(data)-0x0c-3) + data[0x48+8:] def _corrupt_mutable_share_data(data, debug=False): prefix = data[:32] assert prefix == MutableShareFile.MAGIC, "This function is designed to corrupt mutable shares of v1, and the magic number doesn't look right: %r vs %r" % (prefix, MutableShareFile.MAGIC) data_offset = MutableShareFile.DATA_OFFSET sharetype = data[data_offset:data_offset+1] assert sharetype == "\x00", "non-SDMF mutable shares not supported" (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data[data_offset:]) assert version == 0, "this function only handles v0 SDMF files" start = data_offset + offsets["share_data"] length = data_offset + offsets["enc_privkey"] - start return corrupt_field(data, start, length) def _corrupt_share_data(data, debug=False): """Scramble the file data -- the field containing the share data itself will have one bit flipped or else will be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways, not v%d." % sharevernum if sharevernum == 1: sharedatasize = struct.unpack(">L", data[0x0c+0x08:0x0c+0x08+4])[0] return corrupt_field(data, 0x0c+0x24, sharedatasize) else: sharedatasize = struct.unpack(">Q", data[0x0c+0x08:0x0c+0x0c+8])[0] return corrupt_field(data, 0x0c+0x44, sharedatasize) def _corrupt_share_data_last_byte(data, debug=False): """Scramble the file data -- flip all bits of the last byte.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways, not v%d." % sharevernum if sharevernum == 1: sharedatasize = struct.unpack(">L", data[0x0c+0x08:0x0c+0x08+4])[0] offset = 0x0c+0x24+sharedatasize-1 else: sharedatasize = struct.unpack(">Q", data[0x0c+0x08:0x0c+0x0c+8])[0] offset = 0x0c+0x44+sharedatasize-1 newdata = data[:offset] + chr(ord(data[offset])^0xFF) + data[offset+1:] if debug: log.msg("testing: flipping all bits of byte at offset %d: %r, newdata: %r" % (offset, data[offset], newdata[offset])) return newdata def _corrupt_crypttext_hash_tree(data, debug=False): """Scramble the file data -- the field containing the crypttext hash tree will have one bit flipped or else will be changed to a random value. """ sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: crypttexthashtreeoffset = struct.unpack(">L", data[0x0c+0x14:0x0c+0x14+4])[0] blockhashesoffset = struct.unpack(">L", data[0x0c+0x18:0x0c+0x18+4])[0] else: crypttexthashtreeoffset = struct.unpack(">Q", data[0x0c+0x24:0x0c+0x24+8])[0] blockhashesoffset = struct.unpack(">Q", data[0x0c+0x2c:0x0c+0x2c+8])[0] return corrupt_field(data, 0x0c+crypttexthashtreeoffset, blockhashesoffset-crypttexthashtreeoffset, debug=debug) def _corrupt_crypttext_hash_tree_byte_x221(data, debug=False): """Scramble the file data -- the byte at offset 0x221 will have its 7th (b1) bit flipped. """ sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if debug: log.msg("original data: %r" % (data,)) return data[:0x0c+0x221] + chr(ord(data[0x0c+0x221])^0x02) + data[0x0c+0x2210+1:] def _corrupt_block_hashes(data, debug=False): """Scramble the file data -- the field containing the block hash tree will have one bit flipped or else will be changed to a random value. """ sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: blockhashesoffset = struct.unpack(">L", data[0x0c+0x18:0x0c+0x18+4])[0] sharehashesoffset = struct.unpack(">L", data[0x0c+0x1c:0x0c+0x1c+4])[0] else: blockhashesoffset = struct.unpack(">Q", data[0x0c+0x2c:0x0c+0x2c+8])[0] sharehashesoffset = struct.unpack(">Q", data[0x0c+0x34:0x0c+0x34+8])[0] return corrupt_field(data, 0x0c+blockhashesoffset, sharehashesoffset-blockhashesoffset) def _corrupt_share_hashes(data, debug=False): """Scramble the file data -- the field containing the share hash chain will have one bit flipped or else will be changed to a random value. """ sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: sharehashesoffset = struct.unpack(">L", data[0x0c+0x1c:0x0c+0x1c+4])[0] uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0] else: sharehashesoffset = struct.unpack(">Q", data[0x0c+0x34:0x0c+0x34+8])[0] uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0] return corrupt_field(data, 0x0c+sharehashesoffset, uriextoffset-sharehashesoffset) def _corrupt_length_of_uri_extension(data, debug=False): """Scramble the file data -- the field showing the length of the uri extension will have one bit flipped or else will be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0] return corrupt_field(data, uriextoffset, 4) else: uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0] return corrupt_field(data, 0x0c+uriextoffset, 8) def _corrupt_uri_extension(data, debug=False): """Scramble the file data -- the field containing the uri extension will have one bit flipped or else will be changed to a random value.""" sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0] assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if sharevernum == 1: uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0] uriextlen = struct.unpack(">L", data[0x0c+uriextoffset:0x0c+uriextoffset+4])[0] else: uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0] uriextlen = struct.unpack(">Q", data[0x0c+uriextoffset:0x0c+uriextoffset+8])[0] return corrupt_field(data, 0x0c+uriextoffset, uriextlen) tahoe-lafs-1.10.0/src/allmydata/test/common_util.py000066400000000000000000000170151221140116300222240ustar00rootroot00000000000000import os, signal, sys, time from random import randrange from twisted.internet import reactor, defer from twisted.python import failure from allmydata.util import fileutil, log from allmydata.util.encodingutil import unicode_platform, get_filesystem_encoding def insecurerandstr(n): return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) def flip_bit(good, which): # flip the low-order bit of good[which] if which == -1: pieces = good[:which], good[-1:], "" else: pieces = good[:which], good[which:which+1], good[which+1:] return pieces[0] + chr(ord(pieces[1]) ^ 0x01) + pieces[2] def flip_one_bit(s, offset=0, size=None): """ flip one random bit of the string s, in a byte greater than or equal to offset and less than offset+size. """ if size is None: size=len(s)-offset i = randrange(offset, offset+size) result = s[:i] + chr(ord(s[i])^(0x01<]*>', ' ', s) s = re.sub(r'\s+', ' ', s) return s class MyGetter(client.HTTPPageGetter): handleStatus_206 = lambda self: self.handleStatus_200() # PARTIAL_CONTENT handleStatus_304 = lambda self: self.handleStatus_200() # NOT_MODIFIED class HTTPClientHEADFactory(client.HTTPClientFactory): protocol = MyGetter def noPage(self, reason): # Twisted-2.5.0 and earlier had a bug, in which they would raise an # exception when the response to a HEAD request had no body (when in # fact they are defined to never have a body). This was fixed in # Twisted-8.0 . To work around this, we catch the # PartialDownloadError and make it disappear. if (reason.check(client.PartialDownloadError) and self.method.upper() == "HEAD"): self.page("") return return client.HTTPClientFactory.noPage(self, reason) class HTTPClientGETFactory(client.HTTPClientFactory): protocol = MyGetter tahoe-lafs-1.10.0/src/allmydata/test/no_network.py000066400000000000000000000412511221140116300220630ustar00rootroot00000000000000 # This contains a test harness that creates a full Tahoe grid in a single # process (actually in a single MultiService) which does not use the network. # It does not use an Introducer, and there are no foolscap Tubs. Each storage # server puts real shares on disk, but is accessed through loopback # RemoteReferences instead of over serialized SSL. It is not as complete as # the common.SystemTestMixin framework (which does use the network), but # should be considerably faster: on my laptop, it takes 50-80ms to start up, # whereas SystemTestMixin takes close to 2s. # This should be useful for tests which want to examine and/or manipulate the # uploaded shares, checker/verifier/repairer tests, etc. The clients have no # Tubs, so it is not useful for tests that involve a Helper, a KeyGenerator, # or the control.furl . import os.path from zope.interface import implements from twisted.application import service from twisted.internet import defer, reactor from twisted.python.failure import Failure from foolscap.api import Referenceable, fireEventually, RemoteException from base64 import b32encode from allmydata import uri as tahoe_uri from allmydata.client import Client from allmydata.storage.server import StorageServer, storage_index_to_dir from allmydata.util import fileutil, idlib, hashutil from allmydata.util.hashutil import sha1 from allmydata.test.common_web import HTTPClientGETFactory from allmydata.interfaces import IStorageBroker, IServer from allmydata.test.common import TEST_RSA_KEY_SIZE class IntentionalError(Exception): pass class Marker: pass class LocalWrapper: def __init__(self, original): self.original = original self.broken = False self.hung_until = None self.post_call_notifier = None self.disconnectors = {} self.counter_by_methname = {} def _clear_counters(self): self.counter_by_methname = {} def callRemoteOnly(self, methname, *args, **kwargs): d = self.callRemote(methname, *args, **kwargs) del d # explicitly ignored return None def callRemote(self, methname, *args, **kwargs): # this is ideally a Membrane, but that's too hard. We do a shallow # wrapping of inbound arguments, and per-methodname wrapping of # selected return values. def wrap(a): if isinstance(a, Referenceable): return LocalWrapper(a) else: return a args = tuple([wrap(a) for a in args]) kwargs = dict([(k,wrap(kwargs[k])) for k in kwargs]) def _really_call(): def incr(d, k): d[k] = d.setdefault(k, 0) + 1 incr(self.counter_by_methname, methname) meth = getattr(self.original, "remote_" + methname) return meth(*args, **kwargs) def _call(): if self.broken: if self.broken is not True: # a counter, not boolean self.broken -= 1 raise IntentionalError("I was asked to break") if self.hung_until: d2 = defer.Deferred() self.hung_until.addCallback(lambda ign: _really_call()) self.hung_until.addCallback(lambda res: d2.callback(res)) def _err(res): d2.errback(res) return res self.hung_until.addErrback(_err) return d2 return _really_call() d = fireEventually() d.addCallback(lambda res: _call()) def _wrap_exception(f): return Failure(RemoteException(f)) d.addErrback(_wrap_exception) def _return_membrane(res): # rather than complete the difficult task of building a # fully-general Membrane (which would locate all Referenceable # objects that cross the simulated wire and replace them with # wrappers), we special-case certain methods that we happen to # know will return Referenceables. if methname == "allocate_buckets": (alreadygot, allocated) = res for shnum in allocated: allocated[shnum] = LocalWrapper(allocated[shnum]) if methname == "get_buckets": for shnum in res: res[shnum] = LocalWrapper(res[shnum]) return res d.addCallback(_return_membrane) if self.post_call_notifier: d.addCallback(self.post_call_notifier, self, methname) return d def notifyOnDisconnect(self, f, *args, **kwargs): m = Marker() self.disconnectors[m] = (f, args, kwargs) return m def dontNotifyOnDisconnect(self, marker): del self.disconnectors[marker] def wrap_storage_server(original): # Much of the upload/download code uses rref.version (which normally # comes from rrefutil.add_version_to_remote_reference). To avoid using a # network, we want a LocalWrapper here. Try to satisfy all these # constraints at the same time. wrapper = LocalWrapper(original) wrapper.version = original.remote_get_version() return wrapper class NoNetworkServer: implements(IServer) def __init__(self, serverid, rref): self.serverid = serverid self.rref = rref def __repr__(self): return "" % self.get_name() # Special method used by copy.copy() and copy.deepcopy(). When those are # used in allmydata.immutable.filenode to copy CheckResults during # repair, we want it to treat the IServer instances as singletons. def __copy__(self): return self def __deepcopy__(self, memodict): return self def get_serverid(self): return self.serverid def get_permutation_seed(self): return self.serverid def get_lease_seed(self): return self.serverid def get_foolscap_write_enabler_seed(self): return self.serverid def get_name(self): return idlib.shortnodeid_b2a(self.serverid) def get_longname(self): return idlib.nodeid_b2a(self.serverid) def get_nickname(self): return "nickname" def get_rref(self): return self.rref def get_version(self): return self.rref.version class NoNetworkStorageBroker: implements(IStorageBroker) def get_servers_for_psi(self, peer_selection_index): def _permuted(server): seed = server.get_permutation_seed() return sha1(peer_selection_index + seed).digest() return sorted(self.get_connected_servers(), key=_permuted) def get_connected_servers(self): return self.client._servers def get_nickname_for_serverid(self, serverid): return None class NoNetworkClient(Client): def create_tub(self): pass def init_introducer_client(self): pass def setup_logging(self): pass def startService(self): service.MultiService.startService(self) def stopService(self): service.MultiService.stopService(self) def when_tub_ready(self): raise NotImplementedError("NoNetworkClient has no Tub") def init_control(self): pass def init_helper(self): pass def init_key_gen(self): pass def init_storage(self): pass def init_client_storage_broker(self): self.storage_broker = NoNetworkStorageBroker() self.storage_broker.client = self def init_stub_client(self): pass #._servers will be set by the NoNetworkGrid which creates us class SimpleStats: def __init__(self): self.counters = {} self.stats_producers = [] def count(self, name, delta=1): val = self.counters.setdefault(name, 0) self.counters[name] = val + delta def register_producer(self, stats_producer): self.stats_producers.append(stats_producer) def get_stats(self): stats = {} for sp in self.stats_producers: stats.update(sp.get_stats()) ret = { 'counters': self.counters, 'stats': stats } return ret class NoNetworkGrid(service.MultiService): def __init__(self, basedir, num_clients=1, num_servers=10, client_config_hooks={}): service.MultiService.__init__(self) self.basedir = basedir fileutil.make_dirs(basedir) self.servers_by_number = {} # maps to StorageServer instance self.wrappers_by_id = {} # maps to wrapped StorageServer instance self.proxies_by_id = {} # maps to IServer on which .rref is a wrapped # StorageServer self.clients = [] for i in range(num_servers): ss = self.make_server(i) self.add_server(i, ss) self.rebuild_serverlist() for i in range(num_clients): clientid = hashutil.tagged_hash("clientid", str(i))[:20] clientdir = os.path.join(basedir, "clients", idlib.shortnodeid_b2a(clientid)) fileutil.make_dirs(clientdir) f = open(os.path.join(clientdir, "tahoe.cfg"), "w") f.write("[node]\n") f.write("nickname = client-%d\n" % i) f.write("web.port = tcp:0:interface=127.0.0.1\n") f.write("[storage]\n") f.write("enabled = false\n") f.close() c = None if i in client_config_hooks: # this hook can either modify tahoe.cfg, or return an # entirely new Client instance c = client_config_hooks[i](clientdir) if not c: c = NoNetworkClient(clientdir) c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE) c.nodeid = clientid c.short_nodeid = b32encode(clientid).lower()[:8] c._servers = self.all_servers # can be updated later c.setServiceParent(self) self.clients.append(c) def make_server(self, i, readonly=False): serverid = hashutil.tagged_hash("serverid", str(i))[:20] serverdir = os.path.join(self.basedir, "servers", idlib.shortnodeid_b2a(serverid), "storage") fileutil.make_dirs(serverdir) ss = StorageServer(serverdir, serverid, stats_provider=SimpleStats(), readonly_storage=readonly) ss._no_network_server_number = i return ss def add_server(self, i, ss): # to deal with the fact that all StorageServers are named 'storage', # we interpose a middleman middleman = service.MultiService() middleman.setServiceParent(self) ss.setServiceParent(middleman) serverid = ss.my_nodeid self.servers_by_number[i] = ss wrapper = wrap_storage_server(ss) self.wrappers_by_id[serverid] = wrapper self.proxies_by_id[serverid] = NoNetworkServer(serverid, wrapper) self.rebuild_serverlist() def get_all_serverids(self): return self.proxies_by_id.keys() def rebuild_serverlist(self): self.all_servers = frozenset(self.proxies_by_id.values()) for c in self.clients: c._servers = self.all_servers def remove_server(self, serverid): # it's enough to remove the server from c._servers (we don't actually # have to detach and stopService it) for i,ss in self.servers_by_number.items(): if ss.my_nodeid == serverid: del self.servers_by_number[i] break del self.wrappers_by_id[serverid] del self.proxies_by_id[serverid] self.rebuild_serverlist() return ss def break_server(self, serverid, count=True): # mark the given server as broken, so it will throw exceptions when # asked to hold a share or serve a share. If count= is a number, # throw that many exceptions before starting to work again. self.wrappers_by_id[serverid].broken = count def hang_server(self, serverid): # hang the given server ss = self.wrappers_by_id[serverid] assert ss.hung_until is None ss.hung_until = defer.Deferred() def unhang_server(self, serverid): # unhang the given server ss = self.wrappers_by_id[serverid] assert ss.hung_until is not None ss.hung_until.callback(None) ss.hung_until = None def nuke_from_orbit(self): """ Empty all share directories in this grid. It's the only way to be sure ;-) """ for server in self.servers_by_number.values(): for prefixdir in os.listdir(server.sharedir): if prefixdir != 'incoming': fileutil.rm_dir(os.path.join(server.sharedir, prefixdir)) class GridTestMixin: def setUp(self): self.s = service.MultiService() self.s.startService() def tearDown(self): return self.s.stopService() def set_up_grid(self, num_clients=1, num_servers=10, client_config_hooks={}): # self.basedir must be set self.g = NoNetworkGrid(self.basedir, num_clients=num_clients, num_servers=num_servers, client_config_hooks=client_config_hooks) self.g.setServiceParent(self.s) self.client_webports = [c.getServiceNamed("webish").getPortnum() for c in self.g.clients] self.client_baseurls = [c.getServiceNamed("webish").getURL() for c in self.g.clients] def get_clientdir(self, i=0): return self.g.clients[i].basedir def get_serverdir(self, i): return self.g.servers_by_number[i].storedir def iterate_servers(self): for i in sorted(self.g.servers_by_number.keys()): ss = self.g.servers_by_number[i] yield (i, ss, ss.storedir) def find_uri_shares(self, uri): si = tahoe_uri.from_string(uri).get_storage_index() prefixdir = storage_index_to_dir(si) shares = [] for i,ss in self.g.servers_by_number.items(): serverid = ss.my_nodeid basedir = os.path.join(ss.sharedir, prefixdir) if not os.path.exists(basedir): continue for f in os.listdir(basedir): try: shnum = int(f) shares.append((shnum, serverid, os.path.join(basedir, f))) except ValueError: pass return sorted(shares) def copy_shares(self, uri): shares = {} for (shnum, serverid, sharefile) in self.find_uri_shares(uri): shares[sharefile] = open(sharefile, "rb").read() return shares def restore_all_shares(self, shares): for sharefile, data in shares.items(): open(sharefile, "wb").write(data) def delete_share(self, (shnum, serverid, sharefile)): os.unlink(sharefile) def delete_shares_numbered(self, uri, shnums): for (i_shnum, i_serverid, i_sharefile) in self.find_uri_shares(uri): if i_shnum in shnums: os.unlink(i_sharefile) def corrupt_share(self, (shnum, serverid, sharefile), corruptor_function): sharedata = open(sharefile, "rb").read() corruptdata = corruptor_function(sharedata) open(sharefile, "wb").write(corruptdata) def corrupt_shares_numbered(self, uri, shnums, corruptor, debug=False): for (i_shnum, i_serverid, i_sharefile) in self.find_uri_shares(uri): if i_shnum in shnums: sharedata = open(i_sharefile, "rb").read() corruptdata = corruptor(sharedata, debug=debug) open(i_sharefile, "wb").write(corruptdata) def corrupt_all_shares(self, uri, corruptor, debug=False): for (i_shnum, i_serverid, i_sharefile) in self.find_uri_shares(uri): sharedata = open(i_sharefile, "rb").read() corruptdata = corruptor(sharedata, debug=debug) open(i_sharefile, "wb").write(corruptdata) def GET(self, urlpath, followRedirect=False, return_response=False, method="GET", clientnum=0, **kwargs): # if return_response=True, this fires with (data, statuscode, # respheaders) instead of just data. assert not isinstance(urlpath, unicode) url = self.client_baseurls[clientnum] + urlpath factory = HTTPClientGETFactory(url, method=method, followRedirect=followRedirect, **kwargs) reactor.connectTCP("localhost", self.client_webports[clientnum],factory) d = factory.deferred def _got_data(data): return (data, factory.status, factory.response_headers) if return_response: d.addCallback(_got_data) return factory.deferred def PUT(self, urlpath, **kwargs): return self.GET(urlpath, method="PUT", **kwargs) tahoe-lafs-1.10.0/src/allmydata/test/test_backupdb.py000066400000000000000000000226721221140116300225160ustar00rootroot00000000000000 import os.path, time from StringIO import StringIO from twisted.trial import unittest from allmydata.util import fileutil from allmydata.util.encodingutil import listdir_unicode, get_filesystem_encoding, unicode_platform from allmydata.util.assertutil import precondition from allmydata.scripts import backupdb class BackupDB(unittest.TestCase): def create(self, dbfile): stderr = StringIO() bdb = backupdb.get_backupdb(dbfile, stderr=stderr) self.failUnless(bdb, "unable to create backupdb from %r" % (dbfile,)) return bdb def skip_if_cannot_represent_filename(self, u): precondition(isinstance(u, unicode)) enc = get_filesystem_encoding() if not unicode_platform(): try: u.encode(enc) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.") def test_basic(self): self.basedir = basedir = os.path.join("backupdb", "create") fileutil.make_dirs(basedir) dbfile = os.path.join(basedir, "dbfile") bdb = self.create(dbfile) self.failUnlessEqual(bdb.VERSION, 2) def test_upgrade_v1_v2(self): self.basedir = basedir = os.path.join("backupdb", "upgrade_v1_v2") fileutil.make_dirs(basedir) dbfile = os.path.join(basedir, "dbfile") stderr = StringIO() created = backupdb.get_backupdb(dbfile, stderr=stderr, create_version=(backupdb.SCHEMA_v1, 1), just_create=True) self.failUnless(created, "unable to create v1 backupdb") # now we should have a v1 database on disk bdb = self.create(dbfile) self.failUnlessEqual(bdb.VERSION, 2) def test_fail(self): self.basedir = basedir = os.path.join("backupdb", "fail") fileutil.make_dirs(basedir) # put a non-DB file in the way not_a_db = ("I do not look like a sqlite database\n" + "I'M NOT" * 1000) # OS-X sqlite-2.3.2 takes some convincing self.writeto("not-a-database", not_a_db) stderr_f = StringIO() bdb = backupdb.get_backupdb(os.path.join(basedir, "not-a-database"), stderr_f) self.failUnlessEqual(bdb, None) stderr = stderr_f.getvalue() self.failUnlessIn("backupdb file is unusable", stderr) self.failUnlessIn("file is encrypted or is not a database", stderr) # put a directory in the way, to exercise a different error path where = os.path.join(basedir, "roadblock-dir") fileutil.make_dirs(where) stderr_f = StringIO() bdb = backupdb.get_backupdb(where, stderr_f) self.failUnlessEqual(bdb, None) stderr = stderr_f.getvalue() self.failUnlessIn("Unable to create/open backupdb file %s" % (where,), stderr) self.failUnlessIn("unable to open database file", stderr) def writeto(self, filename, data): fn = os.path.join(self.basedir, unicode(filename)) parentdir = os.path.dirname(fn) fileutil.make_dirs(parentdir) fileutil.write(fn, data) return fn def test_check(self): self.basedir = basedir = os.path.join("backupdb", "check") fileutil.make_dirs(basedir) dbfile = os.path.join(basedir, "dbfile") bdb = self.create(dbfile) foo_fn = self.writeto("foo.txt", "foo.txt") blah_fn = self.writeto("bar/blah.txt", "blah.txt") r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), False) r.did_upload("foo-cap") r = bdb.check_file(blah_fn) self.failUnlessEqual(r.was_uploaded(), False) r.did_upload("blah-cap") r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), "foo-cap") self.failUnlessEqual(type(r.was_uploaded()), str) self.failUnlessEqual(r.should_check(), False) time.sleep(1.0) # make sure the timestamp changes self.writeto("foo.txt", "NEW") r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), False) r.did_upload("new-cap") r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), "new-cap") self.failUnlessEqual(r.should_check(), False) # if we spontaneously decide to upload it anyways, nothing should # break r.did_upload("new-cap") r = bdb.check_file(foo_fn, use_timestamps=False) self.failUnlessEqual(r.was_uploaded(), False) r.did_upload("new-cap") r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), "new-cap") self.failUnlessEqual(r.should_check(), False) bdb.NO_CHECK_BEFORE = 0 bdb.ALWAYS_CHECK_AFTER = 0.1 r = bdb.check_file(blah_fn) self.failUnlessEqual(r.was_uploaded(), "blah-cap") self.failUnlessEqual(r.should_check(), True) r.did_check_healthy("results") # we know they're ignored for now bdb.NO_CHECK_BEFORE = 200 bdb.ALWAYS_CHECK_AFTER = 400 r = bdb.check_file(blah_fn) self.failUnlessEqual(r.was_uploaded(), "blah-cap") self.failUnlessEqual(r.should_check(), False) os.unlink(os.path.join(basedir, "foo.txt")) fileutil.make_dirs(os.path.join(basedir, "foo.txt")) # file becomes dir r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), False) def test_wrong_version(self): self.basedir = basedir = os.path.join("backupdb", "wrong_version") fileutil.make_dirs(basedir) where = os.path.join(basedir, "tooold.db") bdb = self.create(where) # reach into the DB and make it old bdb.cursor.execute("UPDATE version SET version=0") bdb.connection.commit() # now the next time we open the database, it should be an unusable # version stderr_f = StringIO() bdb = backupdb.get_backupdb(where, stderr_f) self.failUnlessEqual(bdb, None) stderr = stderr_f.getvalue() self.failUnlessEqual(stderr.strip(), "Unable to handle backupdb version 0") def test_directory(self): self.basedir = basedir = os.path.join("backupdb", "directory") fileutil.make_dirs(basedir) dbfile = os.path.join(basedir, "dbfile") bdb = self.create(dbfile) contents = {u"file1": "URI:CHK:blah1", u"file2": "URI:CHK:blah2", u"dir1": "URI:DIR2-CHK:baz2"} r = bdb.check_directory(contents) self.failUnless(isinstance(r, backupdb.DirectoryResult)) self.failIf(r.was_created()) dircap = "URI:DIR2-CHK:foo1" r.did_create(dircap) r = bdb.check_directory(contents) self.failUnless(r.was_created()) self.failUnlessEqual(r.was_created(), dircap) self.failUnlessEqual(r.should_check(), False) # if we spontaneously decide to upload it anyways, nothing should # break r.did_create(dircap) r = bdb.check_directory(contents) self.failUnless(r.was_created()) self.failUnlessEqual(r.was_created(), dircap) self.failUnlessEqual(type(r.was_created()), str) self.failUnlessEqual(r.should_check(), False) bdb.NO_CHECK_BEFORE = 0 bdb.ALWAYS_CHECK_AFTER = 0.1 time.sleep(1.0) r = bdb.check_directory(contents) self.failUnless(r.was_created()) self.failUnlessEqual(r.was_created(), dircap) self.failUnlessEqual(r.should_check(), True) r.did_check_healthy("results") bdb.NO_CHECK_BEFORE = 200 bdb.ALWAYS_CHECK_AFTER = 400 r = bdb.check_directory(contents) self.failUnless(r.was_created()) self.failUnlessEqual(r.was_created(), dircap) self.failUnlessEqual(r.should_check(), False) contents2 = {u"file1": "URI:CHK:blah1", u"dir1": "URI:DIR2-CHK:baz2"} r = bdb.check_directory(contents2) self.failIf(r.was_created()) contents3 = {u"file1": "URI:CHK:blah1", u"file2": "URI:CHK:blah3", u"dir1": "URI:DIR2-CHK:baz2"} r = bdb.check_directory(contents3) self.failIf(r.was_created()) def test_unicode(self): self.skip_if_cannot_represent_filename(u"f\u00f6\u00f6.txt") self.skip_if_cannot_represent_filename(u"b\u00e5r.txt") self.basedir = basedir = os.path.join("backupdb", "unicode") fileutil.make_dirs(basedir) dbfile = os.path.join(basedir, "dbfile") bdb = self.create(dbfile) self.writeto(u"f\u00f6\u00f6.txt", "foo.txt") files = [fn for fn in listdir_unicode(unicode(basedir)) if fn.endswith(".txt")] self.failUnlessEqual(len(files), 1) foo_fn = os.path.join(basedir, files[0]) #print foo_fn, type(foo_fn) r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), False) r.did_upload("foo-cap") r = bdb.check_file(foo_fn) self.failUnlessEqual(r.was_uploaded(), "foo-cap") self.failUnlessEqual(r.should_check(), False) bar_fn = self.writeto(u"b\u00e5r.txt", "bar.txt") #print bar_fn, type(bar_fn) r = bdb.check_file(bar_fn) self.failUnlessEqual(r.was_uploaded(), False) r.did_upload("bar-cap") r = bdb.check_file(bar_fn) self.failUnlessEqual(r.was_uploaded(), "bar-cap") self.failUnlessEqual(r.should_check(), False) tahoe-lafs-1.10.0/src/allmydata/test/test_base62.py000066400000000000000000000046261221140116300220240ustar00rootroot00000000000000import random, unittest from allmydata.util import base62, mathutil def insecurerandstr(n): return ''.join(map(chr, map(random.randrange, [0]*n, [256]*n))) class T(unittest.TestCase): def _test_num_octets_that_encode_to_this_many_chars(self, chars, octets): assert base62.num_octets_that_encode_to_this_many_chars(chars) == octets, "%s != %s <- %s" % (octets, base62.num_octets_that_encode_to_this_many_chars(chars), chars) def _test_ende(self, bs): ascii=base62.b2a(bs) bs2=base62.a2b(ascii) assert bs2 == bs, "bs2: %s:%s, bs: %s:%s, ascii: %s:%s" % (len(bs2), `bs2`, len(bs), `bs`, len(ascii), `ascii`) def test_num_octets_that_encode_to_this_many_chars(self): return self._test_num_octets_that_encode_to_this_many_chars(2, 1) return self._test_num_octets_that_encode_to_this_many_chars(3, 2) return self._test_num_octets_that_encode_to_this_many_chars(5, 3) return self._test_num_octets_that_encode_to_this_many_chars(6, 4) def test_ende_0x00(self): return self._test_ende('\x00') def test_ende_0x01(self): return self._test_ende('\x01') def test_ende_0x0100(self): return self._test_ende('\x01\x00') def test_ende_0x000000(self): return self._test_ende('\x00\x00\x00') def test_ende_0x010000(self): return self._test_ende('\x01\x00\x00') def test_ende_randstr(self): return self._test_ende(insecurerandstr(2**4)) def test_ende_longrandstr(self): return self._test_ende(insecurerandstr(random.randrange(0, 2**10))) def test_odd_sizes(self): for j in range(2**6): lib = random.randrange(1, 2**8) numos = mathutil.div_ceil(lib, 8) bs = insecurerandstr(numos) # zero-out unused least-sig bits if lib%8: b=ord(bs[-1]) b = b >> (8 - (lib%8)) b = b << (8 - (lib%8)) bs = bs[:-1] + chr(b) asl = base62.b2a_l(bs, lib) assert len(asl) == base62.num_chars_that_this_many_octets_encode_to(numos) # the size of the base-62 encoding must be just right bs2l = base62.a2b_l(asl, lib) assert len(bs2l) == numos # the size of the result must be just right assert bs == bs2l def suite(): suite = unittest.makeSuite(T, 'test') return suite if __name__ == "__main__": unittest.main() tahoe-lafs-1.10.0/src/allmydata/test/test_checker.py000066400000000000000000000600551221140116300223440ustar00rootroot00000000000000 import simplejson import os.path, shutil from twisted.trial import unittest from twisted.internet import defer from allmydata import check_results, uri from allmydata import uri as tahoe_uri from allmydata.util import base32 from allmydata.web import check_results as web_check_results from allmydata.storage_client import StorageFarmBroker, NativeStorageServer from allmydata.storage.server import storage_index_to_dir from allmydata.monitor import Monitor from allmydata.test.no_network import GridTestMixin from allmydata.immutable.upload import Data from allmydata.test.common_web import WebRenderingMixin from allmydata.mutable.publish import MutableData class FakeClient: def get_storage_broker(self): return self.storage_broker class WebResultsRendering(unittest.TestCase, WebRenderingMixin): def create_fake_client(self): sb = StorageFarmBroker(None, True) # s.get_name() (the "short description") will be "v0-00000000". # s.get_longname() will include the -long suffix. # s.get_peerid() (i.e. tubid) will be "aaa.." or "777.." or "ceir.." servers = [("v0-00000000-long", "\x00"*20, "peer-0"), ("v0-ffffffff-long", "\xff"*20, "peer-f"), ("v0-11111111-long", "\x11"*20, "peer-11")] for (key_s, peerid, nickname) in servers: tubid_b32 = base32.b2a(peerid) furl = "pb://%s@nowhere/fake" % tubid_b32 ann = { "version": 0, "service-name": "storage", "anonymous-storage-FURL": furl, "permutation-seed-base32": "", "nickname": unicode(nickname), "app-versions": {}, # need #466 and v2 introducer "my-version": "ver", "oldest-supported": "oldest", } s = NativeStorageServer(key_s, ann) sb.test_add_server(peerid, s) # XXX: maybe use key_s? c = FakeClient() c.storage_broker = sb return c def render_json(self, page): d = self.render1(page, args={"output": ["json"]}) return d def test_literal(self): c = self.create_fake_client() lcr = web_check_results.LiteralCheckResultsRenderer(c) d = self.render1(lcr) def _check(html): s = self.remove_tags(html) self.failUnlessIn("Literal files are always healthy", s) d.addCallback(_check) d.addCallback(lambda ignored: self.render1(lcr, args={"return_to": ["FOOURL"]})) def _check_return_to(html): s = self.remove_tags(html) self.failUnlessIn("Literal files are always healthy", s) self.failUnlessIn('Return to file.', html) d.addCallback(_check_return_to) d.addCallback(lambda ignored: self.render_json(lcr)) def _check_json(json): j = simplejson.loads(json) self.failUnlessEqual(j["storage-index"], "") self.failUnlessEqual(j["results"]["healthy"], True) d.addCallback(_check_json) return d def test_check(self): c = self.create_fake_client() sb = c.storage_broker serverid_1 = "\x00"*20 serverid_f = "\xff"*20 server_1 = sb.get_stub_server(serverid_1) server_f = sb.get_stub_server(serverid_f) u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234) data = { "count_shares_needed": 3, "count_shares_expected": 9, "count_shares_good": 10, "count_good_share_hosts": 11, "count_recoverable_versions": 1, "count_unrecoverable_versions": 0, "servers_responding": [], "sharemap": {"shareid1": [server_1, server_f]}, "count_wrong_shares": 0, "list_corrupt_shares": [], "count_corrupt_shares": 0, "list_incompatible_shares": [], "count_incompatible_shares": 0, "report": [], "share_problems": [], "servermap": None, } cr = check_results.CheckResults(u, u.get_storage_index(), healthy=True, recoverable=True, needs_rebalancing=False, summary="groovy", **data) w = web_check_results.CheckResultsRenderer(c, cr) html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated self.failUnlessIn("Healthy : groovy", s) self.failUnlessIn("Share Counts: need 3-of-9, have 10", s) self.failUnlessIn("Hosts with good shares: 11", s) self.failUnlessIn("Corrupt shares: none", s) self.failUnlessIn("Wrong Shares: 0", s) self.failUnlessIn("Recoverable Versions: 1", s) self.failUnlessIn("Unrecoverable Versions: 0", s) self.failUnlessIn("Good Shares (sorted in share order): Share ID Nickname Node ID shareid1 peer-0 00000000 peer-f ffffffff", s) cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=True, needs_rebalancing=False, summary="ungroovy", **data) w = web_check_results.CheckResultsRenderer(c, cr) html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated self.failUnlessIn("Not Healthy! : ungroovy", s) data["count_corrupt_shares"] = 1 data["list_corrupt_shares"] = [(server_1, u.get_storage_index(), 2)] cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=False, needs_rebalancing=False, summary="rather dead", **data) w = web_check_results.CheckResultsRenderer(c, cr) html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated self.failUnlessIn("Not Recoverable! : rather dead", s) self.failUnlessIn("Corrupt shares: Share ID Nickname Node ID sh#2 peer-0 00000000", s) html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated self.failUnlessIn("Not Recoverable! : rather dead", s) html = self.render2(w, args={"return_to": ["FOOURL"]}) self.failUnlessIn('Return to file/directory.', html) d = self.render_json(w) def _check_json(jdata): j = simplejson.loads(jdata) self.failUnlessEqual(j["summary"], "rather dead") self.failUnlessEqual(j["storage-index"], "2k6avpjga3dho3zsjo6nnkt7n4") expected = {'needs-rebalancing': False, 'count-shares-expected': 9, 'healthy': False, 'count-unrecoverable-versions': 0, 'count-shares-needed': 3, 'sharemap': {"shareid1": ["v0-00000000-long", "v0-ffffffff-long"]}, 'count-recoverable-versions': 1, 'list-corrupt-shares': [["v0-00000000-long", "2k6avpjga3dho3zsjo6nnkt7n4", 2]], 'count-good-share-hosts': 11, 'count-wrong-shares': 0, 'count-shares-good': 10, 'count-corrupt-shares': 1, 'servers-responding': [], 'recoverable': False, } self.failUnlessEqual(j["results"], expected) d.addCallback(_check_json) d.addCallback(lambda ignored: self.render1(w)) def _check(html): s = self.remove_tags(html) self.failUnlessIn("File Check Results for SI=2k6avp", s) self.failUnlessIn("Not Recoverable! : rather dead", s) d.addCallback(_check) return d def test_check_and_repair(self): c = self.create_fake_client() sb = c.storage_broker serverid_1 = "\x00"*20 serverid_f = "\xff"*20 u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234) data = { "count_shares_needed": 3, "count_shares_expected": 10, "count_shares_good": 6, "count_good_share_hosts": 7, "count_recoverable_versions": 1, "count_unrecoverable_versions": 0, "servers_responding": [], "sharemap": {"shareid1": [sb.get_stub_server(serverid_1), sb.get_stub_server(serverid_f)]}, "count_wrong_shares": 0, "list_corrupt_shares": [], "count_corrupt_shares": 0, "list_incompatible_shares": [], "count_incompatible_shares": 0, "report": [], "share_problems": [], "servermap": None, } pre_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=True, needs_rebalancing=False, summary="illing", **data) data = { "count_shares_needed": 3, "count_shares_expected": 10, "count_shares_good": 10, "count_good_share_hosts": 11, "count_recoverable_versions": 1, "count_unrecoverable_versions": 0, "servers_responding": [], "sharemap": {"shareid1": [sb.get_stub_server(serverid_1), sb.get_stub_server(serverid_f)]}, "count_wrong_shares": 0, "count_corrupt_shares": 0, "list_corrupt_shares": [], "list_incompatible_shares": [], "count_incompatible_shares": 0, "report": [], "share_problems": [], "servermap": None, } post_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=True, recoverable=True, needs_rebalancing=False, summary="groovy", **data) crr = check_results.CheckAndRepairResults(u.get_storage_index()) crr.pre_repair_results = pre_cr crr.post_repair_results = post_cr crr.repair_attempted = False w = web_check_results.CheckAndRepairResultsRenderer(c, crr) html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s) self.failUnlessIn("Healthy : groovy", s) self.failUnlessIn("No repair necessary", s) self.failUnlessIn("Post-Repair Checker Results:", s) self.failUnlessIn("Share Counts: need 3-of-10, have 10", s) crr.repair_attempted = True crr.repair_successful = True html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s) self.failUnlessIn("Healthy : groovy", s) self.failUnlessIn("Repair successful", s) self.failUnlessIn("Post-Repair Checker Results:", s) crr.repair_attempted = True crr.repair_successful = False post_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=True, needs_rebalancing=False, summary="better", **data) crr.post_repair_results = post_cr html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s) self.failUnlessIn("Not Healthy! : better", s) self.failUnlessIn("Repair unsuccessful", s) self.failUnlessIn("Post-Repair Checker Results:", s) crr.repair_attempted = True crr.repair_successful = False post_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=False, needs_rebalancing=False, summary="worse", **data) crr.post_repair_results = post_cr html = self.render2(w) s = self.remove_tags(html) self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s) self.failUnlessIn("Not Recoverable! : worse", s) self.failUnlessIn("Repair unsuccessful", s) self.failUnlessIn("Post-Repair Checker Results:", s) d = self.render_json(w) def _got_json(data): j = simplejson.loads(data) self.failUnlessEqual(j["repair-attempted"], True) self.failUnlessEqual(j["storage-index"], "2k6avpjga3dho3zsjo6nnkt7n4") self.failUnlessEqual(j["pre-repair-results"]["summary"], "illing") self.failUnlessEqual(j["post-repair-results"]["summary"], "worse") d.addCallback(_got_json) w2 = web_check_results.CheckAndRepairResultsRenderer(c, None) d.addCallback(lambda ignored: self.render_json(w2)) def _got_lit_results(data): j = simplejson.loads(data) self.failUnlessEqual(j["repair-attempted"], False) self.failUnlessEqual(j["storage-index"], "") d.addCallback(_got_lit_results) return d class BalancingAct(GridTestMixin, unittest.TestCase): # test for #1115 regarding the 'count-good-share-hosts' metric def add_server(self, server_number, readonly=False): assert self.g, "I tried to find a grid at self.g, but failed" ss = self.g.make_server(server_number, readonly) #log.msg("just created a server, number: %s => %s" % (server_number, ss,)) self.g.add_server(server_number, ss) def add_server_with_share(self, server_number, uri, share_number=None, readonly=False): self.add_server(server_number, readonly) if share_number is not None: self.copy_share_to_server(uri, share_number, server_number) def copy_share_to_server(self, uri, share_number, server_number): ss = self.g.servers_by_number[server_number] # Copy share i from the directory associated with the first # storage server to the directory associated with this one. assert self.g, "I tried to find a grid at self.g, but failed" assert self.shares, "I tried to find shares at self.shares, but failed" old_share_location = self.shares[share_number][2] new_share_location = os.path.join(ss.storedir, "shares") si = tahoe_uri.from_string(self.uri).get_storage_index() new_share_location = os.path.join(new_share_location, storage_index_to_dir(si)) if not os.path.exists(new_share_location): os.makedirs(new_share_location) new_share_location = os.path.join(new_share_location, str(share_number)) if old_share_location != new_share_location: shutil.copy(old_share_location, new_share_location) shares = self.find_uri_shares(uri) # Make sure that the storage server has the share. self.failUnless((share_number, ss.my_nodeid, new_share_location) in shares) def _pretty_shares_chart(self, uri): # Servers are labeled A-Z, shares are labeled 0-9 letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' assert len(self.g.servers_by_number) < len(letters), \ "This little printing function is only meant for < 26 servers" shares_chart = {} names = dict(zip([ss.my_nodeid for _,ss in self.g.servers_by_number.iteritems()], letters)) for shnum, serverid, _ in self.find_uri_shares(uri): shares_chart.setdefault(shnum, []).append(names[serverid]) return shares_chart def test_good_share_hosts(self): self.basedir = "checker/BalancingAct/1115" self.set_up_grid(num_servers=1) c0 = self.g.clients[0] c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1 c0.DEFAULT_ENCODING_PARAMETERS['n'] = 4 c0.DEFAULT_ENCODING_PARAMETERS['k'] = 3 DATA = "data" * 100 d = c0.upload(Data(DATA, convergence="")) def _stash_immutable(ur): self.imm = c0.create_node_from_uri(ur.get_uri()) self.uri = self.imm.get_uri() d.addCallback(_stash_immutable) d.addCallback(lambda ign: self.find_uri_shares(self.uri)) def _store_shares(shares): self.shares = shares d.addCallback(_store_shares) def add_three(_, i): # Add a new server with just share 3 self.add_server_with_share(i, self.uri, 3) #print self._pretty_shares_chart(self.uri) for i in range(1,5): d.addCallback(add_three, i) def _check_and_repair(_): return self.imm.check_and_repair(Monitor()) def _check_counts(crr, shares_good, good_share_hosts): prr = crr.get_post_repair_results() #print self._pretty_shares_chart(self.uri) self.failUnlessEqual(prr.get_share_counter_good(), shares_good) self.failUnlessEqual(prr.get_host_counter_good_shares(), good_share_hosts) """ Initial sharemap: 0:[A] 1:[A] 2:[A] 3:[A,B,C,D,E] 4 good shares, but 5 good hosts After deleting all instances of share #3 and repairing: 0:[A,B], 1:[A,C], 2:[A,D], 3:[E] Still 4 good shares and 5 good hosts """ d.addCallback(_check_and_repair) d.addCallback(_check_counts, 4, 5) d.addCallback(lambda _: self.delete_shares_numbered(self.uri, [3])) d.addCallback(_check_and_repair) d.addCallback(_check_counts, 4, 5) d.addCallback(lambda _: [self.g.break_server(sid) for sid in self.g.get_all_serverids()]) d.addCallback(_check_and_repair) d.addCallback(_check_counts, 0, 0) return d class AddLease(GridTestMixin, unittest.TestCase): # test for #875, in which failures in the add-lease call cause # false-negatives in the checker def test_875(self): self.basedir = "checker/AddLease/875" self.set_up_grid(num_servers=1) c0 = self.g.clients[0] c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1 self.uris = {} DATA = "data" * 100 d = c0.upload(Data(DATA, convergence="")) def _stash_immutable(ur): self.imm = c0.create_node_from_uri(ur.get_uri()) d.addCallback(_stash_immutable) d.addCallback(lambda ign: c0.create_mutable_file(MutableData("contents"))) def _stash_mutable(node): self.mut = node d.addCallback(_stash_mutable) def _check_cr(cr, which): self.failUnless(cr.is_healthy(), which) # these two should work normally d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True)) d.addCallback(_check_cr, "immutable-normal") d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True)) d.addCallback(_check_cr, "mutable-normal") really_did_break = [] # now break the server's remote_add_lease call def _break_add_lease(ign): def broken_add_lease(*args, **kwargs): really_did_break.append(1) raise KeyError("intentional failure, should be ignored") assert self.g.servers_by_number[0].remote_add_lease self.g.servers_by_number[0].remote_add_lease = broken_add_lease d.addCallback(_break_add_lease) # and confirm that the files still look healthy d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True)) d.addCallback(_check_cr, "mutable-broken") d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True)) d.addCallback(_check_cr, "immutable-broken") d.addCallback(lambda ign: self.failUnless(really_did_break)) return d class CounterHolder(object): def __init__(self): self._num_active_block_fetches = 0 self._max_active_block_fetches = 0 from allmydata.immutable.checker import ValidatedReadBucketProxy class MockVRBP(ValidatedReadBucketProxy): def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size, counterholder): ValidatedReadBucketProxy.__init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size) self.counterholder = counterholder def get_block(self, blocknum): self.counterholder._num_active_block_fetches += 1 if self.counterholder._num_active_block_fetches > self.counterholder._max_active_block_fetches: self.counterholder._max_active_block_fetches = self.counterholder._num_active_block_fetches d = ValidatedReadBucketProxy.get_block(self, blocknum) def _mark_no_longer_active(res): self.counterholder._num_active_block_fetches -= 1 return res d.addBoth(_mark_no_longer_active) return d class TooParallel(GridTestMixin, unittest.TestCase): # bug #1395: immutable verifier was aggressively parallized, checking all # blocks of all shares at the same time, blowing our memory budget and # crashing with MemoryErrors on >1GB files. def test_immutable(self): import allmydata.immutable.checker origVRBP = allmydata.immutable.checker.ValidatedReadBucketProxy self.basedir = "checker/TooParallel/immutable" # If any code asks to instantiate a ValidatedReadBucketProxy, # we give them a MockVRBP which is configured to use our # CounterHolder. counterholder = CounterHolder() def make_mock_VRBP(*args, **kwargs): return MockVRBP(counterholder=counterholder, *args, **kwargs) allmydata.immutable.checker.ValidatedReadBucketProxy = make_mock_VRBP d = defer.succeed(None) def _start(ign): self.set_up_grid(num_servers=4) self.c0 = self.g.clients[0] self.c0.DEFAULT_ENCODING_PARAMETERS = { "k": 1, "happy": 4, "n": 4, "max_segment_size": 5, } self.uris = {} DATA = "data" * 100 # 400/5 = 80 blocks return self.c0.upload(Data(DATA, convergence="")) d.addCallback(_start) def _do_check(ur): n = self.c0.create_node_from_uri(ur.get_uri()) return n.check(Monitor(), verify=True) d.addCallback(_do_check) def _check(cr): # the verifier works on all 4 shares in parallel, but only # fetches one block from each share at a time, so we expect to # see 4 parallel fetches self.failUnlessEqual(counterholder._max_active_block_fetches, 4) d.addCallback(_check) def _clean_up(res): allmydata.immutable.checker.ValidatedReadBucketProxy = origVRBP return res d.addBoth(_clean_up) return d test_immutable.timeout = 80 tahoe-lafs-1.10.0/src/allmydata/test/test_cli.py000066400000000000000000005217401221140116300215120ustar00rootroot00000000000000 import os.path from twisted.trial import unittest from cStringIO import StringIO import urllib, re, sys import simplejson from mock import patch, Mock, call from allmydata.util import fileutil, hashutil, base32, keyutil from allmydata import uri from allmydata.immutable import upload from allmydata.interfaces import MDMF_VERSION, SDMF_VERSION from allmydata.mutable.publish import MutableData from allmydata.dirnode import normalize from allmydata.scripts.common_http import socket_error import allmydata.scripts.common_http from pycryptopp.publickey import ed25519 # Test that the scripts can be imported. from allmydata.scripts import create_node, debug, keygen, startstop_node, \ tahoe_add_alias, tahoe_backup, tahoe_check, tahoe_cp, tahoe_get, tahoe_ls, \ tahoe_manifest, tahoe_mkdir, tahoe_mv, tahoe_put, tahoe_unlink, tahoe_webopen _hush_pyflakes = [create_node, debug, keygen, startstop_node, tahoe_add_alias, tahoe_backup, tahoe_check, tahoe_cp, tahoe_get, tahoe_ls, tahoe_manifest, tahoe_mkdir, tahoe_mv, tahoe_put, tahoe_unlink, tahoe_webopen] from allmydata.scripts import common from allmydata.scripts.common import DEFAULT_ALIAS, get_aliases, get_alias, \ DefaultAliasMarker from allmydata.scripts import cli, debug, runner, backupdb from allmydata.test.common_util import StallMixin, ReallyEqualMixin from allmydata.test.no_network import GridTestMixin from twisted.internet import threads # CLI tests use deferToThread from twisted.internet import defer # List uses a DeferredList in one place. from twisted.python import usage from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import listdir_unicode, unicode_platform, \ quote_output, get_io_encoding, get_filesystem_encoding, \ unicode_to_output, unicode_to_argv, to_str from allmydata.util.fileutil import abspath_expanduser_unicode timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s def parse_options(basedir, command, args): o = runner.Options() o.parseOptions(["--node-directory", basedir, command] + args) while hasattr(o, "subOptions"): o = o.subOptions return o class CLITestMixin(ReallyEqualMixin): def do_cli(self, verb, *args, **kwargs): nodeargs = [ "--node-directory", self.get_clientdir(), ] argv = nodeargs + [verb] + list(args) stdin = kwargs.get("stdin", "") stdout, stderr = StringIO(), StringIO() d = threads.deferToThread(runner.runner, argv, run_by_human=False, stdin=StringIO(stdin), stdout=stdout, stderr=stderr) def _done(rc): return rc, stdout.getvalue(), stderr.getvalue() d.addCallback(_done) return d def skip_if_cannot_represent_filename(self, u): precondition(isinstance(u, unicode)) enc = get_filesystem_encoding() if not unicode_platform(): try: u.encode(enc) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.") class CLI(CLITestMixin, unittest.TestCase): def _dump_cap(self, *args): config = debug.DumpCapOptions() config.stdout,config.stderr = StringIO(), StringIO() config.parseOptions(args) debug.dump_cap(config) self.failIf(config.stderr.getvalue()) output = config.stdout.getvalue() return output def test_dump_cap_chk(self): key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" uri_extension_hash = hashutil.uri_extension_hash("stuff") needed_shares = 25 total_shares = 100 size = 1234 u = uri.CHKFileURI(key=key, uri_extension_hash=uri_extension_hash, needed_shares=needed_shares, total_shares=total_shares, size=size) output = self._dump_cap(u.to_string()) self.failUnless("CHK File:" in output, output) self.failUnless("key: aaaqeayeaudaocajbifqydiob4" in output, output) self.failUnless("UEB hash: nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa" in output, output) self.failUnless("size: 1234" in output, output) self.failUnless("k/N: 25/100" in output, output) self.failUnless("storage index: hdis5iaveku6lnlaiccydyid7q" in output, output) output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("client renewal secret: znxmki5zdibb5qlt46xbdvk2t55j7hibejq3i5ijyurkr6m6jkhq" in output, output) output = self._dump_cap(u.get_verify_cap().to_string()) self.failIf("key: " in output, output) self.failUnless("UEB hash: nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa" in output, output) self.failUnless("size: 1234" in output, output) self.failUnless("k/N: 25/100" in output, output) self.failUnless("storage index: hdis5iaveku6lnlaiccydyid7q" in output, output) prefixed_u = "http://127.0.0.1/uri/%s" % urllib.quote(u.to_string()) output = self._dump_cap(prefixed_u) self.failUnless("CHK File:" in output, output) self.failUnless("key: aaaqeayeaudaocajbifqydiob4" in output, output) self.failUnless("UEB hash: nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa" in output, output) self.failUnless("size: 1234" in output, output) self.failUnless("k/N: 25/100" in output, output) self.failUnless("storage index: hdis5iaveku6lnlaiccydyid7q" in output, output) def test_dump_cap_lit(self): u = uri.LiteralFileURI("this is some data") output = self._dump_cap(u.to_string()) self.failUnless("Literal File URI:" in output, output) self.failUnless("data: 'this is some data'" in output, output) def test_dump_cap_sdmf(self): writekey = "\x01" * 16 fingerprint = "\xfe" * 32 u = uri.WriteableSSKFileURI(writekey, fingerprint) output = self._dump_cap(u.to_string()) self.failUnless("SDMF Writeable URI:" in output, output) self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) fileutil.make_dirs("cli/test_dump_cap/private") fileutil.write("cli/test_dump_cap/private/secret", "5s33nk3qpvnj2fw3z4mnm2y6fa\n") output = self._dump_cap("--client-dir", "cli/test_dump_cap", u.to_string()) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) output = self._dump_cap("--client-dir", "cli/test_dump_cap_BOGUS", u.to_string()) self.failIf("file renewal secret:" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failIf("file renewal secret:" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", "--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) self.failUnless("lease renewal secret: 7pjtaumrb7znzkkbvekkmuwpqfjyfyamznfz4bwwvmh4nw33lorq" in output, output) u = u.get_readonly() output = self._dump_cap(u.to_string()) self.failUnless("SDMF Read-only URI:" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) u = u.get_verify_cap() output = self._dump_cap(u.to_string()) self.failUnless("SDMF Verifier URI:" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) def test_dump_cap_mdmf(self): writekey = "\x01" * 16 fingerprint = "\xfe" * 32 u = uri.WriteableMDMFFileURI(writekey, fingerprint) output = self._dump_cap(u.to_string()) self.failUnless("MDMF Writeable URI:" in output, output) self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) fileutil.make_dirs("cli/test_dump_cap/private") fileutil.write("cli/test_dump_cap/private/secret", "5s33nk3qpvnj2fw3z4mnm2y6fa\n") output = self._dump_cap("--client-dir", "cli/test_dump_cap", u.to_string()) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) output = self._dump_cap("--client-dir", "cli/test_dump_cap_BOGUS", u.to_string()) self.failIf("file renewal secret:" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failIf("file renewal secret:" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", "--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) self.failUnless("lease renewal secret: 7pjtaumrb7znzkkbvekkmuwpqfjyfyamznfz4bwwvmh4nw33lorq" in output, output) u = u.get_readonly() output = self._dump_cap(u.to_string()) self.failUnless("MDMF Read-only URI:" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) u = u.get_verify_cap() output = self._dump_cap(u.to_string()) self.failUnless("MDMF Verifier URI:" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) def test_dump_cap_chk_directory(self): key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" uri_extension_hash = hashutil.uri_extension_hash("stuff") needed_shares = 25 total_shares = 100 size = 1234 u1 = uri.CHKFileURI(key=key, uri_extension_hash=uri_extension_hash, needed_shares=needed_shares, total_shares=total_shares, size=size) u = uri.ImmutableDirectoryURI(u1) output = self._dump_cap(u.to_string()) self.failUnless("CHK Directory URI:" in output, output) self.failUnless("key: aaaqeayeaudaocajbifqydiob4" in output, output) self.failUnless("UEB hash: nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa" in output, output) self.failUnless("size: 1234" in output, output) self.failUnless("k/N: 25/100" in output, output) self.failUnless("storage index: hdis5iaveku6lnlaiccydyid7q" in output, output) output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("file renewal secret: csrvkjgomkyyyil5yo4yk5np37p6oa2ve2hg6xmk2dy7kaxsu6xq" in output, output) u = u.get_verify_cap() output = self._dump_cap(u.to_string()) self.failUnless("CHK Directory Verifier URI:" in output, output) self.failIf("key: " in output, output) self.failUnless("UEB hash: nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa" in output, output) self.failUnless("size: 1234" in output, output) self.failUnless("k/N: 25/100" in output, output) self.failUnless("storage index: hdis5iaveku6lnlaiccydyid7q" in output, output) def test_dump_cap_sdmf_directory(self): writekey = "\x01" * 16 fingerprint = "\xfe" * 32 u1 = uri.WriteableSSKFileURI(writekey, fingerprint) u = uri.DirectoryURI(u1) output = self._dump_cap(u.to_string()) self.failUnless("Directory Writeable URI:" in output, output) self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failIf("file renewal secret:" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", "--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) self.failUnless("lease renewal secret: 7pjtaumrb7znzkkbvekkmuwpqfjyfyamznfz4bwwvmh4nw33lorq" in output, output) u = u.get_readonly() output = self._dump_cap(u.to_string()) self.failUnless("Directory Read-only URI:" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) u = u.get_verify_cap() output = self._dump_cap(u.to_string()) self.failUnless("Directory Verifier URI:" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) def test_dump_cap_mdmf_directory(self): writekey = "\x01" * 16 fingerprint = "\xfe" * 32 u1 = uri.WriteableMDMFFileURI(writekey, fingerprint) u = uri.MDMFDirectoryURI(u1) output = self._dump_cap(u.to_string()) self.failUnless("Directory Writeable URI:" in output, output) self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failIf("file renewal secret:" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", "--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) self.failUnless("write_enabler: mgcavriox2wlb5eer26unwy5cw56elh3sjweffckkmivvsxtaknq" in output, output) self.failUnless("file renewal secret: arpszxzc2t6kb4okkg7sp765xgkni5z7caavj7lta73vmtymjlxq" in output, output) self.failUnless("lease renewal secret: 7pjtaumrb7znzkkbvekkmuwpqfjyfyamznfz4bwwvmh4nw33lorq" in output, output) u = u.get_readonly() output = self._dump_cap(u.to_string()) self.failUnless("Directory Read-only URI:" in output, output) self.failUnless("readkey: nvgh5vj2ekzzkim5fgtb4gey5y" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) u = u.get_verify_cap() output = self._dump_cap(u.to_string()) self.failUnless("Directory Verifier URI:" in output, output) self.failUnless("storage index: nt4fwemuw7flestsezvo2eveke" in output, output) self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) def _catalog_shares(self, *basedirs): o = debug.CatalogSharesOptions() o.stdout,o.stderr = StringIO(), StringIO() args = list(basedirs) o.parseOptions(args) debug.catalog_shares(o) out = o.stdout.getvalue() err = o.stderr.getvalue() return out, err def test_catalog_shares_error(self): nodedir1 = "cli/test_catalog_shares/node1" sharedir = os.path.join(nodedir1, "storage", "shares", "mq", "mqfblse6m5a6dh45isu2cg7oji") fileutil.make_dirs(sharedir) fileutil.write("cli/test_catalog_shares/node1/storage/shares/mq/not-a-dir", "") # write a bogus share that looks a little bit like CHK fileutil.write(os.path.join(sharedir, "8"), "\x00\x00\x00\x01" + "\xff" * 200) # this triggers an assert nodedir2 = "cli/test_catalog_shares/node2" fileutil.make_dirs(nodedir2) fileutil.write("cli/test_catalog_shares/node1/storage/shares/not-a-dir", "") # now make sure that the 'catalog-shares' commands survives the error out, err = self._catalog_shares(nodedir1, nodedir2) self.failUnlessReallyEqual(out, "", out) self.failUnless("Error processing " in err, "didn't see 'error processing' in '%s'" % err) #self.failUnless(nodedir1 in err, # "didn't see '%s' in '%s'" % (nodedir1, err)) # windows mangles the path, and os.path.join isn't enough to make # up for it, so just look for individual strings self.failUnless("node1" in err, "didn't see 'node1' in '%s'" % err) self.failUnless("mqfblse6m5a6dh45isu2cg7oji" in err, "didn't see 'mqfblse6m5a6dh45isu2cg7oji' in '%s'" % err) def test_alias(self): def s128(c): return base32.b2a(c*(128/8)) def s256(c): return base32.b2a(c*(256/8)) TA = "URI:DIR2:%s:%s" % (s128("T"), s256("T")) WA = "URI:DIR2:%s:%s" % (s128("W"), s256("W")) CA = "URI:DIR2:%s:%s" % (s128("C"), s256("C")) aliases = {"tahoe": TA, "work": WA, "c": CA} def ga1(path): return get_alias(aliases, path, u"tahoe") uses_lettercolon = common.platform_uses_lettercolon_drivename() self.failUnlessReallyEqual(ga1(u"bare"), (TA, "bare")) self.failUnlessReallyEqual(ga1(u"baredir/file"), (TA, "baredir/file")) self.failUnlessReallyEqual(ga1(u"baredir/file:7"), (TA, "baredir/file:7")) self.failUnlessReallyEqual(ga1(u"tahoe:"), (TA, "")) self.failUnlessReallyEqual(ga1(u"tahoe:file"), (TA, "file")) self.failUnlessReallyEqual(ga1(u"tahoe:dir/file"), (TA, "dir/file")) self.failUnlessReallyEqual(ga1(u"work:"), (WA, "")) self.failUnlessReallyEqual(ga1(u"work:file"), (WA, "file")) self.failUnlessReallyEqual(ga1(u"work:dir/file"), (WA, "dir/file")) # default != None means we really expect a tahoe path, regardless of # whether we're on windows or not. This is what 'tahoe get' uses. self.failUnlessReallyEqual(ga1(u"c:"), (CA, "")) self.failUnlessReallyEqual(ga1(u"c:file"), (CA, "file")) self.failUnlessReallyEqual(ga1(u"c:dir/file"), (CA, "dir/file")) self.failUnlessReallyEqual(ga1(u"URI:stuff"), ("URI:stuff", "")) self.failUnlessReallyEqual(ga1(u"URI:stuff/file"), ("URI:stuff", "file")) self.failUnlessReallyEqual(ga1(u"URI:stuff:./file"), ("URI:stuff", "file")) self.failUnlessReallyEqual(ga1(u"URI:stuff/dir/file"), ("URI:stuff", "dir/file")) self.failUnlessReallyEqual(ga1(u"URI:stuff:./dir/file"), ("URI:stuff", "dir/file")) self.failUnlessRaises(common.UnknownAliasError, ga1, u"missing:") self.failUnlessRaises(common.UnknownAliasError, ga1, u"missing:dir") self.failUnlessRaises(common.UnknownAliasError, ga1, u"missing:dir/file") def ga2(path): return get_alias(aliases, path, None) self.failUnlessReallyEqual(ga2(u"bare"), (DefaultAliasMarker, "bare")) self.failUnlessReallyEqual(ga2(u"baredir/file"), (DefaultAliasMarker, "baredir/file")) self.failUnlessReallyEqual(ga2(u"baredir/file:7"), (DefaultAliasMarker, "baredir/file:7")) self.failUnlessReallyEqual(ga2(u"baredir/sub:1/file:7"), (DefaultAliasMarker, "baredir/sub:1/file:7")) self.failUnlessReallyEqual(ga2(u"tahoe:"), (TA, "")) self.failUnlessReallyEqual(ga2(u"tahoe:file"), (TA, "file")) self.failUnlessReallyEqual(ga2(u"tahoe:dir/file"), (TA, "dir/file")) # on windows, we really want c:foo to indicate a local file. # default==None is what 'tahoe cp' uses. if uses_lettercolon: self.failUnlessReallyEqual(ga2(u"c:"), (DefaultAliasMarker, "c:")) self.failUnlessReallyEqual(ga2(u"c:file"), (DefaultAliasMarker, "c:file")) self.failUnlessReallyEqual(ga2(u"c:dir/file"), (DefaultAliasMarker, "c:dir/file")) else: self.failUnlessReallyEqual(ga2(u"c:"), (CA, "")) self.failUnlessReallyEqual(ga2(u"c:file"), (CA, "file")) self.failUnlessReallyEqual(ga2(u"c:dir/file"), (CA, "dir/file")) self.failUnlessReallyEqual(ga2(u"work:"), (WA, "")) self.failUnlessReallyEqual(ga2(u"work:file"), (WA, "file")) self.failUnlessReallyEqual(ga2(u"work:dir/file"), (WA, "dir/file")) self.failUnlessReallyEqual(ga2(u"URI:stuff"), ("URI:stuff", "")) self.failUnlessReallyEqual(ga2(u"URI:stuff/file"), ("URI:stuff", "file")) self.failUnlessReallyEqual(ga2(u"URI:stuff:./file"), ("URI:stuff", "file")) self.failUnlessReallyEqual(ga2(u"URI:stuff/dir/file"), ("URI:stuff", "dir/file")) self.failUnlessReallyEqual(ga2(u"URI:stuff:./dir/file"), ("URI:stuff", "dir/file")) self.failUnlessRaises(common.UnknownAliasError, ga2, u"missing:") self.failUnlessRaises(common.UnknownAliasError, ga2, u"missing:dir") self.failUnlessRaises(common.UnknownAliasError, ga2, u"missing:dir/file") def ga3(path): old = common.pretend_platform_uses_lettercolon try: common.pretend_platform_uses_lettercolon = True retval = get_alias(aliases, path, None) finally: common.pretend_platform_uses_lettercolon = old return retval self.failUnlessReallyEqual(ga3(u"bare"), (DefaultAliasMarker, "bare")) self.failUnlessReallyEqual(ga3(u"baredir/file"), (DefaultAliasMarker, "baredir/file")) self.failUnlessReallyEqual(ga3(u"baredir/file:7"), (DefaultAliasMarker, "baredir/file:7")) self.failUnlessReallyEqual(ga3(u"baredir/sub:1/file:7"), (DefaultAliasMarker, "baredir/sub:1/file:7")) self.failUnlessReallyEqual(ga3(u"tahoe:"), (TA, "")) self.failUnlessReallyEqual(ga3(u"tahoe:file"), (TA, "file")) self.failUnlessReallyEqual(ga3(u"tahoe:dir/file"), (TA, "dir/file")) self.failUnlessReallyEqual(ga3(u"c:"), (DefaultAliasMarker, "c:")) self.failUnlessReallyEqual(ga3(u"c:file"), (DefaultAliasMarker, "c:file")) self.failUnlessReallyEqual(ga3(u"c:dir/file"), (DefaultAliasMarker, "c:dir/file")) self.failUnlessReallyEqual(ga3(u"work:"), (WA, "")) self.failUnlessReallyEqual(ga3(u"work:file"), (WA, "file")) self.failUnlessReallyEqual(ga3(u"work:dir/file"), (WA, "dir/file")) self.failUnlessReallyEqual(ga3(u"URI:stuff"), ("URI:stuff", "")) self.failUnlessReallyEqual(ga3(u"URI:stuff:./file"), ("URI:stuff", "file")) self.failUnlessReallyEqual(ga3(u"URI:stuff:./dir/file"), ("URI:stuff", "dir/file")) self.failUnlessRaises(common.UnknownAliasError, ga3, u"missing:") self.failUnlessRaises(common.UnknownAliasError, ga3, u"missing:dir") self.failUnlessRaises(common.UnknownAliasError, ga3, u"missing:dir/file") # calling get_alias with a path that doesn't include an alias and # default set to something that isn't in the aliases argument should # raise an UnknownAliasError. def ga4(path): return get_alias(aliases, path, u"badddefault:") self.failUnlessRaises(common.UnknownAliasError, ga4, u"afile") self.failUnlessRaises(common.UnknownAliasError, ga4, u"a/dir/path/") def ga5(path): old = common.pretend_platform_uses_lettercolon try: common.pretend_platform_uses_lettercolon = True retval = get_alias(aliases, path, u"baddefault:") finally: common.pretend_platform_uses_lettercolon = old return retval self.failUnlessRaises(common.UnknownAliasError, ga5, u"C:\\Windows") def test_alias_tolerance(self): def s128(c): return base32.b2a(c*(128/8)) def s256(c): return base32.b2a(c*(256/8)) TA = "URI:DIR2:%s:%s" % (s128("T"), s256("T")) aliases = {"present": TA, "future": "URI-FROM-FUTURE:ooh:aah"} def ga1(path): return get_alias(aliases, path, u"tahoe") self.failUnlessReallyEqual(ga1(u"present:file"), (TA, "file")) # this throws, via assert IDirnodeURI.providedBy(), since get_alias() # wants a dirnode, and the future cap gives us UnknownURI instead. self.failUnlessRaises(AssertionError, ga1, u"future:stuff") def test_listdir_unicode_good(self): filenames = [u'L\u00F4zane', u'Bern', u'Gen\u00E8ve'] # must be NFC for name in filenames: self.skip_if_cannot_represent_filename(name) basedir = "cli/common/listdir_unicode_good" fileutil.make_dirs(basedir) for name in filenames: open(os.path.join(unicode(basedir), name), "wb").close() for file in listdir_unicode(unicode(basedir)): self.failUnlessIn(normalize(file), filenames) def test_exception_catcher(self): self.basedir = "cli/exception_catcher" runner_mock = Mock() sys_exit_mock = Mock() stderr = StringIO() self.patch(sys, "argv", ["tahoe"]) self.patch(runner, "runner", runner_mock) self.patch(sys, "exit", sys_exit_mock) self.patch(sys, "stderr", stderr) exc = Exception("canary") def call_runner(args, install_node_control=True): raise exc runner_mock.side_effect = call_runner runner.run() self.failUnlessEqual(runner_mock.call_args_list, [call([], install_node_control=True)]) self.failUnlessEqual(sys_exit_mock.call_args_list, [call(1)]) self.failUnlessIn(str(exc), stderr.getvalue()) class Help(unittest.TestCase): def test_get(self): help = str(cli.GetOptions()) self.failUnlessIn(" [global-opts] get [options] REMOTE_FILE LOCAL_FILE", help) self.failUnlessIn("% tahoe get FOO |less", help) def test_put(self): help = str(cli.PutOptions()) self.failUnlessIn(" [global-opts] put [options] LOCAL_FILE REMOTE_FILE", help) self.failUnlessIn("% cat FILE | tahoe put", help) def test_ls(self): help = str(cli.ListOptions()) self.failUnlessIn(" [global-opts] ls [options] [PATH]", help) def test_unlink(self): help = str(cli.UnlinkOptions()) self.failUnlessIn(" [global-opts] unlink [options] REMOTE_FILE", help) def test_rm(self): help = str(cli.RmOptions()) self.failUnlessIn(" [global-opts] rm [options] REMOTE_FILE", help) def test_mv(self): help = str(cli.MvOptions()) self.failUnlessIn(" [global-opts] mv [options] FROM TO", help) self.failUnlessIn("Use 'tahoe mv' to move files", help) def test_cp(self): help = str(cli.CpOptions()) self.failUnlessIn(" [global-opts] cp [options] FROM.. TO", help) self.failUnlessIn("Use 'tahoe cp' to copy files", help) def test_ln(self): help = str(cli.LnOptions()) self.failUnlessIn(" [global-opts] ln [options] FROM_LINK TO_LINK", help) self.failUnlessIn("Use 'tahoe ln' to duplicate a link", help) def test_mkdir(self): help = str(cli.MakeDirectoryOptions()) self.failUnlessIn(" [global-opts] mkdir [options] [REMOTE_DIR]", help) self.failUnlessIn("Create a new directory", help) def test_backup(self): help = str(cli.BackupOptions()) self.failUnlessIn(" [global-opts] backup [options] FROM ALIAS:TO", help) def test_webopen(self): help = str(cli.WebopenOptions()) self.failUnlessIn(" [global-opts] webopen [options] [ALIAS:PATH]", help) def test_manifest(self): help = str(cli.ManifestOptions()) self.failUnlessIn(" [global-opts] manifest [options] [ALIAS:PATH]", help) def test_stats(self): help = str(cli.StatsOptions()) self.failUnlessIn(" [global-opts] stats [options] [ALIAS:PATH]", help) def test_check(self): help = str(cli.CheckOptions()) self.failUnlessIn(" [global-opts] check [options] [ALIAS:PATH]", help) def test_deep_check(self): help = str(cli.DeepCheckOptions()) self.failUnlessIn(" [global-opts] deep-check [options] [ALIAS:PATH]", help) def test_create_alias(self): help = str(cli.CreateAliasOptions()) self.failUnlessIn(" [global-opts] create-alias [options] ALIAS[:]", help) def test_add_alias(self): help = str(cli.AddAliasOptions()) self.failUnlessIn(" [global-opts] add-alias [options] ALIAS[:] DIRCAP", help) def test_list_aliases(self): help = str(cli.ListAliasesOptions()) self.failUnlessIn(" [global-opts] list-aliases [options]", help) def test_start(self): help = str(startstop_node.StartOptions()) self.failUnlessIn(" [global-opts] start [options] [NODEDIR]", help) def test_stop(self): help = str(startstop_node.StopOptions()) self.failUnlessIn(" [global-opts] stop [options] [NODEDIR]", help) def test_restart(self): help = str(startstop_node.RestartOptions()) self.failUnlessIn(" [global-opts] restart [options] [NODEDIR]", help) def test_run(self): help = str(startstop_node.RunOptions()) self.failUnlessIn(" [global-opts] run [options] [NODEDIR]", help) def test_create_client(self): help = str(create_node.CreateClientOptions()) self.failUnlessIn(" [global-opts] create-client [options] [NODEDIR]", help) def test_create_node(self): help = str(create_node.CreateNodeOptions()) self.failUnlessIn(" [global-opts] create-node [options] [NODEDIR]", help) def test_create_introducer(self): help = str(create_node.CreateIntroducerOptions()) self.failUnlessIn(" [global-opts] create-introducer [options] NODEDIR", help) def test_debug_trial(self): help = str(debug.TrialOptions()) self.failUnlessIn(" [global-opts] debug trial [options] [[file|package|module|TestCase|testmethod]...]", help) self.failUnlessIn("The 'tahoe debug trial' command uses the correct imports", help) def test_debug_flogtool(self): options = debug.FlogtoolOptions() help = str(options) self.failUnlessIn(" [global-opts] debug flogtool ", help) self.failUnlessIn("The 'tahoe debug flogtool' command uses the correct imports", help) for (option, shortcut, oClass, desc) in options.subCommands: subhelp = str(oClass()) self.failUnlessIn(" [global-opts] debug flogtool %s " % (option,), subhelp) class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase): def _test_webopen(self, args, expected_url): o = runner.Options() o.parseOptions(["--node-directory", self.get_clientdir(), "webopen"] + list(args)) urls = [] rc = cli.webopen(o, urls.append) self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(len(urls), 1) self.failUnlessReallyEqual(urls[0], expected_url) def test_create(self): self.basedir = "cli/CreateAlias/create" self.set_up_grid() aliasfile = os.path.join(self.get_clientdir(), "private", "aliases") d = self.do_cli("create-alias", "tahoe") def _done((rc,stdout,stderr)): self.failUnless("Alias 'tahoe' created" in stdout) self.failIf(stderr) aliases = get_aliases(self.get_clientdir()) self.failUnless("tahoe" in aliases) self.failUnless(aliases["tahoe"].startswith("URI:DIR2:")) d.addCallback(_done) d.addCallback(lambda res: self.do_cli("create-alias", "two:")) def _stash_urls(res): aliases = get_aliases(self.get_clientdir()) node_url_file = os.path.join(self.get_clientdir(), "node.url") nodeurl = fileutil.read(node_url_file).strip() self.welcome_url = nodeurl uribase = nodeurl + "uri/" self.tahoe_url = uribase + urllib.quote(aliases["tahoe"]) self.tahoe_subdir_url = self.tahoe_url + "/subdir" self.two_url = uribase + urllib.quote(aliases["two"]) self.two_uri = aliases["two"] d.addCallback(_stash_urls) d.addCallback(lambda res: self.do_cli("create-alias", "two")) # dup def _check_create_duplicate((rc,stdout,stderr)): self.failIfEqual(rc, 0) self.failUnless("Alias 'two' already exists!" in stderr) aliases = get_aliases(self.get_clientdir()) self.failUnlessReallyEqual(aliases["two"], self.two_uri) d.addCallback(_check_create_duplicate) d.addCallback(lambda res: self.do_cli("add-alias", "added", self.two_uri)) def _check_add((rc,stdout,stderr)): self.failUnlessReallyEqual(rc, 0) self.failUnless("Alias 'added' added" in stdout) d.addCallback(_check_add) # check add-alias with a duplicate d.addCallback(lambda res: self.do_cli("add-alias", "two", self.two_uri)) def _check_add_duplicate((rc,stdout,stderr)): self.failIfEqual(rc, 0) self.failUnless("Alias 'two' already exists!" in stderr) aliases = get_aliases(self.get_clientdir()) self.failUnlessReallyEqual(aliases["two"], self.two_uri) d.addCallback(_check_add_duplicate) # check create-alias and add-alias with invalid aliases def _check_invalid((rc,stdout,stderr)): self.failIfEqual(rc, 0) self.failUnlessIn("cannot contain", stderr) for invalid in ['foo:bar', 'foo bar', 'foobar::']: d.addCallback(lambda res, invalid=invalid: self.do_cli("create-alias", invalid)) d.addCallback(_check_invalid) d.addCallback(lambda res, invalid=invalid: self.do_cli("add-alias", invalid, self.two_uri)) d.addCallback(_check_invalid) def _test_urls(junk): self._test_webopen([], self.welcome_url) self._test_webopen(["/"], self.tahoe_url) self._test_webopen(["tahoe:"], self.tahoe_url) self._test_webopen(["tahoe:/"], self.tahoe_url) self._test_webopen(["tahoe:subdir"], self.tahoe_subdir_url) self._test_webopen(["-i", "tahoe:subdir"], self.tahoe_subdir_url+"?t=info") self._test_webopen(["tahoe:subdir/"], self.tahoe_subdir_url + '/') self._test_webopen(["tahoe:subdir/file"], self.tahoe_subdir_url + '/file') self._test_webopen(["--info", "tahoe:subdir/file"], self.tahoe_subdir_url + '/file?t=info') # if "file" is indeed a file, then the url produced by webopen in # this case is disallowed by the webui. but by design, webopen # passes through the mistake from the user to the resultant # webopened url self._test_webopen(["tahoe:subdir/file/"], self.tahoe_subdir_url + '/file/') self._test_webopen(["two:"], self.two_url) d.addCallback(_test_urls) def _remove_trailing_newline_and_create_alias(ign): # ticket #741 is about a manually-edited alias file (which # doesn't end in a newline) being corrupted by a subsequent # "tahoe create-alias" old = fileutil.read(aliasfile) fileutil.write(aliasfile, old.rstrip()) return self.do_cli("create-alias", "un-corrupted1") d.addCallback(_remove_trailing_newline_and_create_alias) def _check_not_corrupted1((rc,stdout,stderr)): self.failUnless("Alias 'un-corrupted1' created" in stdout, stdout) self.failIf(stderr) # the old behavior was to simply append the new record, causing a # line that looked like "NAME1: CAP1NAME2: CAP2". This won't look # like a valid dircap, so get_aliases() will raise an exception. aliases = get_aliases(self.get_clientdir()) self.failUnless("added" in aliases) self.failUnless(aliases["added"].startswith("URI:DIR2:")) # to be safe, let's confirm that we don't see "NAME2:" in CAP1. # No chance of a false-negative, because the hyphen in # "un-corrupted1" is not a valid base32 character. self.failIfIn("un-corrupted1:", aliases["added"]) self.failUnless("un-corrupted1" in aliases) self.failUnless(aliases["un-corrupted1"].startswith("URI:DIR2:")) d.addCallback(_check_not_corrupted1) def _remove_trailing_newline_and_add_alias(ign): # same thing, but for "tahoe add-alias" old = fileutil.read(aliasfile) fileutil.write(aliasfile, old.rstrip()) return self.do_cli("add-alias", "un-corrupted2", self.two_uri) d.addCallback(_remove_trailing_newline_and_add_alias) def _check_not_corrupted((rc,stdout,stderr)): self.failUnless("Alias 'un-corrupted2' added" in stdout, stdout) self.failIf(stderr) aliases = get_aliases(self.get_clientdir()) self.failUnless("un-corrupted1" in aliases) self.failUnless(aliases["un-corrupted1"].startswith("URI:DIR2:")) self.failIfIn("un-corrupted2:", aliases["un-corrupted1"]) self.failUnless("un-corrupted2" in aliases) self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:")) d.addCallback(_check_not_corrupted) def test_create_unicode(self): self.basedir = "cli/CreateAlias/create_unicode" self.set_up_grid() try: etudes_arg = u"\u00E9tudes".encode(get_io_encoding()) lumiere_arg = u"lumi\u00E8re.txt".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") d = self.do_cli("create-alias", etudes_arg) def _check_create_unicode((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessIn("Alias %s created" % quote_output(u"\u00E9tudes"), out) aliases = get_aliases(self.get_clientdir()) self.failUnless(aliases[u"\u00E9tudes"].startswith("URI:DIR2:")) d.addCallback(_check_create_unicode) d.addCallback(lambda res: self.do_cli("ls", etudes_arg + ":")) def _check_ls1((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(out, "") d.addCallback(_check_ls1) d.addCallback(lambda res: self.do_cli("put", "-", etudes_arg + ":uploaded.txt", stdin="Blah blah blah")) d.addCallback(lambda res: self.do_cli("ls", etudes_arg + ":")) def _check_ls2((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(out, "uploaded.txt\n") d.addCallback(_check_ls2) d.addCallback(lambda res: self.do_cli("get", etudes_arg + ":uploaded.txt")) def _check_get((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(out, "Blah blah blah") d.addCallback(_check_get) # Ensure that an Unicode filename in an Unicode alias works as expected d.addCallback(lambda res: self.do_cli("put", "-", etudes_arg + ":" + lumiere_arg, stdin="Let the sunshine In!")) d.addCallback(lambda res: self.do_cli("get", get_aliases(self.get_clientdir())[u"\u00E9tudes"] + "/" + lumiere_arg)) def _check_get2((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(out, "Let the sunshine In!") d.addCallback(_check_get2) return d # TODO: test list-aliases, including Unicode class Ln(GridTestMixin, CLITestMixin, unittest.TestCase): def _create_test_file(self): data = "puppies" * 1000 path = os.path.join(self.basedir, "datafile") fileutil.write(path, data) self.datafile = path def test_ln_without_alias(self): # if invoked without an alias when the 'tahoe' alias doesn't # exist, 'tahoe ln' should output a useful error message and not # a stack trace self.basedir = "cli/Ln/ln_without_alias" self.set_up_grid() d = self.do_cli("ln", "from", "to") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) # Make sure that validation extends to the "to" parameter d.addCallback(lambda ign: self.do_cli("create-alias", "havasu")) d.addCallback(lambda ign: self._create_test_file()) d.addCallback(lambda ign: self.do_cli("put", self.datafile, "havasu:from")) d.addCallback(lambda ign: self.do_cli("ln", "havasu:from", "to")) d.addCallback(_check) return d def test_ln_with_nonexistent_alias(self): # If invoked with aliases that don't exist, 'tahoe ln' should # output a useful error message and not a stack trace. self.basedir = "cli/Ln/ln_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("ln", "havasu:from", "havasu:to") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) d.addCallback(_check) # Make sure that validation occurs on the to parameter if the # from parameter passes. d.addCallback(lambda ign: self.do_cli("create-alias", "havasu")) d.addCallback(lambda ign: self._create_test_file()) d.addCallback(lambda ign: self.do_cli("put", self.datafile, "havasu:from")) d.addCallback(lambda ign: self.do_cli("ln", "havasu:from", "huron:to")) d.addCallback(_check) return d class Put(GridTestMixin, CLITestMixin, unittest.TestCase): def test_unlinked_immutable_stdin(self): # tahoe get `echo DATA | tahoe put` # tahoe get `echo DATA | tahoe put -` self.basedir = "cli/Put/unlinked_immutable_stdin" DATA = "data" * 100 self.set_up_grid() d = self.do_cli("put", stdin=DATA) def _uploaded(res): (rc, out, err) = res self.failUnlessIn("waiting for file data on stdin..", err) self.failUnlessIn("200 OK", err) self.readcap = out self.failUnless(self.readcap.startswith("URI:CHK:")) d.addCallback(_uploaded) d.addCallback(lambda res: self.do_cli("get", self.readcap)) def _downloaded(res): (rc, out, err) = res self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(out, DATA) d.addCallback(_downloaded) d.addCallback(lambda res: self.do_cli("put", "-", stdin=DATA)) d.addCallback(lambda (rc, out, err): self.failUnlessReallyEqual(out, self.readcap)) return d def test_unlinked_immutable_from_file(self): # tahoe put file.txt # tahoe put ./file.txt # tahoe put /tmp/file.txt # tahoe put ~/file.txt self.basedir = "cli/Put/unlinked_immutable_from_file" self.set_up_grid() rel_fn = os.path.join(self.basedir, "DATAFILE") abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) def _uploaded((rc, out, err)): readcap = out self.failUnless(readcap.startswith("URI:LIT:"), readcap) self.readcap = readcap d.addCallback(_uploaded) d.addCallback(lambda res: self.do_cli("put", "./" + rel_fn)) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, self.readcap)) d.addCallback(lambda res: self.do_cli("put", abs_fn)) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, self.readcap)) # we just have to assume that ~ is handled properly return d def test_immutable_from_file(self): # tahoe put file.txt uploaded.txt # tahoe - uploaded.txt # tahoe put file.txt subdir/uploaded.txt # tahoe put file.txt tahoe:uploaded.txt # tahoe put file.txt tahoe:subdir/uploaded.txt # tahoe put file.txt DIRCAP:./uploaded.txt # tahoe put file.txt DIRCAP:./subdir/uploaded.txt self.basedir = "cli/Put/immutable_from_file" self.set_up_grid() rel_fn = os.path.join(self.basedir, "DATAFILE") # we make the file small enough to fit in a LIT file, for speed DATA = "short file" DATA2 = "short file two" fileutil.write(rel_fn, DATA) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("put", rel_fn, "uploaded.txt")) def _uploaded((rc, out, err)): readcap = out.strip() self.failUnless(readcap.startswith("URI:LIT:"), readcap) self.failUnlessIn("201 Created", err) self.readcap = readcap d.addCallback(_uploaded) d.addCallback(lambda res: self.do_cli("get", "tahoe:uploaded.txt")) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, DATA)) d.addCallback(lambda res: self.do_cli("put", "-", "uploaded.txt", stdin=DATA2)) def _replaced((rc, out, err)): readcap = out.strip() self.failUnless(readcap.startswith("URI:LIT:"), readcap) self.failUnlessIn("200 OK", err) d.addCallback(_replaced) d.addCallback(lambda res: self.do_cli("put", rel_fn, "subdir/uploaded2.txt")) d.addCallback(lambda res: self.do_cli("get", "subdir/uploaded2.txt")) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, DATA)) d.addCallback(lambda res: self.do_cli("put", rel_fn, "tahoe:uploaded3.txt")) d.addCallback(lambda res: self.do_cli("get", "tahoe:uploaded3.txt")) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, DATA)) d.addCallback(lambda res: self.do_cli("put", rel_fn, "tahoe:subdir/uploaded4.txt")) d.addCallback(lambda res: self.do_cli("get", "tahoe:subdir/uploaded4.txt")) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, DATA)) def _get_dircap(res): self.dircap = get_aliases(self.get_clientdir())["tahoe"] d.addCallback(_get_dircap) d.addCallback(lambda res: self.do_cli("put", rel_fn, self.dircap+":./uploaded5.txt")) d.addCallback(lambda res: self.do_cli("get", "tahoe:uploaded5.txt")) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, DATA)) d.addCallback(lambda res: self.do_cli("put", rel_fn, self.dircap+":./subdir/uploaded6.txt")) d.addCallback(lambda res: self.do_cli("get", "tahoe:subdir/uploaded6.txt")) d.addCallback(lambda (rc,stdout,stderr): self.failUnlessReallyEqual(stdout, DATA)) return d def test_mutable_unlinked(self): # FILECAP = `echo DATA | tahoe put --mutable` # tahoe get FILECAP, compare against DATA # echo DATA2 | tahoe put - FILECAP # tahoe get FILECAP, compare against DATA2 # tahoe put file.txt FILECAP self.basedir = "cli/Put/mutable_unlinked" self.set_up_grid() DATA = "data" * 100 DATA2 = "two" * 100 rel_fn = os.path.join(self.basedir, "DATAFILE") DATA3 = "three" * 100 fileutil.write(rel_fn, DATA3) d = self.do_cli("put", "--mutable", stdin=DATA) def _created(res): (rc, out, err) = res self.failUnlessIn("waiting for file data on stdin..", err) self.failUnlessIn("200 OK", err) self.filecap = out self.failUnless(self.filecap.startswith("URI:SSK:"), self.filecap) d.addCallback(_created) d.addCallback(lambda res: self.do_cli("get", self.filecap)) d.addCallback(lambda (rc,out,err): self.failUnlessReallyEqual(out, DATA)) d.addCallback(lambda res: self.do_cli("put", "-", self.filecap, stdin=DATA2)) def _replaced(res): (rc, out, err) = res self.failUnlessIn("waiting for file data on stdin..", err) self.failUnlessIn("200 OK", err) self.failUnlessReallyEqual(self.filecap, out) d.addCallback(_replaced) d.addCallback(lambda res: self.do_cli("get", self.filecap)) d.addCallback(lambda (rc,out,err): self.failUnlessReallyEqual(out, DATA2)) d.addCallback(lambda res: self.do_cli("put", rel_fn, self.filecap)) def _replaced2(res): (rc, out, err) = res self.failUnlessIn("200 OK", err) self.failUnlessReallyEqual(self.filecap, out) d.addCallback(_replaced2) d.addCallback(lambda res: self.do_cli("get", self.filecap)) d.addCallback(lambda (rc,out,err): self.failUnlessReallyEqual(out, DATA3)) return d def test_mutable(self): # echo DATA1 | tahoe put --mutable - uploaded.txt # echo DATA2 | tahoe put - uploaded.txt # should modify-in-place # tahoe get uploaded.txt, compare against DATA2 self.basedir = "cli/Put/mutable" self.set_up_grid() DATA1 = "data" * 100 fn1 = os.path.join(self.basedir, "DATA1") fileutil.write(fn1, DATA1) DATA2 = "two" * 100 fn2 = os.path.join(self.basedir, "DATA2") fileutil.write(fn2, DATA2) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("put", "--mutable", fn1, "tahoe:uploaded.txt")) def _check(res): (rc, out, err) = res self.failUnlessEqual(rc, 0, str(res)) self.failUnlessEqual(err.strip(), "201 Created", str(res)) self.uri = out d.addCallback(_check) d.addCallback(lambda res: self.do_cli("put", fn2, "tahoe:uploaded.txt")) def _check2(res): (rc, out, err) = res self.failUnlessEqual(rc, 0, str(res)) self.failUnlessEqual(err.strip(), "200 OK", str(res)) self.failUnlessEqual(out, self.uri, str(res)) d.addCallback(_check2) d.addCallback(lambda res: self.do_cli("get", "tahoe:uploaded.txt")) d.addCallback(lambda (rc,out,err): self.failUnlessReallyEqual(out, DATA2)) return d def _check_mdmf_json(self, (rc, json, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(err, "") self.failUnlessIn('"format": "MDMF"', json) # We also want a valid MDMF cap to be in the json. self.failUnlessIn("URI:MDMF", json) self.failUnlessIn("URI:MDMF-RO", json) self.failUnlessIn("URI:MDMF-Verifier", json) def _check_sdmf_json(self, (rc, json, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(err, "") self.failUnlessIn('"format": "SDMF"', json) # We also want to see the appropriate SDMF caps. self.failUnlessIn("URI:SSK", json) self.failUnlessIn("URI:SSK-RO", json) self.failUnlessIn("URI:SSK-Verifier", json) def _check_chk_json(self, (rc, json, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(err, "") self.failUnlessIn('"format": "CHK"', json) # We also want to see the appropriate CHK caps. self.failUnlessIn("URI:CHK", json) self.failUnlessIn("URI:CHK-Verifier", json) def test_format(self): self.basedir = "cli/Put/format" self.set_up_grid() data = "data" * 40000 # 160kB total, two segments fn1 = os.path.join(self.basedir, "data") fileutil.write(fn1, data) d = self.do_cli("create-alias", "tahoe") def _put_and_ls(ign, cmdargs, expected, filename=None): if filename: args = ["put"] + cmdargs + [fn1, filename] else: # unlinked args = ["put"] + cmdargs + [fn1] d2 = self.do_cli(*args) def _list((rc, out, err)): self.failUnlessEqual(rc, 0) # don't allow failure if filename: return self.do_cli("ls", "--json", filename) else: cap = out.strip() return self.do_cli("ls", "--json", cap) d2.addCallback(_list) return d2 # 'tahoe put' to a directory d.addCallback(_put_and_ls, ["--mutable"], "SDMF", "tahoe:s1.txt") d.addCallback(self._check_sdmf_json) # backwards-compatibility d.addCallback(_put_and_ls, ["--format=SDMF"], "SDMF", "tahoe:s2.txt") d.addCallback(self._check_sdmf_json) d.addCallback(_put_and_ls, ["--format=sdmf"], "SDMF", "tahoe:s3.txt") d.addCallback(self._check_sdmf_json) d.addCallback(_put_and_ls, ["--mutable", "--format=SDMF"], "SDMF", "tahoe:s4.txt") d.addCallback(self._check_sdmf_json) d.addCallback(_put_and_ls, ["--format=MDMF"], "MDMF", "tahoe:m1.txt") d.addCallback(self._check_mdmf_json) d.addCallback(_put_and_ls, ["--mutable", "--format=MDMF"], "MDMF", "tahoe:m2.txt") d.addCallback(self._check_mdmf_json) d.addCallback(_put_and_ls, ["--format=CHK"], "CHK", "tahoe:c1.txt") d.addCallback(self._check_chk_json) d.addCallback(_put_and_ls, [], "CHK", "tahoe:c1.txt") d.addCallback(self._check_chk_json) # 'tahoe put' unlinked d.addCallback(_put_and_ls, ["--mutable"], "SDMF") d.addCallback(self._check_sdmf_json) # backwards-compatibility d.addCallback(_put_and_ls, ["--format=SDMF"], "SDMF") d.addCallback(self._check_sdmf_json) d.addCallback(_put_and_ls, ["--format=sdmf"], "SDMF") d.addCallback(self._check_sdmf_json) d.addCallback(_put_and_ls, ["--mutable", "--format=SDMF"], "SDMF") d.addCallback(self._check_sdmf_json) d.addCallback(_put_and_ls, ["--format=MDMF"], "MDMF") d.addCallback(self._check_mdmf_json) d.addCallback(_put_and_ls, ["--mutable", "--format=MDMF"], "MDMF") d.addCallback(self._check_mdmf_json) d.addCallback(_put_and_ls, ["--format=CHK"], "CHK") d.addCallback(self._check_chk_json) d.addCallback(_put_and_ls, [], "CHK") d.addCallback(self._check_chk_json) return d def test_put_to_mdmf_cap(self): self.basedir = "cli/Put/put_to_mdmf_cap" self.set_up_grid() data = "data" * 100000 fn1 = os.path.join(self.basedir, "data") fileutil.write(fn1, data) d = self.do_cli("put", "--format=MDMF", fn1) def _got_cap((rc, out, err)): self.failUnlessEqual(rc, 0) self.cap = out.strip() d.addCallback(_got_cap) # Now try to write something to the cap using put. data2 = "data2" * 100000 fn2 = os.path.join(self.basedir, "data2") fileutil.write(fn2, data2) d.addCallback(lambda ignored: self.do_cli("put", fn2, self.cap)) def _got_put((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessIn(self.cap, out) d.addCallback(_got_put) # Now get the cap. We should see the data we just put there. d.addCallback(lambda ignored: self.do_cli("get", self.cap)) def _got_data((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(out, data2) d.addCallback(_got_data) # add some extension information to the cap and try to put something # to it. def _make_extended_cap(ignored): self.cap = self.cap + ":Extension-Stuff" d.addCallback(_make_extended_cap) data3 = "data3" * 100000 fn3 = os.path.join(self.basedir, "data3") fileutil.write(fn3, data3) d.addCallback(lambda ignored: self.do_cli("put", fn3, self.cap)) d.addCallback(lambda ignored: self.do_cli("get", self.cap)) def _got_data3((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(out, data3) d.addCallback(_got_data3) return d def test_put_to_sdmf_cap(self): self.basedir = "cli/Put/put_to_sdmf_cap" self.set_up_grid() data = "data" * 100000 fn1 = os.path.join(self.basedir, "data") fileutil.write(fn1, data) d = self.do_cli("put", "--format=SDMF", fn1) def _got_cap((rc, out, err)): self.failUnlessEqual(rc, 0) self.cap = out.strip() d.addCallback(_got_cap) # Now try to write something to the cap using put. data2 = "data2" * 100000 fn2 = os.path.join(self.basedir, "data2") fileutil.write(fn2, data2) d.addCallback(lambda ignored: self.do_cli("put", fn2, self.cap)) def _got_put((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessIn(self.cap, out) d.addCallback(_got_put) # Now get the cap. We should see the data we just put there. d.addCallback(lambda ignored: self.do_cli("get", self.cap)) def _got_data((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(out, data2) d.addCallback(_got_data) return d def test_mutable_type_invalid_format(self): o = cli.PutOptions() self.failUnlessRaises(usage.UsageError, o.parseOptions, ["--format=LDMF"]) def test_put_with_nonexistent_alias(self): # when invoked with an alias that doesn't exist, 'tahoe put' # should output a useful error message, not a stack trace self.basedir = "cli/Put/put_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("put", "somefile", "fake:afile") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def test_immutable_from_file_unicode(self): # tahoe put "\u00E0 trier.txt" "\u00E0 trier.txt" try: a_trier_arg = u"\u00E0 trier.txt".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") self.skip_if_cannot_represent_filename(u"\u00E0 trier.txt") self.basedir = "cli/Put/immutable_from_file_unicode" self.set_up_grid() rel_fn = os.path.join(unicode(self.basedir), u"\u00E0 trier.txt") # we make the file small enough to fit in a LIT file, for speed DATA = "short file" fileutil.write(rel_fn, DATA) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("put", rel_fn.encode(get_io_encoding()), a_trier_arg)) def _uploaded((rc, out, err)): readcap = out.strip() self.failUnless(readcap.startswith("URI:LIT:"), readcap) self.failUnlessIn("201 Created", err) self.readcap = readcap d.addCallback(_uploaded) d.addCallback(lambda res: self.do_cli("get", "tahoe:" + a_trier_arg)) d.addCallback(lambda (rc, out, err): self.failUnlessReallyEqual(out, DATA)) return d class Admin(unittest.TestCase): def do_cli(self, *args, **kwargs): argv = list(args) stdin = kwargs.get("stdin", "") stdout, stderr = StringIO(), StringIO() d = threads.deferToThread(runner.runner, argv, run_by_human=False, stdin=StringIO(stdin), stdout=stdout, stderr=stderr) def _done(res): return stdout.getvalue(), stderr.getvalue() d.addCallback(_done) return d def test_generate_keypair(self): d = self.do_cli("admin", "generate-keypair") def _done( (stdout, stderr) ): lines = [line.strip() for line in stdout.splitlines()] privkey_bits = lines[0].split() pubkey_bits = lines[1].split() sk_header = "private:" vk_header = "public:" self.failUnlessEqual(privkey_bits[0], sk_header, lines[0]) self.failUnlessEqual(pubkey_bits[0], vk_header, lines[1]) self.failUnless(privkey_bits[1].startswith("priv-v0-"), lines[0]) self.failUnless(pubkey_bits[1].startswith("pub-v0-"), lines[1]) sk_bytes = base32.a2b(keyutil.remove_prefix(privkey_bits[1], "priv-v0-")) sk = ed25519.SigningKey(sk_bytes) vk_bytes = base32.a2b(keyutil.remove_prefix(pubkey_bits[1], "pub-v0-")) self.failUnlessEqual(sk.get_verifying_key_bytes(), vk_bytes) d.addCallback(_done) return d def test_derive_pubkey(self): priv1,pub1 = keyutil.make_keypair() d = self.do_cli("admin", "derive-pubkey", priv1) def _done( (stdout, stderr) ): lines = stdout.split("\n") privkey_line = lines[0].strip() pubkey_line = lines[1].strip() sk_header = "private: priv-v0-" vk_header = "public: pub-v0-" self.failUnless(privkey_line.startswith(sk_header), privkey_line) self.failUnless(pubkey_line.startswith(vk_header), pubkey_line) pub2 = pubkey_line[len(vk_header):] self.failUnlessEqual("pub-v0-"+pub2, pub1) d.addCallback(_done) return d class List(GridTestMixin, CLITestMixin, unittest.TestCase): def test_list(self): self.basedir = "cli/List/list" self.set_up_grid() c0 = self.g.clients[0] small = "small" # u"g\u00F6\u00F6d" might not be representable in the argv and/or output encodings. # It is initially included in the directory in any case. try: good_arg = u"g\u00F6\u00F6d".encode(get_io_encoding()) except UnicodeEncodeError: good_arg = None try: good_out = u"g\u00F6\u00F6d".encode(get_io_encoding()) except UnicodeEncodeError: good_out = None d = c0.create_dirnode() def _stash_root_and_create_file(n): self.rootnode = n self.rooturi = n.get_uri() return n.add_file(u"g\u00F6\u00F6d", upload.Data(small, convergence="")) d.addCallback(_stash_root_and_create_file) def _stash_goodcap(n): self.goodcap = n.get_uri() d.addCallback(_stash_goodcap) d.addCallback(lambda ign: self.rootnode.create_subdirectory(u"1share")) d.addCallback(lambda n: self.delete_shares_numbered(n.get_uri(), range(1,10))) d.addCallback(lambda ign: self.rootnode.create_subdirectory(u"0share")) d.addCallback(lambda n: self.delete_shares_numbered(n.get_uri(), range(0,10))) d.addCallback(lambda ign: self.do_cli("add-alias", "tahoe", self.rooturi)) d.addCallback(lambda ign: self.do_cli("ls")) def _check1((rc,out,err)): if good_out is None: self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("files whose names could not be converted", err) self.failUnlessIn(quote_output(u"g\u00F6\u00F6d"), err) self.failUnlessReallyEqual(sorted(out.splitlines()), sorted(["0share", "1share"])) else: self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(sorted(out.splitlines()), sorted(["0share", "1share", good_out])) d.addCallback(_check1) d.addCallback(lambda ign: self.do_cli("ls", "missing")) def _check2((rc,out,err)): self.failIfEqual(rc, 0) self.failUnlessReallyEqual(err.strip(), "No such file or directory") self.failUnlessReallyEqual(out, "") d.addCallback(_check2) d.addCallback(lambda ign: self.do_cli("ls", "1share")) def _check3((rc,out,err)): self.failIfEqual(rc, 0) self.failUnlessIn("Error during GET: 410 Gone", err) self.failUnlessIn("UnrecoverableFileError:", err) self.failUnlessIn("could not be retrieved, because there were " "insufficient good shares.", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check3) d.addCallback(lambda ign: self.do_cli("ls", "0share")) d.addCallback(_check3) def _check4((rc, out, err)): if good_out is None: self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("files whose names could not be converted", err) self.failUnlessIn(quote_output(u"g\u00F6\u00F6d"), err) self.failUnlessReallyEqual(out, "") else: # listing a file (as dir/filename) should have the edge metadata, # including the filename self.failUnlessReallyEqual(rc, 0) self.failUnlessIn(good_out, out) self.failIfIn("-r-- %d -" % len(small), out, "trailing hyphen means unknown date") if good_arg is not None: d.addCallback(lambda ign: self.do_cli("ls", "-l", good_arg)) d.addCallback(_check4) # listing a file as $DIRCAP/filename should work just like dir/filename d.addCallback(lambda ign: self.do_cli("ls", "-l", self.rooturi + "/" + good_arg)) d.addCallback(_check4) # and similarly for $DIRCAP:./filename d.addCallback(lambda ign: self.do_cli("ls", "-l", self.rooturi + ":./" + good_arg)) d.addCallback(_check4) def _check5((rc, out, err)): # listing a raw filecap should not explode, but it will have no # metadata, just the size self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual("-r-- %d -" % len(small), out.strip()) d.addCallback(lambda ign: self.do_cli("ls", "-l", self.goodcap)) d.addCallback(_check5) # Now rename 'g\u00F6\u00F6d' to 'good' and repeat the tests that might have been skipped due # to encoding problems. d.addCallback(lambda ign: self.rootnode.move_child_to(u"g\u00F6\u00F6d", self.rootnode, u"good")) d.addCallback(lambda ign: self.do_cli("ls")) def _check1_ascii((rc,out,err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(sorted(out.splitlines()), sorted(["0share", "1share", "good"])) d.addCallback(_check1_ascii) def _check4_ascii((rc, out, err)): # listing a file (as dir/filename) should have the edge metadata, # including the filename self.failUnlessReallyEqual(rc, 0) self.failUnlessIn("good", out) self.failIfIn("-r-- %d -" % len(small), out, "trailing hyphen means unknown date") d.addCallback(lambda ign: self.do_cli("ls", "-l", "good")) d.addCallback(_check4_ascii) # listing a file as $DIRCAP/filename should work just like dir/filename d.addCallback(lambda ign: self.do_cli("ls", "-l", self.rooturi + "/good")) d.addCallback(_check4_ascii) # and similarly for $DIRCAP:./filename d.addCallback(lambda ign: self.do_cli("ls", "-l", self.rooturi + ":./good")) d.addCallback(_check4_ascii) unknown_immcap = "imm.URI:unknown" def _create_unknown(ign): nm = c0.nodemaker kids = {u"unknownchild-imm": (nm.create_from_cap(unknown_immcap), {})} return self.rootnode.create_subdirectory(u"unknown", initial_children=kids, mutable=False) d.addCallback(_create_unknown) def _check6((rc, out, err)): # listing a directory referencing an unknown object should print # an extra message to stderr self.failUnlessReallyEqual(rc, 0) self.failUnlessIn("?r-- ? - unknownchild-imm\n", out) self.failUnlessIn("included unknown objects", err) d.addCallback(lambda ign: self.do_cli("ls", "-l", "unknown")) d.addCallback(_check6) def _check7((rc, out, err)): # listing an unknown cap directly should print an extra message # to stderr (currently this only works if the URI starts with 'URI:' # after any 'ro.' or 'imm.' prefix, otherwise it will be confused # with an alias). self.failUnlessReallyEqual(rc, 0) self.failUnlessIn("?r-- ? -\n", out) self.failUnlessIn("included unknown objects", err) d.addCallback(lambda ign: self.do_cli("ls", "-l", unknown_immcap)) d.addCallback(_check7) return d def test_list_without_alias(self): # doing just 'tahoe ls' without specifying an alias or first # doing 'tahoe create-alias tahoe' should fail gracefully. self.basedir = "cli/List/list_without_alias" self.set_up_grid() d = self.do_cli("ls") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def test_list_with_nonexistent_alias(self): # doing 'tahoe ls' while specifying an alias that doesn't already # exist should fail with an informative error message self.basedir = "cli/List/list_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("ls", "nonexistent:") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessIn("nonexistent", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def _create_directory_structure(self): # Create a simple directory structure that we can use for MDMF, # SDMF, and immutable testing. assert self.g client = self.g.clients[0] # Create a dirnode d = client.create_dirnode() def _got_rootnode(n): # Add a few nodes. self._dircap = n.get_uri() nm = n._nodemaker # The uploaders may run at the same time, so we need two # MutableData instances or they'll fight over offsets &c and # break. mutable_data = MutableData("data" * 100000) mutable_data2 = MutableData("data" * 100000) # Add both kinds of mutable node. d1 = nm.create_mutable_file(mutable_data, version=MDMF_VERSION) d2 = nm.create_mutable_file(mutable_data2, version=SDMF_VERSION) # Add an immutable node. We do this through the directory, # with add_file. immutable_data = upload.Data("immutable data" * 100000, convergence="") d3 = n.add_file(u"immutable", immutable_data) ds = [d1, d2, d3] dl = defer.DeferredList(ds) def _made_files((r1, r2, r3)): self.failUnless(r1[0]) self.failUnless(r2[0]) self.failUnless(r3[0]) # r1, r2, and r3 contain nodes. mdmf_node = r1[1] sdmf_node = r2[1] imm_node = r3[1] self._mdmf_uri = mdmf_node.get_uri() self._mdmf_readonly_uri = mdmf_node.get_readonly_uri() self._sdmf_uri = mdmf_node.get_uri() self._sdmf_readonly_uri = sdmf_node.get_readonly_uri() self._imm_uri = imm_node.get_uri() d1 = n.set_node(u"mdmf", mdmf_node) d2 = n.set_node(u"sdmf", sdmf_node) return defer.DeferredList([d1, d2]) # We can now list the directory by listing self._dircap. dl.addCallback(_made_files) return dl d.addCallback(_got_rootnode) return d def test_list_mdmf(self): # 'tahoe ls' should include MDMF files. self.basedir = "cli/List/list_mdmf" self.set_up_grid() d = self._create_directory_structure() d.addCallback(lambda ignored: self.do_cli("ls", self._dircap)) def _got_ls((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(err, "") self.failUnlessIn("immutable", out) self.failUnlessIn("mdmf", out) self.failUnlessIn("sdmf", out) d.addCallback(_got_ls) return d def test_list_mdmf_json(self): # 'tahoe ls' should include MDMF caps when invoked with MDMF # caps. self.basedir = "cli/List/list_mdmf_json" self.set_up_grid() d = self._create_directory_structure() d.addCallback(lambda ignored: self.do_cli("ls", "--json", self._dircap)) def _got_json((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(err, "") self.failUnlessIn(self._mdmf_uri, out) self.failUnlessIn(self._mdmf_readonly_uri, out) self.failUnlessIn(self._sdmf_uri, out) self.failUnlessIn(self._sdmf_readonly_uri, out) self.failUnlessIn(self._imm_uri, out) self.failUnlessIn('"format": "SDMF"', out) self.failUnlessIn('"format": "MDMF"', out) d.addCallback(_got_json) return d class Mv(GridTestMixin, CLITestMixin, unittest.TestCase): def test_mv_behavior(self): self.basedir = "cli/Mv/mv_behavior" self.set_up_grid() fn1 = os.path.join(self.basedir, "file1") DATA1 = "Nuclear launch codes" fileutil.write(fn1, DATA1) fn2 = os.path.join(self.basedir, "file2") DATA2 = "UML diagrams" fileutil.write(fn2, DATA2) # copy both files to the grid d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:")) d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:")) # do mv file1 file3 # (we should be able to rename files) d.addCallback(lambda res: self.do_cli("mv", "tahoe:file1", "tahoe:file3")) d.addCallback(lambda (rc, out, err): self.failUnlessIn("OK", out, "mv didn't rename a file")) # do mv file3 file2 # (This should succeed without issue) d.addCallback(lambda res: self.do_cli("mv", "tahoe:file3", "tahoe:file2")) # Out should contain "OK" to show that the transfer worked. d.addCallback(lambda (rc,out,err): self.failUnlessIn("OK", out, "mv didn't output OK after mving")) # Next, make a remote directory. d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:directory")) # mv file2 directory # (should fail with a descriptive error message; the CLI mv # client should support this) d.addCallback(lambda res: self.do_cli("mv", "tahoe:file2", "tahoe:directory")) d.addCallback(lambda (rc, out, err): self.failUnlessIn( "Error: You can't overwrite a directory with a file", err, "mv shouldn't overwrite directories" )) # mv file2 directory/ # (should succeed by making file2 a child node of directory) d.addCallback(lambda res: self.do_cli("mv", "tahoe:file2", "tahoe:directory/")) # We should see an "OK"... d.addCallback(lambda (rc, out, err): self.failUnlessIn("OK", out, "mv didn't mv a file into a directory")) # ... and be able to GET the file d.addCallback(lambda res: self.do_cli("get", "tahoe:directory/file2", self.basedir + "new")) d.addCallback(lambda (rc, out, err): self.failUnless(os.path.exists(self.basedir + "new"), "mv didn't write the destination file")) # ... and not find the file where it was before. d.addCallback(lambda res: self.do_cli("get", "tahoe:file2", "file2")) d.addCallback(lambda (rc, out, err): self.failUnlessIn("404", err, "mv left the source file intact")) # Let's build: # directory/directory2/some_file # directory3 d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:directory/directory2")) d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:directory/directory2/some_file")) d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:directory3")) # Let's now try to mv directory/directory2/some_file to # directory3/some_file d.addCallback(lambda res: self.do_cli("mv", "tahoe:directory/directory2/some_file", "tahoe:directory3/")) # We should have just some_file in tahoe:directory3 d.addCallback(lambda res: self.do_cli("get", "tahoe:directory3/some_file", "some_file")) d.addCallback(lambda (rc, out, err): self.failUnless("404" not in err, "mv didn't handle nested directories correctly")) d.addCallback(lambda res: self.do_cli("get", "tahoe:directory3/directory", "directory")) d.addCallback(lambda (rc, out, err): self.failUnlessIn("404", err, "mv moved the wrong thing")) return d def test_mv_error_if_DELETE_fails(self): self.basedir = "cli/Mv/mv_error_if_DELETE_fails" self.set_up_grid() fn1 = os.path.join(self.basedir, "file1") DATA1 = "Nuclear launch codes" fileutil.write(fn1, DATA1) original_do_http = tahoe_mv.do_http def mock_do_http(method, url, body=""): if method == "DELETE": class FakeResponse: def read(self): return "response" resp = FakeResponse() resp.status = '500 Something Went Wrong' resp.reason = '*shrug*' return resp else: return original_do_http(method, url, body=body) tahoe_mv.do_http = mock_do_http # copy file to the grid d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:")) # do mv file1 file2 d.addCallback(lambda res: self.do_cli("mv", "tahoe:file1", "tahoe:file2")) def _check( (rc, out, err) ): self.failIfIn("OK", out, "mv printed 'OK' even though the DELETE failed") self.failUnlessEqual(rc, 2) d.addCallback(_check) def _restore_do_http(res): tahoe_mv.do_http = original_do_http return res d.addBoth(_restore_do_http) return d def test_mv_without_alias(self): # doing 'tahoe mv' without explicitly specifying an alias or # creating the default 'tahoe' alias should fail with a useful # error message. self.basedir = "cli/Mv/mv_without_alias" self.set_up_grid() d = self.do_cli("mv", "afile", "anotherfile") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) # check to see that the validation extends to the # target argument by making an alias that will work with the first # one. d.addCallback(lambda ign: self.do_cli("create-alias", "havasu")) def _create_a_test_file(ign): self.test_file_path = os.path.join(self.basedir, "afile") fileutil.write(self.test_file_path, "puppies" * 100) d.addCallback(_create_a_test_file) d.addCallback(lambda ign: self.do_cli("put", self.test_file_path, "havasu:afile")) d.addCallback(lambda ign: self.do_cli("mv", "havasu:afile", "anotherfile")) d.addCallback(_check) return d def test_mv_with_nonexistent_alias(self): # doing 'tahoe mv' with an alias that doesn't exist should fail # with an informative error message. self.basedir = "cli/Mv/mv_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("mv", "fake:afile", "fake:anotherfile") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessIn("fake", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) # check to see that the validation extends to the # target argument by making an alias that will work with the first # one. d.addCallback(lambda ign: self.do_cli("create-alias", "havasu")) def _create_a_test_file(ign): self.test_file_path = os.path.join(self.basedir, "afile") fileutil.write(self.test_file_path, "puppies" * 100) d.addCallback(_create_a_test_file) d.addCallback(lambda ign: self.do_cli("put", self.test_file_path, "havasu:afile")) d.addCallback(lambda ign: self.do_cli("mv", "havasu:afile", "fake:anotherfile")) d.addCallback(_check) return d class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): def test_not_enough_args(self): o = cli.CpOptions() self.failUnlessRaises(usage.UsageError, o.parseOptions, ["onearg"]) def test_unicode_filename(self): self.basedir = "cli/Cp/unicode_filename" fn1 = os.path.join(unicode(self.basedir), u"\u00C4rtonwall") try: fn1_arg = fn1.encode(get_io_encoding()) artonwall_arg = u"\u00C4rtonwall".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") self.skip_if_cannot_represent_filename(fn1) self.set_up_grid() DATA1 = "unicode file content" fileutil.write(fn1, DATA1) fn2 = os.path.join(self.basedir, "Metallica") DATA2 = "non-unicode file content" fileutil.write(fn2, DATA2) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("cp", fn1_arg, "tahoe:")) d.addCallback(lambda res: self.do_cli("get", "tahoe:" + artonwall_arg)) d.addCallback(lambda (rc,out,err): self.failUnlessReallyEqual(out, DATA1)) d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:")) d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica")) d.addCallback(lambda (rc,out,err): self.failUnlessReallyEqual(out, DATA2)) d.addCallback(lambda res: self.do_cli("ls", "tahoe:")) def _check((rc, out, err)): try: unicode_to_output(u"\u00C4rtonwall") except UnicodeEncodeError: self.failUnlessReallyEqual(rc, 1) self.failUnlessReallyEqual(out, "Metallica\n") self.failUnlessIn(quote_output(u"\u00C4rtonwall"), err) self.failUnlessIn("files whose names could not be converted", err) else: self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(out.decode(get_io_encoding()), u"Metallica\n\u00C4rtonwall\n") self.failUnlessReallyEqual(err, "") d.addCallback(_check) return d def test_dangling_symlink_vs_recursion(self): if not hasattr(os, 'symlink'): raise unittest.SkipTest("Symlinks are not supported by Python on this platform.") # cp -r on a directory containing a dangling symlink shouldn't assert self.basedir = "cli/Cp/dangling_symlink_vs_recursion" self.set_up_grid() dn = os.path.join(self.basedir, "dir") os.mkdir(dn) fn = os.path.join(dn, "Fakebandica") ln = os.path.join(dn, "link") os.symlink(fn, ln) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("cp", "--recursive", dn, "tahoe:")) return d def test_copy_using_filecap(self): self.basedir = "cli/Cp/test_copy_using_filecap" self.set_up_grid() outdir = os.path.join(self.basedir, "outdir") os.mkdir(outdir) fn1 = os.path.join(self.basedir, "Metallica") fn2 = os.path.join(outdir, "Not Metallica") fn3 = os.path.join(outdir, "test2") DATA1 = "puppies" * 10000 fileutil.write(fn1, DATA1) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda ign: self.do_cli("put", fn1)) def _put_file((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessIn("200 OK", err) # keep track of the filecap self.filecap = out.strip() d.addCallback(_put_file) # Let's try copying this to the disk using the filecap # cp FILECAP filename d.addCallback(lambda ign: self.do_cli("cp", self.filecap, fn2)) def _copy_file((rc, out, err)): self.failUnlessReallyEqual(rc, 0) results = fileutil.read(fn2) self.failUnlessReallyEqual(results, DATA1) d.addCallback(_copy_file) # Test with ./ (see #761) # cp FILECAP localdir d.addCallback(lambda ign: self.do_cli("cp", self.filecap, outdir)) def _resp((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error: you must specify a destination filename", err) self.failUnlessReallyEqual(out, "") d.addCallback(_resp) # Create a directory, linked at tahoe:test d.addCallback(lambda ign: self.do_cli("mkdir", "tahoe:test")) def _get_dir((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.dircap = out.strip() d.addCallback(_get_dir) # Upload a file to the directory d.addCallback(lambda ign: self.do_cli("put", fn1, "tahoe:test/test_file")) d.addCallback(lambda (rc, out, err): self.failUnlessReallyEqual(rc, 0)) # cp DIRCAP/filename localdir d.addCallback(lambda ign: self.do_cli("cp", self.dircap + "/test_file", outdir)) def _get_resp((rc, out, err)): self.failUnlessReallyEqual(rc, 0) results = fileutil.read(os.path.join(outdir, "test_file")) self.failUnlessReallyEqual(results, DATA1) d.addCallback(_get_resp) # cp -r DIRCAP/filename filename2 d.addCallback(lambda ign: self.do_cli("cp", self.dircap + "/test_file", fn3)) def _get_resp2((rc, out, err)): self.failUnlessReallyEqual(rc, 0) results = fileutil.read(fn3) self.failUnlessReallyEqual(results, DATA1) d.addCallback(_get_resp2) # cp --verbose filename3 dircap:test_file d.addCallback(lambda ign: self.do_cli("cp", "--verbose", '--recursive', self.basedir, self.dircap)) def _test_for_wrong_indices((rc, out, err)): self.failUnless('examining 1 of 1\n' in err) d.addCallback(_test_for_wrong_indices) return d def test_cp_with_nonexistent_alias(self): # when invoked with an alias or aliases that don't exist, 'tahoe cp' # should output a sensible error message rather than a stack trace. self.basedir = "cli/Cp/cp_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("cp", "fake:file1", "fake:file2") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) d.addCallback(_check) # 'tahoe cp' actually processes the target argument first, so we need # to check to make sure that validation extends to the source # argument. d.addCallback(lambda ign: self.do_cli("create-alias", "tahoe")) d.addCallback(lambda ign: self.do_cli("cp", "fake:file1", "tahoe:file2")) d.addCallback(_check) return d def test_unicode_dirnames(self): self.basedir = "cli/Cp/unicode_dirnames" fn1 = os.path.join(unicode(self.basedir), u"\u00C4rtonwall") try: fn1_arg = fn1.encode(get_io_encoding()) del fn1_arg # hush pyflakes artonwall_arg = u"\u00C4rtonwall".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") self.skip_if_cannot_represent_filename(fn1) self.set_up_grid() d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:test/" + artonwall_arg)) d.addCallback(lambda res: self.do_cli("cp", "-r", "tahoe:test", "tahoe:test2")) d.addCallback(lambda res: self.do_cli("ls", "tahoe:test2")) def _check((rc, out, err)): try: unicode_to_output(u"\u00C4rtonwall") except UnicodeEncodeError: self.failUnlessReallyEqual(rc, 1) self.failUnlessReallyEqual(out, "") self.failUnlessIn(quote_output(u"\u00C4rtonwall"), err) self.failUnlessIn("files whose names could not be converted", err) else: self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(out.decode(get_io_encoding()), u"\u00C4rtonwall\n") self.failUnlessReallyEqual(err, "") d.addCallback(_check) return d def test_cp_replaces_mutable_file_contents(self): self.basedir = "cli/Cp/cp_replaces_mutable_file_contents" self.set_up_grid() # Write a test file, which we'll copy to the grid. test_txt_path = os.path.join(self.basedir, "test.txt") test_txt_contents = "foo bar baz" f = open(test_txt_path, "w") f.write(test_txt_contents) f.close() d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda ignored: self.do_cli("mkdir", "tahoe:test")) # We have to use 'tahoe put' here because 'tahoe cp' doesn't # know how to make mutable files at the destination. d.addCallback(lambda ignored: self.do_cli("put", "--mutable", test_txt_path, "tahoe:test/test.txt")) d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test/test.txt")) def _check((rc, out, err)): self.failUnlessEqual(rc, 0) self.failUnlessEqual(out, test_txt_contents) d.addCallback(_check) # We'll do ls --json to get the read uri and write uri for the # file we've just uploaded. d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:test/test.txt")) def _get_test_txt_uris((rc, out, err)): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "filenode") self.failUnless(data['mutable']) self.failUnlessIn("rw_uri", data) self.rw_uri = to_str(data["rw_uri"]) self.failUnlessIn("ro_uri", data) self.ro_uri = to_str(data["ro_uri"]) d.addCallback(_get_test_txt_uris) # Now make a new file to copy in place of test.txt. new_txt_path = os.path.join(self.basedir, "new.txt") new_txt_contents = "baz bar foo" * 100000 f = open(new_txt_path, "w") f.write(new_txt_contents) f.close() # Copy the new file on top of the old file. d.addCallback(lambda ignored: self.do_cli("cp", new_txt_path, "tahoe:test/test.txt")) # If we get test.txt now, we should see the new data. d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test/test.txt")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, new_txt_contents)) # If we get the json of the new file, we should see that the old # uri is there d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:test/test.txt")) def _check_json((rc, out, err)): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "filenode") self.failUnless(data['mutable']) self.failUnlessIn("ro_uri", data) self.failUnlessEqual(to_str(data["ro_uri"]), self.ro_uri) self.failUnlessIn("rw_uri", data) self.failUnlessEqual(to_str(data["rw_uri"]), self.rw_uri) d.addCallback(_check_json) # and, finally, doing a GET directly on one of the old uris # should give us the new contents. d.addCallback(lambda ignored: self.do_cli("get", self.rw_uri)) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, new_txt_contents)) # Now copy the old test.txt without an explicit destination # file. tahoe cp will match it to the existing file and # overwrite it appropriately. d.addCallback(lambda ignored: self.do_cli("cp", test_txt_path, "tahoe:test")) d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test/test.txt")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, test_txt_contents)) d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:test/test.txt")) d.addCallback(_check_json) d.addCallback(lambda ignored: self.do_cli("get", self.rw_uri)) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, test_txt_contents)) # Now we'll make a more complicated directory structure. # test2/ # test2/mutable1 # test2/mutable2 # test2/imm1 # test2/imm2 imm_test_txt_path = os.path.join(self.basedir, "imm_test.txt") imm_test_txt_contents = test_txt_contents * 10000 fileutil.write(imm_test_txt_path, imm_test_txt_contents) d.addCallback(lambda ignored: self.do_cli("mkdir", "tahoe:test2")) d.addCallback(lambda ignored: self.do_cli("put", "--mutable", new_txt_path, "tahoe:test2/mutable1")) d.addCallback(lambda ignored: self.do_cli("put", "--mutable", new_txt_path, "tahoe:test2/mutable2")) d.addCallback(lambda ignored: self.do_cli('put', new_txt_path, "tahoe:test2/imm1")) d.addCallback(lambda ignored: self.do_cli("put", imm_test_txt_path, "tahoe:test2/imm2")) d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:test2")) def _process_directory_json((rc, out, err)): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "dirnode") self.failUnless(data['mutable']) self.failUnlessIn("children", data) children = data['children'] # Store the URIs for later use. self.childuris = {} for k in ["mutable1", "mutable2", "imm1", "imm2"]: self.failUnlessIn(k, children) childtype, childdata = children[k] self.failUnlessEqual(childtype, "filenode") if "mutable" in k: self.failUnless(childdata['mutable']) self.failUnlessIn("rw_uri", childdata) uri_key = "rw_uri" else: self.failIf(childdata['mutable']) self.failUnlessIn("ro_uri", childdata) uri_key = "ro_uri" self.childuris[k] = to_str(childdata[uri_key]) d.addCallback(_process_directory_json) # Now build a local directory to copy into place, like the following: # source1/ # source1/mutable1 # source1/mutable2 # source1/imm1 # source1/imm3 def _build_local_directory(ignored): source1_path = os.path.join(self.basedir, "source1") fileutil.make_dirs(source1_path) for fn in ("mutable1", "mutable2", "imm1", "imm3"): fileutil.write(os.path.join(source1_path, fn), fn * 1000) self.source1_path = source1_path d.addCallback(_build_local_directory) d.addCallback(lambda ignored: self.do_cli("cp", "-r", self.source1_path, "tahoe:test2")) # We expect that mutable1 and mutable2 are overwritten in-place, # so they'll retain their URIs but have different content. def _process_file_json((rc, out, err), fn): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "filenode") if "mutable" in fn: self.failUnless(data['mutable']) self.failUnlessIn("rw_uri", data) self.failUnlessEqual(to_str(data["rw_uri"]), self.childuris[fn]) else: self.failIf(data['mutable']) self.failUnlessIn("ro_uri", data) self.failIfEqual(to_str(data["ro_uri"]), self.childuris[fn]) for fn in ("mutable1", "mutable2"): d.addCallback(lambda ignored, fn=fn: self.do_cli("get", "tahoe:test2/%s" % fn)) d.addCallback(lambda (rc, out, err), fn=fn: self.failUnlessEqual(out, fn * 1000)) d.addCallback(lambda ignored, fn=fn: self.do_cli("ls", "--json", "tahoe:test2/%s" % fn)) d.addCallback(_process_file_json, fn=fn) # imm1 should have been replaced, so both its uri and content # should be different. d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test2/imm1")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, "imm1" * 1000)) d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:test2/imm1")) d.addCallback(_process_file_json, fn="imm1") # imm3 should have been created. d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test2/imm3")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, "imm3" * 1000)) # imm2 should be exactly as we left it, since our newly-copied # directory didn't contain an imm2 entry. d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test2/imm2")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, imm_test_txt_contents)) d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:test2/imm2")) def _process_imm2_json((rc, out, err)): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "filenode") self.failIf(data['mutable']) self.failUnlessIn("ro_uri", data) self.failUnlessEqual(to_str(data["ro_uri"]), self.childuris["imm2"]) d.addCallback(_process_imm2_json) return d def test_cp_overwrite_readonly_mutable_file(self): # tahoe cp should print an error when asked to overwrite a # mutable file that it can't overwrite. self.basedir = "cli/Cp/overwrite_readonly_mutable_file" self.set_up_grid() # This is our initial file. We'll link its readcap into the # tahoe: alias. test_file_path = os.path.join(self.basedir, "test_file.txt") test_file_contents = "This is a test file." fileutil.write(test_file_path, test_file_contents) # This is our replacement file. We'll try and fail to upload it # over the readcap that we linked into the tahoe: alias. replacement_file_path = os.path.join(self.basedir, "replacement.txt") replacement_file_contents = "These are new contents." fileutil.write(replacement_file_path, replacement_file_contents) d = self.do_cli("create-alias", "tahoe:") d.addCallback(lambda ignored: self.do_cli("put", "--mutable", test_file_path)) def _get_test_uri((rc, out, err)): self.failUnlessEqual(rc, 0) # this should be a write uri self._test_write_uri = out d.addCallback(_get_test_uri) d.addCallback(lambda ignored: self.do_cli("ls", "--json", self._test_write_uri)) def _process_test_json((rc, out, err)): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "filenode") self.failUnless(data['mutable']) self.failUnlessIn("ro_uri", data) self._test_read_uri = to_str(data["ro_uri"]) d.addCallback(_process_test_json) # Now we'll link the readonly URI into the tahoe: alias. d.addCallback(lambda ignored: self.do_cli("ln", self._test_read_uri, "tahoe:test_file.txt")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(rc, 0)) # Let's grab the json of that to make sure that we did it right. d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:")) def _process_tahoe_json((rc, out, err)): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "dirnode") self.failUnlessIn("children", data) kiddata = data['children'] self.failUnlessIn("test_file.txt", kiddata) testtype, testdata = kiddata['test_file.txt'] self.failUnlessEqual(testtype, "filenode") self.failUnless(testdata['mutable']) self.failUnlessIn("ro_uri", testdata) self.failUnlessEqual(to_str(testdata["ro_uri"]), self._test_read_uri) self.failIfIn("rw_uri", testdata) d.addCallback(_process_tahoe_json) # Okay, now we're going to try uploading another mutable file in # place of that one. We should get an error. d.addCallback(lambda ignored: self.do_cli("cp", replacement_file_path, "tahoe:test_file.txt")) def _check_error_message((rc, out, err)): self.failUnlessEqual(rc, 1) self.failUnlessIn("replace or update requested with read-only cap", err) d.addCallback(_check_error_message) # Make extra sure that that didn't work. d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test_file.txt")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, test_file_contents)) d.addCallback(lambda ignored: self.do_cli("get", self._test_read_uri)) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, test_file_contents)) # Now we'll do it without an explicit destination. d.addCallback(lambda ignored: self.do_cli("cp", test_file_path, "tahoe:")) d.addCallback(_check_error_message) d.addCallback(lambda ignored: self.do_cli("get", "tahoe:test_file.txt")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, test_file_contents)) d.addCallback(lambda ignored: self.do_cli("get", self._test_read_uri)) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(out, test_file_contents)) # Now we'll link a readonly file into a subdirectory. d.addCallback(lambda ignored: self.do_cli("mkdir", "tahoe:testdir")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(rc, 0)) d.addCallback(lambda ignored: self.do_cli("ln", self._test_read_uri, "tahoe:test/file2.txt")) d.addCallback(lambda (rc, out, err): self.failUnlessEqual(rc, 0)) test_dir_path = os.path.join(self.basedir, "test") fileutil.make_dirs(test_dir_path) for f in ("file1.txt", "file2.txt"): fileutil.write(os.path.join(test_dir_path, f), f * 10000) d.addCallback(lambda ignored: self.do_cli("cp", "-r", test_dir_path, "tahoe:test")) d.addCallback(_check_error_message) d.addCallback(lambda ignored: self.do_cli("ls", "--json", "tahoe:test")) def _got_testdir_json((rc, out, err)): self.failUnlessEqual(rc, 0) filetype, data = simplejson.loads(out) self.failUnlessEqual(filetype, "dirnode") self.failUnlessIn("children", data) childdata = data['children'] self.failUnlessIn("file2.txt", childdata) file2type, file2data = childdata['file2.txt'] self.failUnlessEqual(file2type, "filenode") self.failUnless(file2data['mutable']) self.failUnlessIn("ro_uri", file2data) self.failUnlessEqual(to_str(file2data["ro_uri"]), self._test_read_uri) self.failIfIn("rw_uri", file2data) d.addCallback(_got_testdir_json) return d def test_cp_verbose(self): self.basedir = "cli/Cp/cp_verbose" self.set_up_grid() # Write two test files, which we'll copy to the grid. test1_path = os.path.join(self.basedir, "test1") test2_path = os.path.join(self.basedir, "test2") fileutil.write(test1_path, "test1") fileutil.write(test2_path, "test2") d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda ign: self.do_cli("cp", "--verbose", test1_path, test2_path, "tahoe:")) def _check(res): (rc, out, err) = res self.failUnlessEqual(rc, 0, str(res)) self.failUnlessIn("Success: files copied", out, str(res)) self.failUnlessEqual(err, """\ attaching sources to targets, 2 files / 0 dirs in root targets assigned, 1 dirs, 2 files starting copy, 2 files, 1 directories 1/2 files, 0/1 directories 2/2 files, 0/1 directories 1/1 directories """, str(res)) d.addCallback(_check) return d class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): def writeto(self, path, data): full_path = os.path.join(self.basedir, "home", path) fileutil.make_dirs(os.path.dirname(full_path)) fileutil.write(full_path, data) def count_output(self, out): mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), " "(\d)+ files skipped, " "(\d+) directories created \((\d+) reused\), " "(\d+) directories skipped", out) return [int(s) for s in mo.groups()] def count_output2(self, out): mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out) return [int(s) for s in mo.groups()] def test_backup(self): self.basedir = "cli/Backup/backup" self.set_up_grid() # is the backupdb available? If so, we test that a second backup does # not create new directories. hush = StringIO() bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"), hush) self.failUnless(bdb) # create a small local directory with a couple of files source = os.path.join(self.basedir, "home") fileutil.make_dirs(os.path.join(source, "empty")) self.writeto("parent/subdir/foo.txt", "foo") self.writeto("parent/subdir/bar.txt", "bar\n" * 1000) self.writeto("parent/blah.txt", "blah") def do_backup(verbose=False): cmd = ["backup"] if verbose: cmd.append("--verbose") cmd.append(source) cmd.append("tahoe:backups") return self.do_cli(*cmd) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: do_backup()) def _check0((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) fu, fr, fs, dc, dr, ds = self.count_output(out) # foo.txt, bar.txt, blah.txt self.failUnlessReallyEqual(fu, 3) self.failUnlessReallyEqual(fr, 0) self.failUnlessReallyEqual(fs, 0) # empty, home, home/parent, home/parent/subdir self.failUnlessReallyEqual(dc, 4) self.failUnlessReallyEqual(dr, 0) self.failUnlessReallyEqual(ds, 0) d.addCallback(_check0) d.addCallback(lambda res: self.do_cli("ls", "--uri", "tahoe:backups")) def _check1((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.split("\n") children = dict([line.split() for line in lines if line]) latest_uri = children["Latest"] self.failUnless(latest_uri.startswith("URI:DIR2-CHK:"), latest_uri) childnames = children.keys() self.failUnlessReallyEqual(sorted(childnames), ["Archives", "Latest"]) d.addCallback(_check1) d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest")) def _check2((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(sorted(out.split()), ["empty", "parent"]) d.addCallback(_check2) d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty")) def _check2a((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(out.strip(), "") d.addCallback(_check2a) d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt")) def _check3((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(out, "foo") d.addCallback(_check3) d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives")) def _check4((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.old_archives = out.split() self.failUnlessReallyEqual(len(self.old_archives), 1) d.addCallback(_check4) d.addCallback(self.stall, 1.1) d.addCallback(lambda res: do_backup()) def _check4a((rc, out, err)): # second backup should reuse everything, if the backupdb is # available self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) fu, fr, fs, dc, dr, ds = self.count_output(out) # foo.txt, bar.txt, blah.txt self.failUnlessReallyEqual(fu, 0) self.failUnlessReallyEqual(fr, 3) self.failUnlessReallyEqual(fs, 0) # empty, home, home/parent, home/parent/subdir self.failUnlessReallyEqual(dc, 0) self.failUnlessReallyEqual(dr, 4) self.failUnlessReallyEqual(ds, 0) d.addCallback(_check4a) # sneak into the backupdb, crank back the "last checked" # timestamp to force a check on all files def _reset_last_checked(res): dbfile = os.path.join(self.get_clientdir(), "private", "backupdb.sqlite") self.failUnless(os.path.exists(dbfile), dbfile) bdb = backupdb.get_backupdb(dbfile) bdb.cursor.execute("UPDATE last_upload SET last_checked=0") bdb.cursor.execute("UPDATE directories SET last_checked=0") bdb.connection.commit() d.addCallback(_reset_last_checked) d.addCallback(self.stall, 1.1) d.addCallback(lambda res: do_backup(verbose=True)) def _check4b((rc, out, err)): # we should check all files, and re-use all of them. None of # the directories should have been changed, so we should # re-use all of them too. self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) fu, fr, fs, dc, dr, ds = self.count_output(out) fchecked, dchecked = self.count_output2(out) self.failUnlessReallyEqual(fchecked, 3) self.failUnlessReallyEqual(fu, 0) self.failUnlessReallyEqual(fr, 3) self.failUnlessReallyEqual(fs, 0) self.failUnlessReallyEqual(dchecked, 4) self.failUnlessReallyEqual(dc, 0) self.failUnlessReallyEqual(dr, 4) self.failUnlessReallyEqual(ds, 0) d.addCallback(_check4b) d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives")) def _check5((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.new_archives = out.split() self.failUnlessReallyEqual(len(self.new_archives), 3, out) # the original backup should still be the oldest (i.e. sorts # alphabetically towards the beginning) self.failUnlessReallyEqual(sorted(self.new_archives)[0], self.old_archives[0]) d.addCallback(_check5) d.addCallback(self.stall, 1.1) def _modify(res): self.writeto("parent/subdir/foo.txt", "FOOF!") # and turn a file into a directory os.unlink(os.path.join(source, "parent/blah.txt")) os.mkdir(os.path.join(source, "parent/blah.txt")) self.writeto("parent/blah.txt/surprise file", "surprise") self.writeto("parent/blah.txt/surprisedir/subfile", "surprise") # turn a directory into a file os.rmdir(os.path.join(source, "empty")) self.writeto("empty", "imagine nothing being here") return do_backup() d.addCallback(_modify) def _check5a((rc, out, err)): # second backup should reuse bar.txt (if backupdb is available), # and upload the rest. None of the directories can be reused. self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) fu, fr, fs, dc, dr, ds = self.count_output(out) # new foo.txt, surprise file, subfile, empty self.failUnlessReallyEqual(fu, 4) # old bar.txt self.failUnlessReallyEqual(fr, 1) self.failUnlessReallyEqual(fs, 0) # home, parent, subdir, blah.txt, surprisedir self.failUnlessReallyEqual(dc, 5) self.failUnlessReallyEqual(dr, 0) self.failUnlessReallyEqual(ds, 0) d.addCallback(_check5a) d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives")) def _check6((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.new_archives = out.split() self.failUnlessReallyEqual(len(self.new_archives), 4) self.failUnlessReallyEqual(sorted(self.new_archives)[0], self.old_archives[0]) d.addCallback(_check6) d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt")) def _check7((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(out, "FOOF!") # the old snapshot should not be modified return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0]) d.addCallback(_check7) def _check8((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(out, "foo") d.addCallback(_check8) return d # on our old dapper buildslave, this test takes a long time (usually # 130s), so we have to bump up the default 120s timeout. The create-alias # and initial backup alone take 60s, probably because of the handful of # dirnodes being created (RSA key generation). The backup between check4 # and check4a takes 6s, as does the backup before check4b. test_backup.timeout = 3000 def _check_filtering(self, filtered, all, included, excluded): filtered = set(filtered) all = set(all) included = set(included) excluded = set(excluded) self.failUnlessReallyEqual(filtered, included) self.failUnlessReallyEqual(all.difference(filtered), excluded) def test_exclude_options(self): root_listdir = (u'lib.a', u'_darcs', u'subdir', u'nice_doc.lyx') subdir_listdir = (u'another_doc.lyx', u'run_snake_run.py', u'CVS', u'.svn', u'_darcs') basedir = "cli/Backup/exclude_options" fileutil.make_dirs(basedir) nodeurl_path = os.path.join(basedir, 'node.url') fileutil.write(nodeurl_path, 'http://example.net:2357/') def parse(args): return parse_options(basedir, "backup", args) # test simple exclude backup_options = parse(['--exclude', '*lyx', 'from', 'to']) filtered = list(backup_options.filter_listdir(root_listdir)) self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'), (u'nice_doc.lyx',)) # multiple exclude backup_options = parse(['--exclude', '*lyx', '--exclude', 'lib.?', 'from', 'to']) filtered = list(backup_options.filter_listdir(root_listdir)) self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'), (u'nice_doc.lyx', u'lib.a')) # vcs metadata exclusion backup_options = parse(['--exclude-vcs', 'from', 'to']) filtered = list(backup_options.filter_listdir(subdir_listdir)) self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'run_snake_run.py',), (u'CVS', u'.svn', u'_darcs')) # read exclude patterns from file exclusion_string = "_darcs\n*py\n.svn" excl_filepath = os.path.join(basedir, 'exclusion') fileutil.write(excl_filepath, exclusion_string) backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to']) filtered = list(backup_options.filter_listdir(subdir_listdir)) self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'CVS'), (u'.svn', u'_darcs', u'run_snake_run.py')) # test BackupConfigurationError self.failUnlessRaises(cli.BackupConfigurationError, parse, ['--exclude-from', excl_filepath + '.no', 'from', 'to']) # test that an iterator works too backup_options = parse(['--exclude', '*lyx', 'from', 'to']) filtered = list(backup_options.filter_listdir(iter(root_listdir))) self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'), (u'nice_doc.lyx',)) def test_exclude_options_unicode(self): nice_doc = u"nice_d\u00F8c.lyx" try: doc_pattern_arg = u"*d\u00F8c*".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") root_listdir = (u'lib.a', u'_darcs', u'subdir', nice_doc) basedir = "cli/Backup/exclude_options_unicode" fileutil.make_dirs(basedir) nodeurl_path = os.path.join(basedir, 'node.url') fileutil.write(nodeurl_path, 'http://example.net:2357/') def parse(args): return parse_options(basedir, "backup", args) # test simple exclude backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to']) filtered = list(backup_options.filter_listdir(root_listdir)) self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'), (nice_doc,)) # multiple exclude backup_options = parse(['--exclude', doc_pattern_arg, '--exclude', 'lib.?', 'from', 'to']) filtered = list(backup_options.filter_listdir(root_listdir)) self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'), (nice_doc, u'lib.a')) # read exclude patterns from file exclusion_string = doc_pattern_arg + "\nlib.?" excl_filepath = os.path.join(basedir, 'exclusion') fileutil.write(excl_filepath, exclusion_string) backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to']) filtered = list(backup_options.filter_listdir(root_listdir)) self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'), (nice_doc, u'lib.a')) # test that an iterator works too backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to']) filtered = list(backup_options.filter_listdir(iter(root_listdir))) self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'), (nice_doc,)) @patch('__builtin__.file') def test_exclude_from_tilde_expansion(self, mock): basedir = "cli/Backup/exclude_from_tilde_expansion" fileutil.make_dirs(basedir) nodeurl_path = os.path.join(basedir, 'node.url') fileutil.write(nodeurl_path, 'http://example.net:2357/') def parse(args): return parse_options(basedir, "backup", args) # ensure that tilde expansion is performed on exclude-from argument exclude_file = u'~/.tahoe/excludes.dummy' mock.return_value = StringIO() parse(['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) self.failUnlessIn(((abspath_expanduser_unicode(exclude_file),), {}), mock.call_args_list) def test_ignore_symlinks(self): if not hasattr(os, 'symlink'): raise unittest.SkipTest("Symlinks are not supported by Python on this platform.") self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() source = os.path.join(self.basedir, "home") self.writeto("foo.txt", "foo") os.symlink(os.path.join(source, "foo.txt"), os.path.join(source, "foo2.txt")) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("backup", "--verbose", source, "tahoe:test")) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 2) foo2 = os.path.join(source, "foo2.txt") self.failUnlessReallyEqual(err, "WARNING: cannot backup symlink '%s'\n" % foo2) fu, fr, fs, dc, dr, ds = self.count_output(out) # foo.txt self.failUnlessReallyEqual(fu, 1) self.failUnlessReallyEqual(fr, 0) # foo2.txt self.failUnlessReallyEqual(fs, 1) # home self.failUnlessReallyEqual(dc, 1) self.failUnlessReallyEqual(dr, 0) self.failUnlessReallyEqual(ds, 0) d.addCallback(_check) return d def test_ignore_unreadable_file(self): self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() source = os.path.join(self.basedir, "home") self.writeto("foo.txt", "foo") os.chmod(os.path.join(source, "foo.txt"), 0000) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test")) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 2) self.failUnlessReallyEqual(err, "WARNING: permission denied on file %s\n" % os.path.join(source, "foo.txt")) fu, fr, fs, dc, dr, ds = self.count_output(out) self.failUnlessReallyEqual(fu, 0) self.failUnlessReallyEqual(fr, 0) # foo.txt self.failUnlessReallyEqual(fs, 1) # home self.failUnlessReallyEqual(dc, 1) self.failUnlessReallyEqual(dr, 0) self.failUnlessReallyEqual(ds, 0) d.addCallback(_check) # This is necessary for the temp files to be correctly removed def _cleanup(self): os.chmod(os.path.join(source, "foo.txt"), 0644) d.addCallback(_cleanup) d.addErrback(_cleanup) return d def test_ignore_unreadable_directory(self): self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() source = os.path.join(self.basedir, "home") os.mkdir(source) os.mkdir(os.path.join(source, "test")) os.chmod(os.path.join(source, "test"), 0000) d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test")) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 2) self.failUnlessReallyEqual(err, "WARNING: permission denied on directory %s\n" % os.path.join(source, "test")) fu, fr, fs, dc, dr, ds = self.count_output(out) self.failUnlessReallyEqual(fu, 0) self.failUnlessReallyEqual(fr, 0) self.failUnlessReallyEqual(fs, 0) # home, test self.failUnlessReallyEqual(dc, 2) self.failUnlessReallyEqual(dr, 0) # test self.failUnlessReallyEqual(ds, 1) d.addCallback(_check) # This is necessary for the temp files to be correctly removed def _cleanup(self): os.chmod(os.path.join(source, "test"), 0655) d.addCallback(_cleanup) d.addErrback(_cleanup) return d def test_backup_without_alias(self): # 'tahoe backup' should output a sensible error message when invoked # without an alias instead of a stack trace. self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() source = os.path.join(self.basedir, "file1") d = self.do_cli('backup', source, source) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def test_backup_with_nonexistent_alias(self): # 'tahoe backup' should output a sensible error message when invoked # with a nonexistent alias. self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() source = os.path.join(self.basedir, "file1") d = self.do_cli("backup", source, "nonexistent:" + source) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessIn("nonexistent", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d class Check(GridTestMixin, CLITestMixin, unittest.TestCase): def test_check(self): self.basedir = "cli/Check/check" self.set_up_grid() c0 = self.g.clients[0] DATA = "data" * 100 DATA_uploadable = MutableData(DATA) d = c0.create_mutable_file(DATA_uploadable) def _stash_uri(n): self.uri = n.get_uri() d.addCallback(_stash_uri) d.addCallback(lambda ign: self.do_cli("check", self.uri)) def _check1((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("Summary: Healthy" in lines, out) self.failUnless(" good-shares: 10 (encoding is 3-of-10)" in lines, out) d.addCallback(_check1) d.addCallback(lambda ign: self.do_cli("check", "--raw", self.uri)) def _check2((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) data = simplejson.loads(out) self.failUnlessReallyEqual(to_str(data["summary"]), "Healthy") self.failUnlessReallyEqual(data["results"]["healthy"], True) d.addCallback(_check2) d.addCallback(lambda ign: c0.upload(upload.Data("literal", convergence=""))) def _stash_lit_uri(n): self.lit_uri = n.get_uri() d.addCallback(_stash_lit_uri) d.addCallback(lambda ign: self.do_cli("check", self.lit_uri)) def _check_lit((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("Summary: Healthy (LIT)" in lines, out) d.addCallback(_check_lit) d.addCallback(lambda ign: self.do_cli("check", "--raw", self.lit_uri)) def _check_lit_raw((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) data = simplejson.loads(out) self.failUnlessReallyEqual(data["results"]["healthy"], True) d.addCallback(_check_lit_raw) d.addCallback(lambda ign: c0.create_immutable_dirnode({}, convergence="")) def _stash_lit_dir_uri(n): self.lit_dir_uri = n.get_uri() d.addCallback(_stash_lit_dir_uri) d.addCallback(lambda ign: self.do_cli("check", self.lit_dir_uri)) d.addCallback(_check_lit) d.addCallback(lambda ign: self.do_cli("check", "--raw", self.lit_uri)) d.addCallback(_check_lit_raw) def _clobber_shares(ignored): # delete one, corrupt a second shares = self.find_uri_shares(self.uri) self.failUnlessReallyEqual(len(shares), 10) os.unlink(shares[0][2]) cso = debug.CorruptShareOptions() cso.stdout = StringIO() cso.parseOptions([shares[1][2]]) storage_index = uri.from_string(self.uri).get_storage_index() self._corrupt_share_line = " server %s, SI %s, shnum %d" % \ (base32.b2a(shares[1][1]), base32.b2a(storage_index), shares[1][0]) debug.corrupt_share(cso) d.addCallback(_clobber_shares) d.addCallback(lambda ign: self.do_cli("check", "--verify", self.uri)) def _check3((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() summary = [l for l in lines if l.startswith("Summary")][0] self.failUnless("Summary: Unhealthy: 8 shares (enc 3-of-10)" in summary, summary) self.failUnless(" good-shares: 8 (encoding is 3-of-10)" in lines, out) self.failUnless(" corrupt shares:" in lines, out) self.failUnless(self._corrupt_share_line in lines, out) d.addCallback(_check3) d.addCallback(lambda ign: self.do_cli("check", "--verify", "--raw", self.uri)) def _check3_raw((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) data = simplejson.loads(out) self.failUnlessReallyEqual(data["results"]["healthy"], False) self.failUnlessIn("Unhealthy: 8 shares (enc 3-of-10)", data["summary"]) self.failUnlessReallyEqual(data["results"]["count-shares-good"], 8) self.failUnlessReallyEqual(data["results"]["count-corrupt-shares"], 1) self.failUnlessIn("list-corrupt-shares", data["results"]) d.addCallback(_check3_raw) d.addCallback(lambda ign: self.do_cli("check", "--verify", "--repair", self.uri)) def _check4((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("Summary: not healthy" in lines, out) self.failUnless(" good-shares: 8 (encoding is 3-of-10)" in lines, out) self.failUnless(" corrupt shares:" in lines, out) self.failUnless(self._corrupt_share_line in lines, out) self.failUnless(" repair successful" in lines, out) d.addCallback(_check4) d.addCallback(lambda ign: self.do_cli("check", "--verify", "--repair", self.uri)) def _check5((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("Summary: healthy" in lines, out) self.failUnless(" good-shares: 10 (encoding is 3-of-10)" in lines, out) self.failIf(" corrupt shares:" in lines, out) d.addCallback(_check5) return d def test_deep_check(self): self.basedir = "cli/Check/deep_check" self.set_up_grid() c0 = self.g.clients[0] self.uris = {} self.fileurls = {} DATA = "data" * 100 quoted_good = quote_output(u"g\u00F6\u00F6d") d = c0.create_dirnode() def _stash_root_and_create_file(n): self.rootnode = n self.rooturi = n.get_uri() return n.add_file(u"g\u00F6\u00F6d", upload.Data(DATA, convergence="")) d.addCallback(_stash_root_and_create_file) def _stash_uri(fn, which): self.uris[which] = fn.get_uri() return fn d.addCallback(_stash_uri, u"g\u00F6\u00F6d") d.addCallback(lambda ign: self.rootnode.add_file(u"small", upload.Data("literal", convergence=""))) d.addCallback(_stash_uri, "small") d.addCallback(lambda ign: c0.create_mutable_file(MutableData(DATA+"1"))) d.addCallback(lambda fn: self.rootnode.set_node(u"mutable", fn)) d.addCallback(_stash_uri, "mutable") d.addCallback(lambda ign: self.do_cli("deep-check", self.rooturi)) def _check1((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("done: 4 objects checked, 4 healthy, 0 unhealthy" in lines, out) d.addCallback(_check1) # root # root/g\u00F6\u00F6d # root/small # root/mutable d.addCallback(lambda ign: self.do_cli("deep-check", "--verbose", self.rooturi)) def _check2((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("'': Healthy" in lines, out) self.failUnless("'small': Healthy (LIT)" in lines, out) self.failUnless((quoted_good + ": Healthy") in lines, out) self.failUnless("'mutable': Healthy" in lines, out) self.failUnless("done: 4 objects checked, 4 healthy, 0 unhealthy" in lines, out) d.addCallback(_check2) d.addCallback(lambda ign: self.do_cli("stats", self.rooturi)) def _check_stats((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnlessIn(" count-immutable-files: 1", lines) self.failUnlessIn(" count-mutable-files: 1", lines) self.failUnlessIn(" count-literal-files: 1", lines) self.failUnlessIn(" count-directories: 1", lines) self.failUnlessIn(" size-immutable-files: 400", lines) self.failUnlessIn("Size Histogram:", lines) self.failUnlessIn(" 4-10 : 1 (10 B, 10 B)", lines) self.failUnlessIn(" 317-1000 : 1 (1000 B, 1000 B)", lines) d.addCallback(_check_stats) def _clobber_shares(ignored): shares = self.find_uri_shares(self.uris[u"g\u00F6\u00F6d"]) self.failUnlessReallyEqual(len(shares), 10) os.unlink(shares[0][2]) shares = self.find_uri_shares(self.uris["mutable"]) cso = debug.CorruptShareOptions() cso.stdout = StringIO() cso.parseOptions([shares[1][2]]) storage_index = uri.from_string(self.uris["mutable"]).get_storage_index() self._corrupt_share_line = " corrupt: server %s, SI %s, shnum %d" % \ (base32.b2a(shares[1][1]), base32.b2a(storage_index), shares[1][0]) debug.corrupt_share(cso) d.addCallback(_clobber_shares) # root # root/g\u00F6\u00F6d [9 shares] # root/small # root/mutable [1 corrupt share] d.addCallback(lambda ign: self.do_cli("deep-check", "--verbose", self.rooturi)) def _check3((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("'': Healthy" in lines, out) self.failUnless("'small': Healthy (LIT)" in lines, out) self.failUnless("'mutable': Healthy" in lines, out) # needs verifier self.failUnless((quoted_good + ": Not Healthy: 9 shares (enc 3-of-10)") in lines, out) self.failIf(self._corrupt_share_line in lines, out) self.failUnless("done: 4 objects checked, 3 healthy, 1 unhealthy" in lines, out) d.addCallback(_check3) d.addCallback(lambda ign: self.do_cli("deep-check", "--verbose", "--verify", self.rooturi)) def _check4((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("'': Healthy" in lines, out) self.failUnless("'small': Healthy (LIT)" in lines, out) mutable = [l for l in lines if l.startswith("'mutable'")][0] self.failUnless(mutable.startswith("'mutable': Unhealthy: 9 shares (enc 3-of-10)"), mutable) self.failUnless(self._corrupt_share_line in lines, out) self.failUnless((quoted_good + ": Not Healthy: 9 shares (enc 3-of-10)") in lines, out) self.failUnless("done: 4 objects checked, 2 healthy, 2 unhealthy" in lines, out) d.addCallback(_check4) d.addCallback(lambda ign: self.do_cli("deep-check", "--raw", self.rooturi)) def _check5((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() units = [simplejson.loads(line) for line in lines] # root, small, g\u00F6\u00F6d, mutable, stats self.failUnlessReallyEqual(len(units), 4+1) d.addCallback(_check5) d.addCallback(lambda ign: self.do_cli("deep-check", "--verbose", "--verify", "--repair", self.rooturi)) def _check6((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnless("'': healthy" in lines, out) self.failUnless("'small': healthy" in lines, out) self.failUnless("'mutable': not healthy" in lines, out) self.failUnless(self._corrupt_share_line in lines, out) self.failUnless((quoted_good + ": not healthy") in lines, out) self.failUnless("done: 4 objects checked" in lines, out) self.failUnless(" pre-repair: 2 healthy, 2 unhealthy" in lines, out) self.failUnless(" 2 repairs attempted, 2 successful, 0 failed" in lines, out) self.failUnless(" post-repair: 4 healthy, 0 unhealthy" in lines,out) d.addCallback(_check6) # now add a subdir, and a file below that, then make the subdir # unrecoverable d.addCallback(lambda ign: self.rootnode.create_subdirectory(u"subdir")) d.addCallback(_stash_uri, "subdir") d.addCallback(lambda fn: fn.add_file(u"subfile", upload.Data(DATA+"2", ""))) d.addCallback(lambda ign: self.delete_shares_numbered(self.uris["subdir"], range(10))) # root # rootg\u00F6\u00F6d/ # root/small # root/mutable # root/subdir [unrecoverable: 0 shares] # root/subfile d.addCallback(lambda ign: self.do_cli("manifest", self.rooturi)) def _manifest_failed((rc, out, err)): self.failIfEqual(rc, 0) self.failUnlessIn("ERROR: UnrecoverableFileError", err) # the fatal directory should still show up, as the last line self.failUnlessIn(" subdir\n", out) d.addCallback(_manifest_failed) d.addCallback(lambda ign: self.do_cli("deep-check", self.rooturi)) def _deep_check_failed((rc, out, err)): self.failIfEqual(rc, 0) self.failUnlessIn("ERROR: UnrecoverableFileError", err) # we want to make sure that the error indication is the last # thing that gets emitted self.failIf("done:" in out, out) d.addCallback(_deep_check_failed) # this test is disabled until the deep-repair response to an # unrepairable directory is fixed. The failure-to-repair should not # throw an exception, but the failure-to-traverse that follows # should throw UnrecoverableFileError. #d.addCallback(lambda ign: # self.do_cli("deep-check", "--repair", self.rooturi)) #def _deep_check_repair_failed((rc, out, err)): # self.failIfEqual(rc, 0) # print err # self.failUnlessIn("ERROR: UnrecoverableFileError", err) # self.failIf("done:" in out, out) #d.addCallback(_deep_check_repair_failed) return d def test_check_without_alias(self): # 'tahoe check' should output a sensible error message if it needs to # find the default alias and can't self.basedir = "cli/Check/check_without_alias" self.set_up_grid() d = self.do_cli("check") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) d.addCallback(lambda ign: self.do_cli("deep-check")) d.addCallback(_check) return d def test_check_with_nonexistent_alias(self): # 'tahoe check' should output a sensible error message if it needs to # find an alias and can't. self.basedir = "cli/Check/check_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("check", "nonexistent:") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessIn("nonexistent", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): def test_get(self): self.basedir = "cli/Errors/get" self.set_up_grid() c0 = self.g.clients[0] self.fileurls = {} DATA = "data" * 100 d = c0.upload(upload.Data(DATA, convergence="")) def _stash_bad(ur): self.uri_1share = ur.get_uri() self.delete_shares_numbered(ur.get_uri(), range(1,10)) d.addCallback(_stash_bad) # the download is abandoned as soon as it's clear that we won't get # enough shares. The one remaining share might be in either the # COMPLETE or the PENDING state. in_complete_msg = "ran out of shares: complete=sh0 pending= overdue= unused= need 3" in_pending_msg = "ran out of shares: complete= pending=Share(sh0-on-fob7vqgd) overdue= unused= need 3" d.addCallback(lambda ign: self.do_cli("get", self.uri_1share)) def _check1((rc, out, err)): self.failIfEqual(rc, 0) self.failUnless("410 Gone" in err, err) self.failUnlessIn("NotEnoughSharesError: ", err) self.failUnless(in_complete_msg in err or in_pending_msg in err, err) d.addCallback(_check1) targetf = os.path.join(self.basedir, "output") d.addCallback(lambda ign: self.do_cli("get", self.uri_1share, targetf)) def _check2((rc, out, err)): self.failIfEqual(rc, 0) self.failUnless("410 Gone" in err, err) self.failUnlessIn("NotEnoughSharesError: ", err) self.failUnless(in_complete_msg in err or in_pending_msg in err, err) self.failIf(os.path.exists(targetf)) d.addCallback(_check2) return d def test_broken_socket(self): # When the http connection breaks (such as when node.url is overwritten # by a confused user), a user friendly error message should be printed. self.basedir = "cli/Errors/test_broken_socket" self.set_up_grid() # Simulate a connection error def _socket_error(*args, **kwargs): raise socket_error('test error') self.patch(allmydata.scripts.common_http.httplib.HTTPConnection, "endheaders", _socket_error) d = self.do_cli("mkdir") def _check_invalid((rc,stdout,stderr)): self.failIfEqual(rc, 0) self.failUnlessIn("Error trying to connect to http://127.0.0.1", stderr) d.addCallback(_check_invalid) return d class Get(GridTestMixin, CLITestMixin, unittest.TestCase): def test_get_without_alias(self): # 'tahoe get' should output a useful error message when invoked # without an explicit alias and when the default 'tahoe' alias # hasn't been created yet. self.basedir = "cli/Get/get_without_alias" self.set_up_grid() d = self.do_cli('get', 'file') def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def test_get_with_nonexistent_alias(self): # 'tahoe get' should output a useful error message when invoked with # an explicit alias that doesn't exist. self.basedir = "cli/Get/get_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("get", "nonexistent:file") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessIn("nonexistent", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d class Manifest(GridTestMixin, CLITestMixin, unittest.TestCase): def test_manifest_without_alias(self): # 'tahoe manifest' should output a useful error message when invoked # without an explicit alias when the default 'tahoe' alias is # missing. self.basedir = "cli/Manifest/manifest_without_alias" self.set_up_grid() d = self.do_cli("manifest") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def test_manifest_with_nonexistent_alias(self): # 'tahoe manifest' should output a useful error message when invoked # with an explicit alias that doesn't exist. self.basedir = "cli/Manifest/manifest_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("manifest", "nonexistent:") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessIn("nonexistent", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase): def test_mkdir(self): self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("mkdir", "test")) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessIn("URI:", out) d.addCallback(_check) return d def test_mkdir_mutable_type(self): self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() d = self.do_cli("create-alias", "tahoe") def _check((rc, out, err), st): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessIn(st, out) return out def _mkdir(ign, mutable_type, uri_prefix, dirname): d2 = self.do_cli("mkdir", "--format="+mutable_type, dirname) d2.addCallback(_check, uri_prefix) def _stash_filecap(cap): u = uri.from_string(cap) fn_uri = u.get_filenode_cap() self._filecap = fn_uri.to_string() d2.addCallback(_stash_filecap) d2.addCallback(lambda ign: self.do_cli("ls", "--json", dirname)) d2.addCallback(_check, uri_prefix) d2.addCallback(lambda ign: self.do_cli("ls", "--json", self._filecap)) d2.addCallback(_check, '"format": "%s"' % (mutable_type.upper(),)) return d2 d.addCallback(_mkdir, "sdmf", "URI:DIR2", "tahoe:foo") d.addCallback(_mkdir, "SDMF", "URI:DIR2", "tahoe:foo2") d.addCallback(_mkdir, "mdmf", "URI:DIR2-MDMF", "tahoe:bar") d.addCallback(_mkdir, "MDMF", "URI:DIR2-MDMF", "tahoe:bar2") return d def test_mkdir_mutable_type_unlinked(self): self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() d = self.do_cli("mkdir", "--format=SDMF") def _check((rc, out, err), st): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessIn(st, out) return out d.addCallback(_check, "URI:DIR2") def _stash_dircap(cap): self._dircap = cap # Now we're going to feed the cap into uri.from_string... u = uri.from_string(cap) # ...grab the underlying filenode uri. fn_uri = u.get_filenode_cap() # ...and stash that. self._filecap = fn_uri.to_string() d.addCallback(_stash_dircap) d.addCallback(lambda res: self.do_cli("ls", "--json", self._filecap)) d.addCallback(_check, '"format": "SDMF"') d.addCallback(lambda res: self.do_cli("mkdir", "--format=MDMF")) d.addCallback(_check, "URI:DIR2-MDMF") d.addCallback(_stash_dircap) d.addCallback(lambda res: self.do_cli("ls", "--json", self._filecap)) d.addCallback(_check, '"format": "MDMF"') return d def test_mkdir_bad_mutable_type(self): o = cli.MakeDirectoryOptions() self.failUnlessRaises(usage.UsageError, o.parseOptions, ["--format=LDMF"]) def test_mkdir_unicode(self): self.basedir = os.path.dirname(self.mktemp()) self.set_up_grid() try: motorhead_arg = u"tahoe:Mot\u00F6rhead".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: self.do_cli("mkdir", motorhead_arg)) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") self.failUnlessIn("URI:", out) d.addCallback(_check) return d def test_mkdir_with_nonexistent_alias(self): # when invoked with an alias that doesn't exist, 'tahoe mkdir' should # output a sensible error message rather than a stack trace. self.basedir = "cli/Mkdir/mkdir_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("mkdir", "havasu:") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d class Unlink(GridTestMixin, CLITestMixin, unittest.TestCase): command = "unlink" def _create_test_file(self): data = "puppies" * 1000 path = os.path.join(self.basedir, "datafile") fileutil.write(path, data) self.datafile = path def test_unlink_without_alias(self): # 'tahoe unlink' should behave sensibly when invoked without an explicit # alias before the default 'tahoe' alias has been created. self.basedir = "cli/Unlink/%s_without_alias" % (self.command,) self.set_up_grid() d = self.do_cli(self.command, "afile") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) d.addCallback(lambda ign: self.do_cli(self.command, "afile")) d.addCallback(_check) return d def test_unlink_with_nonexistent_alias(self): # 'tahoe unlink' should behave sensibly when invoked with an explicit # alias that doesn't exist. self.basedir = "cli/Unlink/%s_with_nonexistent_alias" % (self.command,) self.set_up_grid() d = self.do_cli(self.command, "nonexistent:afile") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessIn("nonexistent", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) d.addCallback(lambda ign: self.do_cli(self.command, "nonexistent:afile")) d.addCallback(_check) return d def test_unlink_without_path(self): # 'tahoe unlink' should give a sensible error message when invoked without a path. self.basedir = "cli/Unlink/%s_without_path" % (self.command,) self.set_up_grid() self._create_test_file() d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda ign: self.do_cli("put", self.datafile, "tahoe:test")) def _do_unlink((rc, out, err)): self.failUnlessReallyEqual(rc, 0) self.failUnless(out.startswith("URI:"), out) return self.do_cli(self.command, out.strip('\n')) d.addCallback(_do_unlink) def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("'tahoe %s'" % (self.command,), err) self.failUnlessIn("path must be given", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d class Rm(Unlink): """Test that 'tahoe rm' behaves in the same way as 'tahoe unlink'.""" command = "rm" class Stats(GridTestMixin, CLITestMixin, unittest.TestCase): def test_empty_directory(self): self.basedir = "cli/Stats/empty_directory" self.set_up_grid() c0 = self.g.clients[0] self.fileurls = {} d = c0.create_dirnode() def _stash_root(n): self.rootnode = n self.rooturi = n.get_uri() d.addCallback(_stash_root) # make sure we can get stats on an empty directory too d.addCallback(lambda ign: self.do_cli("stats", self.rooturi)) def _check_stats((rc, out, err)): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) lines = out.splitlines() self.failUnlessIn(" count-immutable-files: 0", lines) self.failUnlessIn(" count-mutable-files: 0", lines) self.failUnlessIn(" count-literal-files: 0", lines) self.failUnlessIn(" count-directories: 1", lines) self.failUnlessIn(" size-immutable-files: 0", lines) self.failIfIn("Size Histogram:", lines) d.addCallback(_check_stats) return d def test_stats_without_alias(self): # when invoked with no explicit alias and before the default 'tahoe' # alias is created, 'tahoe stats' should output an informative error # message, not a stack trace. self.basedir = "cli/Stats/stats_without_alias" self.set_up_grid() d = self.do_cli("stats") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def test_stats_with_nonexistent_alias(self): # when invoked with an explicit alias that doesn't exist, # 'tahoe stats' should output a useful error message. self.basedir = "cli/Stats/stats_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("stats", "havasu:") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d class Webopen(GridTestMixin, CLITestMixin, unittest.TestCase): def test_webopen_with_nonexistent_alias(self): # when invoked with an alias that doesn't exist, 'tahoe webopen' # should output an informative error message instead of a stack # trace. self.basedir = "cli/Webopen/webopen_with_nonexistent_alias" self.set_up_grid() d = self.do_cli("webopen", "fake:") def _check((rc, out, err)): self.failUnlessReallyEqual(rc, 1) self.failUnlessIn("error:", err) self.failUnlessReallyEqual(out, "") d.addCallback(_check) return d def test_webopen(self): # TODO: replace with @patch that supports Deferreds. import webbrowser def call_webbrowser_open(url): self.failUnlessIn(self.alias_uri.replace(':', '%3A'), url) self.webbrowser_open_called = True def _cleanup(res): webbrowser.open = self.old_webbrowser_open return res self.old_webbrowser_open = webbrowser.open try: webbrowser.open = call_webbrowser_open self.basedir = "cli/Webopen/webopen" self.set_up_grid() d = self.do_cli("create-alias", "alias:") def _check_alias((rc, out, err)): self.failUnlessReallyEqual(rc, 0, repr((rc, out, err))) self.failUnlessIn("Alias 'alias' created", out) self.failUnlessReallyEqual(err, "") self.alias_uri = get_aliases(self.get_clientdir())["alias"] d.addCallback(_check_alias) d.addCallback(lambda res: self.do_cli("webopen", "alias:")) def _check_webopen((rc, out, err)): self.failUnlessReallyEqual(rc, 0, repr((rc, out, err))) self.failUnlessReallyEqual(out, "") self.failUnlessReallyEqual(err, "") self.failUnless(self.webbrowser_open_called) d.addCallback(_check_webopen) d.addBoth(_cleanup) except: _cleanup(None) raise return d class Options(unittest.TestCase): # this test case only looks at argument-processing and simple stuff. def parse(self, args, stdout=None): o = runner.Options() if stdout is not None: o.stdout = stdout o.parseOptions(args) while hasattr(o, "subOptions"): o = o.subOptions return o def test_list(self): fileutil.rm_dir("cli/test_options") fileutil.make_dirs("cli/test_options") fileutil.make_dirs("cli/test_options/private") fileutil.write("cli/test_options/node.url", "http://localhost:8080/\n") filenode_uri = uri.WriteableSSKFileURI(writekey="\x00"*16, fingerprint="\x00"*32) private_uri = uri.DirectoryURI(filenode_uri).to_string() fileutil.write("cli/test_options/private/root_dir.cap", private_uri + "\n") def parse2(args): return parse_options("cli/test_options", "ls", args) o = parse2([]) self.failUnlessEqual(o['node-url'], "http://localhost:8080/") self.failUnlessEqual(o.aliases[DEFAULT_ALIAS], private_uri) self.failUnlessEqual(o.where, u"") o = parse2(["--node-url", "http://example.org:8111/"]) self.failUnlessEqual(o['node-url'], "http://example.org:8111/") self.failUnlessEqual(o.aliases[DEFAULT_ALIAS], private_uri) self.failUnlessEqual(o.where, u"") o = parse2(["--dir-cap", "root"]) self.failUnlessEqual(o['node-url'], "http://localhost:8080/") self.failUnlessEqual(o.aliases[DEFAULT_ALIAS], "root") self.failUnlessEqual(o.where, u"") other_filenode_uri = uri.WriteableSSKFileURI(writekey="\x11"*16, fingerprint="\x11"*32) other_uri = uri.DirectoryURI(other_filenode_uri).to_string() o = parse2(["--dir-cap", other_uri]) self.failUnlessEqual(o['node-url'], "http://localhost:8080/") self.failUnlessEqual(o.aliases[DEFAULT_ALIAS], other_uri) self.failUnlessEqual(o.where, u"") o = parse2(["--dir-cap", other_uri, "subdir"]) self.failUnlessEqual(o['node-url'], "http://localhost:8080/") self.failUnlessEqual(o.aliases[DEFAULT_ALIAS], other_uri) self.failUnlessEqual(o.where, u"subdir") self.failUnlessRaises(usage.UsageError, parse2, ["--node-url", "NOT-A-URL"]) o = parse2(["--node-url", "http://localhost:8080"]) self.failUnlessEqual(o["node-url"], "http://localhost:8080/") o = parse2(["--node-url", "https://localhost/"]) self.failUnlessEqual(o["node-url"], "https://localhost/") def test_version(self): # "tahoe --version" dumps text to stdout and exits stdout = StringIO() self.failUnlessRaises(SystemExit, self.parse, ["--version"], stdout) self.failUnlessIn("allmydata-tahoe", stdout.getvalue()) # but "tahoe SUBCOMMAND --version" should be rejected self.failUnlessRaises(usage.UsageError, self.parse, ["start", "--version"]) self.failUnlessRaises(usage.UsageError, self.parse, ["start", "--version-and-path"]) def test_quiet(self): # accepted as an overall option, but not on subcommands o = self.parse(["--quiet", "start"]) self.failUnless(o.parent["quiet"]) self.failUnlessRaises(usage.UsageError, self.parse, ["start", "--quiet"]) def test_basedir(self): # accept a --node-directory option before the verb, or a --basedir # option after, or a basedir argument after, but none in the wrong # place, and not more than one of the three. o = self.parse(["start"]) self.failUnlessEqual(o["basedir"], os.path.join(os.path.expanduser("~"), ".tahoe")) o = self.parse(["start", "here"]) self.failUnlessEqual(o["basedir"], os.path.abspath("here")) o = self.parse(["start", "--basedir", "there"]) self.failUnlessEqual(o["basedir"], os.path.abspath("there")) o = self.parse(["--node-directory", "there", "start"]) self.failUnlessEqual(o["basedir"], os.path.abspath("there")) self.failUnlessRaises(usage.UsageError, self.parse, ["--basedir", "there", "start"]) self.failUnlessRaises(usage.UsageError, self.parse, ["start", "--node-directory", "there"]) self.failUnlessRaises(usage.UsageError, self.parse, ["--node-directory=there", "start", "--basedir=here"]) self.failUnlessRaises(usage.UsageError, self.parse, ["start", "--basedir=here", "anywhere"]) self.failUnlessRaises(usage.UsageError, self.parse, ["--node-directory=there", "start", "anywhere"]) self.failUnlessRaises(usage.UsageError, self.parse, ["--node-directory=there", "start", "--basedir=here", "anywhere"]) tahoe-lafs-1.10.0/src/allmydata/test/test_client.py000066400000000000000000000475351221140116300222260ustar00rootroot00000000000000import os from twisted.trial import unittest from twisted.application import service import allmydata from allmydata.node import OldConfigError, OldConfigOptionError, MissingConfigEntry from allmydata import client from allmydata.storage_client import StorageFarmBroker from allmydata.util import base32, fileutil from allmydata.interfaces import IFilesystemNode, IFileNode, \ IImmutableFileNode, IMutableFileNode, IDirectoryNode from foolscap.api import flushEventualQueue import allmydata.test.common_util as testutil import mock BASECONFIG = ("[client]\n" "introducer.furl = \n" ) BASECONFIG_I = ("[client]\n" "introducer.furl = %s\n" ) class Basic(testutil.ReallyEqualMixin, unittest.TestCase): def test_loadable(self): basedir = "test_client.Basic.test_loadable" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG) client.Client(basedir) @mock.patch('twisted.python.log.msg') def test_error_on_old_config_files(self, mock_log_msg): basedir = "test_client.Basic.test_error_on_old_config_files" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG + "[storage]\n" + "enabled = false\n" + "reserved_space = bogus\n") fileutil.write(os.path.join(basedir, "introducer.furl"), "") fileutil.write(os.path.join(basedir, "no_storage"), "") fileutil.write(os.path.join(basedir, "readonly_storage"), "") fileutil.write(os.path.join(basedir, "debug_discard_storage"), "") e = self.failUnlessRaises(OldConfigError, client.Client, basedir) self.failUnlessIn(os.path.abspath(os.path.join(basedir, "introducer.furl")), e.args[0]) self.failUnlessIn(os.path.abspath(os.path.join(basedir, "no_storage")), e.args[0]) self.failUnlessIn(os.path.abspath(os.path.join(basedir, "readonly_storage")), e.args[0]) self.failUnlessIn(os.path.abspath(os.path.join(basedir, "debug_discard_storage")), e.args[0]) for oldfile in ['introducer.furl', 'no_storage', 'readonly_storage', 'debug_discard_storage']: logged = [ m for m in mock_log_msg.call_args_list if ("Found pre-Tahoe-LAFS-v1.3 configuration file" in str(m[0][0]) and oldfile in str(m[0][0])) ] self.failUnless(logged, (oldfile, mock_log_msg.call_args_list)) for oldfile in [ 'nickname', 'webport', 'keepalive_timeout', 'log_gatherer.furl', 'disconnect_timeout', 'advertised_ip_addresses', 'helper.furl', 'key_generator.furl', 'stats_gatherer.furl', 'sizelimit', 'run_helper']: logged = [ m for m in mock_log_msg.call_args_list if ("Found pre-Tahoe-LAFS-v1.3 configuration file" in str(m[0][0]) and oldfile in str(m[0][0])) ] self.failIf(logged, oldfile) def test_secrets(self): basedir = "test_client.Basic.test_secrets" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG) c = client.Client(basedir) secret_fname = os.path.join(basedir, "private", "secret") self.failUnless(os.path.exists(secret_fname), secret_fname) renew_secret = c.get_renewal_secret() self.failUnless(base32.b2a(renew_secret)) cancel_secret = c.get_cancel_secret() self.failUnless(base32.b2a(cancel_secret)) def test_nodekey_yes_storage(self): basedir = "test_client.Basic.test_nodekey_yes_storage" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG) c = client.Client(basedir) self.failUnless(c.get_long_nodeid().startswith("v0-")) def test_nodekey_no_storage(self): basedir = "test_client.Basic.test_nodekey_no_storage" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG + "[storage]\n" + "enabled = false\n") c = client.Client(basedir) self.failUnless(c.get_long_nodeid().startswith("v0-")) def test_reserved_1(self): basedir = "client.Basic.test_reserved_1" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG + \ "[storage]\n" + \ "enabled = true\n" + \ "reserved_space = 1000\n") c = client.Client(basedir) self.failUnlessEqual(c.getServiceNamed("storage").reserved_space, 1000) def test_reserved_2(self): basedir = "client.Basic.test_reserved_2" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG + \ "[storage]\n" + \ "enabled = true\n" + \ "reserved_space = 10K\n") c = client.Client(basedir) self.failUnlessEqual(c.getServiceNamed("storage").reserved_space, 10*1000) def test_reserved_3(self): basedir = "client.Basic.test_reserved_3" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG + \ "[storage]\n" + \ "enabled = true\n" + \ "reserved_space = 5mB\n") c = client.Client(basedir) self.failUnlessEqual(c.getServiceNamed("storage").reserved_space, 5*1000*1000) def test_reserved_4(self): basedir = "client.Basic.test_reserved_4" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG + \ "[storage]\n" + \ "enabled = true\n" + \ "reserved_space = 78Gb\n") c = client.Client(basedir) self.failUnlessEqual(c.getServiceNamed("storage").reserved_space, 78*1000*1000*1000) def test_reserved_bad(self): basedir = "client.Basic.test_reserved_bad" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG + \ "[storage]\n" + \ "enabled = true\n" + \ "reserved_space = bogus\n") self.failUnlessRaises(ValueError, client.Client, basedir) def _permute(self, sb, key): return [ s.get_longname() for s in sb.get_servers_for_psi(key) ] def test_permute(self): sb = StorageFarmBroker(None, True) for k in ["%d" % i for i in range(5)]: ann = {"anonymous-storage-FURL": "pb://abcde@nowhere/fake", "permutation-seed-base32": base32.b2a(k) } sb.test_add_rref(k, "rref", ann) self.failUnlessReallyEqual(self._permute(sb, "one"), ['3','1','0','4','2']) self.failUnlessReallyEqual(self._permute(sb, "two"), ['0','4','2','1','3']) sb.servers.clear() self.failUnlessReallyEqual(self._permute(sb, "one"), []) def test_versions(self): basedir = "test_client.Basic.test_versions" os.mkdir(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), \ BASECONFIG + \ "[storage]\n" + \ "enabled = true\n") c = client.Client(basedir) ss = c.getServiceNamed("storage") verdict = ss.remote_get_version() self.failUnlessReallyEqual(verdict["application-version"], str(allmydata.__full_version__)) self.failIfEqual(str(allmydata.__version__), "unknown") self.failUnless("." in str(allmydata.__full_version__), "non-numeric version in '%s'" % allmydata.__version__) all_versions = allmydata.get_package_versions_string() self.failUnless(allmydata.__appname__ in all_versions) # also test stats stats = c.get_stats() self.failUnless("node.uptime" in stats) self.failUnless(isinstance(stats["node.uptime"], float)) def test_helper_furl(self): basedir = "test_client.Basic.test_helper_furl" os.mkdir(basedir) def _check(config, expected_furl): fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG + config) c = client.Client(basedir) uploader = c.getServiceNamed("uploader") furl, connected = uploader.get_helper_info() self.failUnlessEqual(furl, expected_furl) _check("", None) _check("helper.furl =\n", None) _check("helper.furl = \n", None) _check("helper.furl = None", None) _check("helper.furl = pb://blah\n", "pb://blah") @mock.patch('allmydata.util.log.msg') @mock.patch('allmydata.frontends.drop_upload.DropUploader') def test_create_drop_uploader(self, mock_drop_uploader, mock_log_msg): class MockDropUploader(service.MultiService): name = 'drop-upload' def __init__(self, client, upload_dircap, local_dir_utf8, inotify=None): service.MultiService.__init__(self) self.client = client self.upload_dircap = upload_dircap self.local_dir_utf8 = local_dir_utf8 self.inotify = inotify mock_drop_uploader.side_effect = MockDropUploader upload_dircap = "URI:DIR2:blah" local_dir_utf8 = u"loc\u0101l_dir".encode('utf-8') config = (BASECONFIG + "[storage]\n" + "enabled = false\n" + "[drop_upload]\n" + "enabled = true\n") basedir1 = "test_client.Basic.test_create_drop_uploader1" os.mkdir(basedir1) fileutil.write(os.path.join(basedir1, "tahoe.cfg"), config + "local.directory = " + local_dir_utf8 + "\n") self.failUnlessRaises(MissingConfigEntry, client.Client, basedir1) fileutil.write(os.path.join(basedir1, "tahoe.cfg"), config) fileutil.write(os.path.join(basedir1, "private", "drop_upload_dircap"), "URI:DIR2:blah") self.failUnlessRaises(MissingConfigEntry, client.Client, basedir1) fileutil.write(os.path.join(basedir1, "tahoe.cfg"), config + "upload.dircap = " + upload_dircap + "\n") self.failUnlessRaises(OldConfigOptionError, client.Client, basedir1) fileutil.write(os.path.join(basedir1, "tahoe.cfg"), config + "local.directory = " + local_dir_utf8 + "\n") c1 = client.Client(basedir1) uploader = c1.getServiceNamed('drop-upload') self.failUnless(isinstance(uploader, MockDropUploader), uploader) self.failUnlessReallyEqual(uploader.client, c1) self.failUnlessReallyEqual(uploader.upload_dircap, upload_dircap) self.failUnlessReallyEqual(uploader.local_dir_utf8, local_dir_utf8) self.failUnless(uploader.inotify is None, uploader.inotify) self.failUnless(uploader.running) class Boom(Exception): pass mock_drop_uploader.side_effect = Boom() basedir2 = "test_client.Basic.test_create_drop_uploader2" os.mkdir(basedir2) os.mkdir(os.path.join(basedir2, "private")) fileutil.write(os.path.join(basedir2, "tahoe.cfg"), BASECONFIG + "[drop_upload]\n" + "enabled = true\n" + "local.directory = " + local_dir_utf8 + "\n") fileutil.write(os.path.join(basedir2, "private", "drop_upload_dircap"), "URI:DIR2:blah") c2 = client.Client(basedir2) self.failUnlessRaises(KeyError, c2.getServiceNamed, 'drop-upload') self.failUnless([True for arg in mock_log_msg.call_args_list if "Boom" in repr(arg)], mock_log_msg.call_args_list) def flush_but_dont_ignore(res): d = flushEventualQueue() def _done(ignored): return res d.addCallback(_done) return d class Run(unittest.TestCase, testutil.StallMixin): def setUp(self): self.sparent = service.MultiService() self.sparent.startService() def tearDown(self): d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) return d def test_loadable(self): basedir = "test_client.Run.test_loadable" os.mkdir(basedir) dummy = "pb://wl74cyahejagspqgy4x5ukrvfnevlknt@127.0.0.1:58889/bogus" fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG_I % dummy) fileutil.write(os.path.join(basedir, "suicide_prevention_hotline"), "") client.Client(basedir) def test_reloadable(self): basedir = "test_client.Run.test_reloadable" os.mkdir(basedir) dummy = "pb://wl74cyahejagspqgy4x5ukrvfnevlknt@127.0.0.1:58889/bogus" fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG_I % dummy) c1 = client.Client(basedir) c1.setServiceParent(self.sparent) # delay to let the service start up completely. I'm not entirely sure # this is necessary. d = self.stall(delay=2.0) d.addCallback(lambda res: c1.disownServiceParent()) # the cygwin buildslave seems to need more time to let the old # service completely shut down. When delay=0.1, I saw this test fail, # probably due to the logport trying to reclaim the old socket # number. This suggests that either we're dropping a Deferred # somewhere in the shutdown sequence, or that cygwin is just cranky. d.addCallback(self.stall, delay=2.0) def _restart(res): # TODO: pause for slightly over one second, to let # Client._check_hotline poll the file once. That will exercise # another few lines. Then add another test in which we don't # update the file at all, and watch to see the node shutdown. (to # do this, use a modified node which overrides Node.shutdown(), # also change _check_hotline to use it instead of a raw # reactor.stop, also instrument the shutdown event in an # attribute that we can check) c2 = client.Client(basedir) c2.setServiceParent(self.sparent) return c2.disownServiceParent() d.addCallback(_restart) return d class NodeMaker(testutil.ReallyEqualMixin, unittest.TestCase): def test_maker(self): basedir = "client/NodeMaker/maker" fileutil.make_dirs(basedir) fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG) c = client.Client(basedir) n = c.create_node_from_uri("URI:CHK:6nmrpsubgbe57udnexlkiwzmlu:bjt7j6hshrlmadjyr7otq3dc24end5meo5xcr5xe5r663po6itmq:3:10:7277") self.failUnless(IFilesystemNode.providedBy(n)) self.failUnless(IFileNode.providedBy(n)) self.failUnless(IImmutableFileNode.providedBy(n)) self.failIf(IMutableFileNode.providedBy(n)) self.failIf(IDirectoryNode.providedBy(n)) self.failUnless(n.is_readonly()) self.failIf(n.is_mutable()) # Testing #1679. There was a bug that would occur when downloader was # downloading the same readcap more than once concurrently, so the # filenode object was cached, and there was a failure from one of the # servers in one of the download attempts. No subsequent download # attempt would attempt to use that server again, which would lead to # the file being undownloadable until the gateway was restarted. The # current fix for this (hopefully to be superceded by a better fix # eventually) is to prevent re-use of filenodes, so the NodeMaker is # hereby required *not* to cache and re-use filenodes for CHKs. other_n = c.create_node_from_uri("URI:CHK:6nmrpsubgbe57udnexlkiwzmlu:bjt7j6hshrlmadjyr7otq3dc24end5meo5xcr5xe5r663po6itmq:3:10:7277") self.failIf(n is other_n, (n, other_n)) n = c.create_node_from_uri("URI:LIT:n5xgk") self.failUnless(IFilesystemNode.providedBy(n)) self.failUnless(IFileNode.providedBy(n)) self.failUnless(IImmutableFileNode.providedBy(n)) self.failIf(IMutableFileNode.providedBy(n)) self.failIf(IDirectoryNode.providedBy(n)) self.failUnless(n.is_readonly()) self.failIf(n.is_mutable()) n = c.create_node_from_uri("URI:SSK:n6x24zd3seu725yluj75q5boaa:mm6yoqjhl6ueh7iereldqxue4nene4wl7rqfjfybqrehdqmqskvq") self.failUnless(IFilesystemNode.providedBy(n)) self.failUnless(IFileNode.providedBy(n)) self.failIf(IImmutableFileNode.providedBy(n)) self.failUnless(IMutableFileNode.providedBy(n)) self.failIf(IDirectoryNode.providedBy(n)) self.failIf(n.is_readonly()) self.failUnless(n.is_mutable()) n = c.create_node_from_uri("URI:SSK-RO:b7sr5qsifnicca7cbk3rhrhbvq:mm6yoqjhl6ueh7iereldqxue4nene4wl7rqfjfybqrehdqmqskvq") self.failUnless(IFilesystemNode.providedBy(n)) self.failUnless(IFileNode.providedBy(n)) self.failIf(IImmutableFileNode.providedBy(n)) self.failUnless(IMutableFileNode.providedBy(n)) self.failIf(IDirectoryNode.providedBy(n)) self.failUnless(n.is_readonly()) self.failUnless(n.is_mutable()) n = c.create_node_from_uri("URI:DIR2:n6x24zd3seu725yluj75q5boaa:mm6yoqjhl6ueh7iereldqxue4nene4wl7rqfjfybqrehdqmqskvq") self.failUnless(IFilesystemNode.providedBy(n)) self.failIf(IFileNode.providedBy(n)) self.failIf(IImmutableFileNode.providedBy(n)) self.failIf(IMutableFileNode.providedBy(n)) self.failUnless(IDirectoryNode.providedBy(n)) self.failIf(n.is_readonly()) self.failUnless(n.is_mutable()) n = c.create_node_from_uri("URI:DIR2-RO:b7sr5qsifnicca7cbk3rhrhbvq:mm6yoqjhl6ueh7iereldqxue4nene4wl7rqfjfybqrehdqmqskvq") self.failUnless(IFilesystemNode.providedBy(n)) self.failIf(IFileNode.providedBy(n)) self.failIf(IImmutableFileNode.providedBy(n)) self.failIf(IMutableFileNode.providedBy(n)) self.failUnless(IDirectoryNode.providedBy(n)) self.failUnless(n.is_readonly()) self.failUnless(n.is_mutable()) unknown_rw = "lafs://from_the_future" unknown_ro = "lafs://readonly_from_the_future" n = c.create_node_from_uri(unknown_rw, unknown_ro) self.failUnless(IFilesystemNode.providedBy(n)) self.failIf(IFileNode.providedBy(n)) self.failIf(IImmutableFileNode.providedBy(n)) self.failIf(IMutableFileNode.providedBy(n)) self.failIf(IDirectoryNode.providedBy(n)) self.failUnless(n.is_unknown()) self.failUnlessReallyEqual(n.get_uri(), unknown_rw) self.failUnlessReallyEqual(n.get_write_uri(), unknown_rw) self.failUnlessReallyEqual(n.get_readonly_uri(), "ro." + unknown_ro) # Note: it isn't that we *intend* to deploy non-ASCII caps in # the future, it is that we want to make sure older Tahoe-LAFS # versions wouldn't choke on them if we were to do so. See # #1051 and wiki:NewCapDesign for details. unknown_rw = u"lafs://from_the_future_rw_\u263A".encode('utf-8') unknown_ro = u"lafs://readonly_from_the_future_ro_\u263A".encode('utf-8') n = c.create_node_from_uri(unknown_rw, unknown_ro) self.failUnless(IFilesystemNode.providedBy(n)) self.failIf(IFileNode.providedBy(n)) self.failIf(IImmutableFileNode.providedBy(n)) self.failIf(IMutableFileNode.providedBy(n)) self.failIf(IDirectoryNode.providedBy(n)) self.failUnless(n.is_unknown()) self.failUnlessReallyEqual(n.get_uri(), unknown_rw) self.failUnlessReallyEqual(n.get_write_uri(), unknown_rw) self.failUnlessReallyEqual(n.get_readonly_uri(), "ro." + unknown_ro) tahoe-lafs-1.10.0/src/allmydata/test/test_codec.py000066400000000000000000000077361221140116300220240ustar00rootroot00000000000000 import os from twisted.trial import unittest from twisted.python import log from allmydata.codec import CRSEncoder, CRSDecoder import random from allmydata.util import mathutil class T(unittest.TestCase): def do_test(self, size, required_shares, max_shares, fewer_shares=None): data0s = [os.urandom(mathutil.div_ceil(size, required_shares)) for i in range(required_shares)] enc = CRSEncoder() enc.set_params(size, required_shares, max_shares) params = enc.get_params() assert params == (size, required_shares, max_shares) log.msg("params: %s" % (params,)) d = enc.encode(data0s) def _done_encoding_all((shares, shareids)): self.failUnlessEqual(len(shares), max_shares) self.shares = shares self.shareids = shareids d.addCallback(_done_encoding_all) if fewer_shares is not None: # also validate that the desired_shareids= parameter works desired_shareids = random.sample(range(max_shares), fewer_shares) d.addCallback(lambda res: enc.encode(data0s, desired_shareids)) def _check_fewer_shares((some_shares, their_shareids)): self.failUnlessEqual(tuple(their_shareids), tuple(desired_shareids)) d.addCallback(_check_fewer_shares) def _decode((shares, shareids)): dec = CRSDecoder() dec.set_params(*params) d1 = dec.decode(shares, shareids) return d1 def _check_data(decoded_shares): self.failUnlessEqual(len(''.join(decoded_shares)), len(''.join(data0s))) self.failUnlessEqual(len(decoded_shares), len(data0s)) for (i, (x, y)) in enumerate(zip(data0s, decoded_shares)): self.failUnlessEqual(x, y, "%s: %r != %r.... first share was %r" % (str(i), x, y, data0s[0],)) self.failUnless(''.join(decoded_shares) == ''.join(data0s), "%s" % ("???",)) # 0data0sclipped = tuple(data0s) # data0sclipped[-1] = # self.failUnless(tuple(decoded_shares) == tuple(data0s)) def _decode_some(res): log.msg("_decode_some") # decode with a minimal subset of the shares some_shares = self.shares[:required_shares] some_shareids = self.shareids[:required_shares] return _decode((some_shares, some_shareids)) d.addCallback(_decode_some) d.addCallback(_check_data) def _decode_some_random(res): log.msg("_decode_some_random") # use a randomly-selected minimal subset l = random.sample(zip(self.shares, self.shareids), required_shares) some_shares = [ x[0] for x in l ] some_shareids = [ x[1] for x in l ] return _decode((some_shares, some_shareids)) d.addCallback(_decode_some_random) d.addCallback(_check_data) def _decode_multiple(res): log.msg("_decode_multiple") # make sure we can re-use the decoder object shares1 = random.sample(self.shares, required_shares) sharesl1 = random.sample(zip(self.shares, self.shareids), required_shares) shares1 = [ x[0] for x in sharesl1 ] shareids1 = [ x[1] for x in sharesl1 ] sharesl2 = random.sample(zip(self.shares, self.shareids), required_shares) shares2 = [ x[0] for x in sharesl2 ] shareids2 = [ x[1] for x in sharesl2 ] dec = CRSDecoder() dec.set_params(*params) d1 = dec.decode(shares1, shareids1) d1.addCallback(_check_data) d1.addCallback(lambda res: dec.decode(shares2, shareids2)) d1.addCallback(_check_data) return d1 d.addCallback(_decode_multiple) return d def test_encode(self): return self.do_test(1000, 25, 100) def test_encode1(self): return self.do_test(8, 8, 16) def test_encode2(self): return self.do_test(125, 25, 100, 90) tahoe-lafs-1.10.0/src/allmydata/test/test_crawler.py000066400000000000000000000405751221140116300224040ustar00rootroot00000000000000 import time import os.path from twisted.trial import unittest from twisted.application import service from twisted.internet import defer from foolscap.api import eventually, fireEventually from allmydata.util import fileutil, hashutil, pollmixin from allmydata.storage.server import StorageServer, si_b2a from allmydata.storage.crawler import ShareCrawler, TimeSliceExceeded from allmydata.test.test_storage import FakeCanary from allmydata.test.common_util import StallMixin class BucketEnumeratingCrawler(ShareCrawler): cpu_slice = 500 # make sure it can complete in a single slice slow_start = 0 def __init__(self, *args, **kwargs): ShareCrawler.__init__(self, *args, **kwargs) self.all_buckets = [] self.finished_d = defer.Deferred() def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): self.all_buckets.append(storage_index_b32) def finished_cycle(self, cycle): eventually(self.finished_d.callback, None) class PacedCrawler(ShareCrawler): cpu_slice = 500 # make sure it can complete in a single slice slow_start = 0 def __init__(self, *args, **kwargs): ShareCrawler.__init__(self, *args, **kwargs) self.countdown = 6 self.all_buckets = [] self.finished_d = defer.Deferred() self.yield_cb = None def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): self.all_buckets.append(storage_index_b32) self.countdown -= 1 if self.countdown == 0: # force a timeout. We restore it in yielding() self.cpu_slice = -1.0 def yielding(self, sleep_time): self.cpu_slice = 500 if self.yield_cb: self.yield_cb() def finished_cycle(self, cycle): eventually(self.finished_d.callback, None) class ConsumingCrawler(ShareCrawler): cpu_slice = 0.5 allowed_cpu_percentage = 0.5 minimum_cycle_time = 0 slow_start = 0 def __init__(self, *args, **kwargs): ShareCrawler.__init__(self, *args, **kwargs) self.accumulated = 0.0 self.cycles = 0 self.last_yield = 0.0 def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): start = time.time() time.sleep(0.05) elapsed = time.time() - start self.accumulated += elapsed self.last_yield += elapsed def finished_cycle(self, cycle): self.cycles += 1 def yielding(self, sleep_time): self.last_yield = 0.0 class OneShotCrawler(ShareCrawler): cpu_slice = 500 # make sure it can complete in a single slice slow_start = 0 def __init__(self, *args, **kwargs): ShareCrawler.__init__(self, *args, **kwargs) self.counter = 0 self.finished_d = defer.Deferred() def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): self.counter += 1 def finished_cycle(self, cycle): self.finished_d.callback(None) self.disownServiceParent() class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def setUp(self): self.s = service.MultiService() self.s.startService() def tearDown(self): return self.s.stopService() def si(self, i): return hashutil.storage_index_hash(str(i)) def rs(self, i, serverid): return hashutil.bucket_renewal_secret_hash(str(i), serverid) def cs(self, i, serverid): return hashutil.bucket_cancel_secret_hash(str(i), serverid) def write(self, i, ss, serverid, tail=0): si = self.si(i) si = si[:-1] + chr(tail) had,made = ss.remote_allocate_buckets(si, self.rs(i, serverid), self.cs(i, serverid), set([0]), 99, FakeCanary()) made[0].remote_write(0, "data") made[0].remote_close() return si_b2a(si) def test_immediate(self): self.basedir = "crawler/Basic/immediate" fileutil.make_dirs(self.basedir) serverid = "\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) sis = [self.write(i, ss, serverid) for i in range(10)] statefile = os.path.join(self.basedir, "statefile") c = BucketEnumeratingCrawler(ss, statefile, allowed_cpu_percentage=.1) c.load_state() c.start_current_prefix(time.time()) self.failUnlessEqual(sorted(sis), sorted(c.all_buckets)) # make sure the statefile has been returned to the starting point c.finished_d = defer.Deferred() c.all_buckets = [] c.start_current_prefix(time.time()) self.failUnlessEqual(sorted(sis), sorted(c.all_buckets)) # check that a new crawler picks up on the state file properly c2 = BucketEnumeratingCrawler(ss, statefile) c2.load_state() c2.start_current_prefix(time.time()) self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets)) def test_service(self): self.basedir = "crawler/Basic/service" fileutil.make_dirs(self.basedir) serverid = "\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) sis = [self.write(i, ss, serverid) for i in range(10)] statefile = os.path.join(self.basedir, "statefile") c = BucketEnumeratingCrawler(ss, statefile) c.setServiceParent(self.s) # it should be legal to call get_state() and get_progress() right # away, even before the first tick is performed. No work should have # been done yet. s = c.get_state() p = c.get_progress() self.failUnlessEqual(s["last-complete-prefix"], None) self.failUnlessEqual(s["current-cycle"], None) self.failUnlessEqual(p["cycle-in-progress"], False) d = c.finished_d def _check(ignored): self.failUnlessEqual(sorted(sis), sorted(c.all_buckets)) d.addCallback(_check) return d def test_paced(self): self.basedir = "crawler/Basic/paced" fileutil.make_dirs(self.basedir) serverid = "\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) # put four buckets in each prefixdir sis = [] for i in range(10): for tail in range(4): sis.append(self.write(i, ss, serverid, tail)) statefile = os.path.join(self.basedir, "statefile") c = PacedCrawler(ss, statefile) c.load_state() try: c.start_current_prefix(time.time()) except TimeSliceExceeded: pass # that should stop in the middle of one of the buckets. Since we # aren't using its normal scheduler, we have to save its state # manually. c.save_state() c.cpu_slice = PacedCrawler.cpu_slice self.failUnlessEqual(len(c.all_buckets), 6) c.start_current_prefix(time.time()) # finish it self.failUnlessEqual(len(sis), len(c.all_buckets)) self.failUnlessEqual(sorted(sis), sorted(c.all_buckets)) # make sure the statefile has been returned to the starting point c.finished_d = defer.Deferred() c.all_buckets = [] c.start_current_prefix(time.time()) self.failUnlessEqual(sorted(sis), sorted(c.all_buckets)) del c # start a new crawler, it should start from the beginning c = PacedCrawler(ss, statefile) c.load_state() try: c.start_current_prefix(time.time()) except TimeSliceExceeded: pass # that should stop in the middle of one of the buckets. Since we # aren't using its normal scheduler, we have to save its state # manually. c.save_state() c.cpu_slice = PacedCrawler.cpu_slice # a third crawler should pick up from where it left off c2 = PacedCrawler(ss, statefile) c2.all_buckets = c.all_buckets[:] c2.load_state() c2.countdown = -1 c2.start_current_prefix(time.time()) self.failUnlessEqual(len(sis), len(c2.all_buckets)) self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets)) del c, c2 # now stop it at the end of a bucket (countdown=4), to exercise a # different place that checks the time c = PacedCrawler(ss, statefile) c.load_state() c.countdown = 4 try: c.start_current_prefix(time.time()) except TimeSliceExceeded: pass # that should stop at the end of one of the buckets. Again we must # save state manually. c.save_state() c.cpu_slice = PacedCrawler.cpu_slice self.failUnlessEqual(len(c.all_buckets), 4) c.start_current_prefix(time.time()) # finish it self.failUnlessEqual(len(sis), len(c.all_buckets)) self.failUnlessEqual(sorted(sis), sorted(c.all_buckets)) del c # stop it again at the end of the bucket, check that a new checker # picks up correctly c = PacedCrawler(ss, statefile) c.load_state() c.countdown = 4 try: c.start_current_prefix(time.time()) except TimeSliceExceeded: pass # that should stop at the end of one of the buckets. c.save_state() c2 = PacedCrawler(ss, statefile) c2.all_buckets = c.all_buckets[:] c2.load_state() c2.countdown = -1 c2.start_current_prefix(time.time()) self.failUnlessEqual(len(sis), len(c2.all_buckets)) self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets)) del c, c2 def test_paced_service(self): self.basedir = "crawler/Basic/paced_service" fileutil.make_dirs(self.basedir) serverid = "\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) sis = [self.write(i, ss, serverid) for i in range(10)] statefile = os.path.join(self.basedir, "statefile") c = PacedCrawler(ss, statefile) did_check_progress = [False] def check_progress(): c.yield_cb = None try: p = c.get_progress() self.failUnlessEqual(p["cycle-in-progress"], True) pct = p["cycle-complete-percentage"] # after 6 buckets, we happen to be at 76.17% complete. As # long as we create shares in deterministic order, this will # continue to be true. self.failUnlessEqual(int(pct), 76) left = p["remaining-sleep-time"] self.failUnless(isinstance(left, float), left) self.failUnless(left > 0.0, left) except Exception, e: did_check_progress[0] = e else: did_check_progress[0] = True c.yield_cb = check_progress c.setServiceParent(self.s) # that should get through 6 buckets, pause for a little while (and # run check_progress()), then resume d = c.finished_d def _check(ignored): if did_check_progress[0] is not True: raise did_check_progress[0] self.failUnless(did_check_progress[0]) self.failUnlessEqual(sorted(sis), sorted(c.all_buckets)) # at this point, the crawler should be sitting in the inter-cycle # timer, which should be pegged at the minumum cycle time self.failUnless(c.timer) self.failUnless(c.sleeping_between_cycles) self.failUnlessEqual(c.current_sleep_time, c.minimum_cycle_time) p = c.get_progress() self.failUnlessEqual(p["cycle-in-progress"], False) naptime = p["remaining-wait-time"] self.failUnless(isinstance(naptime, float), naptime) # min-cycle-time is 300, so this is basically testing that it took # less than 290s to crawl self.failUnless(naptime > 10.0, naptime) soon = p["next-crawl-time"] - time.time() self.failUnless(soon > 10.0, soon) d.addCallback(_check) return d def OFF_test_cpu_usage(self): # this test can't actually assert anything, because too many # buildslave machines are slow. But on a fast developer machine, it # can produce interesting results. So if you care about how well the # Crawler is accomplishing it's run-slowly goals, re-enable this test # and read the stdout when it runs. self.basedir = "crawler/Basic/cpu_usage" fileutil.make_dirs(self.basedir) serverid = "\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) for i in range(10): self.write(i, ss, serverid) statefile = os.path.join(self.basedir, "statefile") c = ConsumingCrawler(ss, statefile) c.setServiceParent(self.s) # this will run as fast as it can, consuming about 50ms per call to # process_bucket(), limited by the Crawler to about 50% cpu. We let # it run for a few seconds, then compare how much time # process_bucket() got vs wallclock time. It should get between 10% # and 70% CPU. This is dicey, there's about 100ms of overhead per # 300ms slice (saving the state file takes about 150-200us, but we do # it 1024 times per cycle, one for each [empty] prefixdir), leaving # 200ms for actual processing, which is enough to get through 4 # buckets each slice, then the crawler sleeps for 300ms/0.5 = 600ms, # giving us 900ms wallclock per slice. In 4.0 seconds we can do 4.4 # slices, giving us about 17 shares, so we merely assert that we've # finished at least one cycle in that time. # with a short cpu_slice (so we can keep this test down to 4 # seconds), the overhead is enough to make a nominal 50% usage more # like 30%. Forcing sleep_time to 0 only gets us 67% usage. start = time.time() d = self.stall(delay=4.0) def _done(res): elapsed = time.time() - start percent = 100.0 * c.accumulated / elapsed # our buildslaves vary too much in their speeds and load levels, # and many of them only manage to hit 7% usage when our target is # 50%. So don't assert anything about the results, just log them. print print "crawler: got %d%% percent when trying for 50%%" % percent print "crawler: got %d full cycles" % c.cycles d.addCallback(_done) return d def test_empty_subclass(self): self.basedir = "crawler/Basic/empty_subclass" fileutil.make_dirs(self.basedir) serverid = "\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) for i in range(10): self.write(i, ss, serverid) statefile = os.path.join(self.basedir, "statefile") c = ShareCrawler(ss, statefile) c.slow_start = 0 c.setServiceParent(self.s) # we just let it run for a while, to get figleaf coverage of the # empty methods in the base class def _check(): return bool(c.state["last-cycle-finished"] is not None) d = self.poll(_check) def _done(ignored): state = c.get_state() self.failUnless(state["last-cycle-finished"] is not None) d.addCallback(_done) return d def test_oneshot(self): self.basedir = "crawler/Basic/oneshot" fileutil.make_dirs(self.basedir) serverid = "\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) for i in range(30): self.write(i, ss, serverid) statefile = os.path.join(self.basedir, "statefile") c = OneShotCrawler(ss, statefile) c.setServiceParent(self.s) d = c.finished_d def _finished_first_cycle(ignored): return fireEventually(c.counter) d.addCallback(_finished_first_cycle) def _check(old_counter): # the crawler should do any work after it's been stopped self.failUnlessEqual(old_counter, c.counter) self.failIf(c.running) self.failIf(c.timer) self.failIf(c.current_sleep_time) s = c.get_state() self.failUnlessEqual(s["last-cycle-finished"], 0) self.failUnlessEqual(s["current-cycle"], None) d.addCallback(_check) return d tahoe-lafs-1.10.0/src/allmydata/test/test_deepcheck.py000066400000000000000000001657551221140116300226700ustar00rootroot00000000000000 import os, simplejson, urllib from cStringIO import StringIO from twisted.trial import unittest from twisted.internet import defer from twisted.internet import threads # CLI tests use deferToThread from allmydata.immutable import upload from allmydata.mutable.common import UnrecoverableFileError from allmydata.mutable.publish import MutableData from allmydata.util import idlib from allmydata.util import base32 from allmydata.scripts import runner from allmydata.interfaces import ICheckResults, ICheckAndRepairResults, \ IDeepCheckResults, IDeepCheckAndRepairResults from allmydata.monitor import Monitor, OperationCancelledError from allmydata.uri import LiteralFileURI from twisted.web.client import getPage from allmydata.test.common import ErrorMixin, _corrupt_mutable_share_data, \ ShouldFailMixin from allmydata.test.common_util import StallMixin from allmydata.test.no_network import GridTestMixin timeout = 2400 # One of these took 1046.091s on Zandr's ARM box. class MutableChecker(GridTestMixin, unittest.TestCase, ErrorMixin): def _run_cli(self, argv): stdout, stderr = StringIO(), StringIO() # this can only do synchronous operations assert argv[0] == "debug" runner.runner(argv, run_by_human=False, stdout=stdout, stderr=stderr) return stdout.getvalue() def test_good(self): self.basedir = "deepcheck/MutableChecker/good" self.set_up_grid() CONTENTS = "a little bit of data" CONTENTS_uploadable = MutableData(CONTENTS) d = self.g.clients[0].create_mutable_file(CONTENTS_uploadable) def _created(node): self.node = node self.fileurl = "uri/" + urllib.quote(node.get_uri()) d.addCallback(_created) # now make sure the webapi verifier sees no problems d.addCallback(lambda ign: self.GET(self.fileurl+"?t=check&verify=true", method="POST")) def _got_results(out): self.failUnless("Healthy : Healthy" in out, out) self.failUnless("Recoverable Versions: 10*seq1-" in out, out) self.failIf("Not Healthy!" in out, out) self.failIf("Unhealthy" in out, out) self.failIf("Corrupt Shares" in out, out) d.addCallback(_got_results) d.addErrback(self.explain_web_error) return d def test_corrupt(self): self.basedir = "deepcheck/MutableChecker/corrupt" self.set_up_grid() CONTENTS = "a little bit of data" CONTENTS_uploadable = MutableData(CONTENTS) d = self.g.clients[0].create_mutable_file(CONTENTS_uploadable) def _stash_and_corrupt(node): self.node = node self.fileurl = "uri/" + urllib.quote(node.get_uri()) self.corrupt_shares_numbered(node.get_uri(), [0], _corrupt_mutable_share_data) d.addCallback(_stash_and_corrupt) # now make sure the webapi verifier notices it d.addCallback(lambda ign: self.GET(self.fileurl+"?t=check&verify=true", method="POST")) def _got_results(out): self.failUnless("Not Healthy!" in out, out) self.failUnless("Unhealthy: best version has only 9 shares (encoding is 3-of-10)" in out, out) self.failUnless("Corrupt Shares:" in out, out) d.addCallback(_got_results) # now make sure the webapi repairer can fix it d.addCallback(lambda ign: self.GET(self.fileurl+"?t=check&verify=true&repair=true", method="POST")) def _got_repair_results(out): self.failUnless("
Repair successful
" in out, out) d.addCallback(_got_repair_results) d.addCallback(lambda ign: self.GET(self.fileurl+"?t=check&verify=true", method="POST")) def _got_postrepair_results(out): self.failIf("Not Healthy!" in out, out) self.failUnless("Recoverable Versions: 10*seq" in out, out) d.addCallback(_got_postrepair_results) d.addErrback(self.explain_web_error) return d def test_delete_share(self): self.basedir = "deepcheck/MutableChecker/delete_share" self.set_up_grid() CONTENTS = "a little bit of data" CONTENTS_uploadable = MutableData(CONTENTS) d = self.g.clients[0].create_mutable_file(CONTENTS_uploadable) def _stash_and_delete(node): self.node = node self.fileurl = "uri/" + urllib.quote(node.get_uri()) self.delete_shares_numbered(node.get_uri(), [0]) d.addCallback(_stash_and_delete) # now make sure the webapi checker notices it d.addCallback(lambda ign: self.GET(self.fileurl+"?t=check&verify=false", method="POST")) def _got_results(out): self.failUnless("Not Healthy!" in out, out) self.failUnless("Unhealthy: best version has only 9 shares (encoding is 3-of-10)" in out, out) self.failIf("Corrupt Shares" in out, out) d.addCallback(_got_results) # now make sure the webapi repairer can fix it d.addCallback(lambda ign: self.GET(self.fileurl+"?t=check&verify=false&repair=true", method="POST")) def _got_repair_results(out): self.failUnless("Repair successful" in out) d.addCallback(_got_repair_results) d.addCallback(lambda ign: self.GET(self.fileurl+"?t=check&verify=false", method="POST")) def _got_postrepair_results(out): self.failIf("Not Healthy!" in out, out) self.failUnless("Recoverable Versions: 10*seq" in out) d.addCallback(_got_postrepair_results) d.addErrback(self.explain_web_error) return d class DeepCheckBase(GridTestMixin, ErrorMixin, StallMixin, ShouldFailMixin): def web_json(self, n, **kwargs): kwargs["output"] = "json" d = self.web(n, "POST", **kwargs) d.addCallback(self.decode_json) return d def decode_json(self, (s,url)): try: data = simplejson.loads(s) except ValueError: self.fail("%s: not JSON: '%s'" % (url, s)) return data def parse_streamed_json(self, s): for unit in s.split("\n"): if not unit: # stream should end with a newline, so split returns "" continue try: yield simplejson.loads(unit) except ValueError, le: le.args = tuple(le.args + (unit,)) raise def web(self, n, method="GET", **kwargs): # returns (data, url) url = (self.client_baseurls[0] + "uri/%s" % urllib.quote(n.get_uri()) + "?" + "&".join(["%s=%s" % (k,v) for (k,v) in kwargs.items()])) d = getPage(url, method=method) d.addCallback(lambda data: (data,url)) return d def wait_for_operation(self, ignored, ophandle): url = self.client_baseurls[0] + "operations/" + ophandle url += "?t=status&output=JSON" d = getPage(url) def _got(res): try: data = simplejson.loads(res) except ValueError: self.fail("%s: not JSON: '%s'" % (url, res)) if not data["finished"]: d = self.stall(delay=1.0) d.addCallback(self.wait_for_operation, ophandle) return d return data d.addCallback(_got) return d def get_operation_results(self, ignored, ophandle, output=None): url = self.client_baseurls[0] + "operations/" + ophandle url += "?t=status" if output: url += "&output=" + output d = getPage(url) def _got(res): if output and output.lower() == "json": try: return simplejson.loads(res) except ValueError: self.fail("%s: not JSON: '%s'" % (url, res)) return res d.addCallback(_got) return d def slow_web(self, n, output=None, **kwargs): # use ophandle= handle = base32.b2a(os.urandom(4)) d = self.web(n, "POST", ophandle=handle, **kwargs) d.addCallback(self.wait_for_operation, handle) d.addCallback(self.get_operation_results, handle, output=output) return d class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): # construct a small directory tree (with one dir, one immutable file, one # mutable file, two LIT files, one DIR2:LIT empty dir, one DIR2:LIT tiny # dir, and a loop), and then check/examine it in various ways. def set_up_tree(self): # 2.9s c0 = self.g.clients[0] d = c0.create_dirnode() def _created_root(n): self.root = n self.root_uri = n.get_uri() d.addCallback(_created_root) d.addCallback(lambda ign: c0.create_mutable_file(MutableData("mutable file contents"))) d.addCallback(lambda n: self.root.set_node(u"mutable", n)) def _created_mutable(n): self.mutable = n self.mutable_uri = n.get_uri() d.addCallback(_created_mutable) large = upload.Data("Lots of data\n" * 1000, None) d.addCallback(lambda ign: self.root.add_file(u"large", large)) def _created_large(n): self.large = n self.large_uri = n.get_uri() d.addCallback(_created_large) small = upload.Data("Small enough for a LIT", None) d.addCallback(lambda ign: self.root.add_file(u"small", small)) def _created_small(n): self.small = n self.small_uri = n.get_uri() d.addCallback(_created_small) small2 = upload.Data("Small enough for a LIT too", None) d.addCallback(lambda ign: self.root.add_file(u"small2", small2)) def _created_small2(n): self.small2 = n self.small2_uri = n.get_uri() d.addCallback(_created_small2) empty_litdir_uri = "URI:DIR2-LIT:" tiny_litdir_uri = "URI:DIR2-LIT:gqytunj2onug64tufqzdcosvkjetutcjkq5gw4tvm5vwszdgnz5hgyzufqydulbshj5x2lbm" # contains one child which is itself also LIT d.addCallback(lambda ign: self.root._create_and_validate_node(None, empty_litdir_uri, name=u"test_deepcheck empty_lit_dir")) def _created_empty_lit_dir(n): self.empty_lit_dir = n self.empty_lit_dir_uri = n.get_uri() self.root.set_node(u"empty_lit_dir", n) d.addCallback(_created_empty_lit_dir) d.addCallback(lambda ign: self.root._create_and_validate_node(None, tiny_litdir_uri, name=u"test_deepcheck tiny_lit_dir")) def _created_tiny_lit_dir(n): self.tiny_lit_dir = n self.tiny_lit_dir_uri = n.get_uri() self.root.set_node(u"tiny_lit_dir", n) d.addCallback(_created_tiny_lit_dir) d.addCallback(lambda ign: self.root.set_node(u"loop", self.root)) return d def check_is_healthy(self, cr, n, where, incomplete=False): self.failUnless(ICheckResults.providedBy(cr), where) self.failUnless(cr.is_healthy(), where) self.failUnlessEqual(cr.get_storage_index(), n.get_storage_index(), where) self.failUnlessEqual(cr.get_storage_index_string(), base32.b2a(n.get_storage_index()), where) num_servers = len(self.g.all_servers) needs_rebalancing = bool( num_servers < 10 ) if not incomplete: self.failUnlessEqual(cr.needs_rebalancing(), needs_rebalancing, str((where, cr, cr.as_dict()))) self.failUnlessEqual(cr.get_share_counter_good(), 10, where) self.failUnlessEqual(cr.get_encoding_needed(), 3, where) self.failUnlessEqual(cr.get_encoding_expected(), 10, where) if not incomplete: self.failUnlessEqual(cr.get_host_counter_good_shares(), num_servers, where) self.failUnlessEqual(cr.get_corrupt_shares(), [], where) if not incomplete: self.failUnlessEqual(sorted([s.get_serverid() for s in cr.get_servers_responding()]), sorted(self.g.get_all_serverids()), where) all_serverids = set() for (shareid, servers) in cr.get_sharemap().items(): all_serverids.update([s.get_serverid() for s in servers]) self.failUnlessEqual(sorted(all_serverids), sorted(self.g.get_all_serverids()), where) self.failUnlessEqual(cr.get_share_counter_wrong(), 0, where) self.failUnlessEqual(cr.get_version_counter_recoverable(), 1, where) self.failUnlessEqual(cr.get_version_counter_unrecoverable(), 0, where) def check_and_repair_is_healthy(self, cr, n, where, incomplete=False): self.failUnless(ICheckAndRepairResults.providedBy(cr), (where, cr)) self.failUnless(cr.get_pre_repair_results().is_healthy(), where) self.check_is_healthy(cr.get_pre_repair_results(), n, where, incomplete) self.failUnless(cr.get_post_repair_results().is_healthy(), where) self.check_is_healthy(cr.get_post_repair_results(), n, where, incomplete) self.failIf(cr.get_repair_attempted(), where) def deep_check_is_healthy(self, cr, num_healthy, where): self.failUnless(IDeepCheckResults.providedBy(cr)) self.failUnlessEqual(cr.get_counters()["count-objects-healthy"], num_healthy, where) def deep_check_and_repair_is_healthy(self, cr, num_healthy, where): self.failUnless(IDeepCheckAndRepairResults.providedBy(cr), where) c = cr.get_counters() self.failUnlessEqual(c["count-objects-healthy-pre-repair"], num_healthy, where) self.failUnlessEqual(c["count-objects-healthy-post-repair"], num_healthy, where) self.failUnlessEqual(c["count-repairs-attempted"], 0, where) def test_good(self): self.basedir = "deepcheck/DeepCheckWebGood/good" self.set_up_grid() d = self.set_up_tree() d.addCallback(self.do_stats) d.addCallback(self.do_web_stream_manifest) d.addCallback(self.do_web_stream_check) d.addCallback(self.do_test_check_good) d.addCallback(self.do_test_web_good) d.addCallback(self.do_test_cli_good) d.addErrback(self.explain_web_error) d.addErrback(self.explain_error) return d def do_stats(self, ignored): d = defer.succeed(None) d.addCallback(lambda ign: self.root.start_deep_stats().when_done()) d.addCallback(self.check_stats_good) return d def check_stats_good(self, s): self.failUnlessEqual(s["count-directories"], 3) self.failUnlessEqual(s["count-files"], 5) self.failUnlessEqual(s["count-immutable-files"], 1) self.failUnlessEqual(s["count-literal-files"], 3) self.failUnlessEqual(s["count-mutable-files"], 1) # don't check directories: their size will vary # s["largest-directory"] # s["size-directories"] self.failUnlessEqual(s["largest-directory-children"], 7) self.failUnlessEqual(s["largest-immutable-file"], 13000) # to re-use this function for both the local # dirnode.start_deep_stats() and the webapi t=start-deep-stats, we # coerce the result into a list of tuples. dirnode.start_deep_stats() # returns a list of tuples, but JSON only knows about lists., so # t=start-deep-stats returns a list of lists. histogram = [tuple(stuff) for stuff in s["size-files-histogram"]] self.failUnlessEqual(histogram, [(4, 10, 1), (11, 31, 2), (10001, 31622, 1), ]) self.failUnlessEqual(s["size-immutable-files"], 13000) self.failUnlessEqual(s["size-literal-files"], 56) def do_web_stream_manifest(self, ignored): d = self.web(self.root, method="POST", t="stream-manifest") d.addCallback(lambda (output,url): self._check_streamed_manifest(output)) return d def _check_streamed_manifest(self, output): units = list(self.parse_streamed_json(output)) files = [u for u in units if u["type"] in ("file", "directory")] assert units[-1]["type"] == "stats" stats = units[-1]["stats"] self.failUnlessEqual(len(files), 8) # [root,mutable,large] are distributed, [small,small2,empty_litdir,tiny_litdir] are not self.failUnlessEqual(len([f for f in files if f["verifycap"] != ""]), 3) self.failUnlessEqual(len([f for f in files if f["verifycap"] == ""]), 5) self.failUnlessEqual(len([f for f in files if f["repaircap"] != ""]), 3) self.failUnlessEqual(len([f for f in files if f["repaircap"] == ""]), 5) self.failUnlessEqual(len([f for f in files if f["storage-index"] != ""]), 3) self.failUnlessEqual(len([f for f in files if f["storage-index"] == ""]), 5) # make sure that a mutable file has filecap==repaircap!=verifycap mutable = [f for f in files if f["cap"] is not None and f["cap"].startswith("URI:SSK:")][0] self.failUnlessEqual(mutable["cap"], self.mutable_uri) self.failIfEqual(mutable["cap"], mutable["verifycap"]) self.failUnlessEqual(mutable["cap"], mutable["repaircap"]) # for immutable file, verifycap==repaircap!=filecap large = [f for f in files if f["cap"] is not None and f["cap"].startswith("URI:CHK:")][0] self.failUnlessEqual(large["cap"], self.large_uri) self.failIfEqual(large["cap"], large["verifycap"]) self.failUnlessEqual(large["verifycap"], large["repaircap"]) self.check_stats_good(stats) def do_web_stream_check(self, ignored): # TODO return d = self.web(self.root, t="stream-deep-check") def _check(res): units = list(self.parse_streamed_json(res)) #files = [u for u in units if u["type"] in ("file", "directory")] assert units[-1]["type"] == "stats" #stats = units[-1]["stats"] # ... d.addCallback(_check) return d def do_test_check_good(self, ignored): d = defer.succeed(None) # check the individual items d.addCallback(lambda ign: self.root.check(Monitor())) d.addCallback(self.check_is_healthy, self.root, "root") d.addCallback(lambda ign: self.mutable.check(Monitor())) d.addCallback(self.check_is_healthy, self.mutable, "mutable") d.addCallback(lambda ign: self.large.check(Monitor())) d.addCallback(self.check_is_healthy, self.large, "large") d.addCallback(lambda ign: self.small.check(Monitor())) d.addCallback(self.failUnlessEqual, None, "small") d.addCallback(lambda ign: self.small2.check(Monitor())) d.addCallback(self.failUnlessEqual, None, "small2") d.addCallback(lambda ign: self.empty_lit_dir.check(Monitor())) d.addCallback(self.failUnlessEqual, None, "empty_lit_dir") d.addCallback(lambda ign: self.tiny_lit_dir.check(Monitor())) d.addCallback(self.failUnlessEqual, None, "tiny_lit_dir") # and again with verify=True d.addCallback(lambda ign: self.root.check(Monitor(), verify=True)) d.addCallback(self.check_is_healthy, self.root, "root") d.addCallback(lambda ign: self.mutable.check(Monitor(), verify=True)) d.addCallback(self.check_is_healthy, self.mutable, "mutable") d.addCallback(lambda ign: self.large.check(Monitor(), verify=True)) d.addCallback(self.check_is_healthy, self.large, "large", incomplete=True) d.addCallback(lambda ign: self.small.check(Monitor(), verify=True)) d.addCallback(self.failUnlessEqual, None, "small") d.addCallback(lambda ign: self.small2.check(Monitor(), verify=True)) d.addCallback(self.failUnlessEqual, None, "small2") d.addCallback(lambda ign: self.empty_lit_dir.check(Monitor(), verify=True)) d.addCallback(self.failUnlessEqual, None, "empty_lit_dir") d.addCallback(lambda ign: self.tiny_lit_dir.check(Monitor(), verify=True)) d.addCallback(self.failUnlessEqual, None, "tiny_lit_dir") # and check_and_repair(), which should be a nop d.addCallback(lambda ign: self.root.check_and_repair(Monitor())) d.addCallback(self.check_and_repair_is_healthy, self.root, "root") d.addCallback(lambda ign: self.mutable.check_and_repair(Monitor())) d.addCallback(self.check_and_repair_is_healthy, self.mutable, "mutable") d.addCallback(lambda ign: self.large.check_and_repair(Monitor())) d.addCallback(self.check_and_repair_is_healthy, self.large, "large") d.addCallback(lambda ign: self.small.check_and_repair(Monitor())) d.addCallback(self.failUnlessEqual, None, "small") d.addCallback(lambda ign: self.small2.check_and_repair(Monitor())) d.addCallback(self.failUnlessEqual, None, "small2") d.addCallback(lambda ign: self.empty_lit_dir.check_and_repair(Monitor())) d.addCallback(self.failUnlessEqual, None, "empty_lit_dir") d.addCallback(lambda ign: self.tiny_lit_dir.check_and_repair(Monitor())) # check_and_repair(verify=True) d.addCallback(lambda ign: self.root.check_and_repair(Monitor(), verify=True)) d.addCallback(self.check_and_repair_is_healthy, self.root, "root") d.addCallback(lambda ign: self.mutable.check_and_repair(Monitor(), verify=True)) d.addCallback(self.check_and_repair_is_healthy, self.mutable, "mutable") d.addCallback(lambda ign: self.large.check_and_repair(Monitor(), verify=True)) d.addCallback(self.check_and_repair_is_healthy, self.large, "large", incomplete=True) d.addCallback(lambda ign: self.small.check_and_repair(Monitor(), verify=True)) d.addCallback(self.failUnlessEqual, None, "small") d.addCallback(lambda ign: self.small2.check_and_repair(Monitor(), verify=True)) d.addCallback(self.failUnlessEqual, None, "small2") d.addCallback(self.failUnlessEqual, None, "small2") d.addCallback(lambda ign: self.empty_lit_dir.check_and_repair(Monitor(), verify=True)) d.addCallback(self.failUnlessEqual, None, "empty_lit_dir") d.addCallback(lambda ign: self.tiny_lit_dir.check_and_repair(Monitor(), verify=True)) # now deep-check the root, with various verify= and repair= options d.addCallback(lambda ign: self.root.start_deep_check().when_done()) d.addCallback(self.deep_check_is_healthy, 3, "root") d.addCallback(lambda ign: self.root.start_deep_check(verify=True).when_done()) d.addCallback(self.deep_check_is_healthy, 3, "root") d.addCallback(lambda ign: self.root.start_deep_check_and_repair().when_done()) d.addCallback(self.deep_check_and_repair_is_healthy, 3, "root") d.addCallback(lambda ign: self.root.start_deep_check_and_repair(verify=True).when_done()) d.addCallback(self.deep_check_and_repair_is_healthy, 3, "root") # and finally, start a deep-check, but then cancel it. d.addCallback(lambda ign: self.root.start_deep_check()) def _checking(monitor): monitor.cancel() d = monitor.when_done() # this should fire as soon as the next dirnode.list finishes. # TODO: add a counter to measure how many list() calls are made, # assert that no more than one gets to run before the cancel() # takes effect. def _finished_normally(res): self.fail("this was supposed to fail, not finish normally") def _cancelled(f): f.trap(OperationCancelledError) d.addCallbacks(_finished_normally, _cancelled) return d d.addCallback(_checking) return d def json_check_is_healthy(self, data, n, where, incomplete=False): self.failUnlessEqual(data["storage-index"], base32.b2a(n.get_storage_index()), where) self.failUnless("summary" in data, (where, data)) self.failUnlessEqual(data["summary"].lower(), "healthy", "%s: '%s'" % (where, data["summary"])) r = data["results"] self.failUnlessEqual(r["healthy"], True, where) num_servers = len(self.g.all_servers) needs_rebalancing = bool( num_servers < 10 ) if not incomplete: self.failUnlessEqual(r["needs-rebalancing"], needs_rebalancing, where) self.failUnlessEqual(r["count-shares-good"], 10, where) self.failUnlessEqual(r["count-shares-needed"], 3, where) self.failUnlessEqual(r["count-shares-expected"], 10, where) if not incomplete: self.failUnlessEqual(r["count-good-share-hosts"], num_servers, where) self.failUnlessEqual(r["count-corrupt-shares"], 0, where) self.failUnlessEqual(r["list-corrupt-shares"], [], where) if not incomplete: self.failUnlessEqual(sorted(r["servers-responding"]), sorted([idlib.nodeid_b2a(sid) for sid in self.g.get_all_serverids()]), where) self.failUnless("sharemap" in r, where) all_serverids = set() for (shareid, serverids_s) in r["sharemap"].items(): all_serverids.update(serverids_s) self.failUnlessEqual(sorted(all_serverids), sorted([idlib.nodeid_b2a(sid) for sid in self.g.get_all_serverids()]), where) self.failUnlessEqual(r["count-wrong-shares"], 0, where) self.failUnlessEqual(r["count-recoverable-versions"], 1, where) self.failUnlessEqual(r["count-unrecoverable-versions"], 0, where) def json_check_and_repair_is_healthy(self, data, n, where, incomplete=False): self.failUnlessEqual(data["storage-index"], base32.b2a(n.get_storage_index()), where) self.failUnlessEqual(data["repair-attempted"], False, where) self.json_check_is_healthy(data["pre-repair-results"], n, where, incomplete) self.json_check_is_healthy(data["post-repair-results"], n, where, incomplete) def json_full_deepcheck_is_healthy(self, data, n, where): self.failUnlessEqual(data["root-storage-index"], base32.b2a(n.get_storage_index()), where) self.failUnlessEqual(data["count-objects-checked"], 3, where) self.failUnlessEqual(data["count-objects-healthy"], 3, where) self.failUnlessEqual(data["count-objects-unhealthy"], 0, where) self.failUnlessEqual(data["count-corrupt-shares"], 0, where) self.failUnlessEqual(data["list-corrupt-shares"], [], where) self.failUnlessEqual(data["list-unhealthy-files"], [], where) self.json_check_stats_good(data["stats"], where) def json_full_deepcheck_and_repair_is_healthy(self, data, n, where): self.failUnlessEqual(data["root-storage-index"], base32.b2a(n.get_storage_index()), where) self.failUnlessEqual(data["count-objects-checked"], 3, where) self.failUnlessEqual(data["count-objects-healthy-pre-repair"], 3, where) self.failUnlessEqual(data["count-objects-unhealthy-pre-repair"], 0, where) self.failUnlessEqual(data["count-corrupt-shares-pre-repair"], 0, where) self.failUnlessEqual(data["count-objects-healthy-post-repair"], 3, where) self.failUnlessEqual(data["count-objects-unhealthy-post-repair"], 0, where) self.failUnlessEqual(data["count-corrupt-shares-post-repair"], 0, where) self.failUnlessEqual(data["list-corrupt-shares"], [], where) self.failUnlessEqual(data["list-remaining-corrupt-shares"], [], where) self.failUnlessEqual(data["list-unhealthy-files"], [], where) self.failUnlessEqual(data["count-repairs-attempted"], 0, where) self.failUnlessEqual(data["count-repairs-successful"], 0, where) self.failUnlessEqual(data["count-repairs-unsuccessful"], 0, where) def json_check_lit(self, data, n, where): self.failUnlessEqual(data["storage-index"], "", where) self.failUnlessEqual(data["results"]["healthy"], True, where) def json_check_stats_good(self, data, where): self.check_stats_good(data) def do_test_web_good(self, ignored): d = defer.succeed(None) # stats d.addCallback(lambda ign: self.slow_web(self.root, t="start-deep-stats", output="json")) d.addCallback(self.json_check_stats_good, "deep-stats") # check, no verify d.addCallback(lambda ign: self.web_json(self.root, t="check")) d.addCallback(self.json_check_is_healthy, self.root, "root") d.addCallback(lambda ign: self.web_json(self.mutable, t="check")) d.addCallback(self.json_check_is_healthy, self.mutable, "mutable") d.addCallback(lambda ign: self.web_json(self.large, t="check")) d.addCallback(self.json_check_is_healthy, self.large, "large") d.addCallback(lambda ign: self.web_json(self.small, t="check")) d.addCallback(self.json_check_lit, self.small, "small") d.addCallback(lambda ign: self.web_json(self.small2, t="check")) d.addCallback(self.json_check_lit, self.small2, "small2") d.addCallback(lambda ign: self.web_json(self.empty_lit_dir, t="check")) d.addCallback(self.json_check_lit, self.empty_lit_dir, "empty_lit_dir") d.addCallback(lambda ign: self.web_json(self.tiny_lit_dir, t="check")) d.addCallback(self.json_check_lit, self.tiny_lit_dir, "tiny_lit_dir") # check and verify d.addCallback(lambda ign: self.web_json(self.root, t="check", verify="true")) d.addCallback(self.json_check_is_healthy, self.root, "root+v") d.addCallback(lambda ign: self.web_json(self.mutable, t="check", verify="true")) d.addCallback(self.json_check_is_healthy, self.mutable, "mutable+v") d.addCallback(lambda ign: self.web_json(self.large, t="check", verify="true")) d.addCallback(self.json_check_is_healthy, self.large, "large+v", incomplete=True) d.addCallback(lambda ign: self.web_json(self.small, t="check", verify="true")) d.addCallback(self.json_check_lit, self.small, "small+v") d.addCallback(lambda ign: self.web_json(self.small2, t="check", verify="true")) d.addCallback(self.json_check_lit, self.small2, "small2+v") d.addCallback(lambda ign: self.web_json(self.empty_lit_dir, t="check", verify="true")) d.addCallback(self.json_check_lit, self.empty_lit_dir, "empty_lit_dir+v") d.addCallback(lambda ign: self.web_json(self.tiny_lit_dir, t="check", verify="true")) d.addCallback(self.json_check_lit, self.tiny_lit_dir, "tiny_lit_dir+v") # check and repair, no verify d.addCallback(lambda ign: self.web_json(self.root, t="check", repair="true")) d.addCallback(self.json_check_and_repair_is_healthy, self.root, "root+r") d.addCallback(lambda ign: self.web_json(self.mutable, t="check", repair="true")) d.addCallback(self.json_check_and_repair_is_healthy, self.mutable, "mutable+r") d.addCallback(lambda ign: self.web_json(self.large, t="check", repair="true")) d.addCallback(self.json_check_and_repair_is_healthy, self.large, "large+r") d.addCallback(lambda ign: self.web_json(self.small, t="check", repair="true")) d.addCallback(self.json_check_lit, self.small, "small+r") d.addCallback(lambda ign: self.web_json(self.small2, t="check", repair="true")) d.addCallback(self.json_check_lit, self.small2, "small2+r") d.addCallback(lambda ign: self.web_json(self.empty_lit_dir, t="check", repair="true")) d.addCallback(self.json_check_lit, self.empty_lit_dir, "empty_lit_dir+r") d.addCallback(lambda ign: self.web_json(self.tiny_lit_dir, t="check", repair="true")) d.addCallback(self.json_check_lit, self.tiny_lit_dir, "tiny_lit_dir+r") # check+verify+repair d.addCallback(lambda ign: self.web_json(self.root, t="check", repair="true", verify="true")) d.addCallback(self.json_check_and_repair_is_healthy, self.root, "root+vr") d.addCallback(lambda ign: self.web_json(self.mutable, t="check", repair="true", verify="true")) d.addCallback(self.json_check_and_repair_is_healthy, self.mutable, "mutable+vr") d.addCallback(lambda ign: self.web_json(self.large, t="check", repair="true", verify="true")) d.addCallback(self.json_check_and_repair_is_healthy, self.large, "large+vr", incomplete=True) d.addCallback(lambda ign: self.web_json(self.small, t="check", repair="true", verify="true")) d.addCallback(self.json_check_lit, self.small, "small+vr") d.addCallback(lambda ign: self.web_json(self.small2, t="check", repair="true", verify="true")) d.addCallback(self.json_check_lit, self.small2, "small2+vr") d.addCallback(lambda ign: self.web_json(self.empty_lit_dir, t="check", repair="true", verify=True)) d.addCallback(self.json_check_lit, self.empty_lit_dir, "empty_lit_dir+vr") d.addCallback(lambda ign: self.web_json(self.tiny_lit_dir, t="check", repair="true", verify=True)) d.addCallback(self.json_check_lit, self.tiny_lit_dir, "tiny_lit_dir+vr") # now run a deep-check, with various verify= and repair= flags d.addCallback(lambda ign: self.slow_web(self.root, t="start-deep-check", output="json")) d.addCallback(self.json_full_deepcheck_is_healthy, self.root, "root+d") d.addCallback(lambda ign: self.slow_web(self.root, t="start-deep-check", verify="true", output="json")) d.addCallback(self.json_full_deepcheck_is_healthy, self.root, "root+dv") d.addCallback(lambda ign: self.slow_web(self.root, t="start-deep-check", repair="true", output="json")) d.addCallback(self.json_full_deepcheck_and_repair_is_healthy, self.root, "root+dr") d.addCallback(lambda ign: self.slow_web(self.root, t="start-deep-check", verify="true", repair="true", output="json")) d.addCallback(self.json_full_deepcheck_and_repair_is_healthy, self.root, "root+dvr") # now look at t=info d.addCallback(lambda ign: self.web(self.root, t="info")) # TODO: examine the output d.addCallback(lambda ign: self.web(self.mutable, t="info")) d.addCallback(lambda ign: self.web(self.large, t="info")) d.addCallback(lambda ign: self.web(self.small, t="info")) d.addCallback(lambda ign: self.web(self.small2, t="info")) d.addCallback(lambda ign: self.web(self.empty_lit_dir, t="info")) d.addCallback(lambda ign: self.web(self.tiny_lit_dir, t="info")) return d def _run_cli(self, argv, stdin=""): #print "CLI:", argv stdout, stderr = StringIO(), StringIO() d = threads.deferToThread(runner.runner, argv, run_by_human=False, stdin=StringIO(stdin), stdout=stdout, stderr=stderr) def _done(res): return stdout.getvalue(), stderr.getvalue() d.addCallback(_done) return d def do_test_cli_good(self, ignored): d = defer.succeed(None) d.addCallback(lambda ign: self.do_cli_manifest_stream1()) d.addCallback(lambda ign: self.do_cli_manifest_stream2()) d.addCallback(lambda ign: self.do_cli_manifest_stream3()) d.addCallback(lambda ign: self.do_cli_manifest_stream4()) d.addCallback(lambda ign: self.do_cli_manifest_stream5()) d.addCallback(lambda ign: self.do_cli_stats1()) d.addCallback(lambda ign: self.do_cli_stats2()) return d def _check_manifest_storage_index(self, out): lines = [l for l in out.split("\n") if l] self.failUnlessEqual(len(lines), 3) self.failUnless(base32.b2a(self.root.get_storage_index()) in lines) self.failUnless(base32.b2a(self.mutable.get_storage_index()) in lines) self.failUnless(base32.b2a(self.large.get_storage_index()) in lines) def do_cli_manifest_stream1(self): basedir = self.get_clientdir(0) d = self._run_cli(["--node-directory", basedir, "manifest", self.root_uri]) def _check((out,err)): self.failUnlessEqual(err, "") lines = [l for l in out.split("\n") if l] self.failUnlessEqual(len(lines), 8) caps = {} for l in lines: try: cap, path = l.split(None, 1) except ValueError: cap = l.strip() path = "" caps[cap] = path self.failUnless(self.root.get_uri() in caps) self.failUnlessEqual(caps[self.root.get_uri()], "") self.failUnlessEqual(caps[self.mutable.get_uri()], "mutable") self.failUnlessEqual(caps[self.large.get_uri()], "large") self.failUnlessEqual(caps[self.small.get_uri()], "small") self.failUnlessEqual(caps[self.small2.get_uri()], "small2") self.failUnlessEqual(caps[self.empty_lit_dir.get_uri()], "empty_lit_dir") self.failUnlessEqual(caps[self.tiny_lit_dir.get_uri()], "tiny_lit_dir") d.addCallback(_check) return d def do_cli_manifest_stream2(self): basedir = self.get_clientdir(0) d = self._run_cli(["--node-directory", basedir, "manifest", "--raw", self.root_uri]) def _check((out,err)): self.failUnlessEqual(err, "") # this should be the same as the POST t=stream-manifest output self._check_streamed_manifest(out) d.addCallback(_check) return d def do_cli_manifest_stream3(self): basedir = self.get_clientdir(0) d = self._run_cli(["--node-directory", basedir, "manifest", "--storage-index", self.root_uri]) def _check((out,err)): self.failUnlessEqual(err, "") self._check_manifest_storage_index(out) d.addCallback(_check) return d def do_cli_manifest_stream4(self): basedir = self.get_clientdir(0) d = self._run_cli(["--node-directory", basedir, "manifest", "--verify-cap", self.root_uri]) def _check((out,err)): self.failUnlessEqual(err, "") lines = [l for l in out.split("\n") if l] self.failUnlessEqual(len(lines), 3) self.failUnless(self.root.get_verify_cap().to_string() in lines) self.failUnless(self.mutable.get_verify_cap().to_string() in lines) self.failUnless(self.large.get_verify_cap().to_string() in lines) d.addCallback(_check) return d def do_cli_manifest_stream5(self): basedir = self.get_clientdir(0) d = self._run_cli(["--node-directory", basedir, "manifest", "--repair-cap", self.root_uri]) def _check((out,err)): self.failUnlessEqual(err, "") lines = [l for l in out.split("\n") if l] self.failUnlessEqual(len(lines), 3) self.failUnless(self.root.get_repair_cap().to_string() in lines) self.failUnless(self.mutable.get_repair_cap().to_string() in lines) self.failUnless(self.large.get_repair_cap().to_string() in lines) d.addCallback(_check) return d def do_cli_stats1(self): basedir = self.get_clientdir(0) d = self._run_cli(["--node-directory", basedir, "stats", self.root_uri]) def _check3((out,err)): lines = [l.strip() for l in out.split("\n") if l] self.failUnless("count-immutable-files: 1" in lines) self.failUnless("count-mutable-files: 1" in lines) self.failUnless("count-literal-files: 3" in lines) self.failUnless("count-files: 5" in lines) self.failUnless("count-directories: 3" in lines) self.failUnless("size-immutable-files: 13000 (13.00 kB, 12.70 kiB)" in lines, lines) self.failUnless("size-literal-files: 56" in lines, lines) self.failUnless(" 4-10 : 1 (10 B, 10 B)".strip() in lines, lines) self.failUnless(" 11-31 : 2 (31 B, 31 B)".strip() in lines, lines) self.failUnless("10001-31622 : 1 (31.62 kB, 30.88 kiB)".strip() in lines, lines) d.addCallback(_check3) return d def do_cli_stats2(self): basedir = self.get_clientdir(0) d = self._run_cli(["--node-directory", basedir, "stats", "--raw", self.root_uri]) def _check4((out,err)): data = simplejson.loads(out) self.failUnlessEqual(data["count-immutable-files"], 1) self.failUnlessEqual(data["count-immutable-files"], 1) self.failUnlessEqual(data["count-mutable-files"], 1) self.failUnlessEqual(data["count-literal-files"], 3) self.failUnlessEqual(data["count-files"], 5) self.failUnlessEqual(data["count-directories"], 3) self.failUnlessEqual(data["size-immutable-files"], 13000) self.failUnlessEqual(data["size-literal-files"], 56) self.failUnless([4,10,1] in data["size-files-histogram"]) self.failUnless([11,31,2] in data["size-files-histogram"]) self.failUnless([10001,31622,1] in data["size-files-histogram"]) d.addCallback(_check4) return d class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): def test_bad(self): self.basedir = "deepcheck/DeepCheckWebBad/bad" self.set_up_grid() d = self.set_up_damaged_tree() d.addCallback(self.do_check) d.addCallback(self.do_deepcheck) d.addCallback(self.do_deepcheck_broken) d.addCallback(self.do_test_web_bad) d.addErrback(self.explain_web_error) d.addErrback(self.explain_error) return d def set_up_damaged_tree(self): # 6.4s # root # mutable-good # mutable-missing-shares # mutable-corrupt-shares # mutable-unrecoverable # large-good # large-missing-shares # large-corrupt-shares # large-unrecoverable # broken # large1-good # subdir-good # large2-good # subdir-unrecoverable # large3-good self.nodes = {} c0 = self.g.clients[0] d = c0.create_dirnode() def _created_root(n): self.root = n self.root_uri = n.get_uri() d.addCallback(_created_root) d.addCallback(self.create_mangled, "mutable-good") d.addCallback(self.create_mangled, "mutable-missing-shares") d.addCallback(self.create_mangled, "mutable-corrupt-shares") d.addCallback(self.create_mangled, "mutable-unrecoverable") d.addCallback(self.create_mangled, "large-good") d.addCallback(self.create_mangled, "large-missing-shares") d.addCallback(self.create_mangled, "large-corrupt-shares") d.addCallback(self.create_mangled, "large-unrecoverable") d.addCallback(lambda ignored: c0.create_dirnode()) d.addCallback(self._stash_node, "broken") large1 = upload.Data("Lots of data\n" * 1000 + "large1" + "\n", None) d.addCallback(lambda ignored: self.nodes["broken"].add_file(u"large1", large1)) d.addCallback(lambda ignored: self.nodes["broken"].create_subdirectory(u"subdir-good")) large2 = upload.Data("Lots of data\n" * 1000 + "large2" + "\n", None) d.addCallback(lambda subdir: subdir.add_file(u"large2-good", large2)) d.addCallback(lambda ignored: self.nodes["broken"].create_subdirectory(u"subdir-unrecoverable")) d.addCallback(self._stash_node, "subdir-unrecoverable") large3 = upload.Data("Lots of data\n" * 1000 + "large3" + "\n", None) d.addCallback(lambda subdir: subdir.add_file(u"large3-good", large3)) d.addCallback(lambda ignored: self._delete_most_shares(self.nodes["broken"])) return d def _stash_node(self, node, name): self.nodes[name] = node return node def create_mangled(self, ignored, name): nodetype, mangletype = name.split("-", 1) if nodetype == "mutable": mutable_uploadable = MutableData("mutable file contents") d = self.g.clients[0].create_mutable_file(mutable_uploadable) d.addCallback(lambda n: self.root.set_node(unicode(name), n)) elif nodetype == "large": large = upload.Data("Lots of data\n" * 1000 + name + "\n", None) d = self.root.add_file(unicode(name), large) elif nodetype == "small": small = upload.Data("Small enough for a LIT", None) d = self.root.add_file(unicode(name), small) d.addCallback(self._stash_node, name) if mangletype == "good": pass elif mangletype == "missing-shares": d.addCallback(self._delete_some_shares) elif mangletype == "corrupt-shares": d.addCallback(self._corrupt_some_shares) else: assert mangletype == "unrecoverable" d.addCallback(self._delete_most_shares) return d def _run_cli(self, argv): stdout, stderr = StringIO(), StringIO() # this can only do synchronous operations assert argv[0] == "debug" runner.runner(argv, run_by_human=False, stdout=stdout, stderr=stderr) return stdout.getvalue() def _delete_some_shares(self, node): self.delete_shares_numbered(node.get_uri(), [0,1]) def _corrupt_some_shares(self, node): for (shnum, serverid, sharefile) in self.find_uri_shares(node.get_uri()): if shnum in (0,1): self._run_cli(["debug", "corrupt-share", sharefile]) def _delete_most_shares(self, node): self.delete_shares_numbered(node.get_uri(), range(1,10)) def check_is_healthy(self, cr, where): try: self.failUnless(ICheckResults.providedBy(cr), (cr, type(cr), where)) self.failUnless(cr.is_healthy(), (cr.get_report(), cr.is_healthy(), cr.get_summary(), where)) self.failUnless(cr.is_recoverable(), where) self.failUnlessEqual(cr.get_version_counter_recoverable(), 1, where) self.failUnlessEqual(cr.get_version_counter_unrecoverable(), 0, where) return cr except Exception, le: le.args = tuple(le.args + (where,)) raise def check_is_missing_shares(self, cr, where): self.failUnless(ICheckResults.providedBy(cr), where) self.failIf(cr.is_healthy(), where) self.failUnless(cr.is_recoverable(), where) self.failUnlessEqual(cr.get_version_counter_recoverable(), 1, where) self.failUnlessEqual(cr.get_version_counter_unrecoverable(), 0, where) return cr def check_has_corrupt_shares(self, cr, where): # by "corrupt-shares" we mean the file is still recoverable self.failUnless(ICheckResults.providedBy(cr), where) self.failIf(cr.is_healthy(), (where, cr)) self.failUnless(cr.is_recoverable(), where) self.failUnless(cr.get_share_counter_good() < 10, where) self.failUnless(cr.get_corrupt_shares(), where) return cr def check_is_unrecoverable(self, cr, where): self.failUnless(ICheckResults.providedBy(cr), where) self.failIf(cr.is_healthy(), where) self.failIf(cr.is_recoverable(), where) self.failUnless(cr.get_share_counter_good() < cr.get_encoding_needed(), (cr.get_share_counter_good(), cr.get_encoding_needed(), where)) self.failUnlessEqual(cr.get_version_counter_recoverable(), 0, where) self.failUnlessEqual(cr.get_version_counter_unrecoverable(), 1, where) return cr def do_check(self, ignored): d = defer.succeed(None) # check the individual items, without verification. This will not # detect corrupt shares. def _check(which, checker): d = self.nodes[which].check(Monitor()) d.addCallback(checker, which + "--check") return d d.addCallback(lambda ign: _check("mutable-good", self.check_is_healthy)) d.addCallback(lambda ign: _check("mutable-missing-shares", self.check_is_missing_shares)) d.addCallback(lambda ign: _check("mutable-corrupt-shares", self.check_is_healthy)) d.addCallback(lambda ign: _check("mutable-unrecoverable", self.check_is_unrecoverable)) d.addCallback(lambda ign: _check("large-good", self.check_is_healthy)) d.addCallback(lambda ign: _check("large-missing-shares", self.check_is_missing_shares)) d.addCallback(lambda ign: _check("large-corrupt-shares", self.check_is_healthy)) d.addCallback(lambda ign: _check("large-unrecoverable", self.check_is_unrecoverable)) # and again with verify=True, which *does* detect corrupt shares. def _checkv(which, checker): d = self.nodes[which].check(Monitor(), verify=True) d.addCallback(checker, which + "--check-and-verify") return d d.addCallback(lambda ign: _checkv("mutable-good", self.check_is_healthy)) d.addCallback(lambda ign: _checkv("mutable-missing-shares", self.check_is_missing_shares)) d.addCallback(lambda ign: _checkv("mutable-corrupt-shares", self.check_has_corrupt_shares)) d.addCallback(lambda ign: _checkv("mutable-unrecoverable", self.check_is_unrecoverable)) d.addCallback(lambda ign: _checkv("large-good", self.check_is_healthy)) d.addCallback(lambda ign: _checkv("large-missing-shares", self.check_is_missing_shares)) d.addCallback(lambda ign: _checkv("large-corrupt-shares", self.check_has_corrupt_shares)) d.addCallback(lambda ign: _checkv("large-unrecoverable", self.check_is_unrecoverable)) return d def do_deepcheck(self, ignored): d = defer.succeed(None) # now deep-check the root, with various verify= and repair= options d.addCallback(lambda ign: self.root.start_deep_check().when_done()) def _check1(cr): self.failUnless(IDeepCheckResults.providedBy(cr)) c = cr.get_counters() self.failUnlessEqual(c["count-objects-checked"], 9) self.failUnlessEqual(c["count-objects-healthy"], 5) self.failUnlessEqual(c["count-objects-unhealthy"], 4) self.failUnlessEqual(c["count-objects-unrecoverable"], 2) d.addCallback(_check1) d.addCallback(lambda ign: self.root.start_deep_check(verify=True).when_done()) def _check2(cr): self.failUnless(IDeepCheckResults.providedBy(cr)) c = cr.get_counters() self.failUnlessEqual(c["count-objects-checked"], 9) self.failUnlessEqual(c["count-objects-healthy"], 3) self.failUnlessEqual(c["count-objects-unhealthy"], 6) self.failUnlessEqual(c["count-objects-healthy"], 3) # root, mutable good, large good self.failUnlessEqual(c["count-objects-unrecoverable"], 2) # mutable unrecoverable, large unrecoverable d.addCallback(_check2) return d def do_deepcheck_broken(self, ignored): # deep-check on the broken directory should fail, because of the # untraversable subdir def _do_deep_check(): return self.nodes["broken"].start_deep_check().when_done() d = self.shouldFail(UnrecoverableFileError, "do_deep_check", "no recoverable versions", _do_deep_check) return d def json_is_healthy(self, data, where): r = data["results"] self.failUnless(r["healthy"], where) self.failUnless(r["recoverable"], where) self.failUnlessEqual(r["count-recoverable-versions"], 1, where) self.failUnlessEqual(r["count-unrecoverable-versions"], 0, where) def json_is_missing_shares(self, data, where): r = data["results"] self.failIf(r["healthy"], where) self.failUnless(r["recoverable"], where) self.failUnlessEqual(r["count-recoverable-versions"], 1, where) self.failUnlessEqual(r["count-unrecoverable-versions"], 0, where) def json_has_corrupt_shares(self, data, where): # by "corrupt-shares" we mean the file is still recoverable r = data["results"] self.failIf(r["healthy"], where) self.failUnless(r["recoverable"], where) self.failUnless(r["count-shares-good"] < 10, where) self.failUnless(r["count-corrupt-shares"], where) self.failUnless(r["list-corrupt-shares"], where) def json_is_unrecoverable(self, data, where): r = data["results"] self.failIf(r["healthy"], where) self.failIf(r["recoverable"], where) self.failUnless(r["count-shares-good"] < r["count-shares-needed"], where) self.failUnlessEqual(r["count-recoverable-versions"], 0, where) self.failUnlessEqual(r["count-unrecoverable-versions"], 1, where) def do_test_web_bad(self, ignored): d = defer.succeed(None) # check, no verify def _check(which, checker): d = self.web_json(self.nodes[which], t="check") d.addCallback(checker, which + "--webcheck") return d d.addCallback(lambda ign: _check("mutable-good", self.json_is_healthy)) d.addCallback(lambda ign: _check("mutable-missing-shares", self.json_is_missing_shares)) d.addCallback(lambda ign: _check("mutable-corrupt-shares", self.json_is_healthy)) d.addCallback(lambda ign: _check("mutable-unrecoverable", self.json_is_unrecoverable)) d.addCallback(lambda ign: _check("large-good", self.json_is_healthy)) d.addCallback(lambda ign: _check("large-missing-shares", self.json_is_missing_shares)) d.addCallback(lambda ign: _check("large-corrupt-shares", self.json_is_healthy)) d.addCallback(lambda ign: _check("large-unrecoverable", self.json_is_unrecoverable)) # check and verify def _checkv(which, checker): d = self.web_json(self.nodes[which], t="check", verify="true") d.addCallback(checker, which + "--webcheck-and-verify") return d d.addCallback(lambda ign: _checkv("mutable-good", self.json_is_healthy)) d.addCallback(lambda ign: _checkv("mutable-missing-shares", self.json_is_missing_shares)) d.addCallback(lambda ign: _checkv("mutable-corrupt-shares", self.json_has_corrupt_shares)) d.addCallback(lambda ign: _checkv("mutable-unrecoverable", self.json_is_unrecoverable)) d.addCallback(lambda ign: _checkv("large-good", self.json_is_healthy)) d.addCallback(lambda ign: _checkv("large-missing-shares", self.json_is_missing_shares)) d.addCallback(lambda ign: _checkv("large-corrupt-shares", self.json_has_corrupt_shares)) d.addCallback(lambda ign: _checkv("large-unrecoverable", self.json_is_unrecoverable)) return d class Large(DeepCheckBase, unittest.TestCase): def test_lots_of_lits(self): self.basedir = "deepcheck/Large/lots_of_lits" self.set_up_grid() # create the following directory structure: # root/ # subdir/ # 000-large (CHK) # 001-small (LIT) # 002-small # ... # 399-small # then do a deepcheck and make sure it doesn't cause a # Deferred-tail-recursion stack overflow COUNT = 400 c0 = self.g.clients[0] d = c0.create_dirnode() self.stash = {} def _created_root(n): self.root = n return n d.addCallback(_created_root) d.addCallback(lambda root: root.create_subdirectory(u"subdir")) def _add_children(subdir_node): self.subdir_node = subdir_node kids = {} for i in range(1, COUNT): litcap = LiteralFileURI("%03d-data" % i).to_string() kids[u"%03d-small" % i] = (litcap, litcap) return subdir_node.set_children(kids) d.addCallback(_add_children) up = upload.Data("large enough for CHK" * 100, "") d.addCallback(lambda ign: self.subdir_node.add_file(u"0000-large", up)) def _start_deepcheck(ignored): return self.web(self.root, method="POST", t="stream-deep-check") d.addCallback(_start_deepcheck) def _check( (output, url) ): units = list(self.parse_streamed_json(output)) self.failUnlessEqual(len(units), 2+COUNT+1) d.addCallback(_check) return d tahoe-lafs-1.10.0/src/allmydata/test/test_dirnode.py000066400000000000000000002767261221140116300224020ustar00rootroot00000000000000import time import unicodedata from zope.interface import implements from twisted.trial import unittest from twisted.internet import defer from twisted.internet.interfaces import IConsumer from allmydata import uri, dirnode from allmydata.client import Client from allmydata.immutable import upload from allmydata.interfaces import IImmutableFileNode, IMutableFileNode, \ ExistingChildError, NoSuchChildError, MustNotBeUnknownRWError, \ MustBeDeepImmutableError, MustBeReadonlyError, \ IDeepCheckResults, IDeepCheckAndRepairResults, \ MDMF_VERSION, SDMF_VERSION from allmydata.mutable.filenode import MutableFileNode from allmydata.mutable.common import UncoordinatedWriteError from allmydata.util import hashutil, base32 from allmydata.util.netstring import split_netstring from allmydata.monitor import Monitor from allmydata.test.common import make_chk_file_uri, make_mutable_file_uri, \ ErrorMixin from allmydata.test.no_network import GridTestMixin from allmydata.unknown import UnknownNode, strip_prefix_for_ro from allmydata.nodemaker import NodeMaker from base64 import b32decode import allmydata.test.common_util as testutil class MemAccum: implements(IConsumer) def registerProducer(self, producer, streaming): self.producer = producer self.producer.resumeProducing() pass def unregisterProducer(self): pass def write(self, data): assert not hasattr(self, 'data') self.data = data self.producer.resumeProducing() setup_py_uri = "URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861" one_uri = "URI:LIT:n5xgk" # LIT for "one" mut_write_uri = "URI:SSK:vfvcbdfbszyrsaxchgevhmmlii:euw4iw7bbnkrrwpzuburbhppuxhc3gwxv26f6imekhz7zyw2ojnq" mdmf_write_uri = "URI:MDMF:x533rhbm6kiehzl5kj3s44n5ie:4gif5rhneyd763ouo5qjrgnsoa3bg43xycy4robj2rf3tvmhdl3a" empty_litdir_uri = "URI:DIR2-LIT:" tiny_litdir_uri = "URI:DIR2-LIT:gqytunj2onug64tufqzdcosvkjetutcjkq5gw4tvm5vwszdgnz5hgyzufqydulbshj5x2lbm" # contains one child which is itself also LIT mut_read_uri = "URI:SSK-RO:jf6wkflosyvntwxqcdo7a54jvm:euw4iw7bbnkrrwpzuburbhppuxhc3gwxv26f6imekhz7zyw2ojnq" mdmf_read_uri = "URI:MDMF-RO:d4cydxselputycfzkw6qgz4zv4:4gif5rhneyd763ouo5qjrgnsoa3bg43xycy4robj2rf3tvmhdl3a" future_write_uri = "x-tahoe-crazy://I_am_from_the_future." future_read_uri = "x-tahoe-crazy-readonly://I_am_from_the_future." future_nonascii_write_uri = u"x-tahoe-even-more-crazy://I_am_from_the_future_rw_\u263A".encode('utf-8') future_nonascii_read_uri = u"x-tahoe-even-more-crazy-readonly://I_am_from_the_future_ro_\u263A".encode('utf-8') # 'o' 'n' 'e-macron' one_nfc = u"on\u0113" one_nfd = u"one\u0304" class Dirnode(GridTestMixin, unittest.TestCase, testutil.ReallyEqualMixin, testutil.ShouldFailMixin, testutil.StallMixin, ErrorMixin): timeout = 480 # It occasionally takes longer than 240 seconds on Francois's arm box. def _do_create_test(self, mdmf=False): c = self.g.clients[0] self.expected_manifest = [] self.expected_verifycaps = set() self.expected_storage_indexes = set() d = None if mdmf: d = c.create_dirnode(version=MDMF_VERSION) else: d = c.create_dirnode() def _then(n): # / self.rootnode = n backing_node = n._node if mdmf: self.failUnlessEqual(backing_node.get_version(), MDMF_VERSION) else: self.failUnlessEqual(backing_node.get_version(), SDMF_VERSION) self.failUnless(n.is_mutable()) u = n.get_uri() self.failUnless(u) cap_formats = [] if mdmf: cap_formats = ["URI:DIR2-MDMF:", "URI:DIR2-MDMF-RO:", "URI:DIR2-MDMF-Verifier:"] else: cap_formats = ["URI:DIR2:", "URI:DIR2-RO", "URI:DIR2-Verifier:"] rw, ro, v = cap_formats self.failUnless(u.startswith(rw), u) u_ro = n.get_readonly_uri() self.failUnless(u_ro.startswith(ro), u_ro) u_v = n.get_verify_cap().to_string() self.failUnless(u_v.startswith(v), u_v) u_r = n.get_repair_cap().to_string() self.failUnlessReallyEqual(u_r, u) self.expected_manifest.append( ((), u) ) self.expected_verifycaps.add(u_v) si = n.get_storage_index() self.expected_storage_indexes.add(base32.b2a(si)) expected_si = n._uri.get_storage_index() self.failUnlessReallyEqual(si, expected_si) d = n.list() d.addCallback(lambda res: self.failUnlessEqual(res, {})) d.addCallback(lambda res: n.has_child(u"missing")) d.addCallback(lambda res: self.failIf(res)) fake_file_uri = make_mutable_file_uri() other_file_uri = make_mutable_file_uri() m = c.nodemaker.create_from_cap(fake_file_uri) ffu_v = m.get_verify_cap().to_string() self.expected_manifest.append( ((u"child",) , m.get_uri()) ) self.expected_verifycaps.add(ffu_v) self.expected_storage_indexes.add(base32.b2a(m.get_storage_index())) d.addCallback(lambda res: n.set_uri(u"child", fake_file_uri, fake_file_uri)) d.addCallback(lambda res: self.shouldFail(ExistingChildError, "set_uri-no", "child 'child' already exists", n.set_uri, u"child", other_file_uri, other_file_uri, overwrite=False)) # / # /child = mutable d.addCallback(lambda res: n.create_subdirectory(u"subdir")) # / # /child = mutable # /subdir = directory def _created(subdir): self.failUnless(isinstance(subdir, dirnode.DirectoryNode)) self.subdir = subdir new_v = subdir.get_verify_cap().to_string() assert isinstance(new_v, str) self.expected_manifest.append( ((u"subdir",), subdir.get_uri()) ) self.expected_verifycaps.add(new_v) si = subdir.get_storage_index() self.expected_storage_indexes.add(base32.b2a(si)) d.addCallback(_created) d.addCallback(lambda res: self.shouldFail(ExistingChildError, "mkdir-no", "child 'subdir' already exists", n.create_subdirectory, u"subdir", overwrite=False)) d.addCallback(lambda res: n.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"child", u"subdir"]))) d.addCallback(lambda res: n.start_deep_stats().when_done()) def _check_deepstats(stats): self.failUnless(isinstance(stats, dict)) expected = {"count-immutable-files": 0, "count-mutable-files": 1, "count-literal-files": 0, "count-files": 1, "count-directories": 2, "size-immutable-files": 0, "size-literal-files": 0, #"size-directories": 616, # varies #"largest-directory": 616, "largest-directory-children": 2, "largest-immutable-file": 0, } for k,v in expected.iteritems(): self.failUnlessReallyEqual(stats[k], v, "stats[%s] was %s, not %s" % (k, stats[k], v)) self.failUnless(stats["size-directories"] > 500, stats["size-directories"]) self.failUnless(stats["largest-directory"] > 500, stats["largest-directory"]) self.failUnlessReallyEqual(stats["size-files-histogram"], []) d.addCallback(_check_deepstats) d.addCallback(lambda res: n.build_manifest().when_done()) def _check_manifest(res): manifest = res["manifest"] self.failUnlessReallyEqual(sorted(manifest), sorted(self.expected_manifest)) stats = res["stats"] _check_deepstats(stats) self.failUnlessReallyEqual(self.expected_verifycaps, res["verifycaps"]) self.failUnlessReallyEqual(self.expected_storage_indexes, res["storage-index"]) d.addCallback(_check_manifest) def _add_subsubdir(res): return self.subdir.create_subdirectory(u"subsubdir") d.addCallback(_add_subsubdir) # / # /child = mutable # /subdir = directory # /subdir/subsubdir = directory d.addCallback(lambda res: n.get_child_at_path(u"subdir/subsubdir")) d.addCallback(lambda subsubdir: self.failUnless(isinstance(subsubdir, dirnode.DirectoryNode))) d.addCallback(lambda res: n.get_child_at_path(u"")) d.addCallback(lambda res: self.failUnlessReallyEqual(res.get_uri(), n.get_uri())) d.addCallback(lambda res: n.get_metadata_for(u"child")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) d.addCallback(lambda res: self.shouldFail(NoSuchChildError, "gcamap-no", "nope", n.get_child_and_metadata_at_path, u"subdir/nope")) d.addCallback(lambda res: n.get_child_and_metadata_at_path(u"")) def _check_child_and_metadata1(res): child, metadata = res self.failUnless(isinstance(child, dirnode.DirectoryNode)) # edge-metadata needs at least one path segment self.failUnlessEqual(set(metadata.keys()), set([])) d.addCallback(_check_child_and_metadata1) d.addCallback(lambda res: n.get_child_and_metadata_at_path(u"child")) def _check_child_and_metadata2(res): child, metadata = res self.failUnlessReallyEqual(child.get_uri(), fake_file_uri) self.failUnlessEqual(set(metadata.keys()), set(["tahoe"])) d.addCallback(_check_child_and_metadata2) d.addCallback(lambda res: n.get_child_and_metadata_at_path(u"subdir/subsubdir")) def _check_child_and_metadata3(res): child, metadata = res self.failUnless(isinstance(child, dirnode.DirectoryNode)) self.failUnlessEqual(set(metadata.keys()), set(["tahoe"])) d.addCallback(_check_child_and_metadata3) # set_uri + metadata # it should be possible to add a child without any metadata d.addCallback(lambda res: n.set_uri(u"c2", fake_file_uri, fake_file_uri, {})) d.addCallback(lambda res: n.get_metadata_for(u"c2")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) # You can't override the link timestamps. d.addCallback(lambda res: n.set_uri(u"c2", fake_file_uri, fake_file_uri, { 'tahoe': {'linkcrtime': "bogus"}})) d.addCallback(lambda res: n.get_metadata_for(u"c2")) def _has_good_linkcrtime(metadata): self.failUnless(metadata.has_key('tahoe')) self.failUnless(metadata['tahoe'].has_key('linkcrtime')) self.failIfEqual(metadata['tahoe']['linkcrtime'], 'bogus') d.addCallback(_has_good_linkcrtime) # if we don't set any defaults, the child should get timestamps d.addCallback(lambda res: n.set_uri(u"c3", fake_file_uri, fake_file_uri)) d.addCallback(lambda res: n.get_metadata_for(u"c3")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) # we can also add specific metadata at set_uri() time d.addCallback(lambda res: n.set_uri(u"c4", fake_file_uri, fake_file_uri, {"key": "value"})) d.addCallback(lambda res: n.get_metadata_for(u"c4")) d.addCallback(lambda metadata: self.failUnless((set(metadata.keys()) == set(["key", "tahoe"])) and (metadata['key'] == "value"), metadata)) d.addCallback(lambda res: n.delete(u"c2")) d.addCallback(lambda res: n.delete(u"c3")) d.addCallback(lambda res: n.delete(u"c4")) # set_node + metadata # it should be possible to add a child without any metadata except for timestamps d.addCallback(lambda res: n.set_node(u"d2", n, {})) d.addCallback(lambda res: c.create_dirnode()) d.addCallback(lambda n2: self.shouldFail(ExistingChildError, "set_node-no", "child 'd2' already exists", n.set_node, u"d2", n2, overwrite=False)) d.addCallback(lambda res: n.get_metadata_for(u"d2")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) # if we don't set any defaults, the child should get timestamps d.addCallback(lambda res: n.set_node(u"d3", n)) d.addCallback(lambda res: n.get_metadata_for(u"d3")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) # we can also add specific metadata at set_node() time d.addCallback(lambda res: n.set_node(u"d4", n, {"key": "value"})) d.addCallback(lambda res: n.get_metadata_for(u"d4")) d.addCallback(lambda metadata: self.failUnless((set(metadata.keys()) == set(["key", "tahoe"])) and (metadata["key"] == "value"), metadata)) d.addCallback(lambda res: n.delete(u"d2")) d.addCallback(lambda res: n.delete(u"d3")) d.addCallback(lambda res: n.delete(u"d4")) # metadata through set_children() d.addCallback(lambda res: n.set_children({ u"e1": (fake_file_uri, fake_file_uri), u"e2": (fake_file_uri, fake_file_uri, {}), u"e3": (fake_file_uri, fake_file_uri, {"key": "value"}), })) d.addCallback(lambda n2: self.failUnlessIdentical(n2, n)) d.addCallback(lambda res: self.shouldFail(ExistingChildError, "set_children-no", "child 'e1' already exists", n.set_children, { u"e1": (other_file_uri, other_file_uri), u"new": (other_file_uri, other_file_uri), }, overwrite=False)) # and 'new' should not have been created d.addCallback(lambda res: n.list()) d.addCallback(lambda children: self.failIf(u"new" in children)) d.addCallback(lambda res: n.get_metadata_for(u"e1")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) d.addCallback(lambda res: n.get_metadata_for(u"e2")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) d.addCallback(lambda res: n.get_metadata_for(u"e3")) d.addCallback(lambda metadata: self.failUnless((set(metadata.keys()) == set(["key", "tahoe"])) and (metadata["key"] == "value"), metadata)) d.addCallback(lambda res: n.delete(u"e1")) d.addCallback(lambda res: n.delete(u"e2")) d.addCallback(lambda res: n.delete(u"e3")) # metadata through set_nodes() d.addCallback(lambda res: n.set_nodes({ u"f1": (n, None), u"f2": (n, {}), u"f3": (n, {"key": "value"}), })) d.addCallback(lambda n2: self.failUnlessIdentical(n2, n)) d.addCallback(lambda res: self.shouldFail(ExistingChildError, "set_nodes-no", "child 'f1' already exists", n.set_nodes, { u"f1": (n, None), u"new": (n, None), }, overwrite=False)) # and 'new' should not have been created d.addCallback(lambda res: n.list()) d.addCallback(lambda children: self.failIf(u"new" in children)) d.addCallback(lambda res: n.get_metadata_for(u"f1")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) d.addCallback(lambda res: n.get_metadata_for(u"f2")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) d.addCallback(lambda res: n.get_metadata_for(u"f3")) d.addCallback(lambda metadata: self.failUnless((set(metadata.keys()) == set(["key", "tahoe"])) and (metadata["key"] == "value"), metadata)) d.addCallback(lambda res: n.delete(u"f1")) d.addCallback(lambda res: n.delete(u"f2")) d.addCallback(lambda res: n.delete(u"f3")) d.addCallback(lambda res: n.set_metadata_for(u"child", {"tags": ["web2.0-compatible"], "tahoe": {"bad": "mojo"}})) d.addCallback(lambda n1: n1.get_metadata_for(u"child")) d.addCallback(lambda metadata: self.failUnless((set(metadata.keys()) == set(["tags", "tahoe"])) and metadata["tags"] == ["web2.0-compatible"] and "bad" not in metadata["tahoe"], metadata)) d.addCallback(lambda res: self.shouldFail(NoSuchChildError, "set_metadata_for-nosuch", "", n.set_metadata_for, u"nosuch", {})) def _start(res): self._start_timestamp = time.time() d.addCallback(_start) # simplejson-1.7.1 (as shipped on Ubuntu 'gutsy') rounds all # floats to hundredeths (it uses str(num) instead of repr(num)). # simplejson-1.7.3 does not have this bug. To prevent this bug # from causing the test to fail, stall for more than a few # hundrededths of a second. d.addCallback(self.stall, 0.1) d.addCallback(lambda res: n.add_file(u"timestamps", upload.Data("stamp me", convergence="some convergence string"))) d.addCallback(self.stall, 0.1) def _stop(res): self._stop_timestamp = time.time() d.addCallback(_stop) d.addCallback(lambda res: n.get_metadata_for(u"timestamps")) def _check_timestamp1(metadata): self.failUnlessEqual(set(metadata.keys()), set(["tahoe"])) tahoe_md = metadata["tahoe"] self.failUnlessEqual(set(tahoe_md.keys()), set(["linkcrtime", "linkmotime"])) self.failUnlessGreaterOrEqualThan(tahoe_md["linkcrtime"], self._start_timestamp) self.failUnlessGreaterOrEqualThan(self._stop_timestamp, tahoe_md["linkcrtime"]) self.failUnlessGreaterOrEqualThan(tahoe_md["linkmotime"], self._start_timestamp) self.failUnlessGreaterOrEqualThan(self._stop_timestamp, tahoe_md["linkmotime"]) # Our current timestamp rules say that replacing an existing # child should preserve the 'linkcrtime' but update the # 'linkmotime' self._old_linkcrtime = tahoe_md["linkcrtime"] self._old_linkmotime = tahoe_md["linkmotime"] d.addCallback(_check_timestamp1) d.addCallback(self.stall, 2.0) # accomodate low-res timestamps d.addCallback(lambda res: n.set_node(u"timestamps", n)) d.addCallback(lambda res: n.get_metadata_for(u"timestamps")) def _check_timestamp2(metadata): self.failUnlessIn("tahoe", metadata) tahoe_md = metadata["tahoe"] self.failUnlessEqual(set(tahoe_md.keys()), set(["linkcrtime", "linkmotime"])) self.failUnlessReallyEqual(tahoe_md["linkcrtime"], self._old_linkcrtime) self.failUnlessGreaterThan(tahoe_md["linkmotime"], self._old_linkmotime) return n.delete(u"timestamps") d.addCallback(_check_timestamp2) d.addCallback(lambda res: n.delete(u"subdir")) d.addCallback(lambda old_child: self.failUnlessReallyEqual(old_child.get_uri(), self.subdir.get_uri())) d.addCallback(lambda res: n.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"child"]))) uploadable1 = upload.Data("some data", convergence="converge") d.addCallback(lambda res: n.add_file(u"newfile", uploadable1)) d.addCallback(lambda newnode: self.failUnless(IImmutableFileNode.providedBy(newnode))) uploadable2 = upload.Data("some data", convergence="stuff") d.addCallback(lambda res: self.shouldFail(ExistingChildError, "add_file-no", "child 'newfile' already exists", n.add_file, u"newfile", uploadable2, overwrite=False)) d.addCallback(lambda res: n.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"child", u"newfile"]))) d.addCallback(lambda res: n.get_metadata_for(u"newfile")) d.addCallback(lambda metadata: self.failUnlessEqual(set(metadata.keys()), set(["tahoe"]))) uploadable3 = upload.Data("some data", convergence="converge") d.addCallback(lambda res: n.add_file(u"newfile-metadata", uploadable3, {"key": "value"})) d.addCallback(lambda newnode: self.failUnless(IImmutableFileNode.providedBy(newnode))) d.addCallback(lambda res: n.get_metadata_for(u"newfile-metadata")) d.addCallback(lambda metadata: self.failUnless((set(metadata.keys()) == set(["key", "tahoe"])) and (metadata['key'] == "value"), metadata)) d.addCallback(lambda res: n.delete(u"newfile-metadata")) d.addCallback(lambda res: n.create_subdirectory(u"subdir2")) def _created2(subdir2): self.subdir2 = subdir2 # put something in the way, to make sure it gets overwritten return subdir2.add_file(u"child", upload.Data("overwrite me", "converge")) d.addCallback(_created2) d.addCallback(lambda res: n.move_child_to(u"child", self.subdir2)) d.addCallback(lambda res: n.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"newfile", u"subdir2"]))) d.addCallback(lambda res: self.subdir2.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"child"]))) d.addCallback(lambda res: self.subdir2.get(u"child")) d.addCallback(lambda child: self.failUnlessReallyEqual(child.get_uri(), fake_file_uri)) # move it back, using new_child_name= d.addCallback(lambda res: self.subdir2.move_child_to(u"child", n, u"newchild")) d.addCallback(lambda res: n.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"newchild", u"newfile", u"subdir2"]))) d.addCallback(lambda res: self.subdir2.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([]))) # now make sure that we honor overwrite=False d.addCallback(lambda res: self.subdir2.set_uri(u"newchild", other_file_uri, other_file_uri)) d.addCallback(lambda res: self.shouldFail(ExistingChildError, "move_child_to-no", "child 'newchild' already exists", n.move_child_to, u"newchild", self.subdir2, overwrite=False)) d.addCallback(lambda res: self.subdir2.get(u"newchild")) d.addCallback(lambda child: self.failUnlessReallyEqual(child.get_uri(), other_file_uri)) # Setting the no-write field should diminish a mutable cap to read-only # (for both files and directories). d.addCallback(lambda ign: n.set_uri(u"mutable", other_file_uri, other_file_uri)) d.addCallback(lambda ign: n.get(u"mutable")) d.addCallback(lambda mutable: self.failIf(mutable.is_readonly(), mutable)) d.addCallback(lambda ign: n.set_metadata_for(u"mutable", {"no-write": True})) d.addCallback(lambda ign: n.get(u"mutable")) d.addCallback(lambda mutable: self.failUnless(mutable.is_readonly(), mutable)) d.addCallback(lambda ign: n.set_metadata_for(u"mutable", {"no-write": True})) d.addCallback(lambda ign: n.get(u"mutable")) d.addCallback(lambda mutable: self.failUnless(mutable.is_readonly(), mutable)) d.addCallback(lambda ign: n.get(u"subdir2")) d.addCallback(lambda subdir2: self.failIf(subdir2.is_readonly())) d.addCallback(lambda ign: n.set_metadata_for(u"subdir2", {"no-write": True})) d.addCallback(lambda ign: n.get(u"subdir2")) d.addCallback(lambda subdir2: self.failUnless(subdir2.is_readonly(), subdir2)) d.addCallback(lambda ign: n.set_uri(u"mutable_ro", other_file_uri, other_file_uri, metadata={"no-write": True})) d.addCallback(lambda ign: n.get(u"mutable_ro")) d.addCallback(lambda mutable_ro: self.failUnless(mutable_ro.is_readonly(), mutable_ro)) d.addCallback(lambda ign: n.create_subdirectory(u"subdir_ro", metadata={"no-write": True})) d.addCallback(lambda ign: n.get(u"subdir_ro")) d.addCallback(lambda subdir_ro: self.failUnless(subdir_ro.is_readonly(), subdir_ro)) return d d.addCallback(_then) d.addErrback(self.explain_error) return d def _do_initial_children_test(self, mdmf=False): c = self.g.clients[0] nm = c.nodemaker kids = {one_nfd: (nm.create_from_cap(one_uri), {}), u"two": (nm.create_from_cap(setup_py_uri), {"metakey": "metavalue"}), u"mut": (nm.create_from_cap(mut_write_uri, mut_read_uri), {}), u"mdmf": (nm.create_from_cap(mdmf_write_uri, mdmf_read_uri), {}), u"fut": (nm.create_from_cap(future_write_uri, future_read_uri), {}), u"fro": (nm.create_from_cap(None, future_read_uri), {}), u"fut-unic": (nm.create_from_cap(future_nonascii_write_uri, future_nonascii_read_uri), {}), u"fro-unic": (nm.create_from_cap(None, future_nonascii_read_uri), {}), u"empty_litdir": (nm.create_from_cap(empty_litdir_uri), {}), u"tiny_litdir": (nm.create_from_cap(tiny_litdir_uri), {}), } if mdmf: d = c.create_dirnode(kids, version=MDMF_VERSION) else: d = c.create_dirnode(kids) def _created(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) backing_node = dn._node if mdmf: self.failUnlessEqual(backing_node.get_version(), MDMF_VERSION) else: self.failUnlessEqual(backing_node.get_version(), SDMF_VERSION) self.failUnless(dn.is_mutable()) self.failIf(dn.is_readonly()) self.failIf(dn.is_unknown()) self.failIf(dn.is_allowed_in_immutable_directory()) dn.raise_error() rep = str(dn) self.failUnless("RW-MUT" in rep) return dn.list() d.addCallback(_created) def _check_kids(children): self.failUnlessReallyEqual(set(children.keys()), set([one_nfc, u"two", u"mut", u"mdmf", u"fut", u"fro", u"fut-unic", u"fro-unic", u"empty_litdir", u"tiny_litdir"])) one_node, one_metadata = children[one_nfc] two_node, two_metadata = children[u"two"] mut_node, mut_metadata = children[u"mut"] mdmf_node, mdmf_metadata = children[u"mdmf"] fut_node, fut_metadata = children[u"fut"] fro_node, fro_metadata = children[u"fro"] futna_node, futna_metadata = children[u"fut-unic"] frona_node, frona_metadata = children[u"fro-unic"] emptylit_node, emptylit_metadata = children[u"empty_litdir"] tinylit_node, tinylit_metadata = children[u"tiny_litdir"] self.failUnlessReallyEqual(one_node.get_size(), 3) self.failUnlessReallyEqual(one_node.get_uri(), one_uri) self.failUnlessReallyEqual(one_node.get_readonly_uri(), one_uri) self.failUnless(isinstance(one_metadata, dict), one_metadata) self.failUnlessReallyEqual(two_node.get_size(), 14861) self.failUnlessReallyEqual(two_node.get_uri(), setup_py_uri) self.failUnlessReallyEqual(two_node.get_readonly_uri(), setup_py_uri) self.failUnlessEqual(two_metadata["metakey"], "metavalue") self.failUnlessReallyEqual(mut_node.get_uri(), mut_write_uri) self.failUnlessReallyEqual(mut_node.get_readonly_uri(), mut_read_uri) self.failUnless(isinstance(mut_metadata, dict), mut_metadata) self.failUnlessReallyEqual(mdmf_node.get_uri(), mdmf_write_uri) self.failUnlessReallyEqual(mdmf_node.get_readonly_uri(), mdmf_read_uri) self.failUnless(isinstance(mdmf_metadata, dict), mdmf_metadata) self.failUnless(fut_node.is_unknown()) self.failUnlessReallyEqual(fut_node.get_uri(), future_write_uri) self.failUnlessReallyEqual(fut_node.get_readonly_uri(), "ro." + future_read_uri) self.failUnless(isinstance(fut_metadata, dict), fut_metadata) self.failUnless(futna_node.is_unknown()) self.failUnlessReallyEqual(futna_node.get_uri(), future_nonascii_write_uri) self.failUnlessReallyEqual(futna_node.get_readonly_uri(), "ro." + future_nonascii_read_uri) self.failUnless(isinstance(futna_metadata, dict), futna_metadata) self.failUnless(fro_node.is_unknown()) self.failUnlessReallyEqual(fro_node.get_uri(), "ro." + future_read_uri) self.failUnlessReallyEqual(fut_node.get_readonly_uri(), "ro." + future_read_uri) self.failUnless(isinstance(fro_metadata, dict), fro_metadata) self.failUnless(frona_node.is_unknown()) self.failUnlessReallyEqual(frona_node.get_uri(), "ro." + future_nonascii_read_uri) self.failUnlessReallyEqual(futna_node.get_readonly_uri(), "ro." + future_nonascii_read_uri) self.failUnless(isinstance(frona_metadata, dict), frona_metadata) self.failIf(emptylit_node.is_unknown()) self.failUnlessReallyEqual(emptylit_node.get_storage_index(), None) self.failIf(tinylit_node.is_unknown()) self.failUnlessReallyEqual(tinylit_node.get_storage_index(), None) d2 = defer.succeed(None) d2.addCallback(lambda ignored: emptylit_node.list()) d2.addCallback(lambda children: self.failUnlessEqual(children, {})) d2.addCallback(lambda ignored: tinylit_node.list()) d2.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"short"]))) d2.addCallback(lambda ignored: tinylit_node.list()) d2.addCallback(lambda children: children[u"short"][0].read(MemAccum())) d2.addCallback(lambda accum: self.failUnlessReallyEqual(accum.data, "The end.")) return d2 d.addCallback(_check_kids) d.addCallback(lambda ign: nm.create_new_mutable_directory(kids)) d.addCallback(lambda dn: dn.list()) d.addCallback(_check_kids) bad_future_node = UnknownNode(future_write_uri, None) bad_kids1 = {one_nfd: (bad_future_node, {})} # This should fail because we don't know how to diminish the future_write_uri # cap (given in a write slot and not prefixed with "ro." or "imm.") to a readcap. d.addCallback(lambda ign: self.shouldFail(MustNotBeUnknownRWError, "bad_kids1", "cannot attach unknown", nm.create_new_mutable_directory, bad_kids1)) bad_kids2 = {one_nfd: (nm.create_from_cap(one_uri), None)} d.addCallback(lambda ign: self.shouldFail(AssertionError, "bad_kids2", "requires metadata to be a dict", nm.create_new_mutable_directory, bad_kids2)) return d def _do_basic_test(self, mdmf=False): c = self.g.clients[0] d = None if mdmf: d = c.create_dirnode(version=MDMF_VERSION) else: d = c.create_dirnode() def _done(res): self.failUnless(isinstance(res, dirnode.DirectoryNode)) self.failUnless(res.is_mutable()) self.failIf(res.is_readonly()) self.failIf(res.is_unknown()) self.failIf(res.is_allowed_in_immutable_directory()) res.raise_error() rep = str(res) self.failUnless("RW-MUT" in rep) d.addCallback(_done) return d def test_basic(self): self.basedir = "dirnode/Dirnode/test_basic" self.set_up_grid() return self._do_basic_test() def test_basic_mdmf(self): self.basedir = "dirnode/Dirnode/test_basic_mdmf" self.set_up_grid() return self._do_basic_test(mdmf=True) def test_initial_children(self): self.basedir = "dirnode/Dirnode/test_initial_children" self.set_up_grid() return self._do_initial_children_test() def test_immutable(self): self.basedir = "dirnode/Dirnode/test_immutable" self.set_up_grid() c = self.g.clients[0] nm = c.nodemaker kids = {one_nfd: (nm.create_from_cap(one_uri), {}), u"two": (nm.create_from_cap(setup_py_uri), {"metakey": "metavalue"}), u"fut": (nm.create_from_cap(None, future_read_uri), {}), u"futna": (nm.create_from_cap(None, future_nonascii_read_uri), {}), u"empty_litdir": (nm.create_from_cap(empty_litdir_uri), {}), u"tiny_litdir": (nm.create_from_cap(tiny_litdir_uri), {}), } d = c.create_immutable_dirnode(kids) def _created(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) self.failIf(dn.is_mutable()) self.failUnless(dn.is_readonly()) self.failIf(dn.is_unknown()) self.failUnless(dn.is_allowed_in_immutable_directory()) dn.raise_error() rep = str(dn) self.failUnless("RO-IMM" in rep) cap = dn.get_cap() self.failUnlessIn("CHK", cap.to_string()) self.cap = cap return dn.list() d.addCallback(_created) def _check_kids(children): self.failUnlessReallyEqual(set(children.keys()), set([one_nfc, u"two", u"fut", u"futna", u"empty_litdir", u"tiny_litdir"])) one_node, one_metadata = children[one_nfc] two_node, two_metadata = children[u"two"] fut_node, fut_metadata = children[u"fut"] futna_node, futna_metadata = children[u"futna"] emptylit_node, emptylit_metadata = children[u"empty_litdir"] tinylit_node, tinylit_metadata = children[u"tiny_litdir"] self.failUnlessReallyEqual(one_node.get_size(), 3) self.failUnlessReallyEqual(one_node.get_uri(), one_uri) self.failUnlessReallyEqual(one_node.get_readonly_uri(), one_uri) self.failUnless(isinstance(one_metadata, dict), one_metadata) self.failUnlessReallyEqual(two_node.get_size(), 14861) self.failUnlessReallyEqual(two_node.get_uri(), setup_py_uri) self.failUnlessReallyEqual(two_node.get_readonly_uri(), setup_py_uri) self.failUnlessEqual(two_metadata["metakey"], "metavalue") self.failUnless(fut_node.is_unknown()) self.failUnlessReallyEqual(fut_node.get_uri(), "imm." + future_read_uri) self.failUnlessReallyEqual(fut_node.get_readonly_uri(), "imm." + future_read_uri) self.failUnless(isinstance(fut_metadata, dict), fut_metadata) self.failUnless(futna_node.is_unknown()) self.failUnlessReallyEqual(futna_node.get_uri(), "imm." + future_nonascii_read_uri) self.failUnlessReallyEqual(futna_node.get_readonly_uri(), "imm." + future_nonascii_read_uri) self.failUnless(isinstance(futna_metadata, dict), futna_metadata) self.failIf(emptylit_node.is_unknown()) self.failUnlessReallyEqual(emptylit_node.get_storage_index(), None) self.failIf(tinylit_node.is_unknown()) self.failUnlessReallyEqual(tinylit_node.get_storage_index(), None) d2 = defer.succeed(None) d2.addCallback(lambda ignored: emptylit_node.list()) d2.addCallback(lambda children: self.failUnlessEqual(children, {})) d2.addCallback(lambda ignored: tinylit_node.list()) d2.addCallback(lambda children: self.failUnlessReallyEqual(set(children.keys()), set([u"short"]))) d2.addCallback(lambda ignored: tinylit_node.list()) d2.addCallback(lambda children: children[u"short"][0].read(MemAccum())) d2.addCallback(lambda accum: self.failUnlessReallyEqual(accum.data, "The end.")) return d2 d.addCallback(_check_kids) d.addCallback(lambda ign: nm.create_from_cap(self.cap.to_string())) d.addCallback(lambda dn: dn.list()) d.addCallback(_check_kids) bad_future_node1 = UnknownNode(future_write_uri, None) bad_kids1 = {one_nfd: (bad_future_node1, {})} d.addCallback(lambda ign: self.shouldFail(MustNotBeUnknownRWError, "bad_kids1", "cannot attach unknown", c.create_immutable_dirnode, bad_kids1)) bad_future_node2 = UnknownNode(future_write_uri, future_read_uri) bad_kids2 = {one_nfd: (bad_future_node2, {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids2", "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids2)) bad_kids3 = {one_nfd: (nm.create_from_cap(one_uri), None)} d.addCallback(lambda ign: self.shouldFail(AssertionError, "bad_kids3", "requires metadata to be a dict", c.create_immutable_dirnode, bad_kids3)) bad_kids4 = {one_nfd: (nm.create_from_cap(mut_write_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids4", "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids4)) bad_kids5 = {one_nfd: (nm.create_from_cap(mut_read_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids5", "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids5)) bad_kids6 = {one_nfd: (nm.create_from_cap(mdmf_write_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids6", "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids6)) bad_kids7 = {one_nfd: (nm.create_from_cap(mdmf_read_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids7", "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids7)) d.addCallback(lambda ign: c.create_immutable_dirnode({})) def _created_empty(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) self.failIf(dn.is_mutable()) self.failUnless(dn.is_readonly()) self.failIf(dn.is_unknown()) self.failUnless(dn.is_allowed_in_immutable_directory()) dn.raise_error() rep = str(dn) self.failUnless("RO-IMM" in rep) cap = dn.get_cap() self.failUnlessIn("LIT", cap.to_string()) self.failUnlessReallyEqual(cap.to_string(), "URI:DIR2-LIT:") self.cap = cap return dn.list() d.addCallback(_created_empty) d.addCallback(lambda kids: self.failUnlessEqual(kids, {})) smallkids = {u"o": (nm.create_from_cap(one_uri), {})} d.addCallback(lambda ign: c.create_immutable_dirnode(smallkids)) def _created_small(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) self.failIf(dn.is_mutable()) self.failUnless(dn.is_readonly()) self.failIf(dn.is_unknown()) self.failUnless(dn.is_allowed_in_immutable_directory()) dn.raise_error() rep = str(dn) self.failUnless("RO-IMM" in rep) cap = dn.get_cap() self.failUnlessIn("LIT", cap.to_string()) self.failUnlessReallyEqual(cap.to_string(), "URI:DIR2-LIT:gi4tumj2n4wdcmz2kvjesosmjfkdu3rvpbtwwlbqhiwdeot3puwcy") self.cap = cap return dn.list() d.addCallback(_created_small) d.addCallback(lambda kids: self.failUnlessReallyEqual(kids.keys(), [u"o"])) # now test n.create_subdirectory(mutable=False) d.addCallback(lambda ign: c.create_dirnode()) def _made_parent(n): d = n.create_subdirectory(u"subdir", kids, mutable=False) d.addCallback(lambda sd: sd.list()) d.addCallback(_check_kids) d.addCallback(lambda ign: n.list()) d.addCallback(lambda children: self.failUnlessReallyEqual(children.keys(), [u"subdir"])) d.addCallback(lambda ign: n.get(u"subdir")) d.addCallback(lambda sd: sd.list()) d.addCallback(_check_kids) d.addCallback(lambda ign: n.get(u"subdir")) d.addCallback(lambda sd: self.failIf(sd.is_mutable())) bad_kids8 = {one_nfd: (nm.create_from_cap(mut_write_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids8", "is not allowed in an immutable directory", n.create_subdirectory, u"sub2", bad_kids8, mutable=False)) bad_kids9 = {one_nfd: (nm.create_from_cap(mdmf_write_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids9", "is not allowed in an immutable directory", n.create_subdirectory, u"sub2", bad_kids9, mutable=False)) return d d.addCallback(_made_parent) return d def test_directory_representation(self): self.basedir = "dirnode/Dirnode/test_directory_representation" self.set_up_grid() c = self.g.clients[0] nm = c.nodemaker # This test checks that any trailing spaces in URIs are retained in the # encoded directory, but stripped when we get them out of the directory. # See ticket #925 for why we want that. # It also tests that we store child names as UTF-8 NFC, and normalize # them again when retrieving them. stripped_write_uri = "lafs://from_the_future\t" stripped_read_uri = "lafs://readonly_from_the_future\t" spacedout_write_uri = stripped_write_uri + " " spacedout_read_uri = stripped_read_uri + " " child = nm.create_from_cap(spacedout_write_uri, spacedout_read_uri) self.failUnlessReallyEqual(child.get_write_uri(), spacedout_write_uri) self.failUnlessReallyEqual(child.get_readonly_uri(), "ro." + spacedout_read_uri) child_dottedi = u"ch\u0131\u0307ld" kids_in = {child_dottedi: (child, {}), one_nfd: (child, {})} kids_out = {child_dottedi: (child, {}), one_nfc: (child, {})} kids_norm = {u"child": (child, {}), one_nfc: (child, {})} d = c.create_dirnode(kids_in) def _created(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) self.failUnless(dn.is_mutable()) self.failIf(dn.is_readonly()) dn.raise_error() self.cap = dn.get_cap() self.rootnode = dn return dn._node.download_best_version() d.addCallback(_created) def _check_data(data): # Decode the netstring representation of the directory to check that the # spaces are retained when the URIs are stored, and that the names are stored # as NFC. position = 0 numkids = 0 while position < len(data): entries, position = split_netstring(data, 1, position) entry = entries[0] (name_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4) name = name_utf8.decode("utf-8") rw_uri = self.rootnode._decrypt_rwcapdata(rwcapdata) self.failUnlessIn(name, kids_out) (expected_child, ign) = kids_out[name] self.failUnlessReallyEqual(rw_uri, expected_child.get_write_uri()) self.failUnlessReallyEqual("ro." + ro_uri, expected_child.get_readonly_uri()) numkids += 1 self.failUnlessReallyEqual(numkids, len(kids_out)) return self.rootnode d.addCallback(_check_data) # Mock up a hypothetical future version of Unicode that adds a canonical equivalence # between dotless-i + dot-above, and 'i'. That would actually be prohibited by the # stability rules, but similar additions involving currently-unassigned characters # would not be. old_normalize = unicodedata.normalize def future_normalize(form, s): assert form == 'NFC', form return old_normalize(form, s).replace(u"\u0131\u0307", u"i") def _list(node): unicodedata.normalize = future_normalize d2 = node.list() def _undo_mock(res): unicodedata.normalize = old_normalize return res d2.addBoth(_undo_mock) return d2 d.addCallback(_list) def _check_kids(children): # Now when we use the real directory listing code, the trailing spaces # should have been stripped (and "ro." should have been prepended to the # ro_uri, since it's unknown). Also the dotless-i + dot-above should have been # normalized to 'i'. self.failUnlessReallyEqual(set(children.keys()), set(kids_norm.keys())) child_node, child_metadata = children[u"child"] self.failUnlessReallyEqual(child_node.get_write_uri(), stripped_write_uri) self.failUnlessReallyEqual(child_node.get_readonly_uri(), "ro." + stripped_read_uri) d.addCallback(_check_kids) d.addCallback(lambda ign: nm.create_from_cap(self.cap.to_string())) d.addCallback(_list) d.addCallback(_check_kids) # again with dirnode recreated from cap return d def test_check(self): self.basedir = "dirnode/Dirnode/test_check" self.set_up_grid() c = self.g.clients[0] d = c.create_dirnode() d.addCallback(lambda dn: dn.check(Monitor())) def _done(res): self.failUnless(res.is_healthy()) d.addCallback(_done) return d def _test_deepcheck_create(self, version=SDMF_VERSION): # create a small tree with a loop, and some non-directories # root/ # root/subdir/ # root/subdir/file1 # root/subdir/link -> root # root/rodir c = self.g.clients[0] d = c.create_dirnode(version=version) def _created_root(rootnode): self._rootnode = rootnode self.failUnlessEqual(rootnode._node.get_version(), version) return rootnode.create_subdirectory(u"subdir") d.addCallback(_created_root) def _created_subdir(subdir): self._subdir = subdir d = subdir.add_file(u"file1", upload.Data("data"*100, None)) d.addCallback(lambda res: subdir.set_node(u"link", self._rootnode)) d.addCallback(lambda res: c.create_dirnode()) d.addCallback(lambda dn: self._rootnode.set_uri(u"rodir", dn.get_uri(), dn.get_readonly_uri())) return d d.addCallback(_created_subdir) def _done(res): return self._rootnode d.addCallback(_done) return d def test_deepcheck(self): self.basedir = "dirnode/Dirnode/test_deepcheck" self.set_up_grid() d = self._test_deepcheck_create() d.addCallback(lambda rootnode: rootnode.start_deep_check().when_done()) def _check_results(r): self.failUnless(IDeepCheckResults.providedBy(r)) c = r.get_counters() self.failUnlessReallyEqual(c, {"count-objects-checked": 4, "count-objects-healthy": 4, "count-objects-unhealthy": 0, "count-objects-unrecoverable": 0, "count-corrupt-shares": 0, }) self.failIf(r.get_corrupt_shares()) self.failUnlessReallyEqual(len(r.get_all_results()), 4) d.addCallback(_check_results) return d def test_deepcheck_cachemisses(self): self.basedir = "dirnode/Dirnode/test_mdmf_cachemisses" self.set_up_grid() d = self._test_deepcheck_create() # Clear the counters and set the rootnode d.addCallback(lambda rootnode: not [ss._clear_counters() for ss in self.g.wrappers_by_id.values()] or rootnode) d.addCallback(lambda rootnode: rootnode.start_deep_check().when_done()) def _check(ign): count = sum([ss.counter_by_methname['slot_readv'] for ss in self.g.wrappers_by_id.values()]) self.failIf(count > 60, 'Expected only 60 cache misses,' 'unfortunately there were %d' % (count,)) d.addCallback(_check) return d def test_deepcheck_mdmf(self): self.basedir = "dirnode/Dirnode/test_deepcheck_mdmf" self.set_up_grid() d = self._test_deepcheck_create(MDMF_VERSION) d.addCallback(lambda rootnode: rootnode.start_deep_check().when_done()) def _check_results(r): self.failUnless(IDeepCheckResults.providedBy(r)) c = r.get_counters() self.failUnlessReallyEqual(c, {"count-objects-checked": 4, "count-objects-healthy": 4, "count-objects-unhealthy": 0, "count-objects-unrecoverable": 0, "count-corrupt-shares": 0, }) self.failIf(r.get_corrupt_shares()) self.failUnlessReallyEqual(len(r.get_all_results()), 4) d.addCallback(_check_results) return d def test_deepcheck_and_repair(self): self.basedir = "dirnode/Dirnode/test_deepcheck_and_repair" self.set_up_grid() d = self._test_deepcheck_create() d.addCallback(lambda rootnode: rootnode.start_deep_check_and_repair().when_done()) def _check_results(r): self.failUnless(IDeepCheckAndRepairResults.providedBy(r)) c = r.get_counters() self.failUnlessReallyEqual(c, {"count-objects-checked": 4, "count-objects-healthy-pre-repair": 4, "count-objects-unhealthy-pre-repair": 0, "count-objects-unrecoverable-pre-repair": 0, "count-corrupt-shares-pre-repair": 0, "count-objects-healthy-post-repair": 4, "count-objects-unhealthy-post-repair": 0, "count-objects-unrecoverable-post-repair": 0, "count-corrupt-shares-post-repair": 0, "count-repairs-attempted": 0, "count-repairs-successful": 0, "count-repairs-unsuccessful": 0, }) self.failIf(r.get_corrupt_shares()) self.failIf(r.get_remaining_corrupt_shares()) self.failUnlessReallyEqual(len(r.get_all_results()), 4) d.addCallback(_check_results) return d def test_deepcheck_and_repair_mdmf(self): self.basedir = "dirnode/Dirnode/test_deepcheck_and_repair_mdmf" self.set_up_grid() d = self._test_deepcheck_create(version=MDMF_VERSION) d.addCallback(lambda rootnode: rootnode.start_deep_check_and_repair().when_done()) def _check_results(r): self.failUnless(IDeepCheckAndRepairResults.providedBy(r)) c = r.get_counters() self.failUnlessReallyEqual(c, {"count-objects-checked": 4, "count-objects-healthy-pre-repair": 4, "count-objects-unhealthy-pre-repair": 0, "count-objects-unrecoverable-pre-repair": 0, "count-corrupt-shares-pre-repair": 0, "count-objects-healthy-post-repair": 4, "count-objects-unhealthy-post-repair": 0, "count-objects-unrecoverable-post-repair": 0, "count-corrupt-shares-post-repair": 0, "count-repairs-attempted": 0, "count-repairs-successful": 0, "count-repairs-unsuccessful": 0, }) self.failIf(r.get_corrupt_shares()) self.failIf(r.get_remaining_corrupt_shares()) self.failUnlessReallyEqual(len(r.get_all_results()), 4) d.addCallback(_check_results) return d def _mark_file_bad(self, rootnode): self.delete_shares_numbered(rootnode.get_uri(), [0]) return rootnode def test_deepcheck_problems(self): self.basedir = "dirnode/Dirnode/test_deepcheck_problems" self.set_up_grid() d = self._test_deepcheck_create() d.addCallback(lambda rootnode: self._mark_file_bad(rootnode)) d.addCallback(lambda rootnode: rootnode.start_deep_check().when_done()) def _check_results(r): c = r.get_counters() self.failUnlessReallyEqual(c, {"count-objects-checked": 4, "count-objects-healthy": 3, "count-objects-unhealthy": 1, "count-objects-unrecoverable": 0, "count-corrupt-shares": 0, }) #self.failUnlessReallyEqual(len(r.get_problems()), 1) # TODO d.addCallback(_check_results) return d def test_deepcheck_problems_mdmf(self): self.basedir = "dirnode/Dirnode/test_deepcheck_problems_mdmf" self.set_up_grid() d = self._test_deepcheck_create(version=MDMF_VERSION) d.addCallback(lambda rootnode: self._mark_file_bad(rootnode)) d.addCallback(lambda rootnode: rootnode.start_deep_check().when_done()) def _check_results(r): c = r.get_counters() self.failUnlessReallyEqual(c, {"count-objects-checked": 4, "count-objects-healthy": 3, "count-objects-unhealthy": 1, "count-objects-unrecoverable": 0, "count-corrupt-shares": 0, }) #self.failUnlessReallyEqual(len(r.get_problems()), 1) # TODO d.addCallback(_check_results) return d def _do_readonly_test(self, version=SDMF_VERSION): c = self.g.clients[0] nm = c.nodemaker filecap = make_chk_file_uri(1234) filenode = nm.create_from_cap(filecap) uploadable = upload.Data("some data", convergence="some convergence string") d = c.create_dirnode(version=version) def _created(rw_dn): backing_node = rw_dn._node self.failUnlessEqual(backing_node.get_version(), version) d2 = rw_dn.set_uri(u"child", filecap, filecap) d2.addCallback(lambda res: rw_dn) return d2 d.addCallback(_created) def _ready(rw_dn): ro_uri = rw_dn.get_readonly_uri() ro_dn = c.create_node_from_uri(ro_uri) self.failUnless(ro_dn.is_readonly()) self.failUnless(ro_dn.is_mutable()) self.failIf(ro_dn.is_unknown()) self.failIf(ro_dn.is_allowed_in_immutable_directory()) ro_dn.raise_error() self.shouldFail(dirnode.NotWriteableError, "set_uri ro", None, ro_dn.set_uri, u"newchild", filecap, filecap) self.shouldFail(dirnode.NotWriteableError, "set_uri ro", None, ro_dn.set_node, u"newchild", filenode) self.shouldFail(dirnode.NotWriteableError, "set_nodes ro", None, ro_dn.set_nodes, { u"newchild": (filenode, None) }) self.shouldFail(dirnode.NotWriteableError, "set_uri ro", None, ro_dn.add_file, u"newchild", uploadable) self.shouldFail(dirnode.NotWriteableError, "set_uri ro", None, ro_dn.delete, u"child") self.shouldFail(dirnode.NotWriteableError, "set_uri ro", None, ro_dn.create_subdirectory, u"newchild") self.shouldFail(dirnode.NotWriteableError, "set_metadata_for ro", None, ro_dn.set_metadata_for, u"child", {}) self.shouldFail(dirnode.NotWriteableError, "set_uri ro", None, ro_dn.move_child_to, u"child", rw_dn) self.shouldFail(dirnode.NotWriteableError, "set_uri ro", None, rw_dn.move_child_to, u"child", ro_dn) return ro_dn.list() d.addCallback(_ready) def _listed(children): self.failUnless(u"child" in children) d.addCallback(_listed) return d def test_readonly(self): self.basedir = "dirnode/Dirnode/test_readonly" self.set_up_grid() return self._do_readonly_test() def test_readonly_mdmf(self): self.basedir = "dirnode/Dirnode/test_readonly_mdmf" self.set_up_grid() return self._do_readonly_test(version=MDMF_VERSION) def failUnlessGreaterThan(self, a, b): self.failUnless(a > b, "%r should be > %r" % (a, b)) def failUnlessGreaterOrEqualThan(self, a, b): self.failUnless(a >= b, "%r should be >= %r" % (a, b)) def test_create(self): self.basedir = "dirnode/Dirnode/test_create" self.set_up_grid() return self._do_create_test() def test_update_metadata(self): (t1, t2, t3) = (626644800.0, 634745640.0, 892226160.0) md1 = dirnode.update_metadata({"ctime": t1}, {}, t2) self.failUnlessEqual(md1, {"tahoe":{"linkcrtime": t1, "linkmotime": t2}}) md2 = dirnode.update_metadata(md1, {"key": "value", "tahoe": {"bad": "mojo"}}, t3) self.failUnlessEqual(md2, {"key": "value", "tahoe":{"linkcrtime": t1, "linkmotime": t3}}) md3 = dirnode.update_metadata({}, None, t3) self.failUnlessEqual(md3, {"tahoe":{"linkcrtime": t3, "linkmotime": t3}}) md4 = dirnode.update_metadata({}, {"bool": True, "number": 42}, t1) self.failUnlessEqual(md4, {"bool": True, "number": 42, "tahoe":{"linkcrtime": t1, "linkmotime": t1}}) def _do_create_subdirectory_test(self, version=SDMF_VERSION): c = self.g.clients[0] nm = c.nodemaker d = c.create_dirnode(version=version) def _then(n): # / self.rootnode = n fake_file_uri = make_mutable_file_uri() other_file_uri = make_mutable_file_uri() md = {"metakey": "metavalue"} kids = {u"kid1": (nm.create_from_cap(fake_file_uri), {}), u"kid2": (nm.create_from_cap(other_file_uri), md), } d = n.create_subdirectory(u"subdir", kids, mutable_version=version) def _check(sub): d = n.get_child_at_path(u"subdir") d.addCallback(lambda sub2: self.failUnlessReallyEqual(sub2.get_uri(), sub.get_uri())) d.addCallback(lambda ign: sub.list()) return d d.addCallback(_check) def _check_kids(kids2): self.failUnlessEqual(set(kids.keys()), set(kids2.keys())) self.failUnlessEqual(kids2[u"kid2"][1]["metakey"], "metavalue") d.addCallback(_check_kids) return d d.addCallback(_then) return d def test_create_subdirectory(self): self.basedir = "dirnode/Dirnode/test_create_subdirectory" self.set_up_grid() return self._do_create_subdirectory_test() def test_create_subdirectory_mdmf(self): self.basedir = "dirnode/Dirnode/test_create_subdirectory_mdmf" self.set_up_grid() return self._do_create_subdirectory_test(version=MDMF_VERSION) def test_create_mdmf(self): self.basedir = "dirnode/Dirnode/test_mdmf" self.set_up_grid() return self._do_create_test(mdmf=True) def test_mdmf_initial_children(self): self.basedir = "dirnode/Dirnode/test_mdmf" self.set_up_grid() return self._do_initial_children_test(mdmf=True) class MinimalFakeMutableFile: def get_writekey(self): return "writekey" class Packing(testutil.ReallyEqualMixin, unittest.TestCase): # This is a base32-encoded representation of the directory tree # root/file1 # root/file2 # root/file3 # as represented after being fed to _pack_contents. # We have it here so we can decode it, feed it to # _unpack_contents, and verify that _unpack_contents # works correctly. known_tree = "GM4TOORVHJTGS3DFGEWDSNJ2KVJESOSDJBFTU33MPB2GS3LZNVYG6N3GGI3WU5TIORTXC3DOMJ2G4NB2MVWXUZDONBVTE5LNGRZWK2LYN55GY23XGNYXQMTOMZUWU5TENN4DG23ZG5UTO2L2NQ2DO6LFMRWDMZJWGRQTUMZ2GEYDUMJQFQYTIMZ22XZKZORX5XS7CAQCSK3URR6QOHISHRCMGER5LRFSZRNAS5ZSALCS6TWFQAE754IVOIKJVK73WZPP3VUUEDTX3WHTBBZ5YX3CEKHCPG3ZWQLYA4QM6LDRCF7TJQYWLIZHKGN5ROA3AUZPXESBNLQQ6JTC2DBJU2D47IZJTLR3PKZ4RVF57XLPWY7FX7SZV3T6IJ3ORFW37FXUPGOE3ROPFNUX5DCGMAQJ3PGGULBRGM3TU6ZCMN2GS3LFEI5CAMJSGQ3DMNRTHA4TOLRUGI3TKNRWGEWCAITUMFUG6ZJCHIQHWITMNFXGW3LPORUW2ZJCHIQDCMRUGY3DMMZYHE3S4NBSG42TMNRRFQQCE3DJNZVWG4TUNFWWKIR2EAYTENBWGY3DGOBZG4XDIMRXGU3DMML5FQQCE3LUNFWWKIR2EAYTENBWGY3DGOBZG4XDIMRXGU3DMML5FQWDGOJRHI2TUZTJNRSTELBZGQ5FKUSJHJBUQSZ2MFYGKZ3SOBSWQ43IO52WO23CNAZWU3DUGVSWSNTIOE5DK33POVTW4ZLNMNWDK6DHPA2GS2THNF2W25DEN5VGY2LQNFRGG5DKNNRHO5TZPFTWI6LNMRYGQ2LCGJTHM4J2GM5DCMB2GQWDCNBSHKVVQBGRYMACKJ27CVQ6O6B4QPR72RFVTGOZUI76XUSWAX73JRV5PYRHMIFYZIA25MXDPGUGML6M2NMRSG4YD4W4K37ZDYSXHMJ3IUVT4F64YTQQVBJFFFOUC7J7LAB2VFCL5UKKGMR2D3F4EPOYC7UYWQZNR5KXHBSNXLCNBX2SNF22DCXJIHSMEKWEWOG5XCJEVVZ7UW5IB6I64XXQSJ34B5CAYZGZIIMR6LBRGMZTU6ZCMN2GS3LFEI5CAMJSGQ3DMNRTHA4TOLRUGMYDEMJYFQQCE5DBNBXWKIR2EB5SE3DJNZVW233UNFWWKIR2EAYTENBWGY3DGOBZG4XDIMZQGIYTQLBAEJWGS3TLMNZHI2LNMURDUIBRGI2DMNRWGM4DSNZOGQZTAMRRHB6SYIBCNV2GS3LFEI5CAMJSGQ3DMNRTHA4TOLRUGMYDEMJYPUWCYMZZGU5DKOTGNFWGKMZMHE2DUVKSJE5EGSCLHJRW25DDPBYTO2DXPB3GM6DBNYZTI6LJMV3DM2LWNB4TU4LWMNSWW3LKORXWK5DEMN3TI23NNE3WEM3SORRGY5THPA3TKNBUMNZG453BOF2GSZLXMVWWI3DJOFZW623RHIZTUMJQHI2SYMJUGI5BOSHWDPG3WKPAVXCF3XMKA7QVIWPRMWJHDTQHD27AHDCPJWDQENQ5H5ZZILTXQNIXXCIW4LKQABU2GCFRG5FHQN7CHD7HF4EKNRZFIV2ZYQIBM7IQU7F4RGB3XCX3FREPBKQ7UCICHVWPCYFGA6OLH3J45LXQ6GWWICJ3PGWJNLZ7PCRNLAPNYUGU6BENS7OXMBEOOFRIZV3PF2FFWZ5WHDPKXERYP7GNHKRMGEZTOOT3EJRXI2LNMURDUIBRGI2DMNRWGM4DSNZOGQZTGNRSGY4SYIBCORQWQ33FEI5CA6ZCNRUW423NN52GS3LFEI5CAMJSGQ3DMNRTHA4TOLRUGMZTMMRWHEWCAITMNFXGWY3SORUW2ZJCHIQDCMRUGY3DMMZYHE3S4NBTGM3DENRZPUWCAITNORUW2ZJCHIQDCMRUGY3DMMZYHE3S4NBTGM3DENRZPUWCY===" def test_unpack_and_pack_behavior(self): known_tree = b32decode(self.known_tree) nodemaker = NodeMaker(None, None, None, None, None, {"k": 3, "n": 10}, None, None) write_uri = "URI:SSK-RO:e3mdrzfwhoq42hy5ubcz6rp3o4:ybyibhnp3vvwuq2vaw2ckjmesgkklfs6ghxleztqidihjyofgw7q" filenode = nodemaker.create_from_cap(write_uri) node = dirnode.DirectoryNode(filenode, nodemaker, None) children = node._unpack_contents(known_tree) self._check_children(children) packed_children = node._pack_contents(children) children = node._unpack_contents(packed_children) self._check_children(children) def _check_children(self, children): # Are all the expected child nodes there? self.failUnless(children.has_key(u'file1')) self.failUnless(children.has_key(u'file2')) self.failUnless(children.has_key(u'file3')) # Are the metadata for child 3 right? file3_rocap = "URI:CHK:cmtcxq7hwxvfxan34yiev6ivhy:qvcekmjtoetdcw4kmi7b3rtblvgx7544crnwaqtiewemdliqsokq:3:10:5" file3_rwcap = "URI:CHK:cmtcxq7hwxvfxan34yiev6ivhy:qvcekmjtoetdcw4kmi7b3rtblvgx7544crnwaqtiewemdliqsokq:3:10:5" file3_metadata = {'ctime': 1246663897.4336269, 'tahoe': {'linkmotime': 1246663897.4336269, 'linkcrtime': 1246663897.4336269}, 'mtime': 1246663897.4336269} self.failUnlessEqual(file3_metadata, children[u'file3'][1]) self.failUnlessReallyEqual(file3_rocap, children[u'file3'][0].get_readonly_uri()) self.failUnlessReallyEqual(file3_rwcap, children[u'file3'][0].get_uri()) # Are the metadata for child 2 right? file2_rocap = "URI:CHK:apegrpehshwugkbh3jlt5ei6hq:5oougnemcl5xgx4ijgiumtdojlipibctjkbwvyygdymdphib2fvq:3:10:4" file2_rwcap = "URI:CHK:apegrpehshwugkbh3jlt5ei6hq:5oougnemcl5xgx4ijgiumtdojlipibctjkbwvyygdymdphib2fvq:3:10:4" file2_metadata = {'ctime': 1246663897.430218, 'tahoe': {'linkmotime': 1246663897.430218, 'linkcrtime': 1246663897.430218}, 'mtime': 1246663897.430218} self.failUnlessEqual(file2_metadata, children[u'file2'][1]) self.failUnlessReallyEqual(file2_rocap, children[u'file2'][0].get_readonly_uri()) self.failUnlessReallyEqual(file2_rwcap, children[u'file2'][0].get_uri()) # Are the metadata for child 1 right? file1_rocap = "URI:CHK:olxtimympo7f27jvhtgqlnbtn4:emzdnhk2um4seixozlkw3qx2nfijvdkx3ky7i7izl47yedl6e64a:3:10:10" file1_rwcap = "URI:CHK:olxtimympo7f27jvhtgqlnbtn4:emzdnhk2um4seixozlkw3qx2nfijvdkx3ky7i7izl47yedl6e64a:3:10:10" file1_metadata = {'ctime': 1246663897.4275661, 'tahoe': {'linkmotime': 1246663897.4275661, 'linkcrtime': 1246663897.4275661}, 'mtime': 1246663897.4275661} self.failUnlessEqual(file1_metadata, children[u'file1'][1]) self.failUnlessReallyEqual(file1_rocap, children[u'file1'][0].get_readonly_uri()) self.failUnlessReallyEqual(file1_rwcap, children[u'file1'][0].get_uri()) def _make_kids(self, nm, which): caps = {"imm": "URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861", "lit": "URI:LIT:n5xgk", # LIT for "one" "write": "URI:SSK:vfvcbdfbszyrsaxchgevhmmlii:euw4iw7bbnkrrwpzuburbhppuxhc3gwxv26f6imekhz7zyw2ojnq", "read": "URI:SSK-RO:e3mdrzfwhoq42hy5ubcz6rp3o4:ybyibhnp3vvwuq2vaw2ckjmesgkklfs6ghxleztqidihjyofgw7q", "dirwrite": "URI:DIR2:n6x24zd3seu725yluj75q5boaa:mm6yoqjhl6ueh7iereldqxue4nene4wl7rqfjfybqrehdqmqskvq", "dirread": "URI:DIR2-RO:b7sr5qsifnicca7cbk3rhrhbvq:mm6yoqjhl6ueh7iereldqxue4nene4wl7rqfjfybqrehdqmqskvq", } kids = {} for name in which: kids[unicode(name)] = (nm.create_from_cap(caps[name]), {}) return kids def test_deep_immutable(self): nm = NodeMaker(None, None, None, None, None, {"k": 3, "n": 10}, None, None) fn = MinimalFakeMutableFile() kids = self._make_kids(nm, ["imm", "lit", "write", "read", "dirwrite", "dirread"]) packed = dirnode.pack_children(kids, fn.get_writekey(), deep_immutable=False) self.failUnlessIn("lit", packed) kids = self._make_kids(nm, ["imm", "lit"]) packed = dirnode.pack_children(kids, fn.get_writekey(), deep_immutable=True) self.failUnlessIn("lit", packed) kids = self._make_kids(nm, ["imm", "lit", "write"]) self.failUnlessRaises(dirnode.MustBeDeepImmutableError, dirnode.pack_children, kids, fn.get_writekey(), deep_immutable=True) # read-only is not enough: all children must be immutable kids = self._make_kids(nm, ["imm", "lit", "read"]) self.failUnlessRaises(dirnode.MustBeDeepImmutableError, dirnode.pack_children, kids, fn.get_writekey(), deep_immutable=True) kids = self._make_kids(nm, ["imm", "lit", "dirwrite"]) self.failUnlessRaises(dirnode.MustBeDeepImmutableError, dirnode.pack_children, kids, fn.get_writekey(), deep_immutable=True) kids = self._make_kids(nm, ["imm", "lit", "dirread"]) self.failUnlessRaises(dirnode.MustBeDeepImmutableError, dirnode.pack_children, kids, fn.get_writekey(), deep_immutable=True) class FakeMutableFile: implements(IMutableFileNode) counter = 0 def __init__(self, initial_contents=""): data = self._get_initial_contents(initial_contents) self.data = data.read(data.get_size()) self.data = "".join(self.data) counter = FakeMutableFile.counter FakeMutableFile.counter += 1 writekey = hashutil.ssk_writekey_hash(str(counter)) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(str(counter)) self.uri = uri.WriteableSSKFileURI(writekey, fingerprint) def _get_initial_contents(self, contents): if isinstance(contents, str): return contents if contents is None: return "" assert callable(contents), "%s should be callable, not %s" % \ (contents, type(contents)) return contents(self) def get_cap(self): return self.uri def get_uri(self): return self.uri.to_string() def get_write_uri(self): return self.uri.to_string() def download_best_version(self): return defer.succeed(self.data) def get_writekey(self): return "writekey" def is_readonly(self): return False def is_mutable(self): return True def is_unknown(self): return False def is_allowed_in_immutable_directory(self): return False def raise_error(self): pass def modify(self, modifier): data = modifier(self.data, None, True) self.data = data return defer.succeed(None) class FakeNodeMaker(NodeMaker): def create_mutable_file(self, contents="", keysize=None, version=None): return defer.succeed(FakeMutableFile(contents)) class FakeClient2(Client): def __init__(self): self.nodemaker = FakeNodeMaker(None, None, None, None, None, {"k":3,"n":10}, None, None) def create_node_from_uri(self, rwcap, rocap): return self.nodemaker.create_from_cap(rwcap, rocap) class Dirnode2(testutil.ReallyEqualMixin, testutil.ShouldFailMixin, unittest.TestCase): def setUp(self): client = FakeClient2() self.nodemaker = client.nodemaker def test_from_future(self): # Create a mutable directory that contains unknown URI types, and make sure # we tolerate them properly. d = self.nodemaker.create_new_mutable_directory() future_write_uri = u"x-tahoe-crazy://I_am_from_the_future_rw_\u263A".encode('utf-8') future_read_uri = u"x-tahoe-crazy-readonly://I_am_from_the_future_ro_\u263A".encode('utf-8') future_imm_uri = u"x-tahoe-crazy-immutable://I_am_from_the_future_imm_\u263A".encode('utf-8') future_node = UnknownNode(future_write_uri, future_read_uri) def _then(n): self._node = n return n.set_node(u"future", future_node) d.addCallback(_then) # We should be prohibited from adding an unknown URI to a directory # just in the rw_uri slot, since we don't know how to diminish the cap # to a readcap (for the ro_uri slot). d.addCallback(lambda ign: self.shouldFail(MustNotBeUnknownRWError, "copy unknown", "cannot attach unknown rw cap as child", self._node.set_uri, u"add", future_write_uri, None)) # However, we should be able to add both rw_uri and ro_uri as a pair of # unknown URIs. d.addCallback(lambda ign: self._node.set_uri(u"add-pair", future_write_uri, future_read_uri)) # and to add an URI prefixed with "ro." or "imm." when it is given in a # write slot (or URL parameter). d.addCallback(lambda ign: self._node.set_uri(u"add-ro", "ro." + future_read_uri, None)) d.addCallback(lambda ign: self._node.set_uri(u"add-imm", "imm." + future_imm_uri, None)) d.addCallback(lambda ign: self._node.list()) def _check(children): self.failUnlessReallyEqual(len(children), 4) (fn, metadata) = children[u"future"] self.failUnless(isinstance(fn, UnknownNode), fn) self.failUnlessReallyEqual(fn.get_uri(), future_write_uri) self.failUnlessReallyEqual(fn.get_write_uri(), future_write_uri) self.failUnlessReallyEqual(fn.get_readonly_uri(), "ro." + future_read_uri) (fn2, metadata2) = children[u"add-pair"] self.failUnless(isinstance(fn2, UnknownNode), fn2) self.failUnlessReallyEqual(fn2.get_uri(), future_write_uri) self.failUnlessReallyEqual(fn2.get_write_uri(), future_write_uri) self.failUnlessReallyEqual(fn2.get_readonly_uri(), "ro." + future_read_uri) (fn3, metadata3) = children[u"add-ro"] self.failUnless(isinstance(fn3, UnknownNode), fn3) self.failUnlessReallyEqual(fn3.get_uri(), "ro." + future_read_uri) self.failUnlessReallyEqual(fn3.get_write_uri(), None) self.failUnlessReallyEqual(fn3.get_readonly_uri(), "ro." + future_read_uri) (fn4, metadata4) = children[u"add-imm"] self.failUnless(isinstance(fn4, UnknownNode), fn4) self.failUnlessReallyEqual(fn4.get_uri(), "imm." + future_imm_uri) self.failUnlessReallyEqual(fn4.get_write_uri(), None) self.failUnlessReallyEqual(fn4.get_readonly_uri(), "imm." + future_imm_uri) # We should also be allowed to copy the "future" UnknownNode, because # it contains all the information that was in the original directory # (readcap and writecap), so we're preserving everything. return self._node.set_node(u"copy", fn) d.addCallback(_check) d.addCallback(lambda ign: self._node.list()) def _check2(children): self.failUnlessReallyEqual(len(children), 5) (fn, metadata) = children[u"copy"] self.failUnless(isinstance(fn, UnknownNode), fn) self.failUnlessReallyEqual(fn.get_uri(), future_write_uri) self.failUnlessReallyEqual(fn.get_write_uri(), future_write_uri) self.failUnlessReallyEqual(fn.get_readonly_uri(), "ro." + future_read_uri) d.addCallback(_check2) return d def test_unknown_strip_prefix_for_ro(self): self.failUnlessReallyEqual(strip_prefix_for_ro("foo", False), "foo") self.failUnlessReallyEqual(strip_prefix_for_ro("ro.foo", False), "foo") self.failUnlessReallyEqual(strip_prefix_for_ro("imm.foo", False), "imm.foo") self.failUnlessReallyEqual(strip_prefix_for_ro("foo", True), "foo") self.failUnlessReallyEqual(strip_prefix_for_ro("ro.foo", True), "foo") self.failUnlessReallyEqual(strip_prefix_for_ro("imm.foo", True), "foo") def test_unknownnode(self): lit_uri = one_uri # This does not attempt to be exhaustive. no_no = [# Opaque node, but not an error. ( 0, UnknownNode(None, None)), ( 1, UnknownNode(None, None, deep_immutable=True)), ] unknown_rw = [# These are errors because we're only given a rw_uri, and we can't # diminish it. ( 2, UnknownNode("foo", None)), ( 3, UnknownNode("foo", None, deep_immutable=True)), ( 4, UnknownNode("ro.foo", None, deep_immutable=True)), ( 5, UnknownNode("ro." + mut_read_uri, None, deep_immutable=True)), ( 5.1, UnknownNode("ro." + mdmf_read_uri, None, deep_immutable=True)), ( 6, UnknownNode("URI:SSK-RO:foo", None, deep_immutable=True)), ( 7, UnknownNode("URI:SSK:foo", None)), ] must_be_ro = [# These are errors because a readonly constraint is not met. ( 8, UnknownNode("ro." + mut_write_uri, None)), ( 8.1, UnknownNode("ro." + mdmf_write_uri, None)), ( 9, UnknownNode(None, "ro." + mut_write_uri)), ( 9.1, UnknownNode(None, "ro." + mdmf_write_uri)), ] must_be_imm = [# These are errors because an immutable constraint is not met. (10, UnknownNode(None, "ro.URI:SSK-RO:foo", deep_immutable=True)), (11, UnknownNode(None, "imm.URI:SSK:foo")), (12, UnknownNode(None, "imm.URI:SSK-RO:foo")), (13, UnknownNode("bar", "ro.foo", deep_immutable=True)), (14, UnknownNode("bar", "imm.foo", deep_immutable=True)), (15, UnknownNode("bar", "imm." + lit_uri, deep_immutable=True)), (16, UnknownNode("imm." + mut_write_uri, None)), (16.1, UnknownNode("imm." + mdmf_write_uri, None)), (17, UnknownNode("imm." + mut_read_uri, None)), (17.1, UnknownNode("imm." + mdmf_read_uri, None)), (18, UnknownNode("bar", "imm.foo")), ] bad_uri = [# These are errors because the URI is bad once we've stripped the prefix. (19, UnknownNode("ro.URI:SSK-RO:foo", None)), (20, UnknownNode("imm.URI:CHK:foo", None, deep_immutable=True)), (21, UnknownNode(None, "URI:CHK:foo")), (22, UnknownNode(None, "URI:CHK:foo", deep_immutable=True)), ] ro_prefixed = [# These are valid, and the readcap should end up with a ro. prefix. (23, UnknownNode(None, "foo")), (24, UnknownNode(None, "ro.foo")), (25, UnknownNode(None, "ro." + lit_uri)), (26, UnknownNode("bar", "foo")), (27, UnknownNode("bar", "ro.foo")), (28, UnknownNode("bar", "ro." + lit_uri)), (29, UnknownNode("ro.foo", None)), (30, UnknownNode("ro." + lit_uri, None)), ] imm_prefixed = [# These are valid, and the readcap should end up with an imm. prefix. (31, UnknownNode(None, "foo", deep_immutable=True)), (32, UnknownNode(None, "ro.foo", deep_immutable=True)), (33, UnknownNode(None, "imm.foo")), (34, UnknownNode(None, "imm.foo", deep_immutable=True)), (35, UnknownNode("imm." + lit_uri, None)), (36, UnknownNode("imm." + lit_uri, None, deep_immutable=True)), (37, UnknownNode(None, "imm." + lit_uri)), (38, UnknownNode(None, "imm." + lit_uri, deep_immutable=True)), ] error = unknown_rw + must_be_ro + must_be_imm + bad_uri ok = ro_prefixed + imm_prefixed for (i, n) in no_no + error + ok: self.failUnless(n.is_unknown(), i) for (i, n) in no_no + error: self.failUnless(n.get_uri() is None, i) self.failUnless(n.get_write_uri() is None, i) self.failUnless(n.get_readonly_uri() is None, i) for (i, n) in no_no + ok: n.raise_error() for (i, n) in unknown_rw: self.failUnlessRaises(MustNotBeUnknownRWError, lambda n=n: n.raise_error()) for (i, n) in must_be_ro: self.failUnlessRaises(MustBeReadonlyError, lambda n=n: n.raise_error()) for (i, n) in must_be_imm: self.failUnlessRaises(MustBeDeepImmutableError, lambda n=n: n.raise_error()) for (i, n) in bad_uri: self.failUnlessRaises(uri.BadURIError, lambda n=n: n.raise_error()) for (i, n) in ok: self.failIf(n.get_readonly_uri() is None, i) for (i, n) in ro_prefixed: self.failUnless(n.get_readonly_uri().startswith("ro."), i) for (i, n) in imm_prefixed: self.failUnless(n.get_readonly_uri().startswith("imm."), i) class DeepStats(testutil.ReallyEqualMixin, unittest.TestCase): timeout = 240 # It takes longer than 120 seconds on Francois's arm box. def test_stats(self): ds = dirnode.DeepStats(None) ds.add("count-files") ds.add("size-immutable-files", 123) ds.histogram("size-files-histogram", 123) ds.max("largest-directory", 444) s = ds.get_results() self.failUnlessReallyEqual(s["count-files"], 1) self.failUnlessReallyEqual(s["size-immutable-files"], 123) self.failUnlessReallyEqual(s["largest-directory"], 444) self.failUnlessReallyEqual(s["count-literal-files"], 0) ds.add("count-files") ds.add("size-immutable-files", 321) ds.histogram("size-files-histogram", 321) ds.max("largest-directory", 2) s = ds.get_results() self.failUnlessReallyEqual(s["count-files"], 2) self.failUnlessReallyEqual(s["size-immutable-files"], 444) self.failUnlessReallyEqual(s["largest-directory"], 444) self.failUnlessReallyEqual(s["count-literal-files"], 0) self.failUnlessReallyEqual(s["size-files-histogram"], [ (101, 316, 1), (317, 1000, 1) ]) ds = dirnode.DeepStats(None) for i in range(1, 1100): ds.histogram("size-files-histogram", i) ds.histogram("size-files-histogram", 4*1000*1000*1000*1000) # 4TB s = ds.get_results() self.failUnlessReallyEqual(s["size-files-histogram"], [ (1, 3, 3), (4, 10, 7), (11, 31, 21), (32, 100, 69), (101, 316, 216), (317, 1000, 684), (1001, 3162, 99), (3162277660169L, 10000000000000L, 1), ]) class UCWEingMutableFileNode(MutableFileNode): please_ucwe_after_next_upload = False def _upload(self, new_contents, servermap): d = MutableFileNode._upload(self, new_contents, servermap) def _ucwe(res): if self.please_ucwe_after_next_upload: self.please_ucwe_after_next_upload = False raise UncoordinatedWriteError() return res d.addCallback(_ucwe) return d class UCWEingNodeMaker(NodeMaker): def _create_mutable(self, cap): n = UCWEingMutableFileNode(self.storage_broker, self.secret_holder, self.default_encoding_parameters, self.history) return n.init_from_cap(cap) class Deleter(GridTestMixin, testutil.ReallyEqualMixin, unittest.TestCase): timeout = 3600 # It takes longer than 433 seconds on Zandr's ARM box. def test_retry(self): # ticket #550, a dirnode.delete which experiences an # UncoordinatedWriteError will fail with an incorrect "you're # deleting something which isn't there" NoSuchChildError exception. # to trigger this, we start by creating a directory with a single # file in it. Then we create a special dirnode that uses a modified # MutableFileNode which will raise UncoordinatedWriteError once on # demand. We then call dirnode.delete, which ought to retry and # succeed. self.basedir = self.mktemp() self.set_up_grid() c0 = self.g.clients[0] d = c0.create_dirnode() small = upload.Data("Small enough for a LIT", None) def _created_dir(dn): self.root = dn self.root_uri = dn.get_uri() return dn.add_file(u"file", small) d.addCallback(_created_dir) def _do_delete(ignored): nm = UCWEingNodeMaker(c0.storage_broker, c0._secret_holder, c0.get_history(), c0.getServiceNamed("uploader"), c0.terminator, c0.get_encoding_parameters(), c0.mutable_file_default, c0._key_generator) n = nm.create_from_cap(self.root_uri) assert n._node.please_ucwe_after_next_upload == False n._node.please_ucwe_after_next_upload = True # This should succeed, not raise an exception return n.delete(u"file") d.addCallback(_do_delete) return d class Adder(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin): def test_overwrite(self): # note: This functionality could be tested without actually creating # several RSA keys. It would be faster without the GridTestMixin: use # dn.set_node(nodemaker.create_from_cap(make_chk_file_uri())) instead # of dn.add_file, and use a special NodeMaker that creates fake # mutable files. self.basedir = "dirnode/Adder/test_overwrite" self.set_up_grid() c = self.g.clients[0] fileuri = make_chk_file_uri(1234) filenode = c.nodemaker.create_from_cap(fileuri) d = c.create_dirnode() def _create_directory_tree(root_node): # Build # root/file1 # root/file2 # root/dir1 d = root_node.add_file(u'file1', upload.Data("Important Things", None)) d.addCallback(lambda res: root_node.add_file(u'file2', upload.Data("Sekrit Codes", None))) d.addCallback(lambda res: root_node.create_subdirectory(u"dir1")) d.addCallback(lambda res: root_node) return d d.addCallback(_create_directory_tree) def _test_adder(root_node): d = root_node.set_node(u'file1', filenode) # We've overwritten file1. Let's try it with a directory d.addCallback(lambda res: root_node.create_subdirectory(u'dir2')) d.addCallback(lambda res: root_node.set_node(u'dir2', filenode)) # We try overwriting a file with a child while also specifying # overwrite=False. We should receive an ExistingChildError # when we do this. d.addCallback(lambda res: self.shouldFail(ExistingChildError, "set_node", "child 'file1' already exists", root_node.set_node, u"file1", filenode, overwrite=False)) # If we try with a directory, we should see the same thing d.addCallback(lambda res: self.shouldFail(ExistingChildError, "set_node", "child 'dir1' already exists", root_node.set_node, u'dir1', filenode, overwrite=False)) d.addCallback(lambda res: root_node.set_node(u'file1', filenode, overwrite="only-files")) d.addCallback(lambda res: self.shouldFail(ExistingChildError, "set_node", "child 'dir1' already exists", root_node.set_node, u'dir1', filenode, overwrite="only-files")) return d d.addCallback(_test_adder) return d tahoe-lafs-1.10.0/src/allmydata/test/test_download.py000066400000000000000000004052011221140116300225430ustar00rootroot00000000000000 # system-level upload+download roundtrip test, but using shares created from # a previous run. This asserts that the current code is capable of decoding # shares from a previous version. import os from twisted.trial import unittest from twisted.internet import defer, reactor from allmydata import uri from allmydata.storage.server import storage_index_to_dir from allmydata.util import base32, fileutil, spans, log, hashutil from allmydata.util.consumer import download_to_data, MemoryConsumer from allmydata.immutable import upload, layout from allmydata.test.no_network import GridTestMixin, NoNetworkServer from allmydata.test.common import ShouldFailMixin from allmydata.interfaces import NotEnoughSharesError, NoSharesError, \ DownloadStopped from allmydata.immutable.downloader.common import BadSegmentNumberError, \ BadCiphertextHashError, COMPLETE, OVERDUE, DEAD from allmydata.immutable.downloader.status import DownloadStatus from allmydata.immutable.downloader.fetcher import SegmentFetcher from allmydata.codec import CRSDecoder from foolscap.eventual import eventually, fireEventually, flushEventualQueue plaintext = "This is a moderate-sized file.\n" * 10 mutable_plaintext = "This is a moderate-sized mutable file.\n" * 10 # this chunk was generated by create_share(), written to disk, then pasted # into this file. These shares were created by 1.2.0-r3247, a version that's # probably fairly close to 1.3.0 . #--------- BEGIN stored_shares.py -------------- immutable_uri = "URI:CHK:g4i6qkk7mlj4vkl5ncg6dwo73i:qcas2ebousfk3q5rkl2ncayeku52kpyse76v5yeel2t2eaa4f6ha:3:10:310" immutable_shares = { 0: { # client[0] 0: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaazmksehmgmlmmeqkbxbljh5qnfq36b7h5ukgqccmy3665khphcxihkce7jukeuegdxtn26p353ork6qihitbshwucpopzvdnpkflg6vbvko7ohcmxjywpdkvjmuzq6hysxfl74mamn224nrsyl7czmvtwtss6kkzljridkffeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl7y5y3kjcfritduzdk5rvwqs4lwzvb7fgvljgozbbtamhoriuzaeruaaqt2fbbxr5yv4vqeabkjqow6sd73dfqab3qban3htx6rn2y6mujdwaacbpvbyim4ewanv2vku44tunk7vdjkty2wkfm3jg67pqmm2newyib4aafazigyt6kxmirnlio5sdvbkvh43rwpctm6coigl64chn6z7w45rcaaccvmfgplu4kz5erphnx3xhzclypawi2j5zsvewmn4s2wbba4k2ktaab45y3kjcfritduzdk5rvwqs4lwzvb7fgvljgozbbtamhoriuzaeruaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabj5uiln36za2n4oyier7k5e4sx6newmmflfqhj7xffy32p5iohlyf33bdx5dafkfwr7rxwxjcsg3ljflkaae537llwnnykgf36h52dojfplbwi"), 5: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaazmsdsvwbnfx2rnh7dusqniqomsdeetuafps6cawyb4pzxpkzal7w5ufaknxfnqw2qywv4c3a2zlumb2x2rx5osbxd3kqmebjndqf7zihbtagqczgwrka5rnywtsaeyijyh26okua2u7loep2nzo5etirjrxmp3yxpb4pheusaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl7zs3zcg7igd2xoa4eu3lffqginpmoxrshqe6n3hzpocihgeu4vvymaadjz54nelgyi47767pkbsjwdjgsv7uyd5ntrztw6juavj7sd7wx7aaacx7wxlycyjniwxvby4ar546ncb4d3jnbhssnq4n4l4xeajurmn5diabgxwi6i5d2ysny3vavrm3a5lsuvng5mhbzk7axesyeddzw6uzmnluaakglpei35aypk5ydqstnmuwazbv5r26gi6atzxm7f5yja4ystswxbqaakbsqnrh4voyrc2wq53ehkcvkpzxdm6fgz4e4qmx5yeo35t7nz3ceaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabj5uiln36za2n4oyier7k5e4sx6newmmflfqhj7xffy32p5iohlyf33bdx5dafkfwr7rxwxjcsg3ljflkaae537llwnnykgf36h52dojfplbwi"), }, 1: { # client[1] 2: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaazmj7um4zfgqo35m62ln6has6xz43klzjphj5eg46mb5x2jzgr6x6zb4voveo5uef53xbjbktr5rlupomy7x5b34amqeeg4r6obt6kpo2x4s3m3cwoo54oijyqfms3n3fethykhtglc47r4ci7ugqgz5d5fap3xzyhm4ehaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl7zqkzg32wa74epeppqwneujs6tjptlm4qw75hoafobsoif3ok5odkaarmcwjw6vqh7bdzd34ftjfcmxu2l423hefx7j3qblqmtsbo3sxlq2qaewyffwgzojfi4uj2praj5azehnr4fhan5kdyewhtfncrqzoe42ijeaaikvquz5otrlhusf45w7o47ejpb4czdjhxgkuszrxslkyeedrljkmaabigkbwe7sv3celk2dxmq5ikvj7g4ntyu3hqtsbs7xar3pwp5xhmiqaa6k7uub7uqlamlqi2oduautemch242scu7cfor6kedxs6mm3uwjsmaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabdp37hh2k4ys4d7qusb5e3dakjntythtcwcwfok7e52pu64zn4wrwbtlkzxzntwuwemi6e6mek5n4i7h3bw7nkat2zmqieftinxgzl2jfplbwi"), 7: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaaznhsh2frhzxbutelvddtbuf3tfilhcj2zi3cxjyzy7pg7ewamazcblv76mvey54fxmch64chqfi24jmondc4uzitby3wjeui4nfp7kv6ufo67exptkvwk7cnbouvjiapyqzrps4r6ise4jhlr7mtp2tlizb5hyaqm3fhsvrmqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl72g6h2oewtfcwgupjbjnh4k5k6d3k2fpi2q6nyidh3yo5ui6cslreaajms7f3pcsywhjbgrybzp64jzlsyjqbu7h4hvdlwf77ar6l63imdeqaaudfa3cpzk5rcfvnb3wioufku7togz4kntyjzazp3qi5x3h63tweiaagtt3y2iwnqrz77566udetmgsnfl7jqh23hdthn4tibkt7eh7np6aaakvpbzjdki64qaigkdj2bven3uigxbpurpwtrkjs4b6habv2ls7zqaac2g6h2oewtfcwgupjbjnh4k5k6d3k2fpi2q6nyidh3yo5ui6cslreaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabdp37hh2k4ys4d7qusb5e3dakjntythtcwcwfok7e52pu64zn4wrwbtlkzxzntwuwemi6e6mek5n4i7h3bw7nkat2zmqieftinxgzl2jfplbwi"), }, 2: { # client[2] 1: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaazmkrwrt6figauxkgqyk3nggp5eeoeq5htt7tke4gfqj2u5roieslao4fldcwlq4btzk4brhkaerqiih6mhudotttrb6xzmvnqgg33fjcqeuw6teb3gml2pmhsezisa5svnzlvqnbaz6kzdmhisbwgu6ocexf2ge2rvc67gneqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl72piueg6hxcxswaqafjgb232ip7mmwaahoaebxm6o72fxldzsreoyaaif6uhbbtqsybwxkvkttsorvl6unfkpdkzivtne3356brtjus3bahqaee6riin4pofpfmbaaksmdvxuq76yzmaao4aidoz457ulowhtfci5qaafazigyt6kxmirnlio5sdvbkvh43rwpctm6coigl64chn6z7w45rcaaccvmfgplu4kz5erphnx3xhzclypawi2j5zsvewmn4s2wbba4k2ktaab45y3kjcfritduzdk5rvwqs4lwzvb7fgvljgozbbtamhoriuzaeruaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaugotrr7enihxy2b2nwodhxabihaf3ewc2hmcdjsqx5hi4h3rn7gnvpt3lzzo5qgbnlp4dybwr7dn7vu5hsiyo5pedlqcasb7csiuojfplbwi"), 6: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaazm34cgyp37ou5ohrofmk6bf5gcppxeb2njwmiwasn3uh4ykeocvq4vydsw36ksh63fcil3o257zupffrruiuqlwjvbdcdjiuqrojiromunzxxc34io7zlfafprzlvmztph4qsp67ozxmwvivqwtvu6ckr7pffsikgi2supviqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl7zlyoki2shxeacbsq2oqnjdo5cbvyl5el5u4ksmxapryanos4x6maaajms7f3pcsywhjbgrybzp64jzlsyjqbu7h4hvdlwf77ar6l63imdeqaaudfa3cpzk5rcfvnb3wioufku7togz4kntyjzazp3qi5x3h63tweiaagtt3y2iwnqrz77566udetmgsnfl7jqh23hdthn4tibkt7eh7np6aaakvpbzjdki64qaigkdj2bven3uigxbpurpwtrkjs4b6habv2ls7zqaac2g6h2oewtfcwgupjbjnh4k5k6d3k2fpi2q6nyidh3yo5ui6cslreaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaugotrr7enihxy2b2nwodhxabihaf3ewc2hmcdjsqx5hi4h3rn7gnvpt3lzzo5qgbnlp4dybwr7dn7vu5hsiyo5pedlqcasb7csiuojfplbwi"), }, 3: { # client[3] 4: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaaznjqn7ehmj6f4p3fjyliuvwnfothumsfhs7ienw4uln6joaxopqlmcy5daa4njrkgj7nqm6tpnmz2dci2b356pljv4zjj5ayzfihi4g26qdei7kjtegjuv4d3k3t4orpufnft3edbondkpj5etjczwhyulukzuy5socyivdfqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl7zpmr4r2hvre3rxkblczwb2xfjk2n2yodsv6bojfqightn5jsy2xiaatl3epeor5mjg4n2qkywnqovzkkwtowdq4vpqlsjmcbr43pkmwgv2aacx7wxlycyjniwxvby4ar546ncb4d3jnbhssnq4n4l4xeajurmn5diaagtt3y2iwnqrz77566udetmgsnfl7jqh23hdthn4tibkt7eh7np6aaakglpei35aypk5ydqstnmuwazbv5r26gi6atzxm7f5yja4ystswxbqaakbsqnrh4voyrc2wq53ehkcvkpzxdm6fgz4e4qmx5yeo35t7nz3ceaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacifoqlus3puiqkekp6g6fdecjcx2bak27angodamzoxugovlhtcj5xbly7teqwmf73fqk3clyfvs6hdauq5qnqahlxlmp2vrmnneedgjfplbwi"), 9: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaazn2tz3qt62bgsdnvksvdegsylb2kbltouheryflpho7hugme7svk7so2v7hmcgc43tcyugybuqzgifvkllikfiiezvml7ilolb7ivwvrv4d4t2gbywa44ibqwogmjtffta4b2sfwqebfg7pptergeqm5wo3tndtf7p3vftabqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl7y3m26swfhsb66ze4cmyhohaksid7fyljgkhag32ibc7vx2yj4j5saayg3gxuvrj4qpxwjhatgb3rycusa7zoc2jsrybw6saix5n6wcpcpmqaamxjsc6bwv4w4or2oylltmsbfbobvmenj3sa6lnq6iy4tugsnv72eaaybvqu3gmlomi3dnf2tum3hoseavpesyia2i2wqgwbmbtrgmotu6oaadirzs2idl54toffh4a2hehvg2e3zoed4dr6pcdpuqpnz2knte7gqqac6kfatp33ianoqvg6mdd4vaxa27lo6vpugbcvanhskaqq2kewn6kwaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacifoqlus3puiqkekp6g6fdecjcx2bak27angodamzoxugovlhtcj5xbly7teqwmf73fqk3clyfvs6hdauq5qnqahlxlmp2vrmnneedgjfplbwi"), }, 4: { # client[4] 3: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaazmbduh5nwvcvpgrihhnjxacz2jvzu3prrdqewo3vmxkhu5yd3fa3eil56fyh5l7ojimghwbf2o6ri7cmppr34qflr5o4w6s5fekxhdt3qvlgsw5yp5wrmjjffhph5czd5kzoo7yyg5x3wgxxzdvwtuom2c5olao62ep77b7wqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl73mcs3dmxesuoke5hyqe6qmsdwy6ctqg6vb4cldzswriymxconeesaarmcwjw6vqh7bdzd34ftjfcmxu2l423hefx7j3qblqmtsbo3sxlq2qaaudfa3cpzk5rcfvnb3wioufku7togz4kntyjzazp3qi5x3h63tweiaaikvquz5otrlhusf45w7o47ejpb4czdjhxgkuszrxslkyeedrljkmaajnqklmns4skrzitu7cat2bsio3dykoa32uhqjmpgk2fdbs4jzuqsiaa6k7uub7uqlamlqi2oduautemch242scu7cfor6kedxs6mm3uwjsmaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaadusyxmwhtnfdeewwgxd25fwixycfdcy46ifqv4dhga23fko6dbl4ywo2d27n3zh3wd6zumhupvmtgspqrh6t7wbsghruzqd3imbo2tojfplbwi"), 8: base32.a2b("aaaaaaiaaacyeaaaaaaqaaaaaeaaaadiaaaaa2aaaaaciaaaacgaaaaavqaaaagmaaaab3aaaaaznjzqcxwyhgwlcpzvfb2berhoyw47h72gkzofwgksryqd4r6xlyougvyg4p3wkz7u37zllskeswuuh4w2rylbxecomnmqfv7n5ex3thjzq7ykr7gjkvq3kmrlhmxu3wnsr4ipsdn546btavjzc6yppoii2mxgnnk4qbxqrltaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaactwxn7tsxj2nh6skwbghycguqfj7xrpegeporex5ebctynbgbl72kfatp33ianoqvg6mdd4vaxa27lo6vpugbcvanhskaqq2kewn6kwaayg3gxuvrj4qpxwjhatgb3rycusa7zoc2jsrybw6saix5n6wcpcpmqaamxjsc6bwv4w4or2oylltmsbfbobvmenj3sa6lnq6iy4tugsnv72eaaybvqu3gmlomi3dnf2tum3hoseavpesyia2i2wqgwbmbtrgmotu6oaadirzs2idl54toffh4a2hehvg2e3zoed4dr6pcdpuqpnz2knte7gqqac6kfatp33ianoqvg6mdd4vaxa27lo6vpugbcvanhskaqq2kewn6kwaaaae4gg33emvrv63tbnvstumz2mnzhglddn5sgky27obqxeylnom5dqortgezc2mzngeycyy3spfyhi5dfpb2f62dbonudumzshkl7fjw5sp7x3yw4sdhze6qf7zgsjocpqtwl2gj5o6vufvixto3u2lddoj4xa5dumv4hix3sn5xxix3imfzwqortgi5fhno37hfotu2p5evmcmpqenjakt7pc6imi65cjp2icfhq2cmcx7rmnzswkzdfmrpxg2dbojsxgorrhizsy3tvnvpxgzlhnvsw45dthiytumjmonswo3lfnz2f643jpjstumz2gmyteldtnbqxezk7ojxw65c7nbqxg2b2gmzdubzmius26hljzu4j7gq5hdshwueqcfjc2bmveiyqbdxgyejetzovfrzws6tfhiztumzrgawhiyljnrpwg33emvrv64dbojqw24z2ha5dgmjsfuzs2mjqfr2g65dbnrpxg2dbojsxgorshiytalaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaadusyxmwhtnfdeewwgxd25fwixycfdcy46ifqv4dhga23fko6dbl4ywo2d27n3zh3wd6zumhupvmtgspqrh6t7wbsghruzqd3imbo2tojfplbwi"), }, } mutable_uri = "URI:SSK:vfvcbdfbszyrsaxchgevhmmlii:euw4iw7bbnkrrwpzuburbhppuxhc3gwxv26f6imekhz7zyw2ojnq" mutable_shares = { 0: { # client[0] 2: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohgreckgxome2uhcps464pzydv5wsywald7wthurw2dp6qxtkeb5vtswoeshuyno24v5oble7xb4j6ij7wwqriaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynu3wrm2mwdv3syv4r34b5mklbtjuv5i5bzcuiwgfnl4wtpombwn7l7ugdvv2xut7hwbttcjfsacuhc7ipf43gvrgrt5vj7hau52uenoywreckgxome2uhcps464pzydv5wsywaldqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaabdm4cpjolak4m47telnokjaxwodj7ont7n7vffnmhkzp3lyshkh3qaaohafr65kctby6wa34bjpnviviiwmwq5mft3yho4tmslaarpcg6biaajlxuwwafut5a6dsr7lq5fkmiik7icppic5ffjjmqaud4y746q2rzd42k42oitzukawdl2fupkoqcztfu7qf2flp55xh4lm6rzpdbb7gtnx4kaffym36rboalf2tbmatt46ra6igvjnvwmig6ivf6gqrhcietf373xrbm3bpeecz7luv7kv76i7pwa5xtubga37vnlu6hspejpsenxiptd23ipri7u5w7lz67mdjfrpahtp5j46obg4ct7c5lelfskzqw5hq7x7kd7pbcgq3gjbv53amzxjelwgxpf6ni74zb6aixhjjllivkthks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), 7: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohgreckgxome2uhcps464pzydv5wsywald7wthurw2dp6qxtkeb5vtswoeshuyno24v5oble7xb4j6ij7wwqriaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynu3wrm2mwdv3syv4r34b5mklbtjuv5i5bzcuiwgfnl4wtpombwn7l7ugdvv2xut7hwbttcjfsacuhc7ipf43gvrgrt5vj7hau52uenoywreckgxome2uhcps464pzydv5wsywaldqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaaazuum3xriq54h5v6afcrrl3kkbd46oizwulc5fbslmblxfc3ldyyqaaszc7rkciv6rhwt5gbgnl5u54ihnqrfyuh7s54r2444mrhcwgizieaak4ap2xhvuz664fw3kayv7z5vawqs7skj6frzp3ihmk7js3tr7cwpnbfwoefuyn6bqkj5kssx3rvvffqgd3mhb7pbtegk6qfvsopvzmsiftabaykw3qitiqcv2wwfvdud5lkbjigatrf4ndeejsij5ab3eyaqqgxfiyxtv674qwltgynickeznu5el6uhs2k75hq2rsxhco2kmxw4didbdjodmjf2nrne63du76fd6laa7ng7zq4i7bx2xtohfrgwlxls6h7ibfsbybdz46sow3tn4vao3ulciz75kfbb62jrz3omvnihr2jwthks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), }, 1: { # client[1] 3: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohm5tnwcmfsfmep4exoamss5lqyleq2ehahoduym5vgk37pmxx2xekzrtlzfvhapzb2fe3quv6tv3atr3g6ykqaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynvx5mk74p2on26ax4rlp5jcoto5jkz3ndmgbmurhez4a5rbuyr55acbwlgbndlebsdyvlt4ttog767zqpoq3n2a4pra5va2o5zvbttlh45tnwcmfsfmep4exoamss5lqyleq2ehaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaabdm4cpjolak4m47telnokjaxwodj7ont7n7vffnmhkzp3lyshkh3qaaohafr65kctby6wa34bjpnviviiwmwq5mft3yho4tmslaarpcg6biaaixzuvzu4rhtiubmgxuli6u5aftglj7alukw733opywz5ds6gcd6nf32llac2j6qpbzi7vyosvgeefpubhxubossuuwiakb6mp6pini4rja473klkmi52lzfwofja7bb6pixgcxkwdaerc2irfpnrqwh5o2remu3iv3dtib75ku63cb6xzj4h53nmsguanjpganh3ow5yzovjcsezsj2cunyvlpva63zx5sudxe2zrtcu5zoty2tjzzlhodaz6rxe62ehbiktd4pmaodaz6ajsrohw7tdga2dpaftzbhadsolylgwgtbymenwthks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), 8: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohm5tnwcmfsfmep4exoamss5lqyleq2ehahoduym5vgk37pmxx2xekzrtlzfvhapzb2fe3quv6tv3atr3g6ykqaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynvx5mk74p2on26ax4rlp5jcoto5jkz3ndmgbmurhez4a5rbuyr55acbwlgbndlebsdyvlt4ttog767zqpoq3n2a4pra5va2o5zvbttlh45tnwcmfsfmep4exoamss5lqyleq2ehaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaabduzspufh6gomrp7sycuerhgl7ah3x3mpc6watmzlp6y23afmlxcaabui4znebv66jxcst6andsd2tncn4xcb6by7hrbx2ihw45fgzsptiiaaybvqu3gmlomi3dnf2tum3hoseavpesyia2i2wqgwbmbtrgmotu6oaamprqe6ozjrouoeltzhezhntop7wb6bbnnr3ak6x3ihvsjlz77gffkdet4sc63bxykwaikdyxwoehbrggxdu6qcwquzsnaltcgn52nyy4ypqbthfg4txtnznap6dktqtgtmtu7icooojppbwyi5c22uehbveptbuhbi7q3d4wuvsrptnd6wrhxwtlkxe4kurp4fey52p2v6urgephzxmaqfhm7pq3wxbi2uj5ourg65xnhbo4lrp7nzrdmk3svespmmitccvtwom6wtqefpp73j67zybiu4wrjjqt7vhip4ipuaezkmdy7feothks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), }, 2: { # client[2] 4: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohelfyqrvy7pzjh3tqx73xsfkpi3so4qjghlywdkwuioyjvbtgekiulaes4myuxydi2sudi2fkg2q5nkjrt3zaaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynujj2kh34jfiungka3deihevw7p3mzhj7uobc3qnbfxqp3xfazrsicvtz3enqkn4xxlu5xvxjj2rtlv6j3w3kmpzn2jbrnuoafq2aacoulfyqrvy7pzjh3tqx73xsfkpi3so4qjgaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaaazuum3xriq54h5v6afcrrl3kkbd46oizwulc5fbslmblxfc3ldyyqaavmjy6g336aewy42vw5rusytyi7vzs6y22c5jhxyt5w6gthcbjp4zaakhlvz26psskxjisz27qlpzw4annhegunhnvlyr35ijotdizegjf4lgx3o4dt3d6d4bjqexz2eu3dprjlmuvlkbfcpmkq2ceydywqqcqdhmdl2nm5ku6z6gnss2bsbn7ycab2ggktr3bjlzaeo5pb4meolrckviwiddsikieo4wyatlxtybmzkoh3fb2vxc34xb47ty2cyi55xjan6m4bbie7muzrzmjmzviwlotk6icove7ydpag6dlrjwu4svgs3y2ln5r463dmflqs3p4aa7dldhjb5kfpxq63tgquunkucsfvlkaiiisgthks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), 9: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohelfyqrvy7pzjh3tqx73xsfkpi3so4qjghlywdkwuioyjvbtgekiulaes4myuxydi2sudi2fkg2q5nkjrt3zaaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynujj2kh34jfiungka3deihevw7p3mzhj7uobc3qnbfxqp3xfazrsicvtz3enqkn4xxlu5xvxjj2rtlv6j3w3kmpzn2jbrnuoafq2aacoulfyqrvy7pzjh3tqx73xsfkpi3so4qjgaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaabduzspufh6gomrp7sycuerhgl7ah3x3mpc6watmzlp6y23afmlxcaabui4znebv66jxcst6andsd2tncn4xcb6by7hrbx2ihw45fgzsptiiaaybvqu3gmlomi3dnf2tum3hoseavpesyia2i2wqgwbmbtrgmotu6oaalugjhzef5wdpqvmaquhrpm4iodcmnohj5afnbjte2axgem33u3rr7yycphmuyxkhcfz4tsmtwzxh73a7aqwwy5qfpl5ud2zev477tcsviylwmlv6fgp54rk4iwputjkcgegczq6uynbvebu67jf6f2foocphznw7jrdsvphppguypjwmkkhugm6yjnrjka2ycvxsyh5xohn3fvbbhl4tvhedbaix3zlwxeayabnldp3oqnkjger7yrxh44wuv3adb76jh3nl6h45t4ixj77himst5plmpdtexyoozpxzjmedge5leynxhziothks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), }, 3: { # client[3] 1: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohar2c5jzdcrekne6myzpxl2z65d6ufdjuuyhabg2j57ecmy23jyflcp7djzupj4tfr345bkg7cmwxmpmn3h4iaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynu3sjzjwrfjn4cwfspkueq47j6ej2uodmjsjexyray7dn6ut4nnuftdhhgxo3t3a5eoipsdy5evdihyeigny3c4adtpveplcwt76m7naar2c5jzdcrekne6myzpxl2z65d6ufdjuqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaabdm4cpjolak4m47telnokjaxwodj7ont7n7vffnmhkzp3lyshkh3qaarzybn64ru5rss7tmi4ttv26q66ebdvvrtyd3s5t7dmqku3uoefroaahxhmt46bsa3cpmjfwjyw3zijhhbqh3j2dbc42jaqj6wvmxoz7pecirykndmb6dylde5utzkpucky5pk3x4u6dphkq2ycmfuyvpg5lsudusosyofwfnokbe7qmld2xwaxah3qkywarndsfvp3rybq2y7q42silj5cnlbdxnabv2zhhix3h5o5kz2ttqzm34clnbo527obrxvqlxz3sofwcmz2kqs4c3ypj6o4ny4hkh6qu7ljs7xiygzmoojhnaxc6wjbnvnsu2socztfaegy6ft22tgtdudtok4z755vgj3etwmje73af2f2thks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), 6: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohar2c5jzdcrekne6myzpxl2z65d6ufdjuuyhabg2j57ecmy23jyflcp7djzupj4tfr345bkg7cmwxmpmn3h4iaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynu3sjzjwrfjn4cwfspkueq47j6ej2uodmjsjexyray7dn6ut4nnuftdhhgxo3t3a5eoipsdy5evdihyeigny3c4adtpveplcwt76m7naar2c5jzdcrekne6myzpxl2z65d6ufdjuqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaaazuum3xriq54h5v6afcrrl3kkbd46oizwulc5fbslmblxfc3ldyyqaaszc7rkciv6rhwt5gbgnl5u54ihnqrfyuh7s54r2444mrhcwgizieaalkclm4iljq34daut2vffpxdlkklamhwyod66dgimv5alle47lszewah5lt22m7poc3nvamk7462qlijpzfe7cy4x5udwfpuznzy7rlhx7ev5hmvxi5m3nctyofw2axz6a4fttdxoefezaqu7wur2rtcmxx5wxmpdkfflvzvawzr2oecq7yriklbc2nfyk4ezeulmdaktctlwcoz26jt3yx5gg2ez6jnhblc5swn7qbl6t3ebm2fmworvtrpxyqhegsly6xtpbh2yfdu6ww52ypka6cc4crgov33cdnbxyekdmjck2h55ni4othks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), }, 4: { # client[4] 0: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohanguihdeqshi3vbil354mnoip7yzj3rpsvjbydjlngiqocl2s6dja4dqjzuaghaekxoithualnjp6artv6laaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynvzguwqjavmynllmjm66qaqz4uh4dinujrxcaafvp5vvzrgueu3fxwkppvopapdw3p4hjezva23vxif5rzgacysmyo7tr4tjd44nnqpsanguihdeqshi3vbil354mnoip7yzj3rpqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaabdm4cpjolak4m47telnokjaxwodj7ont7n7vffnmhkzp3lyshkh3qaarzybn64ru5rss7tmi4ttv26q66ebdvvrtyd3s5t7dmqku3uoefroaaibdqu2gyd4hqwgj3jhsu7ievr26vxpzj4g6ovbvqeyljrk6n2xfidtwj6pazanrhwes3e4ln4uettqyd5u5bqroneqie7lkwlxm7xsbg4zhnlc2fybonhlpcatwlgdvk3jpn7sge4qnod2ufxgxc7rphbnunb52xrgmdgpojqhyfajxealxwdddlhhbttphrgv5zrub5mggbcec3honrtuuv3epex3s5yvkt2zmsaxfeu34psjwjltm4ys5qa72ryrmgjtmtu3i34jfmachhmgul2j2sddwydgvtpqnatglb3ejlhukxp3isthks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), 5: base32.a2b("krqwq33febwxk5dbmjwgkiddn5xhiyljnzsxeidwgefhkckeaohanguihdeqshi3vbil354mnoip7yzj3rpsvjbydjlngiqocl2s6dja4dqjzuaghaekxoithualnjp6artv6laaaaaaaaaabb5aaaaaaaaaacsoaaaaaakjl2ynvzguwqjavmynllmjm66qaqz4uh4dinujrxcaafvp5vvzrgueu3fxwkppvopapdw3p4hjezva23vxif5rzgacysmyo7tr4tjd44nnqpsanguihdeqshi3vbil354mnoip7yzj3rpqaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaarybcjqw7lbehfrbu7pwx26vvv3mbjprnqup7q7wxmdsoyiqaqzgbtv6cplrrgpzbnptsoqz7imnauamfaaaaaaaaaaamgaaaaaaaaaaaymaaaaghqaaacr4aaaayxaaaagnyaaaaaaaaaao4qaaaaaaaaacd2gcbacibqbudaskugjcdpodibaeaqkaadqiaq2abqqiaqqaucaeaqbvkghg3s5ivqiq4uiajdzagl7iqhz2uxwj6aqk47oscyqxyljmxkapldxhxigum2ejxflnps3yrxwwehop2zairilrouid5s6wxgnoqg2vq7rx77icxfnx7mq65niuknra3tezhpndwo7pdxtvtci645p4umyhdh5gp3kbdpypphvouaql662r6ufigp4wwm6emrsavlontn4wttg6lv7bcmq6ojw5utpvz3agoenovrkndncjzxog7sp2w7l6jkmzgfxd2asxos5khkjbxbuhgkd6j5yqlzsmk3kq67engtlgyd7hxk7nedw73bq2bs4353wtler23poucntgve22acfxdbyj2f6q2saj2agu2ptfk364d3zayddffxkcah4olnkczjonselwwrvdcu3vch3licaeirogosgsui3y4ovcyzleurbiunp7fsfk2fgmiek7b3jogvrhj4snvkpqjfh2w2lqnkvqvuoojmgrn6sll354mpomajbtlsv6fuguzgsxsm3l2ehxdxfdv6kcoyogaa5raetooahunf4ltvfdb4d7mwgpj4pg7tmcw55ku5vgx7tjqkdkuyq2uvhavkbsbujsrcfzve5ekuhftxm7nmtomibrblbwyxcr5mcy6qqwx66lrhejmgewrig74nzpriualhw4g22qaw423qeezqn6irea3vlgc3foz4egf6irincownoq7utv643vmtoueebigwrl6nehbos3ipsdx44tmucpvyui2jzgfulb5mrrcjuycmuzfigu6tf25lbysqn7n7smhqniddgctjt573rtd6o63wiaactacm7dw7giohzcgwe4okbijnmzlacetmfjjbasrd6whexjmwkaiaaazuum3xriq54h5v6afcrrl3kkbd46oizwulc5fbslmblxfc3ldyyqaavmjy6g336aewy42vw5rusytyi7vzs6y22c5jhxyt5w6gthcbjp4zaajwnpw5yhhwh4hyctajptujjwg7cswzjkwucke6yvbuejqhrnbafadv245phzjfluujm5pyfx43oagwtsdkgtw2v4i56uexjrumsdes6go7556an26wmzpbskyrsx4qbzqcedilovhlkrlnhvsfr4mjwkw62mkf4kde7jgesu4ztbzc7xmuobydnxk5hdyyly6n7socvrsqw6z56v6osxr2vgxpz6jor7ciyclkungeaayume5hdrm6cbnvwgua4gc2fcpixfdbkiijnmlicribyoinnpu6zdce4mdfqyl4qzup3kkk5qju2wthks6df52kvobtcnscytmjrrfbekvwmhtbcke2cgcyaj2cra7xmnd4bw2xe2qki5kycopo45ekfyxwzsmxuyxvjzqklnqjwm3j3gwcm75ftnrsvbj33w6eyr4dbz2tewum7vfsyfbb3ojw5ujtn22jxpr4nkmkqup6z7ukpp4cdxwyge2psc5suaxaltmp23gbawoo3qjeo44hgwgtkge2oowox3fpxwxkckaqgrxilfeyxmjp4cwf2rhpkbwtveqkukchv3u5rfkykwfj5zhleu3obsif6ldfclaef32wjpe5d6ddh2grdx2yt6tuhw53t6zuuumlw6t6i3e2ev7nh36e4shwbn3ew2bbahn6imgb5sismfttf5cdlr5kw6wvjoaqiaiz2onxecfx735dvon7epthnklq67tnqj4fvcwechbvmnkpiwd5fd36dirpshc7i7zj3rcr5y3kzps5nfnfnik27gdgemfn27iblcjh5bpkio6sr375bmxbh6fshbo7cvjzsdsirsafnbjzgl6ycqczwbacj5sxwgrzl6qbdhfbzev5fzutznzbasejqdjm3qxsdcuqvqau3kih2anr2itgmr44wmwtsk6bd42m2j436ptslaugmbbvtbzsukeqytkse7niovlilyfssn3ipzkqtclmetqrxxn7h56xn2ju7pjnuamd6ijfawn2jpjsrmnawaozeniosvhgovftoj24dl77ytdkxdl7ogappnlgkqsjyy43urtnj6tqf2psfptkbzyx4nu3rzgsqqi5ybx3pu6cvt6de67xutdz566wrkp2ymy5n7tqchmw77ss532noqcbfxv6quum6jmeed3exasdapvid6bilwzm5dcnutkcxktmsdryqopw5ntws3tnbd7um27clmxkgl2uinwzvv4tmo4axbj5zmgfd6sy2fiw6efdwjcyj4awdx3peuxcyh3ccee63w2nqaltierdrevl3f3hnjsrdrl4aosd23szhhaimhg2mjuocottcdsraoczh3waoyxx2skunaphe6w5gutu2z7cag3cx4pgsspfmspphuunzx357x6l36hj3tdys727rhawfwc4wc4ytgts4nrxlxl3xxzffunlhjhzj5guxljwxfrxwjfsg5c67pg3js7gvfqmpson6rjgiuwbsklranwhauq74lbesavftdzf7y3x5zwbi4uu6q2vqimbkttm7k6ycttsgknej2ylkwdxgtut7wecpepnb527pblj3vuzldjt3whsmstax536plulalxtxmvj6vvg4phofyaidhxhhkl4dfb6oabp3fi55jt77pw3jl55pwbsimjpdanuenll2xxctr6swaimuaqk4wvqa6rbgow3onr74v4alkuukc2tlmorvsjpgaazpun6pbfyorazzarhc2r7fjt55pmosowrqcpdwl2q34hcco2f3icmpktchxdvtpmitufnplqaifbtlktkpo7b22244n6dkmxtgcnxtixsit57uhh7rc5rqezjz7pfd7ojhrui5bcdzb7bo2nbo6o24lpwbg4bmqgbqpbwclq6kglgxefryxlkqydillki3545vcrelfw6reszml6emuyjscx377on2qpq26j5jrh5xmbwmpcyq6sewanlbmwwk2vqhq5zunbcyd6h5z3ms3bgfn7lflvev5vwmjnv5nzbgrmpamy453zuvy6xc6jp7tqgpmrlxup7suptejbacm6rdurdhcaori6i25wylgaikfov4dfgeswxdeerogy2m5tbzsdlr7pfhchd4wnokuipfwjzejxiruj5cljm66hvn47j3eseys3nsi6xdh566jgap5s5e7ytdkkhh5lsuv47oose4luozz427dzk577jccjg3n7b4myd565edmsywol3hgh2i54lcya6saaaaaaa"), }, } #--------- END stored_shares.py ---------------- class _Base(GridTestMixin, ShouldFailMixin): def create_shares(self, ignored=None): u = upload.Data(plaintext, None) d = self.c0.upload(u) f = open("stored_shares.py", "w") def _created_immutable(ur): # write the generated shares and URI to a file, which can then be # incorporated into this one next time. f.write('immutable_uri = "%s"\n' % ur.get_uri()) f.write('immutable_shares = {\n') si = uri.from_string(ur.get_uri()).get_storage_index() si_dir = storage_index_to_dir(si) for (i,ss,ssdir) in self.iterate_servers(): sharedir = os.path.join(ssdir, "shares", si_dir) shares = {} for fn in os.listdir(sharedir): shnum = int(fn) sharedata = open(os.path.join(sharedir, fn), "rb").read() shares[shnum] = sharedata fileutil.rm_dir(sharedir) if shares: f.write(' %d: { # client[%d]\n' % (i, i)) for shnum in sorted(shares.keys()): f.write(' %d: base32.a2b("%s"),\n' % (shnum, base32.b2a(shares[shnum]))) f.write(' },\n') f.write('}\n') f.write('\n') d.addCallback(_created_immutable) d.addCallback(lambda ignored: self.c0.create_mutable_file(mutable_plaintext)) def _created_mutable(n): f.write('mutable_uri = "%s"\n' % n.get_uri()) f.write('mutable_shares = {\n') si = uri.from_string(n.get_uri()).get_storage_index() si_dir = storage_index_to_dir(si) for (i,ss,ssdir) in self.iterate_servers(): sharedir = os.path.join(ssdir, "shares", si_dir) shares = {} for fn in os.listdir(sharedir): shnum = int(fn) sharedata = open(os.path.join(sharedir, fn), "rb").read() shares[shnum] = sharedata fileutil.rm_dir(sharedir) if shares: f.write(' %d: { # client[%d]\n' % (i, i)) for shnum in sorted(shares.keys()): f.write(' %d: base32.a2b("%s"),\n' % (shnum, base32.b2a(shares[shnum]))) f.write(' },\n') f.write('}\n') f.close() d.addCallback(_created_mutable) def _done(ignored): f.close() d.addCallback(_done) return d def load_shares(self, ignored=None): # this uses the data generated by create_shares() to populate the # storage servers with pre-generated shares si = uri.from_string(immutable_uri).get_storage_index() si_dir = storage_index_to_dir(si) for i in immutable_shares: shares = immutable_shares[i] for shnum in shares: dn = os.path.join(self.get_serverdir(i), "shares", si_dir) fileutil.make_dirs(dn) fn = os.path.join(dn, str(shnum)) f = open(fn, "wb") f.write(shares[shnum]) f.close() si = uri.from_string(mutable_uri).get_storage_index() si_dir = storage_index_to_dir(si) for i in mutable_shares: shares = mutable_shares[i] for shnum in shares: dn = os.path.join(self.get_serverdir(i), "shares", si_dir) fileutil.make_dirs(dn) fn = os.path.join(dn, str(shnum)) f = open(fn, "wb") f.write(shares[shnum]) f.close() def download_immutable(self, ignored=None): n = self.c0.create_node_from_uri(immutable_uri) d = download_to_data(n) def _got_data(data): self.failUnlessEqual(data, plaintext) d.addCallback(_got_data) # make sure we can use the same node twice d.addCallback(lambda ign: download_to_data(n)) d.addCallback(_got_data) return d def download_mutable(self, ignored=None): n = self.c0.create_node_from_uri(mutable_uri) d = n.download_best_version() def _got_data(data): self.failUnlessEqual(data, mutable_plaintext) d.addCallback(_got_data) return d class DownloadTest(_Base, unittest.TestCase): timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box. def test_download(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # do this to create the shares #return self.create_shares() self.load_shares() d = self.download_immutable() d.addCallback(self.download_mutable) return d def test_download_failover(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() si = uri.from_string(immutable_uri).get_storage_index() si_dir = storage_index_to_dir(si) n = self.c0.create_node_from_uri(immutable_uri) d = download_to_data(n) def _got_data(data): self.failUnlessEqual(data, plaintext) d.addCallback(_got_data) def _clobber_some_shares(ign): # find the three shares that were used, and delete them. Then # download again, forcing the downloader to fail over to other # shares for s in n._cnode._node._shares: for clientnum in immutable_shares: for shnum in immutable_shares[clientnum]: if s._shnum == shnum: fn = os.path.join(self.get_serverdir(clientnum), "shares", si_dir, str(shnum)) os.unlink(fn) d.addCallback(_clobber_some_shares) d.addCallback(lambda ign: download_to_data(n)) d.addCallback(_got_data) def _clobber_most_shares(ign): # delete all but one of the shares that are still alive live_shares = [s for s in n._cnode._node._shares if s.is_alive()] save_me = live_shares[0]._shnum for clientnum in immutable_shares: for shnum in immutable_shares[clientnum]: if shnum == save_me: continue fn = os.path.join(self.get_serverdir(clientnum), "shares", si_dir, str(shnum)) if os.path.exists(fn): os.unlink(fn) # now the download should fail with NotEnoughSharesError return self.shouldFail(NotEnoughSharesError, "1shares", None, download_to_data, n) d.addCallback(_clobber_most_shares) def _clobber_all_shares(ign): # delete the last remaining share for clientnum in immutable_shares: for shnum in immutable_shares[clientnum]: fn = os.path.join(self.get_serverdir(clientnum), "shares", si_dir, str(shnum)) if os.path.exists(fn): os.unlink(fn) # now a new download should fail with NoSharesError. We want a # new ImmutableFileNode so it will forget about the old shares. # If we merely called create_node_from_uri() without first # dereferencing the original node, the NodeMaker's _node_cache # would give us back the old one. n = None n = self.c0.create_node_from_uri(immutable_uri) return self.shouldFail(NoSharesError, "0shares", None, download_to_data, n) d.addCallback(_clobber_all_shares) return d def test_lost_servers(self): # while downloading a file (after seg[0], before seg[1]), lose the # three servers that we were using. The download should switch over # to other servers. self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # upload a file with multiple segments, so we can catch the download # in the middle. u = upload.Data(plaintext, None) u.max_segment_size = 70 # 5 segs d = self.c0.upload(u) def _uploaded(ur): self.uri = ur.get_uri() self.n = self.c0.create_node_from_uri(self.uri) return download_to_data(self.n) d.addCallback(_uploaded) def _got_data(data): self.failUnlessEqual(data, plaintext) d.addCallback(_got_data) def _kill_some_shares(): # find the shares that were used and delete them shares = self.n._cnode._node._shares shnums = sorted([s._shnum for s in shares]) self.failUnlessEqual(shnums, [0,1,2,3]) # break the RIBucketReader references # (we don't break the RIStorageServer references, because that # isn't needed to test the current downloader implementation) for s in shares: s._rref.broken = True def _download_again(ign): # download again, deleting some shares after the first write # to the consumer c = StallingConsumer(_kill_some_shares) return self.n.read(c) d.addCallback(_download_again) def _check_failover(c): self.failUnlessEqual("".join(c.chunks), plaintext) shares = self.n._cnode._node._shares shnums = sorted([s._shnum for s in shares]) self.failIfEqual(shnums, [0,1,2,3]) d.addCallback(_check_failover) return d def test_long_offset(self): # bug #1154: mplayer doing a seek-to-end results in an offset of type # 'long', rather than 'int', and apparently __len__ is required to # return an int. Rewrote Spans/DataSpans to provide s.len() instead # of len(s) . self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) c = MemoryConsumer() d = n.read(c, 0L, 10L) d.addCallback(lambda c: len("".join(c.chunks))) d.addCallback(lambda size: self.failUnlessEqual(size, 10)) return d def test_badguess(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) # Cause the downloader to guess a segsize that's too low, so it will # ask for a segment number that's too high (beyond the end of the # real list, causing BadSegmentNumberError), to exercise # Segmentation._retry_bad_segment n._cnode._maybe_create_download_node() n._cnode._node._build_guessed_tables(90) con1 = MemoryConsumer() # plaintext size of 310 bytes, wrong-segsize of 90 bytes, will make # us think that file[180:200] is in the third segment (segnum=2), but # really there's only one segment d = n.read(con1, 180, 20) def _done(res): self.failUnlessEqual("".join(con1.chunks), plaintext[180:200]) d.addCallback(_done) return d def test_simultaneous_badguess(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # upload a file with multiple segments, and a non-default segsize, to # exercise the offset-guessing code. Because we don't tell the # downloader about the unusual segsize, it will guess wrong, and have # to do extra roundtrips to get the correct data. u = upload.Data(plaintext, None) u.max_segment_size = 70 # 5 segs, 8-wide hashtree con1 = MemoryConsumer() con2 = MemoryConsumer() d = self.c0.upload(u) def _uploaded(ur): n = self.c0.create_node_from_uri(ur.get_uri()) d1 = n.read(con1, 70, 20) d2 = n.read(con2, 140, 20) return defer.gatherResults([d1,d2]) d.addCallback(_uploaded) def _done(res): self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) d.addCallback(_done) return d def test_simultaneous_goodguess(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # upload a file with multiple segments, and a non-default segsize, to # exercise the offset-guessing code. This time we *do* tell the # downloader about the unusual segsize, so it can guess right. u = upload.Data(plaintext, None) u.max_segment_size = 70 # 5 segs, 8-wide hashtree con1 = MemoryConsumer() con2 = MemoryConsumer() d = self.c0.upload(u) def _uploaded(ur): n = self.c0.create_node_from_uri(ur.get_uri()) n._cnode._maybe_create_download_node() n._cnode._node._build_guessed_tables(u.max_segment_size) d1 = n.read(con1, 70, 20) d2 = n.read(con2, 140, 20) return defer.gatherResults([d1,d2]) d.addCallback(_uploaded) def _done(res): self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) d.addCallback(_done) return d def test_sequential_goodguess(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] data = (plaintext*100)[:30000] # multiple of k # upload a file with multiple segments, and a non-default segsize, to # exercise the offset-guessing code. This time we *do* tell the # downloader about the unusual segsize, so it can guess right. u = upload.Data(data, None) u.max_segment_size = 6000 # 5 segs, 8-wide hashtree con1 = MemoryConsumer() con2 = MemoryConsumer() d = self.c0.upload(u) def _uploaded(ur): n = self.c0.create_node_from_uri(ur.get_uri()) n._cnode._maybe_create_download_node() n._cnode._node._build_guessed_tables(u.max_segment_size) d = n.read(con1, 12000, 20) def _read1(ign): self.failUnlessEqual("".join(con1.chunks), data[12000:12020]) return n.read(con2, 24000, 20) d.addCallback(_read1) def _read2(ign): self.failUnlessEqual("".join(con2.chunks), data[24000:24020]) d.addCallback(_read2) return d d.addCallback(_uploaded) return d def test_simultaneous_get_blocks(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() stay_empty = [] n = self.c0.create_node_from_uri(immutable_uri) d = download_to_data(n) def _use_shares(ign): shares = list(n._cnode._node._shares) s0 = shares[0] # make sure .cancel works too o0 = s0.get_block(0) o0.subscribe(lambda **kwargs: stay_empty.append(kwargs)) o1 = s0.get_block(0) o2 = s0.get_block(0) o0.cancel() o3 = s0.get_block(1) # state=BADSEGNUM d1 = defer.Deferred() d2 = defer.Deferred() d3 = defer.Deferred() o1.subscribe(lambda **kwargs: d1.callback(kwargs)) o2.subscribe(lambda **kwargs: d2.callback(kwargs)) o3.subscribe(lambda **kwargs: d3.callback(kwargs)) return defer.gatherResults([d1,d2,d3]) d.addCallback(_use_shares) def _done(res): r1,r2,r3 = res self.failUnlessEqual(r1["state"], "COMPLETE") self.failUnlessEqual(r2["state"], "COMPLETE") self.failUnlessEqual(r3["state"], "BADSEGNUM") self.failUnless("block" in r1) self.failUnless("block" in r2) self.failIf(stay_empty) d.addCallback(_done) return d def test_simultaneous_onefails_onecancelled(self): # This exercises an mplayer behavior in ticket #1154. I believe that # mplayer made two simultaneous webapi GET requests: first one for an # index region at the end of the (mp3/video) file, then one for the # first block of the file (the order doesn't really matter). All GETs # failed (NoSharesError) because of the type(__len__)==long bug. Each # GET submitted a DownloadNode.get_segment() request, which was # queued by the DN (DN._segment_requests), so the second one was # blocked waiting on the first one. When the first one failed, # DN.fetch_failed() was invoked, which errbacks the first GET, but # left the other one hanging (the lost-progress bug mentioned in # #1154 comment 10) # # Then mplayer sees that the index region GET failed, so it cancels # the first-block GET (by closing the HTTP request), triggering # stopProducer. The second GET was waiting in the Deferred (between # n.get_segment() and self._request_retired), so its # _cancel_segment_request was active, so was invoked. However, # DN._active_segment was None since it was not working on any segment # at that time, hence the error in #1154. self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # upload a file with multiple segments, so we can catch the download # in the middle. Tell the downloader, so it can guess correctly. u = upload.Data(plaintext, None) u.max_segment_size = 70 # 5 segs d = self.c0.upload(u) def _uploaded(ur): # corrupt all the shares so the download will fail def _corruptor(s, debug=False): which = 48 # first byte of block0 return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] self.corrupt_all_shares(ur.get_uri(), _corruptor) n = self.c0.create_node_from_uri(ur.get_uri()) n._cnode._maybe_create_download_node() n._cnode._node._build_guessed_tables(u.max_segment_size) con1 = MemoryConsumer() con2 = MemoryConsumer() d = n.read(con1, 0L, 20) d2 = n.read(con2, 140L, 20) # con2 will be cancelled, so d2 should fail with DownloadStopped def _con2_should_not_succeed(res): self.fail("the second read should not have succeeded") def _con2_failed(f): self.failUnless(f.check(DownloadStopped)) d2.addCallbacks(_con2_should_not_succeed, _con2_failed) def _con1_should_not_succeed(res): self.fail("the first read should not have succeeded") def _con1_failed(f): self.failUnless(f.check(NoSharesError)) con2.producer.stopProducing() return d2 d.addCallbacks(_con1_should_not_succeed, _con1_failed) return d d.addCallback(_uploaded) return d def test_simultaneous_onefails(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # upload a file with multiple segments, so we can catch the download # in the middle. Tell the downloader, so it can guess correctly. u = upload.Data(plaintext, None) u.max_segment_size = 70 # 5 segs d = self.c0.upload(u) def _uploaded(ur): # corrupt all the shares so the download will fail def _corruptor(s, debug=False): which = 48 # first byte of block0 return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] self.corrupt_all_shares(ur.get_uri(), _corruptor) n = self.c0.create_node_from_uri(ur.get_uri()) n._cnode._maybe_create_download_node() n._cnode._node._build_guessed_tables(u.max_segment_size) con1 = MemoryConsumer() con2 = MemoryConsumer() d = n.read(con1, 0L, 20) d2 = n.read(con2, 140L, 20) # con2 should wait for con1 to fail and then con2 should succeed. # In particular, we should not lose progress. If this test fails, # it will fail with a timeout error. def _con2_should_succeed(res): # this should succeed because we only corrupted the first # segment of each share. The segment that holds [140:160] is # fine, as are the hash chains and UEB. self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) d2.addCallback(_con2_should_succeed) def _con1_should_not_succeed(res): self.fail("the first read should not have succeeded") def _con1_failed(f): self.failUnless(f.check(NoSharesError)) # we *don't* cancel the second one here: this exercises a # lost-progress bug from #1154. We just wait for it to # succeed. return d2 d.addCallbacks(_con1_should_not_succeed, _con1_failed) return d d.addCallback(_uploaded) return d def test_download_no_overrun(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() # tweak the client's copies of server-version data, so it believes # that they're old and can't handle reads that overrun the length of # the share. This exercises a different code path. for s in self.c0.storage_broker.get_connected_servers(): rref = s.get_rref() v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] v1["tolerates-immutable-read-overrun"] = False n = self.c0.create_node_from_uri(immutable_uri) d = download_to_data(n) def _got_data(data): self.failUnlessEqual(data, plaintext) d.addCallback(_got_data) return d def test_download_segment(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) cn = n._cnode (d,c) = cn.get_segment(0) def _got_segment((offset,data,decodetime)): self.failUnlessEqual(offset, 0) self.failUnlessEqual(len(data), len(plaintext)) d.addCallback(_got_segment) return d def test_download_segment_cancel(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) cn = n._cnode (d,c) = cn.get_segment(0) fired = [] d.addCallback(fired.append) c.cancel() d = fireEventually() d.addCallback(flushEventualQueue) def _check(ign): self.failUnlessEqual(fired, []) d.addCallback(_check) return d def test_download_bad_segment(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) cn = n._cnode def _try_download(): (d,c) = cn.get_segment(1) return d d = self.shouldFail(BadSegmentNumberError, "badseg", "segnum=1, numsegs=1", _try_download) return d def test_download_segment_terminate(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) cn = n._cnode (d,c) = cn.get_segment(0) fired = [] d.addCallback(fired.append) self.c0.terminator.disownServiceParent() d = fireEventually() d.addCallback(flushEventualQueue) def _check(ign): self.failUnlessEqual(fired, []) d.addCallback(_check) return d def test_pause(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) c = PausingConsumer() d = n.read(c) def _downloaded(mc): newdata = "".join(mc.chunks) self.failUnlessEqual(newdata, plaintext) d.addCallback(_downloaded) return d def test_pause_then_stop(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) c = PausingAndStoppingConsumer() d = self.shouldFail(DownloadStopped, "test_pause_then_stop", "our Consumer called stopProducing()", n.read, c) return d def test_stop(self): # use a download target that stops after the first segment (#473) self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) c = StoppingConsumer() d = self.shouldFail(DownloadStopped, "test_stop", "our Consumer called stopProducing()", n.read, c) return d def test_stop_immediately(self): # and a target that stops right after registerProducer (maybe #1154) self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) c = ImmediatelyStoppingConsumer() # stops after registerProducer d = self.shouldFail(DownloadStopped, "test_stop_immediately", "our Consumer called stopProducing()", n.read, c) return d def test_stop_immediately2(self): # and a target that stops right after registerProducer (maybe #1154) self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) c = MemoryConsumer() d0 = n.read(c) c.producer.stopProducing() d = self.shouldFail(DownloadStopped, "test_stop_immediately", "our Consumer called stopProducing()", lambda: d0) return d def test_download_segment_bad_ciphertext_hash(self): # The crypttext_hash_tree asserts the integrity of the decoded # ciphertext, and exists to detect two sorts of problems. The first # is a bug in zfec decode. The second is the "two-sided t-shirt" # attack (found by Christian Grothoff), in which a malicious uploader # creates two sets of shares (one for file A, second for file B), # uploads a combination of them (shares 0-4 of A, 5-9 of B), and then # builds an otherwise normal UEB around those shares: their goal is # to give their victim a filecap which sometimes downloads the good A # contents, and sometimes the bad B contents, depending upon which # servers/shares they can get to. Having a hash of the ciphertext # forces them to commit to exactly one version. (Christian's prize # for finding this problem was a t-shirt with two sides: the shares # of file A on the front, B on the back). # creating a set of shares with this property is too hard, although # it'd be nice to do so and confirm our fix. (it requires a lot of # tampering with the uploader). So instead, we just damage the # decoder. The tail decoder is rebuilt each time, so we need to use a # file with multiple segments. self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] u = upload.Data(plaintext, None) u.max_segment_size = 60 # 6 segs d = self.c0.upload(u) def _uploaded(ur): n = self.c0.create_node_from_uri(ur.get_uri()) n._cnode._maybe_create_download_node() n._cnode._node._build_guessed_tables(u.max_segment_size) d = download_to_data(n) def _break_codec(data): # the codec isn't created until the UEB is retrieved node = n._cnode._node vcap = node._verifycap k, N = vcap.needed_shares, vcap.total_shares bad_codec = BrokenDecoder() bad_codec.set_params(node.segment_size, k, N) node._codec = bad_codec d.addCallback(_break_codec) # now try to download it again. The broken codec will provide # ciphertext that fails the hash test. d.addCallback(lambda ign: self.shouldFail(BadCiphertextHashError, "badhash", "hash failure in " "ciphertext_hash_tree: segnum=0", download_to_data, n)) return d d.addCallback(_uploaded) return d def OFFtest_download_segment_XXX(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # upload a file with multiple segments, and a non-default segsize, to # exercise the offset-guessing code. This time we *do* tell the # downloader about the unusual segsize, so it can guess right. u = upload.Data(plaintext, None) u.max_segment_size = 70 # 5 segs, 8-wide hashtree con1 = MemoryConsumer() con2 = MemoryConsumer() d = self.c0.upload(u) def _uploaded(ur): n = self.c0.create_node_from_uri(ur.get_uri()) n._cnode._maybe_create_download_node() n._cnode._node._build_guessed_tables(u.max_segment_size) d1 = n.read(con1, 70, 20) #d2 = n.read(con2, 140, 20) d2 = defer.succeed(None) return defer.gatherResults([d1,d2]) d.addCallback(_uploaded) def _done(res): self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) #d.addCallback(_done) return d def test_duplicate_shares(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() # make sure everybody has a copy of sh0. The second server contacted # will report two shares, and the ShareFinder will handle the # duplicate by attaching both to the same CommonShare instance. si = uri.from_string(immutable_uri).get_storage_index() si_dir = storage_index_to_dir(si) sh0_file = [sharefile for (shnum, serverid, sharefile) in self.find_uri_shares(immutable_uri) if shnum == 0][0] sh0_data = open(sh0_file, "rb").read() for clientnum in immutable_shares: if 0 in immutable_shares[clientnum]: continue cdir = self.get_serverdir(clientnum) target = os.path.join(cdir, "shares", si_dir, "0") outf = open(target, "wb") outf.write(sh0_data) outf.close() d = self.download_immutable() return d def test_verifycap(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] self.load_shares() n = self.c0.create_node_from_uri(immutable_uri) vcap = n.get_verify_cap().to_string() vn = self.c0.create_node_from_uri(vcap) d = download_to_data(vn) def _got_ciphertext(ciphertext): self.failUnlessEqual(len(ciphertext), len(plaintext)) self.failIfEqual(ciphertext, plaintext) d.addCallback(_got_ciphertext) return d class BrokenDecoder(CRSDecoder): def decode(self, shares, shareids): d = CRSDecoder.decode(self, shares, shareids) def _decoded(buffers): def _corruptor(s, which): return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] buffers[0] = _corruptor(buffers[0], 0) # flip lsb of first byte return buffers d.addCallback(_decoded) return d class PausingConsumer(MemoryConsumer): def __init__(self): MemoryConsumer.__init__(self) self.size = 0 self.writes = 0 def write(self, data): self.size += len(data) self.writes += 1 if self.writes <= 2: # we happen to use 4 segments, and want to avoid pausing on the # last one (since then the _unpause timer will still be running) self.producer.pauseProducing() reactor.callLater(0.1, self._unpause) return MemoryConsumer.write(self, data) def _unpause(self): self.producer.resumeProducing() class PausingAndStoppingConsumer(PausingConsumer): debug_stopped = False def write(self, data): if self.debug_stopped: raise Exception("I'm stopped, don't write to me") self.producer.pauseProducing() eventually(self._stop) def _stop(self): self.debug_stopped = True self.producer.stopProducing() class StoppingConsumer(PausingConsumer): def write(self, data): self.producer.stopProducing() class ImmediatelyStoppingConsumer(MemoryConsumer): def registerProducer(self, p, streaming): MemoryConsumer.registerProducer(self, p, streaming) self.producer.stopProducing() class StallingConsumer(MemoryConsumer): def __init__(self, halfway_cb): MemoryConsumer.__init__(self) self.halfway_cb = halfway_cb self.writes = 0 def write(self, data): self.writes += 1 if self.writes == 1: self.halfway_cb() return MemoryConsumer.write(self, data) class Corruption(_Base, unittest.TestCase): def _corrupt_flip(self, ign, imm_uri, which): log.msg("corrupt %d" % which) def _corruptor(s, debug=False): return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] self.corrupt_shares_numbered(imm_uri, [0], _corruptor) def _corrupt_set(self, ign, imm_uri, which, newvalue): log.msg("corrupt %d" % which) def _corruptor(s, debug=False): return s[:which] + chr(newvalue) + s[which+1:] self.corrupt_shares_numbered(imm_uri, [0], _corruptor) def test_each_byte(self): # Setting catalog_detection=True performs an exhaustive test of the # Downloader's response to corruption in the lsb of each byte of the # 2070-byte share, with two goals: make sure we tolerate all forms of # corruption (i.e. don't hang or return bad data), and make a list of # which bytes can be corrupted without influencing the download # (since we don't need every byte of the share). That takes 50s to # run on my laptop and doesn't have any actual asserts, so we don't # normally do that. self.catalog_detection = False self.basedir = "download/Corruption/each_byte" self.set_up_grid() self.c0 = self.g.clients[0] # to exercise the block-hash-tree code properly, we need to have # multiple segments. We don't tell the downloader about the different # segsize, so it guesses wrong and must do extra roundtrips. u = upload.Data(plaintext, None) u.max_segment_size = 120 # 3 segs, 4-wide hashtree if self.catalog_detection: undetected = spans.Spans() def _download(ign, imm_uri, which, expected): n = self.c0.create_node_from_uri(imm_uri) n._cnode._maybe_create_download_node() # for this test to work, we need to have a new Node each time. # Make sure the NodeMaker's weakcache hasn't interfered. assert not n._cnode._node._shares d = download_to_data(n) def _got_data(data): self.failUnlessEqual(data, plaintext) shnums = sorted([s._shnum for s in n._cnode._node._shares]) no_sh0 = bool(0 not in shnums) sh0 = [s for s in n._cnode._node._shares if s._shnum == 0] sh0_had_corruption = False if sh0 and sh0[0].had_corruption: sh0_had_corruption = True num_needed = len(n._cnode._node._shares) if self.catalog_detection: detected = no_sh0 or sh0_had_corruption or (num_needed!=3) if not detected: undetected.add(which, 1) if expected == "no-sh0": self.failIfIn(0, shnums) elif expected == "0bad-need-3": self.failIf(no_sh0) self.failUnless(sh0[0].had_corruption) self.failUnlessEqual(num_needed, 3) elif expected == "need-4th": self.failIf(no_sh0) self.failUnless(sh0[0].had_corruption) self.failIfEqual(num_needed, 3) d.addCallback(_got_data) return d d = self.c0.upload(u) def _uploaded(ur): imm_uri = ur.get_uri() self.shares = self.copy_shares(imm_uri) d = defer.succeed(None) # 'victims' is a list of corruption tests to run. Each one flips # the low-order bit of the specified offset in the share file (so # offset=0 is the MSB of the container version, offset=15 is the # LSB of the share version, offset=24 is the MSB of the # data-block-offset, and offset=48 is the first byte of the first # data-block). Each one also specifies what sort of corruption # we're expecting to see. no_sh0_victims = [0,1,2,3] # container version need3_victims = [ ] # none currently in this category # when the offsets are corrupted, the Share will be unable to # retrieve the data it wants (because it thinks that data lives # off in the weeds somewhere), and Share treats DataUnavailable # as abandon-this-share, so in general we'll be forced to look # for a 4th share. need_4th_victims = [12,13,14,15, # share version 24,25,26,27, # offset[data] 32,33,34,35, # offset[crypttext_hash_tree] 36,37,38,39, # offset[block_hashes] 44,45,46,47, # offset[UEB] ] need_4th_victims.append(48) # block data # when corrupting hash trees, we must corrupt a value that isn't # directly set from somewhere else. Since we download data from # seg0, corrupt something on its hash chain, like [2] (the # right-hand child of the root) need_4th_victims.append(600+2*32) # block_hashes[2] # Share.loop is pretty conservative: it abandons the share at the # first sign of corruption. It doesn't strictly need to be this # way: if the UEB were corrupt, we could still get good block # data from that share, as long as there was a good copy of the # UEB elsewhere. If this behavior is relaxed, then corruption in # the following fields (which are present in multiple shares) # should fall into the "need3_victims" case instead of the # "need_4th_victims" case. need_4th_victims.append(376+2*32) # crypttext_hash_tree[2] need_4th_victims.append(824) # share_hashes need_4th_victims.append(994) # UEB length need_4th_victims.append(998) # UEB corrupt_me = ([(i,"no-sh0") for i in no_sh0_victims] + [(i, "0bad-need-3") for i in need3_victims] + [(i, "need-4th") for i in need_4th_victims]) if self.catalog_detection: corrupt_me = [(i, "") for i in range(len(self.sh0_orig))] for i,expected in corrupt_me: # All these tests result in a successful download. What we're # measuring is how many shares the downloader had to use. d.addCallback(self._corrupt_flip, imm_uri, i) d.addCallback(_download, imm_uri, i, expected) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) corrupt_values = [(3, 2, "no-sh0"), (15, 2, "need-4th"), # share looks v2 ] for i,newvalue,expected in corrupt_values: d.addCallback(self._corrupt_set, imm_uri, i, newvalue) d.addCallback(_download, imm_uri, i, expected) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) return d d.addCallback(_uploaded) def _show_results(ign): print print ("of [0:%d], corruption ignored in %s" % (len(self.sh0_orig), undetected.dump())) if self.catalog_detection: d.addCallback(_show_results) # of [0:2070], corruption ignored in len=1133: # [4-11],[16-23],[28-31],[152-439],[600-663],[1309-2069] # [4-11]: container sizes # [16-23]: share block/data sizes # [152-375]: plaintext hash tree # [376-408]: crypttext_hash_tree[0] (root) # [408-439]: crypttext_hash_tree[1] (computed) # [600-631]: block hash tree[0] (root) # [632-663]: block hash tree[1] (computed) # [1309-]: reserved+unused UEB space return d def test_failure(self): # this test corrupts all shares in the same way, and asserts that the # download fails. self.basedir = "download/Corruption/failure" self.set_up_grid() self.c0 = self.g.clients[0] # to exercise the block-hash-tree code properly, we need to have # multiple segments. We don't tell the downloader about the different # segsize, so it guesses wrong and must do extra roundtrips. u = upload.Data(plaintext, None) u.max_segment_size = 120 # 3 segs, 4-wide hashtree d = self.c0.upload(u) def _uploaded(ur): imm_uri = ur.get_uri() self.shares = self.copy_shares(imm_uri) corrupt_me = [(48, "block data", "Last failure: None"), (600+2*32, "block_hashes[2]", "BadHashError"), (376+2*32, "crypttext_hash_tree[2]", "BadHashError"), (824, "share_hashes", "BadHashError"), ] def _download(imm_uri): n = self.c0.create_node_from_uri(imm_uri) n._cnode._maybe_create_download_node() # for this test to work, we need to have a new Node each time. # Make sure the NodeMaker's weakcache hasn't interfered. assert not n._cnode._node._shares return download_to_data(n) d = defer.succeed(None) for i,which,substring in corrupt_me: # All these tests result in a failed download. d.addCallback(self._corrupt_flip_all, imm_uri, i) d.addCallback(lambda ign, which=which, substring=substring: self.shouldFail(NoSharesError, which, substring, _download, imm_uri)) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) return d d.addCallback(_uploaded) return d def _corrupt_flip_all(self, ign, imm_uri, which): def _corruptor(s, debug=False): return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] self.corrupt_all_shares(imm_uri, _corruptor) class DownloadV2(_Base, unittest.TestCase): # tests which exercise v2-share code. They first upload a file with # FORCE_V2 set. def setUp(self): d = defer.maybeDeferred(_Base.setUp, self) def _set_force_v2(ign): self.old_force_v2 = layout.FORCE_V2 layout.FORCE_V2 = True d.addCallback(_set_force_v2) return d def tearDown(self): layout.FORCE_V2 = self.old_force_v2 return _Base.tearDown(self) def test_download(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # upload a file u = upload.Data(plaintext, None) d = self.c0.upload(u) def _uploaded(ur): imm_uri = ur.get_uri() n = self.c0.create_node_from_uri(imm_uri) return download_to_data(n) d.addCallback(_uploaded) return d def test_download_no_overrun(self): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] # tweak the client's copies of server-version data, so it believes # that they're old and can't handle reads that overrun the length of # the share. This exercises a different code path. for s in self.c0.storage_broker.get_connected_servers(): rref = s.get_rref() v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] v1["tolerates-immutable-read-overrun"] = False # upload a file u = upload.Data(plaintext, None) d = self.c0.upload(u) def _uploaded(ur): imm_uri = ur.get_uri() n = self.c0.create_node_from_uri(imm_uri) return download_to_data(n) d.addCallback(_uploaded) return d def OFF_test_no_overrun_corrupt_shver(self): # unnecessary self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] for s in self.c0.storage_broker.get_connected_servers(): rref = s.get_rref() v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] v1["tolerates-immutable-read-overrun"] = False # upload a file u = upload.Data(plaintext, None) d = self.c0.upload(u) def _uploaded(ur): imm_uri = ur.get_uri() def _do_corrupt(which, newvalue): def _corruptor(s, debug=False): return s[:which] + chr(newvalue) + s[which+1:] self.corrupt_shares_numbered(imm_uri, [0], _corruptor) _do_corrupt(12+3, 0x00) n = self.c0.create_node_from_uri(imm_uri) d = download_to_data(n) def _got_data(data): self.failUnlessEqual(data, plaintext) d.addCallback(_got_data) return d d.addCallback(_uploaded) return d class Status(unittest.TestCase): def test_status(self): now = 12345.1 ds = DownloadStatus("si-1", 123) self.failUnlessEqual(ds.get_status(), "idle") ev0 = ds.add_segment_request(0, now) self.failUnlessEqual(ds.get_status(), "fetching segment 0") ev0.activate(now+0.5) ev0.deliver(now+1, 0, 1000, 2.0) self.failUnlessEqual(ds.get_status(), "idle") ev2 = ds.add_segment_request(2, now+2) del ev2 # hush pyflakes ev1 = ds.add_segment_request(1, now+2) self.failUnlessEqual(ds.get_status(), "fetching segments 1,2") ev1.error(now+3) self.failUnlessEqual(ds.get_status(), "fetching segment 2; errors on segment 1") def test_progress(self): now = 12345.1 ds = DownloadStatus("si-1", 123) self.failUnlessEqual(ds.get_progress(), 0.0) e = ds.add_read_event(0, 1000, now) self.failUnlessEqual(ds.get_progress(), 0.0) e.update(500, 2.0, 2.0) self.failUnlessEqual(ds.get_progress(), 0.5) e.finished(now+2) self.failUnlessEqual(ds.get_progress(), 1.0) e1 = ds.add_read_event(1000, 2000, now+3) e2 = ds.add_read_event(4000, 2000, now+3) self.failUnlessEqual(ds.get_progress(), 0.0) e1.update(1000, 2.0, 2.0) self.failUnlessEqual(ds.get_progress(), 0.25) e2.update(1000, 2.0, 2.0) self.failUnlessEqual(ds.get_progress(), 0.5) e1.update(1000, 2.0, 2.0) e1.finished(now+4) # now there is only one outstanding read, and it is 50% done self.failUnlessEqual(ds.get_progress(), 0.5) e2.update(1000, 2.0, 2.0) e2.finished(now+5) self.failUnlessEqual(ds.get_progress(), 1.0) def test_active(self): now = 12345.1 ds = DownloadStatus("si-1", 123) self.failUnlessEqual(ds.get_active(), False) e1 = ds.add_read_event(0, 1000, now) self.failUnlessEqual(ds.get_active(), True) e2 = ds.add_read_event(1, 1000, now+1) self.failUnlessEqual(ds.get_active(), True) e1.finished(now+2) self.failUnlessEqual(ds.get_active(), True) e2.finished(now+3) self.failUnlessEqual(ds.get_active(), False) def make_server(clientid): tubid = hashutil.tagged_hash("clientid", clientid)[:20] return NoNetworkServer(tubid, None) def make_servers(clientids): servers = {} for clientid in clientids: servers[clientid] = make_server(clientid) return servers class MyShare: def __init__(self, shnum, server, rtt): self._shnum = shnum self._server = server self._dyhb_rtt = rtt def __repr__(self): return "sh%d-on-%s" % (self._shnum, self._server.get_name()) class MySegmentFetcher(SegmentFetcher): def __init__(self, *args, **kwargs): SegmentFetcher.__init__(self, *args, **kwargs) self._test_start_shares = [] def _start_share(self, share, shnum): self._test_start_shares.append(share) class FakeNode: def __init__(self): self.want_more = 0 self.failed = None self.processed = None self._si_prefix = "si_prefix" def want_more_shares(self): self.want_more += 1 def fetch_failed(self, fetcher, f): self.failed = f def process_blocks(self, segnum, blocks): self.processed = (segnum, blocks) def get_num_segments(self): return 1, True class Selection(unittest.TestCase): def test_no_shares(self): node = FakeNode() sf = SegmentFetcher(node, 0, 3, None) sf.add_shares([]) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 1) self.failUnlessEqual(node.failed, None) sf.no_more_shares() return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failUnless(node.failed) self.failUnless(node.failed.check(NoSharesError)) d.addCallback(_check2) return d def test_only_one_share(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) serverA = make_server("peer-A") shares = [MyShare(0, serverA, 0.0)] sf.add_shares(shares) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 1) self.failUnlessEqual(node.failed, None) sf.no_more_shares() return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failUnless(node.failed) self.failUnless(node.failed.check(NotEnoughSharesError)) sname = serverA.get_name() self.failUnlessIn("complete= pending=sh0-on-%s overdue= unused=" % sname, str(node.failed)) d.addCallback(_check2) return d def test_good_diversity_early(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) shares = [MyShare(i, make_server("peer-%d" % i), i) for i in range(10)] sf.add_shares(shares) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 0) self.failUnlessEqual(sf._test_start_shares, shares[:3]) for sh in sf._test_start_shares: sf._block_request_activity(sh, sh._shnum, COMPLETE, "block-%d" % sh._shnum) return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failIfEqual(node.processed, None) self.failUnlessEqual(node.processed, (0, {0: "block-0", 1: "block-1", 2: "block-2"}) ) d.addCallback(_check2) return d def test_good_diversity_late(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) shares = [MyShare(i, make_server("peer-%d" % i), i) for i in range(10)] sf.add_shares([]) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 1) sf.add_shares(shares) return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failUnlessEqual(sf._test_start_shares, shares[:3]) for sh in sf._test_start_shares: sf._block_request_activity(sh, sh._shnum, COMPLETE, "block-%d" % sh._shnum) return flushEventualQueue() d.addCallback(_check2) def _check3(ign): self.failIfEqual(node.processed, None) self.failUnlessEqual(node.processed, (0, {0: "block-0", 1: "block-1", 2: "block-2"}) ) d.addCallback(_check3) return d def test_avoid_bad_diversity_late(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) # we could satisfy the read entirely from the first server, but we'd # prefer not to. Instead, we expect to only pull one share from the # first server servers = make_servers(["peer-A", "peer-B", "peer-C"]) shares = [MyShare(0, servers["peer-A"], 0.0), MyShare(1, servers["peer-A"], 0.0), MyShare(2, servers["peer-A"], 0.0), MyShare(3, servers["peer-B"], 1.0), MyShare(4, servers["peer-C"], 2.0), ] sf.add_shares([]) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 1) sf.add_shares(shares) return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failUnlessEqual(sf._test_start_shares, [shares[0], shares[3], shares[4]]) for sh in sf._test_start_shares: sf._block_request_activity(sh, sh._shnum, COMPLETE, "block-%d" % sh._shnum) return flushEventualQueue() d.addCallback(_check2) def _check3(ign): self.failIfEqual(node.processed, None) self.failUnlessEqual(node.processed, (0, {0: "block-0", 3: "block-3", 4: "block-4"}) ) d.addCallback(_check3) return d def test_suffer_bad_diversity_late(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) # we satisfy the read entirely from the first server because we don't # have any other choice. serverA = make_server("peer-A") shares = [MyShare(0, serverA, 0.0), MyShare(1, serverA, 0.0), MyShare(2, serverA, 0.0), MyShare(3, serverA, 0.0), MyShare(4, serverA, 0.0), ] sf.add_shares([]) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 1) sf.add_shares(shares) return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failUnlessEqual(node.want_more, 3) self.failUnlessEqual(sf._test_start_shares, [shares[0], shares[1], shares[2]]) for sh in sf._test_start_shares: sf._block_request_activity(sh, sh._shnum, COMPLETE, "block-%d" % sh._shnum) return flushEventualQueue() d.addCallback(_check2) def _check3(ign): self.failIfEqual(node.processed, None) self.failUnlessEqual(node.processed, (0, {0: "block-0", 1: "block-1", 2: "block-2"}) ) d.addCallback(_check3) return d def test_suffer_bad_diversity_early(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) # we satisfy the read entirely from the first server because we don't # have any other choice. serverA = make_server("peer-A") shares = [MyShare(0, serverA, 0.0), MyShare(1, serverA, 0.0), MyShare(2, serverA, 0.0), MyShare(3, serverA, 0.0), MyShare(4, serverA, 0.0), ] sf.add_shares(shares) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 2) self.failUnlessEqual(sf._test_start_shares, [shares[0], shares[1], shares[2]]) for sh in sf._test_start_shares: sf._block_request_activity(sh, sh._shnum, COMPLETE, "block-%d" % sh._shnum) return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failIfEqual(node.processed, None) self.failUnlessEqual(node.processed, (0, {0: "block-0", 1: "block-1", 2: "block-2"}) ) d.addCallback(_check2) return d def test_overdue(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) shares = [MyShare(i, make_server("peer-%d" % i), i) for i in range(10)] sf.add_shares(shares) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 0) self.failUnlessEqual(sf._test_start_shares, shares[:3]) for sh in sf._test_start_shares: sf._block_request_activity(sh, sh._shnum, OVERDUE) return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failUnlessEqual(sf._test_start_shares, shares[:6]) for sh in sf._test_start_shares[3:]: sf._block_request_activity(sh, sh._shnum, COMPLETE, "block-%d" % sh._shnum) return flushEventualQueue() d.addCallback(_check2) def _check3(ign): self.failIfEqual(node.processed, None) self.failUnlessEqual(node.processed, (0, {3: "block-3", 4: "block-4", 5: "block-5"}) ) d.addCallback(_check3) return d def test_overdue_fails(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) servers = make_servers(["peer-%d" % i for i in range(6)]) shares = [MyShare(i, servers["peer-%d" % i], i) for i in range(6)] sf.add_shares(shares) sf.no_more_shares() d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 0) self.failUnlessEqual(sf._test_start_shares, shares[:3]) for sh in sf._test_start_shares: sf._block_request_activity(sh, sh._shnum, OVERDUE) return flushEventualQueue() d.addCallback(_check1) def _check2(ign): self.failUnlessEqual(sf._test_start_shares, shares[:6]) for sh in sf._test_start_shares[3:]: sf._block_request_activity(sh, sh._shnum, DEAD) return flushEventualQueue() d.addCallback(_check2) def _check3(ign): # we're still waiting self.failUnlessEqual(node.processed, None) self.failUnlessEqual(node.failed, None) # now complete one of the overdue ones, and kill one of the other # ones, leaving one hanging. This should trigger a failure, since # we cannot succeed. live = sf._test_start_shares[0] die = sf._test_start_shares[1] sf._block_request_activity(live, live._shnum, COMPLETE, "block") sf._block_request_activity(die, die._shnum, DEAD) return flushEventualQueue() d.addCallback(_check3) def _check4(ign): self.failUnless(node.failed) self.failUnless(node.failed.check(NotEnoughSharesError)) sname = servers["peer-2"].get_name() self.failUnlessIn("complete=sh0 pending= overdue=sh2-on-%s unused=" % sname, str(node.failed)) d.addCallback(_check4) return d def test_avoid_redundancy(self): node = FakeNode() sf = MySegmentFetcher(node, 0, 3, None) # we could satisfy the read entirely from the first server, but we'd # prefer not to. Instead, we expect to only pull one share from the # first server servers = make_servers(["peer-A", "peer-B", "peer-C", "peer-D", "peer-E"]) shares = [MyShare(0, servers["peer-A"],0.0), MyShare(1, servers["peer-B"],1.0), MyShare(0, servers["peer-C"],2.0), # this will be skipped MyShare(1, servers["peer-D"],3.0), MyShare(2, servers["peer-E"],4.0), ] sf.add_shares(shares[:3]) d = flushEventualQueue() def _check1(ign): self.failUnlessEqual(node.want_more, 1) self.failUnlessEqual(sf._test_start_shares, [shares[0], shares[1]]) # allow sh1 to retire sf._block_request_activity(shares[1], 1, COMPLETE, "block-1") return flushEventualQueue() d.addCallback(_check1) def _check2(ign): # and then feed in the remaining shares sf.add_shares(shares[3:]) sf.no_more_shares() return flushEventualQueue() d.addCallback(_check2) def _check3(ign): self.failUnlessEqual(sf._test_start_shares, [shares[0], shares[1], shares[4]]) sf._block_request_activity(shares[0], 0, COMPLETE, "block-0") sf._block_request_activity(shares[4], 2, COMPLETE, "block-2") return flushEventualQueue() d.addCallback(_check3) def _check4(ign): self.failIfEqual(node.processed, None) self.failUnlessEqual(node.processed, (0, {0: "block-0", 1: "block-1", 2: "block-2"}) ) d.addCallback(_check4) return d tahoe-lafs-1.10.0/src/allmydata/test/test_drop_upload.py000066400000000000000000000200311221140116300232360ustar00rootroot00000000000000 import os, sys from twisted.trial import unittest from twisted.python import filepath, runtime from twisted.internet import defer from allmydata.interfaces import IDirectoryNode, NoSuchChildError from allmydata.util import fake_inotify from allmydata.util.encodingutil import get_filesystem_encoding from allmydata.util.consumer import download_to_data from allmydata.test.no_network import GridTestMixin from allmydata.test.common_util import ReallyEqualMixin, NonASCIIPathMixin from allmydata.test.common import ShouldFailMixin from allmydata.frontends.drop_upload import DropUploader class DropUploadTestMixin(GridTestMixin, ShouldFailMixin, ReallyEqualMixin, NonASCIIPathMixin): """ These tests will be run both with a mock notifier, and (on platforms that support it) with the real INotify. """ def _get_count(self, name): return self.stats_provider.get_stats()["counters"].get(name, 0) def _test(self): self.uploader = None self.set_up_grid() self.local_dir = os.path.join(self.basedir, self.unicode_or_fallback(u"loc\u0101l_dir", u"local_dir")) self.mkdir_nonascii(self.local_dir) self.client = self.g.clients[0] self.stats_provider = self.client.stats_provider d = self.client.create_dirnode() def _made_upload_dir(n): self.failUnless(IDirectoryNode.providedBy(n)) self.upload_dirnode = n self.upload_dircap = n.get_uri() self.uploader = DropUploader(self.client, self.upload_dircap, self.local_dir.encode('utf-8'), inotify=self.inotify) return self.uploader.startService() d.addCallback(_made_upload_dir) # Write something short enough for a LIT file. d.addCallback(lambda ign: self._test_file(u"short", "test")) # Write to the same file again with different data. d.addCallback(lambda ign: self._test_file(u"short", "different")) # Test that temporary files are not uploaded. d.addCallback(lambda ign: self._test_file(u"tempfile", "test", temporary=True)) # Test that we tolerate creation of a subdirectory. d.addCallback(lambda ign: os.mkdir(os.path.join(self.local_dir, u"directory"))) # Write something longer, and also try to test a Unicode name if the fs can represent it. name_u = self.unicode_or_fallback(u"l\u00F8ng", u"long") d.addCallback(lambda ign: self._test_file(name_u, "test"*100)) # TODO: test that causes an upload failure. d.addCallback(lambda ign: self.failUnlessReallyEqual(self._get_count('drop_upload.files_failed'), 0)) # Prevent unclean reactor errors. def _cleanup(res): d = defer.succeed(None) if self.uploader is not None: d.addCallback(lambda ign: self.uploader.finish(for_tests=True)) d.addCallback(lambda ign: res) return d d.addBoth(_cleanup) return d def _test_file(self, name_u, data, temporary=False): previously_uploaded = self._get_count('drop_upload.files_uploaded') previously_disappeared = self._get_count('drop_upload.files_disappeared') d = defer.Deferred() # Note: this relies on the fact that we only get one IN_CLOSE_WRITE notification per file # (otherwise we would get a defer.AlreadyCalledError). Should we be relying on that? self.uploader.set_uploaded_callback(d.callback) path_u = os.path.join(self.local_dir, name_u) if sys.platform == "win32": path = filepath.FilePath(path_u) else: path = filepath.FilePath(path_u.encode(get_filesystem_encoding())) # We don't use FilePath.setContent() here because it creates a temporary file that # is renamed into place, which causes events that the test is not expecting. f = open(path.path, "wb") try: if temporary and sys.platform != "win32": os.unlink(path.path) f.write(data) finally: f.close() if temporary and sys.platform == "win32": os.unlink(path.path) self.notify_close_write(path) if temporary: d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, 'temp file not uploaded', None, self.upload_dirnode.get, name_u)) d.addCallback(lambda ign: self.failUnlessReallyEqual(self._get_count('drop_upload.files_disappeared'), previously_disappeared + 1)) else: d.addCallback(lambda ign: self.upload_dirnode.get(name_u)) d.addCallback(download_to_data) d.addCallback(lambda actual_data: self.failUnlessReallyEqual(actual_data, data)) d.addCallback(lambda ign: self.failUnlessReallyEqual(self._get_count('drop_upload.files_uploaded'), previously_uploaded + 1)) d.addCallback(lambda ign: self.failUnlessReallyEqual(self._get_count('drop_upload.files_queued'), 0)) return d class MockTest(DropUploadTestMixin, unittest.TestCase): """This can run on any platform, and even if twisted.internet.inotify can't be imported.""" def test_errors(self): self.basedir = "drop_upload.MockTest.test_errors" self.set_up_grid() errors_dir = os.path.join(self.basedir, "errors_dir") os.mkdir(errors_dir) client = self.g.clients[0] d = client.create_dirnode() def _made_upload_dir(n): self.failUnless(IDirectoryNode.providedBy(n)) upload_dircap = n.get_uri() readonly_dircap = n.get_readonly_uri() self.shouldFail(AssertionError, 'invalid local.directory', 'could not be represented', DropUploader, client, upload_dircap, '\xFF', inotify=fake_inotify) self.shouldFail(AssertionError, 'nonexistent local.directory', 'there is no directory', DropUploader, client, upload_dircap, os.path.join(self.basedir, "Laputa"), inotify=fake_inotify) fp = filepath.FilePath(self.basedir).child('NOT_A_DIR') fp.touch() self.shouldFail(AssertionError, 'non-directory local.directory', 'is not a directory', DropUploader, client, upload_dircap, fp.path, inotify=fake_inotify) self.shouldFail(AssertionError, 'bad upload.dircap', 'does not refer to a directory', DropUploader, client, 'bad', errors_dir, inotify=fake_inotify) self.shouldFail(AssertionError, 'non-directory upload.dircap', 'does not refer to a directory', DropUploader, client, 'URI:LIT:foo', errors_dir, inotify=fake_inotify) self.shouldFail(AssertionError, 'readonly upload.dircap', 'is not a writecap to a directory', DropUploader, client, readonly_dircap, errors_dir, inotify=fake_inotify) d.addCallback(_made_upload_dir) return d def test_drop_upload(self): self.inotify = fake_inotify self.basedir = "drop_upload.MockTest.test_drop_upload" return self._test() def notify_close_write(self, path): self.uploader._notifier.event(path, self.inotify.IN_CLOSE_WRITE) class RealTest(DropUploadTestMixin, unittest.TestCase): """This is skipped unless both Twisted and the platform support inotify.""" def test_drop_upload(self): # We should always have runtime.platform.supportsINotify, because we're using # Twisted >= 10.1. if not runtime.platform.supportsINotify(): raise unittest.SkipTest("Drop-upload support can only be tested for-real on an OS that supports inotify or equivalent.") self.inotify = None # use the appropriate inotify for the platform self.basedir = "drop_upload.RealTest.test_drop_upload" return self._test() def notify_close_write(self, path): # Writing to the file causes the notification. pass tahoe-lafs-1.10.0/src/allmydata/test/test_encode.py000066400000000000000000000367201221140116300221770ustar00rootroot00000000000000from zope.interface import implements from twisted.trial import unittest from twisted.internet import defer from twisted.python.failure import Failure from foolscap.api import fireEventually from allmydata import uri from allmydata.immutable import encode, upload, checker from allmydata.util import hashutil from allmydata.util.assertutil import _assert from allmydata.util.consumer import download_to_data from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader from allmydata.test.no_network import GridTestMixin class LostPeerError(Exception): pass def flip_bit(good): # flips the last bit return good[:-1] + chr(ord(good[-1]) ^ 0x01) class FakeBucketReaderWriterProxy: implements(IStorageBucketWriter, IStorageBucketReader) # these are used for both reading and writing def __init__(self, mode="good", peerid="peer"): self.mode = mode self.blocks = {} self.plaintext_hashes = [] self.crypttext_hashes = [] self.block_hashes = None self.share_hashes = None self.closed = False self.peerid = peerid def get_peerid(self): return self.peerid def _start(self): if self.mode == "lost-early": f = Failure(LostPeerError("I went away early")) return fireEventually(f) return defer.succeed(self) def put_header(self): return self._start() def put_block(self, segmentnum, data): if self.mode == "lost-early": f = Failure(LostPeerError("I went away early")) return fireEventually(f) def _try(): assert not self.closed assert segmentnum not in self.blocks if self.mode == "lost" and segmentnum >= 1: raise LostPeerError("I'm going away now") self.blocks[segmentnum] = data return defer.maybeDeferred(_try) def put_crypttext_hashes(self, hashes): def _try(): assert not self.closed assert not self.crypttext_hashes self.crypttext_hashes = hashes return defer.maybeDeferred(_try) def put_block_hashes(self, blockhashes): def _try(): assert not self.closed assert self.block_hashes is None self.block_hashes = blockhashes return defer.maybeDeferred(_try) def put_share_hashes(self, sharehashes): def _try(): assert not self.closed assert self.share_hashes is None self.share_hashes = sharehashes return defer.maybeDeferred(_try) def put_uri_extension(self, uri_extension): def _try(): assert not self.closed self.uri_extension = uri_extension return defer.maybeDeferred(_try) def close(self): def _try(): assert not self.closed self.closed = True return defer.maybeDeferred(_try) def abort(self): return defer.succeed(None) def get_block_data(self, blocknum, blocksize, size): d = self._start() def _try(unused=None): assert isinstance(blocknum, (int, long)) if self.mode == "bad block": return flip_bit(self.blocks[blocknum]) return self.blocks[blocknum] d.addCallback(_try) return d def get_plaintext_hashes(self): d = self._start() def _try(unused=None): hashes = self.plaintext_hashes[:] return hashes d.addCallback(_try) return d def get_crypttext_hashes(self): d = self._start() def _try(unused=None): hashes = self.crypttext_hashes[:] if self.mode == "bad crypttext hashroot": hashes[0] = flip_bit(hashes[0]) if self.mode == "bad crypttext hash": hashes[1] = flip_bit(hashes[1]) return hashes d.addCallback(_try) return d def get_block_hashes(self, at_least_these=()): d = self._start() def _try(unused=None): if self.mode == "bad blockhash": hashes = self.block_hashes[:] hashes[1] = flip_bit(hashes[1]) return hashes return self.block_hashes d.addCallback(_try) return d def get_share_hashes(self, at_least_these=()): d = self._start() def _try(unused=None): if self.mode == "bad sharehash": hashes = self.share_hashes[:] hashes[1] = (hashes[1][0], flip_bit(hashes[1][1])) return hashes if self.mode == "missing sharehash": # one sneaky attack would be to pretend we don't know our own # sharehash, which could manage to frame someone else. # download.py is supposed to guard against this case. return [] return self.share_hashes d.addCallback(_try) return d def get_uri_extension(self): d = self._start() def _try(unused=None): if self.mode == "bad uri_extension": return flip_bit(self.uri_extension) return self.uri_extension d.addCallback(_try) return d def make_data(length): data = "happy happy joy joy" * 100 assert length <= len(data) return data[:length] class ValidatedExtendedURIProxy(unittest.TestCase): timeout = 240 # It takes longer than 120 seconds on Francois's arm box. K = 4 M = 10 SIZE = 200 SEGSIZE = 72 _TMP = SIZE%SEGSIZE if _TMP == 0: _TMP = SEGSIZE if _TMP % K != 0: _TMP += (K - (_TMP % K)) TAIL_SEGSIZE = _TMP _TMP = SIZE / SEGSIZE if SIZE % SEGSIZE != 0: _TMP += 1 NUM_SEGMENTS = _TMP mindict = { 'segment_size': SEGSIZE, 'crypttext_root_hash': '0'*hashutil.CRYPTO_VAL_SIZE, 'share_root_hash': '1'*hashutil.CRYPTO_VAL_SIZE } optional_consistent = { 'crypttext_hash': '2'*hashutil.CRYPTO_VAL_SIZE, 'codec_name': "crs", 'codec_params': "%d-%d-%d" % (SEGSIZE, K, M), 'tail_codec_params': "%d-%d-%d" % (TAIL_SEGSIZE, K, M), 'num_segments': NUM_SEGMENTS, 'size': SIZE, 'needed_shares': K, 'total_shares': M, 'plaintext_hash': "anything", 'plaintext_root_hash': "anything", } # optional_inconsistent = { 'crypttext_hash': ('2'*(hashutil.CRYPTO_VAL_SIZE-1), "", 77), optional_inconsistent = { 'crypttext_hash': (77,), 'codec_name': ("digital fountain", ""), 'codec_params': ("%d-%d-%d" % (SEGSIZE, K-1, M), "%d-%d-%d" % (SEGSIZE-1, K, M), "%d-%d-%d" % (SEGSIZE, K, M-1)), 'tail_codec_params': ("%d-%d-%d" % (TAIL_SEGSIZE, K-1, M), "%d-%d-%d" % (TAIL_SEGSIZE-1, K, M), "%d-%d-%d" % (TAIL_SEGSIZE, K, M-1)), 'num_segments': (NUM_SEGMENTS-1,), 'size': (SIZE-1,), 'needed_shares': (K-1,), 'total_shares': (M-1,), } def _test(self, uebdict): uebstring = uri.pack_extension(uebdict) uebhash = hashutil.uri_extension_hash(uebstring) fb = FakeBucketReaderWriterProxy() fb.put_uri_extension(uebstring) verifycap = uri.CHKFileVerifierURI(storage_index='x'*16, uri_extension_hash=uebhash, needed_shares=self.K, total_shares=self.M, size=self.SIZE) vup = checker.ValidatedExtendedURIProxy(fb, verifycap) return vup.start() def _test_accept(self, uebdict): return self._test(uebdict) def _should_fail(self, res, expected_failures): if isinstance(res, Failure): res.trap(*expected_failures) else: self.fail("was supposed to raise %s, not get '%s'" % (expected_failures, res)) def _test_reject(self, uebdict): d = self._test(uebdict) d.addBoth(self._should_fail, (KeyError, checker.BadURIExtension)) return d def test_accept_minimal(self): return self._test_accept(self.mindict) def test_reject_insufficient(self): dl = [] for k in self.mindict.iterkeys(): insuffdict = self.mindict.copy() del insuffdict[k] d = self._test_reject(insuffdict) dl.append(d) return defer.DeferredList(dl) def test_accept_optional(self): dl = [] for k in self.optional_consistent.iterkeys(): mydict = self.mindict.copy() mydict[k] = self.optional_consistent[k] d = self._test_accept(mydict) dl.append(d) return defer.DeferredList(dl) def test_reject_optional(self): dl = [] for k in self.optional_inconsistent.iterkeys(): for v in self.optional_inconsistent[k]: mydict = self.mindict.copy() mydict[k] = v d = self._test_reject(mydict) dl.append(d) return defer.DeferredList(dl) class Encode(unittest.TestCase): timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box. def do_encode(self, max_segment_size, datalen, NUM_SHARES, NUM_SEGMENTS, expected_block_hashes, expected_share_hashes): data = make_data(datalen) # force use of multiple segments e = encode.Encoder() u = upload.Data(data, convergence="some convergence string") u.set_default_encoding_parameters({'max_segment_size': max_segment_size, 'k': 25, 'happy': 75, 'n': 100}) eu = upload.EncryptAnUploadable(u) d = e.set_encrypted_uploadable(eu) all_shareholders = [] def _ready(res): k,happy,n = e.get_param("share_counts") _assert(n == NUM_SHARES) # else we'll be completely confused numsegs = e.get_param("num_segments") _assert(numsegs == NUM_SEGMENTS, numsegs, NUM_SEGMENTS) segsize = e.get_param("segment_size") _assert( (NUM_SEGMENTS-1)*segsize < len(data) <= NUM_SEGMENTS*segsize, NUM_SEGMENTS, segsize, (NUM_SEGMENTS-1)*segsize, len(data), NUM_SEGMENTS*segsize) shareholders = {} servermap = {} for shnum in range(NUM_SHARES): peer = FakeBucketReaderWriterProxy() shareholders[shnum] = peer servermap.setdefault(shnum, set()).add(peer.get_peerid()) all_shareholders.append(peer) e.set_shareholders(shareholders, servermap) return e.start() d.addCallback(_ready) def _check(res): verifycap = res self.failUnless(isinstance(verifycap.uri_extension_hash, str)) self.failUnlessEqual(len(verifycap.uri_extension_hash), 32) for i,peer in enumerate(all_shareholders): self.failUnless(peer.closed) self.failUnlessEqual(len(peer.blocks), NUM_SEGMENTS) # each peer gets a full tree of block hashes. For 3 or 4 # segments, that's 7 hashes. For 5 segments it's 15 hashes. self.failUnlessEqual(len(peer.block_hashes), expected_block_hashes) for h in peer.block_hashes: self.failUnlessEqual(len(h), 32) # each peer also gets their necessary chain of share hashes. # For 100 shares (rounded up to 128 leaves), that's 8 hashes self.failUnlessEqual(len(peer.share_hashes), expected_share_hashes) for (hashnum, h) in peer.share_hashes: self.failUnless(isinstance(hashnum, int)) self.failUnlessEqual(len(h), 32) d.addCallback(_check) return d def test_send_74(self): # 3 segments (25, 25, 24) return self.do_encode(25, 74, 100, 3, 7, 8) def test_send_75(self): # 3 segments (25, 25, 25) return self.do_encode(25, 75, 100, 3, 7, 8) def test_send_51(self): # 3 segments (25, 25, 1) return self.do_encode(25, 51, 100, 3, 7, 8) def test_send_76(self): # encode a 76 byte file (in 4 segments: 25,25,25,1) to 100 shares return self.do_encode(25, 76, 100, 4, 7, 8) def test_send_99(self): # 4 segments: 25,25,25,24 return self.do_encode(25, 99, 100, 4, 7, 8) def test_send_100(self): # 4 segments: 25,25,25,25 return self.do_encode(25, 100, 100, 4, 7, 8) def test_send_124(self): # 5 segments: 25, 25, 25, 25, 24 return self.do_encode(25, 124, 100, 5, 15, 8) def test_send_125(self): # 5 segments: 25, 25, 25, 25, 25 return self.do_encode(25, 125, 100, 5, 15, 8) def test_send_101(self): # 5 segments: 25, 25, 25, 25, 1 return self.do_encode(25, 101, 100, 5, 15, 8) class Roundtrip(GridTestMixin, unittest.TestCase): # a series of 3*3 tests to check out edge conditions. One axis is how the # plaintext is divided into segments: kn+(-1,0,1). Another way to express # this is n%k == -1 or 0 or 1. For example, for 25-byte segments, we # might test 74 bytes, 75 bytes, and 76 bytes. # on the other axis is how many leaves in the block hash tree we wind up # with, relative to a power of 2, so 2^a+(-1,0,1). Each segment turns # into a single leaf. So we'd like to check out, e.g., 3 segments, 4 # segments, and 5 segments. # that results in the following series of data lengths: # 3 segs: 74, 75, 51 # 4 segs: 99, 100, 76 # 5 segs: 124, 125, 101 # all tests encode to 100 shares, which means the share hash tree will # have 128 leaves, which means that buckets will be given an 8-long share # hash chain # all 3-segment files will have a 4-leaf blockhashtree, and thus expect # to get 7 blockhashes. 4-segment files will also get 4-leaf block hash # trees and 7 blockhashes. 5-segment files will get 8-leaf block hash # trees, which gets 15 blockhashes. def test_74(self): return self.do_test_size(74) def test_75(self): return self.do_test_size(75) def test_51(self): return self.do_test_size(51) def test_99(self): return self.do_test_size(99) def test_100(self): return self.do_test_size(100) def test_76(self): return self.do_test_size(76) def test_124(self): return self.do_test_size(124) def test_125(self): return self.do_test_size(125) def test_101(self): return self.do_test_size(101) def upload(self, data): u = upload.Data(data, None) u.max_segment_size = 25 u.encoding_param_k = 25 u.encoding_param_happy = 1 u.encoding_param_n = 100 d = self.c0.upload(u) d.addCallback(lambda ur: self.c0.create_node_from_uri(ur.get_uri())) # returns a FileNode return d def do_test_size(self, size): self.basedir = self.mktemp() self.set_up_grid() self.c0 = self.g.clients[0] DATA = "p"*size d = self.upload(DATA) d.addCallback(lambda n: download_to_data(n)) def _downloaded(newdata): self.failUnlessEqual(newdata, DATA) d.addCallback(_downloaded) return d tahoe-lafs-1.10.0/src/allmydata/test/test_encodingutil.py000066400000000000000000000407721221140116300234300ustar00rootroot00000000000000 lumiere_nfc = u"lumi\u00E8re" Artonwall_nfc = u"\u00C4rtonwall.mp3" Artonwall_nfd = u"A\u0308rtonwall.mp3" TEST_FILENAMES = ( Artonwall_nfc, u'test_file', u'Blah blah.txt', ) # The following main helps to generate a test class for other operating # systems. if __name__ == "__main__": import sys, os import tempfile import shutil import platform if len(sys.argv) != 2: print "Usage: %s lumire" % sys.argv[0] sys.exit(1) if sys.platform == "win32": try: from allmydata.windows.fixups import initialize except ImportError: print "set PYTHONPATH to the src directory" sys.exit(1) initialize() print print "class MyWeirdOS(EncodingUtil, unittest.TestCase):" print " uname = '%s'" % ' '.join(platform.uname()) print " argv = %s" % repr(sys.argv[1]) print " platform = '%s'" % sys.platform print " filesystem_encoding = '%s'" % sys.getfilesystemencoding() print " io_encoding = '%s'" % sys.stdout.encoding try: tmpdir = tempfile.mkdtemp() for fname in TEST_FILENAMES: open(os.path.join(tmpdir, fname), 'w').close() # Use Unicode API under Windows or MacOS X if sys.platform in ('win32', 'darwin'): dirlist = os.listdir(unicode(tmpdir)) else: dirlist = os.listdir(tmpdir) print " dirlist = %s" % repr(dirlist) except: print " # Oops, I cannot write filenames containing non-ascii characters" print shutil.rmtree(tmpdir) sys.exit(0) from twisted.trial import unittest from mock import patch import os, sys, locale from allmydata.test.common_util import ReallyEqualMixin from allmydata.util import encodingutil from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ unicode_to_output, quote_output, unicode_platform, listdir_unicode, \ FilenameEncodingError, get_io_encoding, get_filesystem_encoding, _reload from allmydata.dirnode import normalize from twisted.python import usage class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): @patch('sys.stdout') def test_get_io_encoding(self, mock_stdout): mock_stdout.encoding = 'UTF-8' _reload() self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') mock_stdout.encoding = 'cp65001' _reload() self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') mock_stdout.encoding = 'koi8-r' expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' _reload() self.failUnlessReallyEqual(get_io_encoding(), expected) mock_stdout.encoding = 'nonexistent_encoding' if sys.platform == "win32": _reload() self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') else: self.failUnlessRaises(AssertionError, _reload) @patch('locale.getpreferredencoding') def test_get_io_encoding_not_from_stdout(self, mock_locale_getpreferredencoding): locale # hush pyflakes mock_locale_getpreferredencoding.return_value = 'koi8-r' class DummyStdout: pass old_stdout = sys.stdout sys.stdout = DummyStdout() try: expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' _reload() self.failUnlessReallyEqual(get_io_encoding(), expected) sys.stdout.encoding = None _reload() self.failUnlessReallyEqual(get_io_encoding(), expected) mock_locale_getpreferredencoding.return_value = None _reload() self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') finally: sys.stdout = old_stdout def test_argv_to_unicode(self): encodingutil.io_encoding = 'utf-8' self.failUnlessRaises(usage.UsageError, argv_to_unicode, lumiere_nfc.encode('latin1')) def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) @patch('os.listdir') def test_no_unicode_normalization(self, mock): # Pretend to run on a Unicode platform. # We normalized to NFC in 1.7beta, but we now don't. orig_platform = sys.platform try: sys.platform = 'darwin' mock.return_value = [Artonwall_nfd] _reload() self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd]) finally: sys.platform = orig_platform # The following tests apply only to platforms that don't store filenames as # Unicode entities on the filesystem. class EncodingUtilNonUnicodePlatform(unittest.TestCase): def setUp(self): # Mock sys.platform because unicode_platform() uses it self.original_platform = sys.platform sys.platform = 'linux' def tearDown(self): sys.platform = self.original_platform _reload() @patch('sys.getfilesystemencoding') @patch('os.listdir') def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): # What happens if latin1-encoded filenames are encountered on an UTF-8 # filesystem? mock_listdir.return_value = [ lumiere_nfc.encode('utf-8'), lumiere_nfc.encode('latin1')] mock_getfilesystemencoding.return_value = 'utf-8' _reload() self.failUnlessRaises(FilenameEncodingError, listdir_unicode, u'/dummy') # We're trying to list a directory whose name cannot be represented in # the filesystem encoding. This should fail. mock_getfilesystemencoding.return_value = 'ascii' _reload() self.failUnlessRaises(FilenameEncodingError, listdir_unicode, u'/' + lumiere_nfc) class EncodingUtil(ReallyEqualMixin): def setUp(self): self.original_platform = sys.platform sys.platform = self.platform def tearDown(self): sys.platform = self.original_platform _reload() @patch('sys.stdout') def test_argv_to_unicode(self, mock): if 'argv' not in dir(self): return mock.encoding = self.io_encoding argu = lumiere_nfc argv = self.argv _reload() self.failUnlessReallyEqual(argv_to_unicode(argv), argu) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re") @patch('sys.stdout') def test_unicode_to_output(self, mock): if 'argv' not in dir(self): return mock.encoding = self.io_encoding _reload() self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv) def test_unicode_platform(self): matrix = { 'linux2': False, 'linux3': False, 'openbsd4': False, 'win32': True, 'darwin': True, } _reload() self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform]) @patch('sys.getfilesystemencoding') @patch('os.listdir') def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): if 'dirlist' not in dir(self): return try: u"test".encode(self.filesystem_encoding) except (LookupError, AttributeError): raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding " "that we are testing for the benefit of a different platform." % (self.filesystem_encoding,)) mock_listdir.return_value = self.dirlist mock_getfilesystemencoding.return_value = self.filesystem_encoding _reload() filenames = listdir_unicode(u'/dummy') self.failUnlessEqual(set([normalize(fname) for fname in filenames]), set(TEST_FILENAMES)) class StdlibUnicode(unittest.TestCase): """This mainly tests that some of the stdlib functions support Unicode paths, but also that listdir_unicode works for valid filenames.""" def skip_if_cannot_represent_filename(self, u): enc = get_filesystem_encoding() if not unicode_platform(): try: u.encode(enc) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.") def test_mkdir_open_exists_abspath_listdir_expanduser(self): self.skip_if_cannot_represent_filename(lumiere_nfc) try: os.mkdir(lumiere_nfc) except EnvironmentError, e: raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run " "does not support Unicode, even though the platform does." % (e,)) fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt' open(fn, 'wb').close() self.failUnless(os.path.exists(fn)) self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn))) filenames = listdir_unicode(lumiere_nfc) # We only require that the listing includes a filename that is canonically equivalent # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent). self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames])) expanded = os.path.expanduser("~/" + lumiere_nfc) self.failIfIn("~", expanded) self.failUnless(expanded.endswith(lumiere_nfc), expanded) def test_open_unrepresentable(self): if unicode_platform(): raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.") enc = get_filesystem_encoding() fn = u'\u2621.txt' try: fn.encode(enc) raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.") except UnicodeEncodeError: self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb') class QuoteOutput(ReallyEqualMixin, unittest.TestCase): def tearDown(self): _reload() def _check(self, inp, out, enc, optional_quotes, quote_newlines): out2 = out if optional_quotes: out2 = out2[1:-1] self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out) self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) if out[0:2] == 'b"': pass elif isinstance(inp, str): self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out) self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) else: self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quote_newlines=quote_newlines), out) self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) def _test_quote_output_all(self, enc): def check(inp, out, optional_quotes=False, quote_newlines=None): self._check(inp, out, enc, optional_quotes, quote_newlines) # optional single quotes check("foo", "'foo'", True) check("\\", "'\\'", True) check("$\"`", "'$\"`'", True) check("\n", "'\n'", True, quote_newlines=False) # mandatory single quotes check("\"", "'\"'") # double quotes check("'", "\"'\"") check("\n", "\"\\x0a\"", quote_newlines=True) check("\x00", "\"\\x00\"") # invalid Unicode and astral planes check(u"\uFDD0\uFDEF", "\"\\ufdd0\\ufdef\"") check(u"\uDC00\uD800", "\"\\udc00\\ud800\"") check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"") check(u"\uD800\uDC00", "\"\\U00010000\"") check(u"\uD800\uDC01", "\"\\U00010001\"") check(u"\uD801\uDC00", "\"\\U00010400\"") check(u"\uDBFF\uDFFF", "\"\\U0010ffff\"") check(u"'\uDBFF\uDFFF", "\"'\\U0010ffff\"") check(u"\"\uDBFF\uDFFF", "\"\\\"\\U0010ffff\"") # invalid UTF-8 check("\xFF", "b\"\\xff\"") check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"") def test_quote_output_ascii(self, enc='ascii'): def check(inp, out, optional_quotes=False, quote_newlines=None): self._check(inp, out, enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u00D7", "\"\\xd7\"") check(u"'\u00D7", "\"'\\xd7\"") check(u"\"\u00D7", "\"\\\"\\xd7\"") check(u"\u2621", "\"\\u2621\"") check(u"'\u2621", "\"'\\u2621\"") check(u"\"\u2621", "\"\\\"\\u2621\"") check(u"\n", "'\n'", True, quote_newlines=False) check(u"\n", "\"\\x0a\"", quote_newlines=True) def test_quote_output_latin1(self, enc='latin1'): def check(inp, out, optional_quotes=False, quote_newlines=None): self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u00D7", u"'\u00D7'", True) check(u"'\u00D7", u"\"'\u00D7\"") check(u"\"\u00D7", u"'\"\u00D7'") check(u"\u00D7\"", u"'\u00D7\"'", True) check(u"\u2621", u"\"\\u2621\"") check(u"'\u2621", u"\"'\\u2621\"") check(u"\"\u2621", u"\"\\\"\\u2621\"") check(u"\n", u"'\n'", True, quote_newlines=False) check(u"\n", u"\"\\x0a\"", quote_newlines=True) def test_quote_output_utf8(self, enc='utf-8'): def check(inp, out, optional_quotes=False, quote_newlines=None): self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u2621", u"'\u2621'", True) check(u"'\u2621", u"\"'\u2621\"") check(u"\"\u2621", u"'\"\u2621'") check(u"\u2621\"", u"'\u2621\"'", True) check(u"\n", u"'\n'", True, quote_newlines=False) check(u"\n", u"\"\\x0a\"", quote_newlines=True) def test_quote_output_default(self): encodingutil.io_encoding = 'ascii' self.test_quote_output_ascii(None) encodingutil.io_encoding = 'latin1' self.test_quote_output_latin1(None) encodingutil.io_encoding = 'utf-8' self.test_quote_output_utf8(None) class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' argv = 'lumi\xc3\xa8re' platform = 'linux2' filesystem_encoding = 'UTF-8' io_encoding = 'UTF-8' dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt'] class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' argv = 'lumi\xe8re' platform = 'linux2' filesystem_encoding = 'ISO-8859-1' io_encoding = 'ISO-8859-1' dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3'] class Windows(EncodingUtil, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' argv = 'lumi\xc3\xa8re' platform = 'win32' filesystem_encoding = 'mbcs' io_encoding = 'utf-8' dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class MacOSXLeopard(EncodingUtil, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' output = 'lumi\xc3\xa8re' platform = 'darwin' filesystem_encoding = 'utf-8' io_encoding = 'UTF-8' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' platform = 'darwin' filesystem_encoding = 'utf-8' io_encoding = 'US-ASCII' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class OpenBSD(EncodingUtil, unittest.TestCase): uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' platform = 'openbsd4' filesystem_encoding = '646' io_encoding = '646' # Oops, I cannot write filenames containing non-ascii characters tahoe-lafs-1.10.0/src/allmydata/test/test_filenode.py000066400000000000000000000166151221140116300225300ustar00rootroot00000000000000 from twisted.trial import unittest from allmydata import uri, client from allmydata.monitor import Monitor from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.mutable.filenode import MutableFileNode from allmydata.util import hashutil from allmydata.util.consumer import download_to_data class NotANode: pass class FakeClient: # just enough to let the node acquire a downloader (which it won't use), # and to get default encoding parameters def getServiceNamed(self, name): return None def get_encoding_parameters(self): return {"k": 3, "n": 10} def get_storage_broker(self): return None def get_history(self): return None _secret_holder = client.SecretHolder("lease secret", "convergence secret") class Node(unittest.TestCase): def test_chk_filenode(self): u = uri.CHKFileURI(key="\x00"*16, uri_extension_hash="\x00"*32, needed_shares=3, total_shares=10, size=1000) fn1 = ImmutableFileNode(u, None, None, None, None) fn2 = ImmutableFileNode(u, None, None, None, None) self.failUnlessEqual(fn1, fn2) self.failIfEqual(fn1, "I am not a filenode") self.failIfEqual(fn1, NotANode()) self.failUnlessEqual(fn1.get_uri(), u.to_string()) self.failUnlessEqual(fn1.get_cap(), u) self.failUnlessEqual(fn1.get_readcap(), u) self.failUnless(fn1.is_readonly()) self.failIf(fn1.is_mutable()) self.failIf(fn1.is_unknown()) self.failUnless(fn1.is_allowed_in_immutable_directory()) self.failUnlessEqual(fn1.get_write_uri(), None) self.failUnlessEqual(fn1.get_readonly_uri(), u.to_string()) self.failUnlessEqual(fn1.get_size(), 1000) self.failUnlessEqual(fn1.get_storage_index(), u.get_storage_index()) fn1.raise_error() fn2.raise_error() d = {} d[fn1] = 1 # exercise __hash__ v = fn1.get_verify_cap() self.failUnless(isinstance(v, uri.CHKFileVerifierURI)) self.failUnlessEqual(fn1.get_repair_cap(), v) self.failUnless(v.is_readonly()) self.failIf(v.is_mutable()) def test_literal_filenode(self): DATA = "I am a short file." u = uri.LiteralFileURI(data=DATA) fn1 = LiteralFileNode(u) fn2 = LiteralFileNode(u) self.failUnlessEqual(fn1, fn2) self.failIfEqual(fn1, "I am not a filenode") self.failIfEqual(fn1, NotANode()) self.failUnlessEqual(fn1.get_uri(), u.to_string()) self.failUnlessEqual(fn1.get_cap(), u) self.failUnlessEqual(fn1.get_readcap(), u) self.failUnless(fn1.is_readonly()) self.failIf(fn1.is_mutable()) self.failIf(fn1.is_unknown()) self.failUnless(fn1.is_allowed_in_immutable_directory()) self.failUnlessEqual(fn1.get_write_uri(), None) self.failUnlessEqual(fn1.get_readonly_uri(), u.to_string()) self.failUnlessEqual(fn1.get_size(), len(DATA)) self.failUnlessEqual(fn1.get_storage_index(), None) fn1.raise_error() fn2.raise_error() d = {} d[fn1] = 1 # exercise __hash__ v = fn1.get_verify_cap() self.failUnlessEqual(v, None) self.failUnlessEqual(fn1.get_repair_cap(), None) d = download_to_data(fn1) def _check(res): self.failUnlessEqual(res, DATA) d.addCallback(_check) d.addCallback(lambda res: download_to_data(fn1, 1, 5)) def _check_segment(res): self.failUnlessEqual(res, DATA[1:1+5]) d.addCallback(_check_segment) d.addCallback(lambda ignored: fn1.get_best_readable_version()) d.addCallback(lambda fn2: self.failUnlessEqual(fn1, fn2)) d.addCallback(lambda ignored: fn1.get_size_of_best_version()) d.addCallback(lambda size: self.failUnlessEqual(size, len(DATA))) d.addCallback(lambda ignored: fn1.download_to_data()) d.addCallback(lambda data: self.failUnlessEqual(data, DATA)) d.addCallback(lambda ignored: fn1.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(data, DATA)) return d def test_mutable_filenode(self): client = FakeClient() wk = "\x00"*16 rk = hashutil.ssk_readkey_hash(wk) si = hashutil.ssk_storage_index_hash(rk) u = uri.WriteableSSKFileURI("\x00"*16, "\x00"*32) n = MutableFileNode(None, None, client.get_encoding_parameters(), None).init_from_cap(u) self.failUnlessEqual(n.get_writekey(), wk) self.failUnlessEqual(n.get_readkey(), rk) self.failUnlessEqual(n.get_storage_index(), si) # these items are populated on first read (or create), so until that # happens they'll be None self.failUnlessEqual(n.get_privkey(), None) self.failUnlessEqual(n.get_encprivkey(), None) self.failUnlessEqual(n.get_pubkey(), None) self.failUnlessEqual(n.get_uri(), u.to_string()) self.failUnlessEqual(n.get_write_uri(), u.to_string()) self.failUnlessEqual(n.get_readonly_uri(), u.get_readonly().to_string()) self.failUnlessEqual(n.get_cap(), u) self.failUnlessEqual(n.get_readcap(), u.get_readonly()) self.failUnless(n.is_mutable()) self.failIf(n.is_readonly()) self.failIf(n.is_unknown()) self.failIf(n.is_allowed_in_immutable_directory()) n.raise_error() n2 = MutableFileNode(None, None, client.get_encoding_parameters(), None).init_from_cap(u) self.failUnlessEqual(n, n2) self.failIfEqual(n, "not even the right type") self.failIfEqual(n, u) # not the right class n.raise_error() d = {n: "can these be used as dictionary keys?"} d[n2] = "replace the old one" self.failUnlessEqual(len(d), 1) nro = n.get_readonly() self.failUnless(isinstance(nro, MutableFileNode)) self.failUnlessEqual(nro.get_readonly(), nro) self.failUnlessEqual(nro.get_cap(), u.get_readonly()) self.failUnlessEqual(nro.get_readcap(), u.get_readonly()) self.failUnless(nro.is_mutable()) self.failUnless(nro.is_readonly()) self.failIf(nro.is_unknown()) self.failIf(nro.is_allowed_in_immutable_directory()) nro_u = nro.get_uri() self.failUnlessEqual(nro_u, nro.get_readonly_uri()) self.failUnlessEqual(nro_u, u.get_readonly().to_string()) self.failUnlessEqual(nro.get_write_uri(), None) self.failUnlessEqual(nro.get_repair_cap(), None) # RSAmut needs writecap nro.raise_error() v = n.get_verify_cap() self.failUnless(isinstance(v, uri.SSKVerifierURI)) self.failUnlessEqual(n.get_repair_cap(), n._uri) # TODO: n.get_uri() class LiteralChecker(unittest.TestCase): def test_literal_filenode(self): DATA = "I am a short file." u = uri.LiteralFileURI(data=DATA) fn1 = LiteralFileNode(u) d = fn1.check(Monitor()) def _check_checker_results(cr): self.failUnlessEqual(cr, None) d.addCallback(_check_checker_results) d.addCallback(lambda res: fn1.check(Monitor(), verify=True)) d.addCallback(_check_checker_results) return d tahoe-lafs-1.10.0/src/allmydata/test/test_ftp.py000066400000000000000000000077321221140116300215340ustar00rootroot00000000000000 from twisted.trial import unittest from allmydata.frontends import ftpd from allmydata.immutable import upload from allmydata.mutable import publish from allmydata.test.no_network import GridTestMixin from allmydata.test.common_util import ReallyEqualMixin class Handler(GridTestMixin, ReallyEqualMixin, unittest.TestCase): """ This is a no-network unit test of ftpd.Handler and the abstractions it uses. """ FALL_OF_BERLIN_WALL = 626644800 TURN_OF_MILLENIUM = 946684800 def _set_up(self, basedir, num_clients=1, num_servers=10): self.basedir = "ftp/" + basedir self.set_up_grid(num_clients=num_clients, num_servers=num_servers) self.client = self.g.clients[0] self.username = "alice" self.convergence = "" d = self.client.create_dirnode() def _created_root(node): self.root = node self.root_uri = node.get_uri() self.handler = ftpd.Handler(self.client, self.root, self.username, self.convergence) d.addCallback(_created_root) return d def _set_metadata(self, name, metadata): """Set metadata for `name', avoiding MetadataSetter's timestamp reset behavior.""" def _modifier(old_contents, servermap, first_time): children = self.root._unpack_contents(old_contents) children[name] = (children[name][0], metadata) return self.root._pack_contents(children) return self.root._node.modify(_modifier) def _set_up_tree(self): # add immutable file at root immutable = upload.Data("immutable file contents", None) d = self.root.add_file(u"immutable", immutable) # `mtime' and `linkmotime' both set md_both = {'mtime': self.FALL_OF_BERLIN_WALL, 'tahoe': {'linkmotime': self.TURN_OF_MILLENIUM}} d.addCallback(lambda _: self._set_metadata(u"immutable", md_both)) # add link to root from root d.addCallback(lambda _: self.root.set_node(u"loop", self.root)) # `mtime' set, but no `linkmotime' md_just_mtime = {'mtime': self.FALL_OF_BERLIN_WALL, 'tahoe': {}} d.addCallback(lambda _: self._set_metadata(u"loop", md_just_mtime)) # add mutable file at root mutable = publish.MutableData("mutable file contents") d.addCallback(lambda _: self.client.create_mutable_file(mutable)) d.addCallback(lambda node: self.root.set_node(u"mutable", node)) # neither `mtime' nor `linkmotime' set d.addCallback(lambda _: self._set_metadata(u"mutable", {})) return d def _compareDirLists(self, actual, expected): actual_list = sorted(actual) expected_list = sorted(expected) self.failUnlessReallyEqual(len(actual_list), len(expected_list), "%r is wrong length, expecting %r" % ( actual_list, expected_list)) for (a, b) in zip(actual_list, expected_list): (name, meta) = a (expected_name, expected_meta) = b self.failUnlessReallyEqual(name, expected_name) self.failUnlessReallyEqual(meta, expected_meta) def test_list(self): keys = ("size", "directory", "permissions", "hardlinks", "modified", "owner", "group", "unexpected") d = self._set_up("list") d.addCallback(lambda _: self._set_up_tree()) d.addCallback(lambda _: self.handler.list("", keys=keys)) expected_root = [ ('loop', [0, True, 0600, 1, self.FALL_OF_BERLIN_WALL, 'alice', 'alice', '??']), ('immutable', [23, False, 0600, 1, self.TURN_OF_MILLENIUM, 'alice', 'alice', '??']), ('mutable', # timestamp should be 0 if no timestamp metadata is present [0, False, 0600, 1, 0, 'alice', 'alice', '??'])] d.addCallback(lambda root: self._compareDirLists(root, expected_root)) return d tahoe-lafs-1.10.0/src/allmydata/test/test_hashtree.py000066400000000000000000000214431221140116300225410ustar00rootroot00000000000000# -*- test-case-name: allmydata.test.test_hashtree -*- from twisted.trial import unittest from allmydata.util.hashutil import tagged_hash from allmydata import hashtree def make_tree(numleaves): leaves = ["%d" % i for i in range(numleaves)] leaf_hashes = [tagged_hash("tag", leaf) for leaf in leaves] ht = hashtree.HashTree(leaf_hashes) return ht class Complete(unittest.TestCase): def test_create(self): # try out various sizes, since we pad to a power of two ht = make_tree(6) ht = make_tree(9) ht = make_tree(8) root = ht[0] self.failUnlessEqual(len(root), 32) self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0")) self.failUnlessRaises(IndexError, ht.get_leaf, 8) self.failUnlessEqual(ht.get_leaf_index(0), 7) self.failUnlessRaises(IndexError, ht.parent, 0) self.failUnlessRaises(IndexError, ht.needed_for, -1) def test_needed_hashes(self): ht = make_tree(8) self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1])) self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1])) self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1])) def test_dump(self): ht = make_tree(6) expected = [(0,0), (1,1), (3,2), (7,3), (8,3), (4,2), (9,3), (10,3), (2,1), (5,2), (11,3), (12,3), (6,2), (13,3), (14,3), ] self.failUnlessEqual(list(ht.depth_first()), expected) d = "\n" + ht.dump() #print d self.failUnless("\n 0:" in d) self.failUnless("\n 1:" in d) self.failUnless("\n 3:" in d) self.failUnless("\n 7:" in d) self.failUnless("\n 8:" in d) self.failUnless("\n 4:" in d) class Incomplete(unittest.TestCase): def test_create(self): ht = hashtree.IncompleteHashTree(6) ht = hashtree.IncompleteHashTree(9) ht = hashtree.IncompleteHashTree(8) self.failUnlessEqual(ht[0], None) self.failUnlessEqual(ht.get_leaf(0), None) self.failUnlessRaises(IndexError, ht.get_leaf, 8) self.failUnlessEqual(ht.get_leaf_index(0), 7) def test_needed_hashes(self): ht = hashtree.IncompleteHashTree(8) self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1])) self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1])) self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1])) ht = hashtree.IncompleteHashTree(1) self.failUnlessEqual(ht.needed_hashes(0), set([])) ht = hashtree.IncompleteHashTree(6) self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1])) self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1])) self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1])) def test_depth_of(self): hashtree.IncompleteHashTree(8) self.failUnlessEqual(hashtree.depth_of(0), 0) for i in [1,2]: self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d"%i) for i in [3,4,5,6]: self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d"%i) for i in [7,8,9,10,11,12,13,14]: self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d"%i) def test_large(self): # IncompleteHashTree.set_hashes() used to take O(N**2). This test is # meant to show that it now takes O(N) or maybe O(N*ln(N)). I wish # there were a good way to assert this (like counting VM operations # or something): the problem was inside list.sort(), so there's no # good way to instrument set_hashes() to count what we care about. On # my laptop, 10k leaves takes 1.1s in this fixed version, and 11.6s # in the old broken version. An 80k-leaf test (corresponding to a # 10GB file with a 128KiB segsize) 10s in the fixed version, and # several hours in the broken version, but 10s on my laptop (plus the # 20s of setup code) probably means 200s on our dapper buildslave, # which is painfully long for a unit test. self.do_test_speed(10000) def do_test_speed(self, SIZE): # on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB # segsize) takes: # 7s to build the (complete) HashTree # 13s to set up the dictionary # 10s to run set_hashes() ht = make_tree(SIZE) iht = hashtree.IncompleteHashTree(SIZE) needed = set() for i in range(SIZE): needed.update(ht.needed_hashes(i, True)) all = dict([ (i, ht[i]) for i in needed]) iht.set_hashes(hashes=all) def test_check(self): # first create a complete hash tree ht = make_tree(6) # then create a corresponding incomplete tree iht = hashtree.IncompleteHashTree(6) # suppose we wanted to validate leaf[0] # leaf[0] is the same as node[7] self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) iht[0] = ht[0] # set the root self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) iht[5] = ht[5] self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) # reset iht = hashtree.IncompleteHashTree(6) current_hashes = list(iht) # this should fail because there aren't enough hashes known try: iht.set_hashes(leaves={0: tagged_hash("tag", "0")}) except hashtree.NotEnoughHashesError: pass else: self.fail("didn't catch not enough hashes") # and the set of hashes stored in the tree should still be the same self.failUnlessEqual(list(iht), current_hashes) # and we should still need the same self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]} # this should fail because the leaf hash is just plain wrong try: iht.set_hashes(chain, leaves={0: tagged_hash("bad tag", "0")}) except hashtree.BadHashError: pass else: self.fail("didn't catch bad hash") # this should fail because we give it conflicting hashes: one as an # internal node, another as a leaf try: iht.set_hashes(chain, leaves={1: tagged_hash("bad tag", "1")}) except hashtree.BadHashError: pass else: self.fail("didn't catch bad hash") bad_chain = chain.copy() bad_chain[2] = ht[2] + "BOGUS" # this should fail because the internal hash is wrong try: iht.set_hashes(bad_chain, leaves={0: tagged_hash("tag", "0")}) except hashtree.BadHashError: pass else: self.fail("didn't catch bad hash") # this should succeed try: iht.set_hashes(chain, leaves={0: tagged_hash("tag", "0")}) except hashtree.BadHashError, e: self.fail("bad hash: %s" % e) self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0")) self.failUnlessRaises(IndexError, ht.get_leaf, 8) # this should succeed too try: iht.set_hashes(leaves={1: tagged_hash("tag", "1")}) except hashtree.BadHashError: self.fail("bad hash") # this should fail because we give it hashes that conflict with some # that we added successfully before try: iht.set_hashes(leaves={1: tagged_hash("bad tag", "1")}) except hashtree.BadHashError: pass else: self.fail("didn't catch bad hash") # now that leaves 0 and 1 are known, some of the internal nodes are # known self.failUnlessEqual(iht.needed_hashes(4), set([12, 6])) chain = {6: ht[6], 12: ht[12]} # this should succeed try: iht.set_hashes(chain, leaves={4: tagged_hash("tag", "4")}) except hashtree.BadHashError, e: self.fail("bad hash: %s" % e) tahoe-lafs-1.10.0/src/allmydata/test/test_helper.py000066400000000000000000000220511221140116300222110ustar00rootroot00000000000000import os from twisted.internet import defer from twisted.trial import unittest from twisted.application import service from foolscap.api import Tub, fireEventually, flushEventualQueue from allmydata.storage.server import si_b2a from allmydata.storage_client import StorageFarmBroker from allmydata.immutable import offloaded, upload from allmydata import uri, client from allmydata.util import hashutil, fileutil, mathutil from pycryptopp.cipher.aes import AES MiB = 1024*1024 DATA = "I need help\n" * 1000 class CHKUploadHelper_fake(offloaded.CHKUploadHelper): def start_encrypted(self, eu): d = eu.get_size() def _got_size(size): d2 = eu.get_all_encoding_parameters() def _got_parms(parms): # just pretend we did the upload needed_shares, happy, total_shares, segsize = parms ueb_data = {"needed_shares": needed_shares, "total_shares": total_shares, "segment_size": segsize, "size": size, } ueb_hash = "fake" v = uri.CHKFileVerifierURI(self._storage_index, "x"*32, needed_shares, total_shares, size) _UR = upload.UploadResults ur = _UR(file_size=size, ciphertext_fetched=0, preexisting_shares=0, pushed_shares=total_shares, sharemap={}, servermap={}, timings={}, uri_extension_data=ueb_data, uri_extension_hash=ueb_hash, verifycapstr=v.to_string()) self._upload_status.set_results(ur) return ur d2.addCallback(_got_parms) return d2 d.addCallback(_got_size) return d class Helper_fake_upload(offloaded.Helper): def _make_chk_upload_helper(self, storage_index, lp): si_s = si_b2a(storage_index) incoming_file = os.path.join(self._chk_incoming, si_s) encoding_file = os.path.join(self._chk_encoding, si_s) uh = CHKUploadHelper_fake(storage_index, self, self._storage_broker, self._secret_holder, incoming_file, encoding_file, lp) return uh class Helper_already_uploaded(Helper_fake_upload): def _check_chk(self, storage_index, lp): res = upload.HelperUploadResults() res.uri_extension_hash = hashutil.uri_extension_hash("") # we're pretending that the file they're trying to upload was already # present in the grid. We return some information about the file, so # the client can decide if they like the way it looks. The parameters # used here are chosen to match the defaults. PARAMS = FakeClient.DEFAULT_ENCODING_PARAMETERS ueb_data = {"needed_shares": PARAMS["k"], "total_shares": PARAMS["n"], "segment_size": min(PARAMS["max_segment_size"], len(DATA)), "size": len(DATA), } res.uri_extension_data = ueb_data return defer.succeed(res) class FakeClient(service.MultiService): DEFAULT_ENCODING_PARAMETERS = {"k":25, "happy": 75, "n": 100, "max_segment_size": 1*MiB, } def get_encoding_parameters(self): return self.DEFAULT_ENCODING_PARAMETERS def get_storage_broker(self): return self.storage_broker def flush_but_dont_ignore(res): d = flushEventualQueue() def _done(ignored): return res d.addCallback(_done) return d def wait_a_few_turns(ignored=None): d = fireEventually() d.addCallback(fireEventually) d.addCallback(fireEventually) d.addCallback(fireEventually) d.addCallback(fireEventually) d.addCallback(fireEventually) return d def upload_data(uploader, data, convergence): u = upload.Data(data, convergence=convergence) return uploader.upload(u) class AssistedUpload(unittest.TestCase): timeout = 240 # It takes longer than 120 seconds on Francois's arm box. def setUp(self): self.s = FakeClient() self.s.storage_broker = StorageFarmBroker(None, True) self.s.secret_holder = client.SecretHolder("lease secret", "converge") self.s.startService() self.tub = t = Tub() t.setOption("expose-remote-exception-types", False) t.setServiceParent(self.s) self.s.tub = t # we never actually use this for network traffic, so it can use a # bogus host/port t.setLocation("bogus:1234") def setUpHelper(self, basedir, helper_class=Helper_fake_upload): fileutil.make_dirs(basedir) self.helper = h = helper_class(basedir, self.s.storage_broker, self.s.secret_holder, None, None) self.helper_furl = self.tub.registerReference(h) def tearDown(self): d = self.s.stopService() d.addCallback(fireEventually) d.addBoth(flush_but_dont_ignore) return d def test_one(self): self.basedir = "helper/AssistedUpload/test_one" self.setUpHelper(self.basedir) u = upload.Uploader(self.helper_furl) u.setServiceParent(self.s) d = wait_a_few_turns() def _ready(res): assert u._helper return upload_data(u, DATA, convergence="some convergence string") d.addCallback(_ready) def _uploaded(results): the_uri = results.get_uri() assert "CHK" in the_uri d.addCallback(_uploaded) def _check_empty(res): files = os.listdir(os.path.join(self.basedir, "CHK_encoding")) self.failUnlessEqual(files, []) files = os.listdir(os.path.join(self.basedir, "CHK_incoming")) self.failUnlessEqual(files, []) d.addCallback(_check_empty) return d def test_previous_upload_failed(self): self.basedir = "helper/AssistedUpload/test_previous_upload_failed" self.setUpHelper(self.basedir) # we want to make sure that an upload which fails (leaving the # ciphertext in the CHK_encoding/ directory) does not prevent a later # attempt to upload that file from working. We simulate this by # populating the directory manually. The hardest part is guessing the # storage index. k = FakeClient.DEFAULT_ENCODING_PARAMETERS["k"] n = FakeClient.DEFAULT_ENCODING_PARAMETERS["n"] max_segsize = FakeClient.DEFAULT_ENCODING_PARAMETERS["max_segment_size"] segsize = min(max_segsize, len(DATA)) # this must be a multiple of 'required_shares'==k segsize = mathutil.next_multiple(segsize, k) key = hashutil.convergence_hash(k, n, segsize, DATA, "test convergence string") assert len(key) == 16 encryptor = AES(key) SI = hashutil.storage_index_hash(key) SI_s = si_b2a(SI) encfile = os.path.join(self.basedir, "CHK_encoding", SI_s) f = open(encfile, "wb") f.write(encryptor.process(DATA)) f.close() u = upload.Uploader(self.helper_furl) u.setServiceParent(self.s) d = wait_a_few_turns() def _ready(res): assert u._helper return upload_data(u, DATA, convergence="test convergence string") d.addCallback(_ready) def _uploaded(results): the_uri = results.get_uri() assert "CHK" in the_uri d.addCallback(_uploaded) def _check_empty(res): files = os.listdir(os.path.join(self.basedir, "CHK_encoding")) self.failUnlessEqual(files, []) files = os.listdir(os.path.join(self.basedir, "CHK_incoming")) self.failUnlessEqual(files, []) d.addCallback(_check_empty) return d def test_already_uploaded(self): self.basedir = "helper/AssistedUpload/test_already_uploaded" self.setUpHelper(self.basedir, helper_class=Helper_already_uploaded) u = upload.Uploader(self.helper_furl) u.setServiceParent(self.s) d = wait_a_few_turns() def _ready(res): assert u._helper return upload_data(u, DATA, convergence="some convergence string") d.addCallback(_ready) def _uploaded(results): the_uri = results.get_uri() assert "CHK" in the_uri d.addCallback(_uploaded) def _check_empty(res): files = os.listdir(os.path.join(self.basedir, "CHK_encoding")) self.failUnlessEqual(files, []) files = os.listdir(os.path.join(self.basedir, "CHK_incoming")) self.failUnlessEqual(files, []) d.addCallback(_check_empty) return d tahoe-lafs-1.10.0/src/allmydata/test/test_hung_server.py000066400000000000000000000355711221140116300232740ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os, shutil from twisted.trial import unittest from twisted.internet import defer from allmydata import uri from allmydata.util.consumer import download_to_data from allmydata.immutable import upload from allmydata.mutable.common import UnrecoverableFileError from allmydata.mutable.publish import MutableData from allmydata.storage.common import storage_index_to_dir from allmydata.test.no_network import GridTestMixin from allmydata.test.common import ShouldFailMixin from allmydata.util.pollmixin import PollMixin from allmydata.interfaces import NotEnoughSharesError immutable_plaintext = "data" * 10000 mutable_plaintext = "muta" * 10000 class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, PollMixin, unittest.TestCase): # Many of these tests take around 60 seconds on François's ARM buildslave: # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel # allmydata.test.test_hung_server.HungServerDownloadTest.test_2_good_8_broken_duplicate_share_fail # once ERRORed after 197 seconds on Midnight Magic's NetBSD buildslave: # http://tahoe-lafs.org/buildbot/builders/MM%20netbsd4%20i386%20warp # MM's buildslave varies a lot in how long it takes to run tests. timeout = 240 def _break(self, servers): for (id, ss) in servers: self.g.break_server(id) def _hang(self, servers, **kwargs): for (id, ss) in servers: self.g.hang_server(id, **kwargs) def _unhang(self, servers, **kwargs): for (id, ss) in servers: self.g.unhang_server(id, **kwargs) def _hang_shares(self, shnums, **kwargs): # hang all servers who are holding the given shares hung_serverids = set() for (i_shnum, i_serverid, i_sharefile) in self.shares: if i_shnum in shnums: if i_serverid not in hung_serverids: self.g.hang_server(i_serverid, **kwargs) hung_serverids.add(i_serverid) def _delete_all_shares_from(self, servers): serverids = [id for (id, ss) in servers] for (i_shnum, i_serverid, i_sharefile) in self.shares: if i_serverid in serverids: os.unlink(i_sharefile) def _corrupt_all_shares_in(self, servers, corruptor_func): serverids = [id for (id, ss) in servers] for (i_shnum, i_serverid, i_sharefile) in self.shares: if i_serverid in serverids: self._corrupt_share((i_shnum, i_sharefile), corruptor_func) def _copy_all_shares_from(self, from_servers, to_server): serverids = [id for (id, ss) in from_servers] for (i_shnum, i_serverid, i_sharefile) in self.shares: if i_serverid in serverids: self._copy_share((i_shnum, i_sharefile), to_server) def _copy_share(self, share, to_server): (sharenum, sharefile) = share (id, ss) = to_server shares_dir = os.path.join(ss.original.storedir, "shares") si = uri.from_string(self.uri).get_storage_index() si_dir = os.path.join(shares_dir, storage_index_to_dir(si)) if not os.path.exists(si_dir): os.makedirs(si_dir) new_sharefile = os.path.join(si_dir, str(sharenum)) shutil.copy(sharefile, new_sharefile) self.shares = self.find_uri_shares(self.uri) # Make sure that the storage server has the share. self.failUnless((sharenum, ss.original.my_nodeid, new_sharefile) in self.shares) def _corrupt_share(self, share, corruptor_func): (sharenum, sharefile) = share data = open(sharefile, "rb").read() newdata = corruptor_func(data) os.unlink(sharefile) wf = open(sharefile, "wb") wf.write(newdata) wf.close() def _set_up(self, mutable, testdir, num_clients=1, num_servers=10): self.mutable = mutable if mutable: self.basedir = "hung_server/mutable_" + testdir else: self.basedir = "hung_server/immutable_" + testdir self.set_up_grid(num_clients=num_clients, num_servers=num_servers) self.c0 = self.g.clients[0] nm = self.c0.nodemaker self.servers = sorted([(s.get_serverid(), s.get_rref()) for s in nm.storage_broker.get_connected_servers()]) self.servers = self.servers[5:] + self.servers[:5] if mutable: uploadable = MutableData(mutable_plaintext) d = nm.create_mutable_file(uploadable) def _uploaded_mutable(node): self.uri = node.get_uri() self.shares = self.find_uri_shares(self.uri) d.addCallback(_uploaded_mutable) else: data = upload.Data(immutable_plaintext, convergence="") d = self.c0.upload(data) def _uploaded_immutable(upload_res): self.uri = upload_res.get_uri() self.shares = self.find_uri_shares(self.uri) d.addCallback(_uploaded_immutable) return d def _start_download(self): n = self.c0.create_node_from_uri(self.uri) if self.mutable: d = n.download_best_version() else: d = download_to_data(n) return d def _wait_for_data(self, n): if self.mutable: d = n.download_best_version() else: d = download_to_data(n) return d def _check(self, resultingdata): if self.mutable: self.failUnlessEqual(resultingdata, mutable_plaintext) else: self.failUnlessEqual(resultingdata, immutable_plaintext) def _download_and_check(self): d = self._start_download() d.addCallback(self._check) return d def _should_fail_download(self): if self.mutable: return self.shouldFail(UnrecoverableFileError, self.basedir, "no recoverable versions", self._download_and_check) else: return self.shouldFail(NotEnoughSharesError, self.basedir, "ran out of shares", self._download_and_check) def test_10_good_sanity_check(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_10_good_sanity_check")) d.addCallback(lambda ign: self._download_and_check()) return d def test_10_good_copied_share(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_10_good_copied_share")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0])) d.addCallback(lambda ign: self._download_and_check()) return d def test_3_good_7_noshares(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_3_good_7_noshares")) d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[3:])) d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_broken_fail(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_fail")) d.addCallback(lambda ign: self._break(self.servers[2:])) d.addCallback(lambda ign: self._should_fail_download()) return d def test_2_good_8_noshares_fail(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_noshares_fail")) d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[2:])) d.addCallback(lambda ign: self._should_fail_download()) return d def test_2_good_8_broken_copied_share(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_copied_share")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0])) d.addCallback(lambda ign: self._break(self.servers[2:])) d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_broken_duplicate_share_fail(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_duplicate_share_fail")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[1:2], self.servers[0])) d.addCallback(lambda ign: self._break(self.servers[2:])) d.addCallback(lambda ign: self._should_fail_download()) return d def test_3_good_7_hung_immutable(self): d = defer.succeed(None) d.addCallback(lambda ign: self._set_up(False, "test_3_good_7_hung")) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: self._download_and_check()) return d def test_5_overdue_immutable(self): # restrict the ShareFinder to only allow 5 outstanding requests, and # arrange for the first 5 servers to hang. Then trigger the OVERDUE # timers (simulating 10 seconds passed), at which point the # ShareFinder should send additional queries and finish the download # quickly. If we didn't have OVERDUE timers, this test would fail by # timing out. done = [] d = self._set_up(False, "test_5_overdue_immutable") def _reduce_max_outstanding_requests_and_download(ign): self._hang_shares(range(5)) n = self.c0.create_node_from_uri(self.uri) n._cnode._maybe_create_download_node() self._sf = n._cnode._node._sharefinder self._sf.max_outstanding_requests = 5 self._sf.OVERDUE_TIMEOUT = 1000.0 d2 = download_to_data(n) # start download, but don't wait for it to complete yet def _done(res): done.append(res) # we will poll for this later d2.addBoth(_done) d.addCallback(_reduce_max_outstanding_requests_and_download) from foolscap.eventual import fireEventually, flushEventualQueue # wait here a while d.addCallback(lambda res: fireEventually(res)) d.addCallback(lambda res: flushEventualQueue()) d.addCallback(lambda ign: self.failIf(done)) def _check_waiting(ign): # all the share requests should now be stuck waiting self.failUnlessEqual(len(self._sf.pending_requests), 5) # but none should be marked as OVERDUE until the timers expire self.failUnlessEqual(len(self._sf.overdue_requests), 0) d.addCallback(_check_waiting) def _mark_overdue(ign): # declare four requests overdue, allowing new requests to take # their place, and leaving one stuck. The finder will keep # sending requests until there are 5 non-overdue ones # outstanding, at which point we'll have 4 OVERDUE, 1 # stuck-but-not-overdue, and 4 live requests. All 4 live requests # will retire before the download is complete and the ShareFinder # is shut off. That will leave 4 OVERDUE and 1 # stuck-but-not-overdue, for a total of 5 requests in in # _sf.pending_requests for t in self._sf.overdue_timers.values()[:4]: t.reset(-1.0) # the timers ought to fire before the eventual-send does return fireEventually() d.addCallback(_mark_overdue) def _we_are_done(): return bool(done) d.addCallback(lambda ign: self.poll(_we_are_done)) def _check_done(ign): self.failUnlessEqual(done, [immutable_plaintext]) self.failUnlessEqual(len(self._sf.pending_requests), 5) self.failUnlessEqual(len(self._sf.overdue_requests), 4) d.addCallback(_check_done) return d def test_2_good_8_hung_then_1_recovers_immutable(self): d = defer.succeed(None) d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers")) d.addCallback(lambda ign: self._hang(self.servers[2:3])) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: self._unhang(self.servers[2:3])) d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_hung_then_1_recovers_with_2_shares_immutable(self): d = defer.succeed(None) d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers_with_2_shares")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2])) d.addCallback(lambda ign: self._hang(self.servers[2:3])) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: self._unhang(self.servers[2:3])) d.addCallback(lambda ign: self._download_and_check()) return d # The tests below do not currently pass for mutable files. The # mutable-file downloader does not yet handle hung servers, and the tests # hang forever (hence the use of SkipTest rather than .todo) def test_3_good_7_hung_mutable(self): raise unittest.SkipTest("still broken") d = defer.succeed(None) d.addCallback(lambda ign: self._set_up(True, "test_3_good_7_hung")) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_hung_then_1_recovers_mutable(self): raise unittest.SkipTest("still broken") d = defer.succeed(None) d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers")) d.addCallback(lambda ign: self._hang(self.servers[2:3])) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: self._unhang(self.servers[2:3])) d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_hung_then_1_recovers_with_2_shares_mutable(self): raise unittest.SkipTest("still broken") d = defer.succeed(None) d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers_with_2_shares")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2])) d.addCallback(lambda ign: self._hang(self.servers[2:3])) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: self._unhang(self.servers[2:3])) d.addCallback(lambda ign: self._download_and_check()) return d tahoe-lafs-1.10.0/src/allmydata/test/test_immutable.py000066400000000000000000000326361221140116300227230ustar00rootroot00000000000000import random from twisted.trial import unittest from twisted.internet import defer import mock from foolscap.api import eventually from allmydata.test import common from allmydata.test.no_network import GridTestMixin from allmydata.test.common import TEST_DATA from allmydata import uri from allmydata.util import log from allmydata.util.consumer import download_to_data from allmydata.interfaces import NotEnoughSharesError from allmydata.immutable.upload import Data from allmydata.immutable.downloader import finder class MockNode(object): def __init__(self, check_reneging, check_fetch_failed): self.got = 0 self.finished_d = defer.Deferred() self.segment_size = 78 self.guessed_segment_size = 78 self._no_more_shares = False self.check_reneging = check_reneging self.check_fetch_failed = check_fetch_failed self._si_prefix='aa' self.have_UEB = True self.share_hash_tree = mock.Mock() self.share_hash_tree.needed_hashes.return_value = False self.on_want_more_shares = None def when_finished(self): return self.finished_d def get_num_segments(self): return (5, True) def _calculate_sizes(self, guessed_segment_size): return {'block_size': 4, 'num_segments': 5} def no_more_shares(self): self._no_more_shares = True def got_shares(self, shares): if self.check_reneging: if self._no_more_shares: self.finished_d.errback(unittest.FailTest("The node was told by the share finder that it is destined to remain hungry, then was given another share.")) return self.got += len(shares) log.msg("yyy 3 %s.got_shares(%s) got: %s" % (self, shares, self.got)) if self.got == 3: self.finished_d.callback(True) def get_desired_ciphertext_hashes(self, *args, **kwargs): return iter([]) def fetch_failed(self, *args, **kwargs): if self.check_fetch_failed: if self.finished_d: self.finished_d.errback(unittest.FailTest("The node was told by the segment fetcher that the download failed.")) self.finished_d = None def want_more_shares(self): if self.on_want_more_shares: self.on_want_more_shares() def process_blocks(self, *args, **kwargs): if self.finished_d: self.finished_d.callback(None) class TestShareFinder(unittest.TestCase): def test_no_reneging_on_no_more_shares_ever(self): # ticket #1191 # Suppose that K=3 and you send two DYHB requests, the first # response offers two shares, and then the last offers one # share. If you tell your share consumer "no more shares, # ever", and then immediately tell them "oh, and here's # another share", then you lose. rcap = uri.CHKFileURI('a'*32, 'a'*32, 3, 99, 100) vcap = rcap.get_verify_cap() class MockServer(object): def __init__(self, buckets): self.version = { 'http://allmydata.org/tahoe/protocols/storage/v1': { "tolerates-immutable-read-overrun": True } } self.buckets = buckets self.d = defer.Deferred() self.s = None def callRemote(self, methname, *args, **kwargs): d = defer.Deferred() # Even after the 3rd answer we're still hungry because # we're interested in finding a share on a 3rd server # so we don't have to download more than one share # from the first server. This is actually necessary to # trigger the bug. def _give_buckets_and_hunger_again(): d.callback(self.buckets) self.s.hungry() eventually(_give_buckets_and_hunger_again) return d class MockIServer(object): def __init__(self, serverid, rref): self.serverid = serverid self.rref = rref def get_serverid(self): return self.serverid def get_rref(self): return self.rref def get_name(self): return "name-%s" % self.serverid def get_version(self): return self.rref.version mockserver1 = MockServer({1: mock.Mock(), 2: mock.Mock()}) mockserver2 = MockServer({}) mockserver3 = MockServer({3: mock.Mock()}) mockstoragebroker = mock.Mock() servers = [ MockIServer("ms1", mockserver1), MockIServer("ms2", mockserver2), MockIServer("ms3", mockserver3), ] mockstoragebroker.get_servers_for_psi.return_value = servers mockdownloadstatus = mock.Mock() mocknode = MockNode(check_reneging=True, check_fetch_failed=True) s = finder.ShareFinder(mockstoragebroker, vcap, mocknode, mockdownloadstatus) mockserver1.s = s mockserver2.s = s mockserver3.s = s s.hungry() return mocknode.when_finished() class Test(GridTestMixin, unittest.TestCase, common.ShouldFailMixin): def startup(self, basedir): self.basedir = basedir self.set_up_grid(num_clients=2, num_servers=5) c1 = self.g.clients[1] # We need multiple segments to test crypttext hash trees that are # non-trivial (i.e. they have more than just one hash in them). c1.DEFAULT_ENCODING_PARAMETERS['max_segment_size'] = 12 # Tests that need to test servers of happiness using this should # set their own value for happy -- the default (7) breaks stuff. c1.DEFAULT_ENCODING_PARAMETERS['happy'] = 1 d = c1.upload(Data(TEST_DATA, convergence="")) def _after_upload(ur): self.uri = ur.get_uri() self.filenode = self.g.clients[0].create_node_from_uri(ur.get_uri()) return self.uri d.addCallback(_after_upload) return d def _stash_shares(self, shares): self.shares = shares def _download_and_check_plaintext(self, ign=None): num_reads = self._count_reads() d = download_to_data(self.filenode) def _after_download(result): self.failUnlessEqual(result, TEST_DATA) return self._count_reads() - num_reads d.addCallback(_after_download) return d def _shuffled(self, num_shnums): shnums = range(10) random.shuffle(shnums) return shnums[:num_shnums] def _count_reads(self): return sum([s.stats_provider.get_stats() ['counters'].get('storage_server.read', 0) for s in self.g.servers_by_number.values()]) def _count_allocates(self): return sum([s.stats_provider.get_stats() ['counters'].get('storage_server.allocate', 0) for s in self.g.servers_by_number.values()]) def _count_writes(self): return sum([s.stats_provider.get_stats() ['counters'].get('storage_server.write', 0) for s in self.g.servers_by_number.values()]) def test_test_code(self): # The following process of stashing the shares, running # replace_shares, and asserting that the new set of shares equals the # old is more to test this test code than to test the Tahoe code... d = self.startup("immutable/Test/code") d.addCallback(self.copy_shares) d.addCallback(self._stash_shares) d.addCallback(self._download_and_check_plaintext) # The following process of deleting 8 of the shares and asserting # that you can't download it is more to test this test code than to # test the Tahoe code... def _then_delete_8(ign): self.restore_all_shares(self.shares) self.delete_shares_numbered(self.uri, range(8)) d.addCallback(_then_delete_8) d.addCallback(lambda ign: self.shouldFail(NotEnoughSharesError, "download-2", "ran out of shares", download_to_data, self.filenode)) return d def test_download(self): """ Basic download. (This functionality is more or less already tested by test code in other modules, but this module is also going to test some more specific things about immutable download.) """ d = self.startup("immutable/Test/download") d.addCallback(self._download_and_check_plaintext) def _after_download(ign): num_reads = self._count_reads() #print num_reads self.failIf(num_reads > 41, num_reads) d.addCallback(_after_download) return d def test_download_from_only_3_remaining_shares(self): """ Test download after 7 random shares (of the 10) have been removed.""" d = self.startup("immutable/Test/download_from_only_3_remaining_shares") d.addCallback(lambda ign: self.delete_shares_numbered(self.uri, range(7))) d.addCallback(self._download_and_check_plaintext) def _after_download(num_reads): #print num_reads self.failIf(num_reads > 41, num_reads) d.addCallback(_after_download) return d def test_download_from_only_3_shares_with_good_crypttext_hash(self): """ Test download after 7 random shares (of the 10) have had their crypttext hash tree corrupted.""" d = self.startup("download_from_only_3_shares_with_good_crypttext_hash") def _corrupt_7(ign): c = common._corrupt_offset_of_block_hashes_to_truncate_crypttext_hashes self.corrupt_shares_numbered(self.uri, self._shuffled(7), c) d.addCallback(_corrupt_7) d.addCallback(self._download_and_check_plaintext) return d def test_download_abort_if_too_many_missing_shares(self): """ Test that download gives up quickly when it realizes there aren't enough shares out there.""" d = self.startup("download_abort_if_too_many_missing_shares") d.addCallback(lambda ign: self.delete_shares_numbered(self.uri, range(8))) d.addCallback(lambda ign: self.shouldFail(NotEnoughSharesError, "delete 8", "Last failure: None", download_to_data, self.filenode)) # the new downloader pipelines a bunch of read requests in parallel, # so don't bother asserting anything about the number of reads return d def test_download_abort_if_too_many_corrupted_shares(self): """Test that download gives up quickly when it realizes there aren't enough uncorrupted shares out there. It should be able to tell because the corruption occurs in the sharedata version number, which it checks first.""" d = self.startup("download_abort_if_too_many_corrupted_shares") def _corrupt_8(ign): c = common._corrupt_sharedata_version_number self.corrupt_shares_numbered(self.uri, self._shuffled(8), c) d.addCallback(_corrupt_8) def _try_download(ign): start_reads = self._count_reads() d2 = self.shouldFail(NotEnoughSharesError, "corrupt 8", "LayoutInvalid", download_to_data, self.filenode) def _check_numreads(ign): num_reads = self._count_reads() - start_reads #print num_reads # To pass this test, you are required to give up before # reading all of the share data. Actually, we could give up # sooner than 45 reads, but currently our download code does # 45 reads. This test then serves as a "performance # regression detector" -- if you change download code so that # it takes *more* reads, then this test will fail. self.failIf(num_reads > 45, num_reads) d2.addCallback(_check_numreads) return d2 d.addCallback(_try_download) return d def test_download_to_data(self): d = self.startup("download_to_data") d.addCallback(lambda ign: self.filenode.download_to_data()) d.addCallback(lambda data: self.failUnlessEqual(data, common.TEST_DATA)) return d def test_download_best_version(self): d = self.startup("download_best_version") d.addCallback(lambda ign: self.filenode.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(data, common.TEST_DATA)) return d def test_get_best_readable_version(self): d = self.startup("get_best_readable_version") d.addCallback(lambda ign: self.filenode.get_best_readable_version()) d.addCallback(lambda n2: self.failUnlessEqual(n2, self.filenode)) return d def test_get_size_of_best_version(self): d = self.startup("get_size_of_best_version") d.addCallback(lambda ign: self.filenode.get_size_of_best_version()) d.addCallback(lambda size: self.failUnlessEqual(size, len(common.TEST_DATA))) return d # XXX extend these tests to show bad behavior of various kinds from servers: # raising exception from each remove_foo() method, for example # XXX test disconnect DeadReferenceError from get_buckets and get_block_whatsit # TODO: delete this whole file tahoe-lafs-1.10.0/src/allmydata/test/test_import.py000066400000000000000000000015601221140116300222460ustar00rootroot00000000000000 from twisted.trial import unittest import allmydata import mock real_import_func = __import__ class T(unittest.TestCase): @mock.patch('__builtin__.__import__') def test_report_import_error(self, mockimport): def raiseIE_from_this_particular_func(name, *args): if name == "foolscap": marker = "wheeeyo" raise ImportError(marker + " foolscap cant be imported") else: return real_import_func(name, *args) mockimport.side_effect = raiseIE_from_this_particular_func vers_and_locs = allmydata.get_package_versions_and_locations() for (pkgname, stuff) in vers_and_locs: if pkgname == 'foolscap': self.failUnless('wheeeyo' in str(stuff[2]), stuff) self.failUnless('raiseIE_from_this_particular_func' in str(stuff[2]), stuff) tahoe-lafs-1.10.0/src/allmydata/test/test_introducer.py000066400000000000000000001462371221140116300231250ustar00rootroot00000000000000 import os, re, itertools from base64 import b32decode import simplejson from twisted.trial import unittest from twisted.internet import defer, address from twisted.python import log from foolscap.api import Tub, Referenceable, fireEventually, flushEventualQueue from twisted.application import service from allmydata.interfaces import InsufficientVersionError from allmydata.introducer.client import IntroducerClient, \ WrapV2ClientInV1Interface from allmydata.introducer.server import IntroducerService, FurlFileConflictError from allmydata.introducer.common import get_tubid_string_from_ann, \ get_tubid_string, sign_to_foolscap, unsign_from_foolscap, \ UnknownKeyError from allmydata.introducer import old # test compatibility with old introducer .tac files from allmydata.introducer import IntroducerNode from allmydata.web import introweb from allmydata.client import Client as TahoeClient from allmydata.util import pollmixin, keyutil, idlib, fileutil import allmydata.test.common_util as testutil class LoggingMultiService(service.MultiService): def log(self, msg, **kw): log.msg(msg, **kw) class Node(testutil.SignalMixin, unittest.TestCase): def test_furl(self): basedir = "introducer.IntroducerNode.test_furl" os.mkdir(basedir) public_fn = os.path.join(basedir, "introducer.furl") private_fn = os.path.join(basedir, "private", "introducer.furl") q1 = IntroducerNode(basedir) d = fireEventually(None) d.addCallback(lambda res: q1.startService()) d.addCallback(lambda res: q1.when_tub_ready()) d.addCallback(lambda res: q1.stopService()) d.addCallback(flushEventualQueue) def _check_furl(res): # new nodes create unguessable furls in private/introducer.furl ifurl = fileutil.read(private_fn) self.failUnless(ifurl) ifurl = ifurl.strip() self.failIf(ifurl.endswith("/introducer"), ifurl) # old nodes created guessable furls in BASEDIR/introducer.furl guessable = ifurl[:ifurl.rfind("/")] + "/introducer" fileutil.write(public_fn, guessable+"\n", mode="w") # text # if we see both files, throw an error self.failUnlessRaises(FurlFileConflictError, IntroducerNode, basedir) # when we see only the public one, move it to private/ and use # the existing furl instead of creating a new one os.unlink(private_fn) q2 = IntroducerNode(basedir) d2 = fireEventually(None) d2.addCallback(lambda res: q2.startService()) d2.addCallback(lambda res: q2.when_tub_ready()) d2.addCallback(lambda res: q2.stopService()) d2.addCallback(flushEventualQueue) def _check_furl2(res): self.failIf(os.path.exists(public_fn)) ifurl2 = fileutil.read(private_fn) self.failUnless(ifurl2) self.failUnlessEqual(ifurl2.strip(), guessable) d2.addCallback(_check_furl2) return d2 d.addCallback(_check_furl) return d class ServiceMixin: def setUp(self): self.parent = LoggingMultiService() self.parent.startService() def tearDown(self): log.msg("TestIntroducer.tearDown") d = defer.succeed(None) d.addCallback(lambda res: self.parent.stopService()) d.addCallback(flushEventualQueue) return d class Introducer(ServiceMixin, unittest.TestCase, pollmixin.PollMixin): def test_create(self): ic = IntroducerClient(None, "introducer.furl", u"my_nickname", "my_version", "oldest_version", {}, fakeseq) self.failUnless(isinstance(ic, IntroducerClient)) def test_listen(self): i = IntroducerService() i.setServiceParent(self.parent) def test_duplicate_publish(self): i = IntroducerService() self.failUnlessEqual(len(i.get_announcements()), 0) self.failUnlessEqual(len(i.get_subscribers()), 0) furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@192.168.69.247:36106,127.0.0.1:36106/gydnpigj2ja2qr2srq4ikjwnl7xfgbra" furl2 = "pb://ttwwooyunnyhzs7r6vdonnm2hpi52w6y@192.168.69.247:36111,127.0.0.1:36106/ttwwoogj2ja2qr2srq4ikjwnl7xfgbra" ann1 = (furl1, "storage", "RIStorage", "nick1", "ver23", "ver0") ann1b = (furl1, "storage", "RIStorage", "nick1", "ver24", "ver0") ann2 = (furl2, "storage", "RIStorage", "nick2", "ver30", "ver0") i.remote_publish(ann1) self.failUnlessEqual(len(i.get_announcements()), 1) self.failUnlessEqual(len(i.get_subscribers()), 0) i.remote_publish(ann2) self.failUnlessEqual(len(i.get_announcements()), 2) self.failUnlessEqual(len(i.get_subscribers()), 0) i.remote_publish(ann1b) self.failUnlessEqual(len(i.get_announcements()), 2) self.failUnlessEqual(len(i.get_subscribers()), 0) def test_id_collision(self): # test replacement case where tubid equals a keyid (one should # not replace the other) i = IntroducerService() ic = IntroducerClient(None, "introducer.furl", u"my_nickname", "my_version", "oldest_version", {}, fakeseq) sk_s, vk_s = keyutil.make_keypair() sk, _ignored = keyutil.parse_privkey(sk_s) keyid = keyutil.remove_prefix(vk_s, "pub-v0-") furl1 = "pb://onug64tu@127.0.0.1:123/short" # base32("short") ann_t = make_ann_t(ic, furl1, sk, 1) i.remote_publish_v2(ann_t, Referenceable()) announcements = i.get_announcements() self.failUnlessEqual(len(announcements), 1) key1 = ("storage", "v0-"+keyid, None) self.failUnlessEqual(announcements[0].index, key1) ann1_out = announcements[0].announcement self.failUnlessEqual(ann1_out["anonymous-storage-FURL"], furl1) furl2 = "pb://%s@127.0.0.1:36106/swissnum" % keyid ann2 = (furl2, "storage", "RIStorage", "nick1", "ver23", "ver0") i.remote_publish(ann2) announcements = i.get_announcements() self.failUnlessEqual(len(announcements), 2) key2 = ("storage", None, keyid) wanted = [ad for ad in announcements if ad.index == key2] self.failUnlessEqual(len(wanted), 1) ann2_out = wanted[0].announcement self.failUnlessEqual(ann2_out["anonymous-storage-FURL"], furl2) def fakeseq(): return 1, "nonce" seqnum_counter = itertools.count(1) def realseq(): return seqnum_counter.next(), str(os.randint(1,100000)) def make_ann(furl): ann = { "anonymous-storage-FURL": furl, "permutation-seed-base32": get_tubid_string(furl) } return ann def make_ann_t(ic, furl, privkey, seqnum): ann_d = ic.create_announcement_dict("storage", make_ann(furl)) ann_d["seqnum"] = seqnum ann_d["nonce"] = "nonce" ann_t = sign_to_foolscap(ann_d, privkey) return ann_t class Client(unittest.TestCase): def test_duplicate_receive_v1(self): ic = IntroducerClient(None, "introducer.furl", u"my_nickname", "my_version", "oldest_version", {}, fakeseq) announcements = [] ic.subscribe_to("storage", lambda key_s,ann: announcements.append(ann)) furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:36106/gydnpigj2ja2qr2srq4ikjwnl7xfgbra" ann1 = (furl1, "storage", "RIStorage", "nick1", "ver23", "ver0") ann1b = (furl1, "storage", "RIStorage", "nick1", "ver24", "ver0") ca = WrapV2ClientInV1Interface(ic) ca.remote_announce([ann1]) d = fireEventually() def _then(ign): self.failUnlessEqual(len(announcements), 1) self.failUnlessEqual(announcements[0]["nickname"], u"nick1") self.failUnlessEqual(announcements[0]["my-version"], "ver23") self.failUnlessEqual(ic._debug_counts["inbound_announcement"], 1) self.failUnlessEqual(ic._debug_counts["new_announcement"], 1) self.failUnlessEqual(ic._debug_counts["update"], 0) self.failUnlessEqual(ic._debug_counts["duplicate_announcement"], 0) # now send a duplicate announcement: this should not notify clients ca.remote_announce([ann1]) return fireEventually() d.addCallback(_then) def _then2(ign): self.failUnlessEqual(len(announcements), 1) self.failUnlessEqual(ic._debug_counts["inbound_announcement"], 2) self.failUnlessEqual(ic._debug_counts["new_announcement"], 1) self.failUnlessEqual(ic._debug_counts["update"], 0) self.failUnlessEqual(ic._debug_counts["duplicate_announcement"], 1) # and a replacement announcement: same FURL, new other stuff. # Clients should be notified. ca.remote_announce([ann1b]) return fireEventually() d.addCallback(_then2) def _then3(ign): self.failUnlessEqual(len(announcements), 2) self.failUnlessEqual(ic._debug_counts["inbound_announcement"], 3) self.failUnlessEqual(ic._debug_counts["new_announcement"], 1) self.failUnlessEqual(ic._debug_counts["update"], 1) self.failUnlessEqual(ic._debug_counts["duplicate_announcement"], 1) # test that the other stuff changed self.failUnlessEqual(announcements[-1]["nickname"], u"nick1") self.failUnlessEqual(announcements[-1]["my-version"], "ver24") d.addCallback(_then3) return d def test_duplicate_receive_v2(self): ic1 = IntroducerClient(None, "introducer.furl", u"my_nickname", "ver23", "oldest_version", {}, fakeseq) # we use a second client just to create a different-looking # announcement ic2 = IntroducerClient(None, "introducer.furl", u"my_nickname", "ver24","oldest_version",{}, fakeseq) announcements = [] def _received(key_s, ann): announcements.append( (key_s, ann) ) ic1.subscribe_to("storage", _received) furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:36106/gydnp" furl1a = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:7777/gydnp" furl2 = "pb://ttwwooyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:36106/ttwwoo" privkey_s, pubkey_vs = keyutil.make_keypair() privkey, _ignored = keyutil.parse_privkey(privkey_s) pubkey_s = keyutil.remove_prefix(pubkey_vs, "pub-") # ann1: ic1, furl1 # ann1a: ic1, furl1a (same SturdyRef, different connection hints) # ann1b: ic2, furl1 # ann2: ic2, furl2 self.ann1 = make_ann_t(ic1, furl1, privkey, seqnum=10) self.ann1old = make_ann_t(ic1, furl1, privkey, seqnum=9) self.ann1noseqnum = make_ann_t(ic1, furl1, privkey, seqnum=None) self.ann1b = make_ann_t(ic2, furl1, privkey, seqnum=11) self.ann1a = make_ann_t(ic1, furl1a, privkey, seqnum=12) self.ann2 = make_ann_t(ic2, furl2, privkey, seqnum=13) ic1.remote_announce_v2([self.ann1]) # queues eventual-send d = fireEventually() def _then1(ign): self.failUnlessEqual(len(announcements), 1) key_s,ann = announcements[0] self.failUnlessEqual(key_s, pubkey_s) self.failUnlessEqual(ann["anonymous-storage-FURL"], furl1) self.failUnlessEqual(ann["my-version"], "ver23") d.addCallback(_then1) # now send a duplicate announcement. This should not fire the # subscriber d.addCallback(lambda ign: ic1.remote_announce_v2([self.ann1])) d.addCallback(fireEventually) def _then2(ign): self.failUnlessEqual(len(announcements), 1) d.addCallback(_then2) # an older announcement shouldn't fire the subscriber either d.addCallback(lambda ign: ic1.remote_announce_v2([self.ann1old])) d.addCallback(fireEventually) def _then2a(ign): self.failUnlessEqual(len(announcements), 1) d.addCallback(_then2a) # announcement with no seqnum cannot replace one with-seqnum d.addCallback(lambda ign: ic1.remote_announce_v2([self.ann1noseqnum])) d.addCallback(fireEventually) def _then2b(ign): self.failUnlessEqual(len(announcements), 1) d.addCallback(_then2b) # and a replacement announcement: same FURL, new other stuff. The # subscriber *should* be fired. d.addCallback(lambda ign: ic1.remote_announce_v2([self.ann1b])) d.addCallback(fireEventually) def _then3(ign): self.failUnlessEqual(len(announcements), 2) key_s,ann = announcements[-1] self.failUnlessEqual(key_s, pubkey_s) self.failUnlessEqual(ann["anonymous-storage-FURL"], furl1) self.failUnlessEqual(ann["my-version"], "ver24") d.addCallback(_then3) # and a replacement announcement with a different FURL (it uses # different connection hints) d.addCallback(lambda ign: ic1.remote_announce_v2([self.ann1a])) d.addCallback(fireEventually) def _then4(ign): self.failUnlessEqual(len(announcements), 3) key_s,ann = announcements[-1] self.failUnlessEqual(key_s, pubkey_s) self.failUnlessEqual(ann["anonymous-storage-FURL"], furl1a) self.failUnlessEqual(ann["my-version"], "ver23") d.addCallback(_then4) # now add a new subscription, which should be called with the # backlog. The introducer only records one announcement per index, so # the backlog will only have the latest message. announcements2 = [] def _received2(key_s, ann): announcements2.append( (key_s, ann) ) d.addCallback(lambda ign: ic1.subscribe_to("storage", _received2)) d.addCallback(fireEventually) def _then5(ign): self.failUnlessEqual(len(announcements2), 1) key_s,ann = announcements2[-1] self.failUnlessEqual(key_s, pubkey_s) self.failUnlessEqual(ann["anonymous-storage-FURL"], furl1a) self.failUnlessEqual(ann["my-version"], "ver23") d.addCallback(_then5) return d def test_id_collision(self): # test replacement case where tubid equals a keyid (one should # not replace the other) ic = IntroducerClient(None, "introducer.furl", u"my_nickname", "my_version", "oldest_version", {}, fakeseq) announcements = [] ic.subscribe_to("storage", lambda key_s,ann: announcements.append(ann)) sk_s, vk_s = keyutil.make_keypair() sk, _ignored = keyutil.parse_privkey(sk_s) keyid = keyutil.remove_prefix(vk_s, "pub-v0-") furl1 = "pb://onug64tu@127.0.0.1:123/short" # base32("short") furl2 = "pb://%s@127.0.0.1:36106/swissnum" % keyid ann_t = make_ann_t(ic, furl1, sk, 1) ic.remote_announce_v2([ann_t]) d = fireEventually() def _then(ign): # first announcement has been processed self.failUnlessEqual(len(announcements), 1) self.failUnlessEqual(announcements[0]["anonymous-storage-FURL"], furl1) # now submit a second one, with a tubid that happens to look just # like the pubkey-based serverid we just processed. They should # not overlap. ann2 = (furl2, "storage", "RIStorage", "nick1", "ver23", "ver0") ca = WrapV2ClientInV1Interface(ic) ca.remote_announce([ann2]) return fireEventually() d.addCallback(_then) def _then2(ign): # if they overlapped, the second announcement would be ignored self.failUnlessEqual(len(announcements), 2) self.failUnlessEqual(announcements[1]["anonymous-storage-FURL"], furl2) d.addCallback(_then2) return d class Server(unittest.TestCase): def test_duplicate(self): i = IntroducerService() ic1 = IntroducerClient(None, "introducer.furl", u"my_nickname", "ver23", "oldest_version", {}, realseq) furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:36106/gydnp" privkey_s, _ = keyutil.make_keypair() privkey, _ = keyutil.parse_privkey(privkey_s) ann1 = make_ann_t(ic1, furl1, privkey, seqnum=10) ann1_old = make_ann_t(ic1, furl1, privkey, seqnum=9) ann1_new = make_ann_t(ic1, furl1, privkey, seqnum=11) ann1_noseqnum = make_ann_t(ic1, furl1, privkey, seqnum=None) ann1_badseqnum = make_ann_t(ic1, furl1, privkey, seqnum="not an int") i.remote_publish_v2(ann1, None) all = i.get_announcements() self.failUnlessEqual(len(all), 1) self.failUnlessEqual(all[0].announcement["seqnum"], 10) self.failUnlessEqual(i._debug_counts["inbound_message"], 1) self.failUnlessEqual(i._debug_counts["inbound_duplicate"], 0) self.failUnlessEqual(i._debug_counts["inbound_no_seqnum"], 0) self.failUnlessEqual(i._debug_counts["inbound_old_replay"], 0) self.failUnlessEqual(i._debug_counts["inbound_update"], 0) i.remote_publish_v2(ann1, None) all = i.get_announcements() self.failUnlessEqual(len(all), 1) self.failUnlessEqual(all[0].announcement["seqnum"], 10) self.failUnlessEqual(i._debug_counts["inbound_message"], 2) self.failUnlessEqual(i._debug_counts["inbound_duplicate"], 1) self.failUnlessEqual(i._debug_counts["inbound_no_seqnum"], 0) self.failUnlessEqual(i._debug_counts["inbound_old_replay"], 0) self.failUnlessEqual(i._debug_counts["inbound_update"], 0) i.remote_publish_v2(ann1_old, None) all = i.get_announcements() self.failUnlessEqual(len(all), 1) self.failUnlessEqual(all[0].announcement["seqnum"], 10) self.failUnlessEqual(i._debug_counts["inbound_message"], 3) self.failUnlessEqual(i._debug_counts["inbound_duplicate"], 1) self.failUnlessEqual(i._debug_counts["inbound_no_seqnum"], 0) self.failUnlessEqual(i._debug_counts["inbound_old_replay"], 1) self.failUnlessEqual(i._debug_counts["inbound_update"], 0) i.remote_publish_v2(ann1_new, None) all = i.get_announcements() self.failUnlessEqual(len(all), 1) self.failUnlessEqual(all[0].announcement["seqnum"], 11) self.failUnlessEqual(i._debug_counts["inbound_message"], 4) self.failUnlessEqual(i._debug_counts["inbound_duplicate"], 1) self.failUnlessEqual(i._debug_counts["inbound_no_seqnum"], 0) self.failUnlessEqual(i._debug_counts["inbound_old_replay"], 1) self.failUnlessEqual(i._debug_counts["inbound_update"], 1) i.remote_publish_v2(ann1_noseqnum, None) all = i.get_announcements() self.failUnlessEqual(len(all), 1) self.failUnlessEqual(all[0].announcement["seqnum"], 11) self.failUnlessEqual(i._debug_counts["inbound_message"], 5) self.failUnlessEqual(i._debug_counts["inbound_duplicate"], 1) self.failUnlessEqual(i._debug_counts["inbound_no_seqnum"], 1) self.failUnlessEqual(i._debug_counts["inbound_old_replay"], 1) self.failUnlessEqual(i._debug_counts["inbound_update"], 1) i.remote_publish_v2(ann1_badseqnum, None) all = i.get_announcements() self.failUnlessEqual(len(all), 1) self.failUnlessEqual(all[0].announcement["seqnum"], 11) self.failUnlessEqual(i._debug_counts["inbound_message"], 6) self.failUnlessEqual(i._debug_counts["inbound_duplicate"], 1) self.failUnlessEqual(i._debug_counts["inbound_no_seqnum"], 2) self.failUnlessEqual(i._debug_counts["inbound_old_replay"], 1) self.failUnlessEqual(i._debug_counts["inbound_update"], 1) NICKNAME = u"n\u00EDickname-%s" # LATIN SMALL LETTER I WITH ACUTE class SystemTestMixin(ServiceMixin, pollmixin.PollMixin): def create_tub(self, portnum=0): tubfile = os.path.join(self.basedir, "tub.pem") self.central_tub = tub = Tub(certFile=tubfile) #tub.setOption("logLocalFailures", True) #tub.setOption("logRemoteFailures", True) tub.setOption("expose-remote-exception-types", False) tub.setServiceParent(self.parent) l = tub.listenOn("tcp:%d" % portnum) self.central_portnum = l.getPortnum() if portnum != 0: assert self.central_portnum == portnum tub.setLocation("localhost:%d" % self.central_portnum) class Queue(SystemTestMixin, unittest.TestCase): def test_queue_until_connected(self): self.basedir = "introducer/QueueUntilConnected/queued" os.makedirs(self.basedir) self.create_tub() introducer = IntroducerService() introducer.setServiceParent(self.parent) iff = os.path.join(self.basedir, "introducer.furl") ifurl = self.central_tub.registerReference(introducer, furlFile=iff) tub2 = Tub() tub2.setServiceParent(self.parent) c = IntroducerClient(tub2, ifurl, u"nickname", "version", "oldest", {}, fakeseq) furl1 = "pb://onug64tu@127.0.0.1:123/short" # base32("short") sk_s, vk_s = keyutil.make_keypair() sk, _ignored = keyutil.parse_privkey(sk_s) d = introducer.disownServiceParent() def _offline(ign): # now that the introducer server is offline, create a client and # publish some messages c.setServiceParent(self.parent) # this starts the reconnector c.publish("storage", make_ann(furl1), sk) introducer.setServiceParent(self.parent) # restart the server # now wait for the messages to be delivered def _got_announcement(): return bool(introducer.get_announcements()) return self.poll(_got_announcement) d.addCallback(_offline) def _done(ign): v = introducer.get_announcements()[0] furl = v.announcement["anonymous-storage-FURL"] self.failUnlessEqual(furl, furl1) d.addCallback(_done) # now let the ack get back def _wait_until_idle(ign): def _idle(): if c._debug_outstanding: return False if introducer._debug_outstanding: return False return True return self.poll(_idle) d.addCallback(_wait_until_idle) return d V1 = "v1"; V2 = "v2" class SystemTest(SystemTestMixin, unittest.TestCase): def do_system_test(self, server_version): self.create_tub() if server_version == V1: introducer = old.IntroducerService_v1() else: introducer = IntroducerService() introducer.setServiceParent(self.parent) iff = os.path.join(self.basedir, "introducer.furl") tub = self.central_tub ifurl = self.central_tub.registerReference(introducer, furlFile=iff) self.introducer_furl = ifurl # we have 5 clients who publish themselves as storage servers, and a # sixth which does which not. All 6 clients subscriber to hear about # storage. When the connections are fully established, all six nodes # should have 5 connections each. NUM_STORAGE = 5 NUM_CLIENTS = 6 clients = [] tubs = {} received_announcements = {} subscribing_clients = [] publishing_clients = [] printable_serverids = {} self.the_introducer = introducer privkeys = {} expected_announcements = [0 for c in range(NUM_CLIENTS)] for i in range(NUM_CLIENTS): tub = Tub() #tub.setOption("logLocalFailures", True) #tub.setOption("logRemoteFailures", True) tub.setOption("expose-remote-exception-types", False) tub.setServiceParent(self.parent) l = tub.listenOn("tcp:0") portnum = l.getPortnum() tub.setLocation("localhost:%d" % portnum) log.msg("creating client %d: %s" % (i, tub.getShortTubID())) if i == 0: c = old.IntroducerClient_v1(tub, self.introducer_furl, NICKNAME % str(i), "version", "oldest") else: c = IntroducerClient(tub, self.introducer_furl, NICKNAME % str(i), "version", "oldest", {"component": "component-v1"}, fakeseq) received_announcements[c] = {} def got(key_s_or_tubid, ann, announcements, i): if i == 0: index = get_tubid_string_from_ann(ann) else: index = key_s_or_tubid or get_tubid_string_from_ann(ann) announcements[index] = ann c.subscribe_to("storage", got, received_announcements[c], i) subscribing_clients.append(c) expected_announcements[i] += 1 # all expect a 'storage' announcement node_furl = tub.registerReference(Referenceable()) if i < NUM_STORAGE: if i == 0: c.publish(node_furl, "storage", "ri_name") printable_serverids[i] = get_tubid_string(node_furl) elif i == 1: # sign the announcement privkey_s, pubkey_s = keyutil.make_keypair() privkey, _ignored = keyutil.parse_privkey(privkey_s) privkeys[c] = privkey c.publish("storage", make_ann(node_furl), privkey) if server_version == V1: printable_serverids[i] = get_tubid_string(node_furl) else: assert pubkey_s.startswith("pub-") printable_serverids[i] = pubkey_s[len("pub-"):] else: c.publish("storage", make_ann(node_furl)) printable_serverids[i] = get_tubid_string(node_furl) publishing_clients.append(c) else: # the last one does not publish anything pass if i == 0: # users of the V1 client were required to publish a # 'stub_client' record (somewhat after they published the # 'storage' record), so the introducer could see their # version. Match that behavior. c.publish(node_furl, "stub_client", "stub_ri_name") if i == 2: # also publish something that nobody cares about boring_furl = tub.registerReference(Referenceable()) c.publish("boring", make_ann(boring_furl)) c.setServiceParent(self.parent) clients.append(c) tubs[c] = tub def _wait_for_connected(ign): def _connected(): for c in clients: if not c.connected_to_introducer(): return False return True return self.poll(_connected) # we watch the clients to determine when the system has settled down. # Then we can look inside the server to assert things about its # state. def _wait_for_expected_announcements(ign): def _got_expected_announcements(): for i,c in enumerate(subscribing_clients): if len(received_announcements[c]) < expected_announcements[i]: return False return True return self.poll(_got_expected_announcements) # before shutting down any Tub, we'd like to know that there are no # messages outstanding def _wait_until_idle(ign): def _idle(): for c in subscribing_clients + publishing_clients: if c._debug_outstanding: return False if self.the_introducer._debug_outstanding: return False return True return self.poll(_idle) d = defer.succeed(None) d.addCallback(_wait_for_connected) d.addCallback(_wait_for_expected_announcements) d.addCallback(_wait_until_idle) def _check1(res): log.msg("doing _check1") dc = self.the_introducer._debug_counts if server_version == V1: # each storage server publishes a record, and (after its # 'subscribe' has been ACKed) also publishes a "stub_client". # The non-storage client (which subscribes) also publishes a # stub_client. There is also one "boring" service. The number # of messages is higher, because the stub_clients aren't # published until after we get the 'subscribe' ack (since we # don't realize that we're dealing with a v1 server [which # needs stub_clients] until then), and the act of publishing # the stub_client causes us to re-send all previous # announcements. self.failUnlessEqual(dc["inbound_message"] - dc["inbound_duplicate"], NUM_STORAGE + NUM_CLIENTS + 1) else: # each storage server publishes a record. There is also one # "stub_client" and one "boring" self.failUnlessEqual(dc["inbound_message"], NUM_STORAGE+2) self.failUnlessEqual(dc["inbound_duplicate"], 0) self.failUnlessEqual(dc["inbound_update"], 0) self.failUnlessEqual(dc["inbound_subscribe"], NUM_CLIENTS) # the number of outbound messages is tricky.. I think it depends # upon a race between the publish and the subscribe messages. self.failUnless(dc["outbound_message"] > 0) # each client subscribes to "storage", and each server publishes self.failUnlessEqual(dc["outbound_announcements"], NUM_STORAGE*NUM_CLIENTS) for c in subscribing_clients: cdc = c._debug_counts self.failUnless(cdc["inbound_message"]) self.failUnlessEqual(cdc["inbound_announcement"], NUM_STORAGE) self.failUnlessEqual(cdc["wrong_service"], 0) self.failUnlessEqual(cdc["duplicate_announcement"], 0) self.failUnlessEqual(cdc["update"], 0) self.failUnlessEqual(cdc["new_announcement"], NUM_STORAGE) anns = received_announcements[c] self.failUnlessEqual(len(anns), NUM_STORAGE) nodeid0 = tubs[clients[0]].tubID ann = anns[nodeid0] nick = ann["nickname"] self.failUnlessEqual(type(nick), unicode) self.failUnlessEqual(nick, NICKNAME % "0") if server_version == V1: for c in publishing_clients: cdc = c._debug_counts expected = 1 # storage if c is clients[2]: expected += 1 # boring if c is not clients[0]: # the v2 client tries to call publish_v2, which fails # because the server is v1. It then re-sends # everything it has so far, plus a stub_client record expected = 2*expected + 1 if c is clients[0]: # we always tell v1 client to send stub_client expected += 1 self.failUnlessEqual(cdc["outbound_message"], expected) else: for c in publishing_clients: cdc = c._debug_counts expected = 1 if c in [clients[0], # stub_client clients[2], # boring ]: expected = 2 self.failUnlessEqual(cdc["outbound_message"], expected) # now check the web status, make sure it renders without error ir = introweb.IntroducerRoot(self.parent) self.parent.nodeid = "NODEID" text = ir.renderSynchronously().decode("utf-8") self.failUnlessIn(NICKNAME % "0", text) # the v1 client self.failUnlessIn(NICKNAME % "1", text) # a v2 client for i in range(NUM_STORAGE): self.failUnlessIn(printable_serverids[i], text, (i,printable_serverids[i],text)) # make sure there isn't a double-base32ed string too self.failIfIn(idlib.nodeid_b2a(printable_serverids[i]), text, (i,printable_serverids[i],text)) log.msg("_check1 done") d.addCallback(_check1) # force an introducer reconnect, by shutting down the Tub it's using # and starting a new Tub (with the old introducer). Everybody should # reconnect and republish, but the introducer should ignore the # republishes as duplicates. However, because the server doesn't know # what each client does and does not know, it will send them a copy # of the current announcement table anyway. d.addCallback(lambda _ign: log.msg("shutting down introducer's Tub")) d.addCallback(lambda _ign: self.central_tub.disownServiceParent()) def _wait_for_introducer_loss(ign): def _introducer_lost(): for c in clients: if c.connected_to_introducer(): return False return True return self.poll(_introducer_lost) d.addCallback(_wait_for_introducer_loss) def _restart_introducer_tub(_ign): log.msg("restarting introducer's Tub") # reset counters for i in range(NUM_CLIENTS): c = subscribing_clients[i] for k in c._debug_counts: c._debug_counts[k] = 0 for k in self.the_introducer._debug_counts: self.the_introducer._debug_counts[k] = 0 expected_announcements[i] += 1 # new 'storage' for everyone self.create_tub(self.central_portnum) newfurl = self.central_tub.registerReference(self.the_introducer, furlFile=iff) assert newfurl == self.introducer_furl d.addCallback(_restart_introducer_tub) d.addCallback(_wait_for_connected) d.addCallback(_wait_for_expected_announcements) d.addCallback(_wait_until_idle) d.addCallback(lambda _ign: log.msg(" reconnected")) # TODO: publish something while the introducer is offline, then # confirm it gets delivered when the connection is reestablished def _check2(res): log.msg("doing _check2") # assert that the introducer sent out new messages, one per # subscriber dc = self.the_introducer._debug_counts self.failUnlessEqual(dc["outbound_announcements"], NUM_STORAGE*NUM_CLIENTS) self.failUnless(dc["outbound_message"] > 0) self.failUnlessEqual(dc["inbound_subscribe"], NUM_CLIENTS) for c in subscribing_clients: cdc = c._debug_counts self.failUnlessEqual(cdc["inbound_message"], 1) self.failUnlessEqual(cdc["inbound_announcement"], NUM_STORAGE) self.failUnlessEqual(cdc["new_announcement"], 0) self.failUnlessEqual(cdc["wrong_service"], 0) self.failUnlessEqual(cdc["duplicate_announcement"], NUM_STORAGE) d.addCallback(_check2) # Then force an introducer restart, by shutting down the Tub, # destroying the old introducer, and starting a new Tub+Introducer. # Everybody should reconnect and republish, and the (new) introducer # will distribute the new announcements, but the clients should # ignore the republishes as duplicates. d.addCallback(lambda _ign: log.msg("shutting down introducer")) d.addCallback(lambda _ign: self.central_tub.disownServiceParent()) d.addCallback(_wait_for_introducer_loss) d.addCallback(lambda _ign: log.msg("introducer lost")) def _restart_introducer(_ign): log.msg("restarting introducer") self.create_tub(self.central_portnum) # reset counters for i in range(NUM_CLIENTS): c = subscribing_clients[i] for k in c._debug_counts: c._debug_counts[k] = 0 expected_announcements[i] += 1 # new 'storage' for everyone if server_version == V1: introducer = old.IntroducerService_v1() else: introducer = IntroducerService() self.the_introducer = introducer newfurl = self.central_tub.registerReference(self.the_introducer, furlFile=iff) assert newfurl == self.introducer_furl d.addCallback(_restart_introducer) d.addCallback(_wait_for_connected) d.addCallback(_wait_for_expected_announcements) d.addCallback(_wait_until_idle) def _check3(res): log.msg("doing _check3") dc = self.the_introducer._debug_counts self.failUnlessEqual(dc["outbound_announcements"], NUM_STORAGE*NUM_CLIENTS) self.failUnless(dc["outbound_message"] > 0) self.failUnlessEqual(dc["inbound_subscribe"], NUM_CLIENTS) for c in subscribing_clients: cdc = c._debug_counts self.failUnless(cdc["inbound_message"] > 0) self.failUnlessEqual(cdc["inbound_announcement"], NUM_STORAGE) self.failUnlessEqual(cdc["new_announcement"], 0) self.failUnlessEqual(cdc["wrong_service"], 0) self.failUnlessEqual(cdc["duplicate_announcement"], NUM_STORAGE) d.addCallback(_check3) return d def test_system_v2_server(self): self.basedir = "introducer/SystemTest/system_v2_server" os.makedirs(self.basedir) return self.do_system_test(V2) test_system_v2_server.timeout = 480 # occasionally takes longer than 350s on "draco" def test_system_v1_server(self): self.basedir = "introducer/SystemTest/system_v1_server" os.makedirs(self.basedir) return self.do_system_test(V1) test_system_v1_server.timeout = 480 # occasionally takes longer than 350s on "draco" class FakeRemoteReference: def notifyOnDisconnect(self, *args, **kwargs): pass def getRemoteTubID(self): return "62ubehyunnyhzs7r6vdonnm2hpi52w6y" def getLocationHints(self): return [("ipv4", "here.example.com", "1234"), ("ipv4", "there.example.com", "2345")] def getPeer(self): return address.IPv4Address("TCP", "remote.example.com", 3456) class ClientInfo(unittest.TestCase): def test_client_v2(self): introducer = IntroducerService() tub = introducer_furl = None app_versions = {"whizzy": "fizzy"} client_v2 = IntroducerClient(tub, introducer_furl, NICKNAME % u"v2", "my_version", "oldest", app_versions, fakeseq) #furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:0/swissnum" #ann_s = make_ann_t(client_v2, furl1, None, 10) #introducer.remote_publish_v2(ann_s, Referenceable()) subscriber = FakeRemoteReference() introducer.remote_subscribe_v2(subscriber, "storage", client_v2._my_subscriber_info) subs = introducer.get_subscribers() self.failUnlessEqual(len(subs), 1) s0 = subs[0] self.failUnlessEqual(s0.service_name, "storage") self.failUnlessEqual(s0.app_versions, app_versions) self.failUnlessEqual(s0.nickname, NICKNAME % u"v2") self.failUnlessEqual(s0.version, "my_version") def test_client_v1(self): introducer = IntroducerService() subscriber = FakeRemoteReference() introducer.remote_subscribe(subscriber, "storage") # the v1 subscribe interface had no subscriber_info: that was usually # sent in a separate stub_client pseudo-announcement subs = introducer.get_subscribers() self.failUnlessEqual(len(subs), 1) s0 = subs[0] self.failUnlessEqual(s0.nickname, u"?") # not known yet self.failUnlessEqual(s0.service_name, "storage") # now submit the stub_client announcement furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:0/swissnum" ann = (furl1, "stub_client", "RIStubClient", (NICKNAME % u"v1").encode("utf-8"), "my_version", "oldest") introducer.remote_publish(ann) # the server should correlate the two subs = introducer.get_subscribers() self.failUnlessEqual(len(subs), 1) s0 = subs[0] self.failUnlessEqual(s0.service_name, "storage") # v1 announcements do not contain app-versions self.failUnlessEqual(s0.app_versions, {}) self.failUnlessEqual(s0.nickname, NICKNAME % u"v1") self.failUnlessEqual(s0.version, "my_version") # a subscription that arrives after the stub_client announcement # should be correlated too subscriber2 = FakeRemoteReference() introducer.remote_subscribe(subscriber2, "thing2") subs = introducer.get_subscribers() self.failUnlessEqual(len(subs), 2) s0 = [s for s in subs if s.service_name == "thing2"][0] # v1 announcements do not contain app-versions self.failUnlessEqual(s0.app_versions, {}) self.failUnlessEqual(s0.nickname, NICKNAME % u"v1") self.failUnlessEqual(s0.version, "my_version") class Announcements(unittest.TestCase): def test_client_v2_unsigned(self): introducer = IntroducerService() tub = introducer_furl = None app_versions = {"whizzy": "fizzy"} client_v2 = IntroducerClient(tub, introducer_furl, u"nick-v2", "my_version", "oldest", app_versions, fakeseq) furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:0/swissnum" tubid = "62ubehyunnyhzs7r6vdonnm2hpi52w6y" ann_s0 = make_ann_t(client_v2, furl1, None, 10) canary0 = Referenceable() introducer.remote_publish_v2(ann_s0, canary0) a = introducer.get_announcements() self.failUnlessEqual(len(a), 1) self.failUnlessIdentical(a[0].canary, canary0) self.failUnlessEqual(a[0].index, ("storage", None, tubid)) self.failUnlessEqual(a[0].announcement["app-versions"], app_versions) self.failUnlessEqual(a[0].nickname, u"nick-v2") self.failUnlessEqual(a[0].service_name, "storage") self.failUnlessEqual(a[0].version, "my_version") self.failUnlessEqual(a[0].announcement["anonymous-storage-FURL"], furl1) def test_client_v2_signed(self): introducer = IntroducerService() tub = introducer_furl = None app_versions = {"whizzy": "fizzy"} client_v2 = IntroducerClient(tub, introducer_furl, u"nick-v2", "my_version", "oldest", app_versions, fakeseq) furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:0/swissnum" sk_s, vk_s = keyutil.make_keypair() sk, _ignored = keyutil.parse_privkey(sk_s) pks = keyutil.remove_prefix(vk_s, "pub-") ann_t0 = make_ann_t(client_v2, furl1, sk, 10) canary0 = Referenceable() introducer.remote_publish_v2(ann_t0, canary0) a = introducer.get_announcements() self.failUnlessEqual(len(a), 1) self.failUnlessIdentical(a[0].canary, canary0) self.failUnlessEqual(a[0].index, ("storage", pks, None)) self.failUnlessEqual(a[0].announcement["app-versions"], app_versions) self.failUnlessEqual(a[0].nickname, u"nick-v2") self.failUnlessEqual(a[0].service_name, "storage") self.failUnlessEqual(a[0].version, "my_version") self.failUnlessEqual(a[0].announcement["anonymous-storage-FURL"], furl1) def test_client_v1(self): introducer = IntroducerService() furl1 = "pb://62ubehyunnyhzs7r6vdonnm2hpi52w6y@127.0.0.1:0/swissnum" tubid = "62ubehyunnyhzs7r6vdonnm2hpi52w6y" ann = (furl1, "storage", "RIStorage", u"nick-v1".encode("utf-8"), "my_version", "oldest") introducer.remote_publish(ann) a = introducer.get_announcements() self.failUnlessEqual(len(a), 1) self.failUnlessEqual(a[0].index, ("storage", None, tubid)) self.failUnlessEqual(a[0].canary, None) self.failUnlessEqual(a[0].announcement["app-versions"], {}) self.failUnlessEqual(a[0].nickname, u"nick-v1".encode("utf-8")) self.failUnlessEqual(a[0].service_name, "storage") self.failUnlessEqual(a[0].version, "my_version") self.failUnlessEqual(a[0].announcement["anonymous-storage-FURL"], furl1) class ClientSeqnums(unittest.TestCase): def test_client(self): basedir = "introducer/ClientSeqnums/test_client" fileutil.make_dirs(basedir) f = open(os.path.join(basedir, "tahoe.cfg"), "w") f.write("[client]\n") f.write("introducer.furl = nope\n") f.close() c = TahoeClient(basedir) ic = c.introducer_client outbound = ic._outbound_announcements published = ic._published_announcements def read_seqnum(): f = open(os.path.join(basedir, "announcement-seqnum")) seqnum = f.read().strip() f.close() return int(seqnum) ic.publish("sA", {"key": "value1"}, c._node_key) self.failUnlessEqual(read_seqnum(), 1) self.failUnless("sA" in outbound) self.failUnlessEqual(outbound["sA"]["seqnum"], 1) nonce1 = outbound["sA"]["nonce"] self.failUnless(isinstance(nonce1, str)) self.failUnlessEqual(simplejson.loads(published["sA"][0]), outbound["sA"]) # [1] is the signature, [2] is the pubkey # publishing a second service causes both services to be # re-published, with the next higher sequence number ic.publish("sB", {"key": "value2"}, c._node_key) self.failUnlessEqual(read_seqnum(), 2) self.failUnless("sB" in outbound) self.failUnlessEqual(outbound["sB"]["seqnum"], 2) self.failUnless("sA" in outbound) self.failUnlessEqual(outbound["sA"]["seqnum"], 2) nonce2 = outbound["sA"]["nonce"] self.failUnless(isinstance(nonce2, str)) self.failIfEqual(nonce1, nonce2) self.failUnlessEqual(simplejson.loads(published["sA"][0]), outbound["sA"]) self.failUnlessEqual(simplejson.loads(published["sB"][0]), outbound["sB"]) class TooNewServer(IntroducerService): VERSION = { "http://allmydata.org/tahoe/protocols/introducer/v999": { }, "application-version": "greetings from the crazy future", } class NonV1Server(SystemTestMixin, unittest.TestCase): # if the 1.3.0 client connects to a server that doesn't provide the 'v1' # protocol, it is supposed to provide a useful error instead of a weird # exception. def test_failure(self): self.basedir = "introducer/NonV1Server/failure" os.makedirs(self.basedir) self.create_tub() i = TooNewServer() i.setServiceParent(self.parent) self.introducer_furl = self.central_tub.registerReference(i) tub = Tub() tub.setOption("expose-remote-exception-types", False) tub.setServiceParent(self.parent) l = tub.listenOn("tcp:0") portnum = l.getPortnum() tub.setLocation("localhost:%d" % portnum) c = IntroducerClient(tub, self.introducer_furl, u"nickname-client", "version", "oldest", {}, fakeseq) announcements = {} def got(key_s, ann): announcements[key_s] = ann c.subscribe_to("storage", got) c.setServiceParent(self.parent) # now we wait for it to connect and notice the bad version def _got_bad(): return bool(c._introducer_error) or bool(c._publisher) d = self.poll(_got_bad) def _done(res): self.failUnless(c._introducer_error) self.failUnless(c._introducer_error.check(InsufficientVersionError), c._introducer_error) d.addCallback(_done) return d class DecodeFurl(unittest.TestCase): def test_decode(self): # make sure we have a working base64.b32decode. The one in # python2.4.[01] was broken. furl = 'pb://t5g7egomnnktbpydbuijt6zgtmw4oqi5@127.0.0.1:51857/hfzv36i' m = re.match(r'pb://(\w+)@', furl) assert m nodeid = b32decode(m.group(1).upper()) self.failUnlessEqual(nodeid, "\x9fM\xf2\x19\xcckU0\xbf\x03\r\x10\x99\xfb&\x9b-\xc7A\x1d") class Signatures(unittest.TestCase): def test_sign(self): ann = {"key1": "value1"} sk_s,vk_s = keyutil.make_keypair() sk,ignored = keyutil.parse_privkey(sk_s) ann_t = sign_to_foolscap(ann, sk) (msg, sig, key) = ann_t self.failUnlessEqual(type(msg), type("".encode("utf-8"))) # bytes self.failUnlessEqual(simplejson.loads(msg.decode("utf-8")), ann) self.failUnless(sig.startswith("v0-")) self.failUnless(key.startswith("v0-")) (ann2,key2) = unsign_from_foolscap(ann_t) self.failUnlessEqual(ann2, ann) self.failUnlessEqual("pub-"+key2, vk_s) # bad signature bad_ann = {"key1": "value2"} bad_msg = simplejson.dumps(bad_ann).encode("utf-8") self.failUnlessRaises(keyutil.BadSignatureError, unsign_from_foolscap, (bad_msg,sig,key)) # sneaky bad signature should be ignored (ann2,key2) = unsign_from_foolscap( (bad_msg,None,key) ) self.failUnlessEqual(key2, None) self.failUnlessEqual(ann2, bad_ann) # unrecognized signatures self.failUnlessRaises(UnknownKeyError, unsign_from_foolscap, (bad_msg,"v999-sig",key)) self.failUnlessRaises(UnknownKeyError, unsign_from_foolscap, (bad_msg,sig,"v999-key")) # add tests of StorageFarmBroker: if it receives duplicate announcements, it # should leave the Reconnector in place, also if it receives # same-FURL-different-misc, but if it receives same-nodeid-different-FURL, it # should tear down the Reconnector and make a new one. This behavior used to # live in the IntroducerClient, and thus used to be tested by test_introducer # copying more tests from old branch: # then also add Upgrade test tahoe-lafs-1.10.0/src/allmydata/test/test_iputil.py000066400000000000000000000015361221140116300222450ustar00rootroot00000000000000 import re from twisted.trial import unittest from allmydata.util import iputil import allmydata.test.common_util as testutil DOTTED_QUAD_RE=re.compile("^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$") class ListAddresses(testutil.SignalMixin, unittest.TestCase): def test_get_local_ip_for(self): addr = iputil.get_local_ip_for('127.0.0.1') self.failUnless(DOTTED_QUAD_RE.match(addr)) def test_list_async(self): d = iputil.get_local_addresses_async() def _check(addresses): self.failUnless(len(addresses) >= 1) # always have localhost self.failUnless("127.0.0.1" in addresses, addresses) self.failIf("0.0.0.0" in addresses, addresses) d.addCallbacks(_check) return d # David A.'s OpenSolaris box timed out on this test one time when it was at 2s. test_list_async.timeout=4 tahoe-lafs-1.10.0/src/allmydata/test/test_keygen.py000066400000000000000000000065041221140116300222210ustar00rootroot00000000000000 import os from twisted.trial import unittest from twisted.application import service from foolscap.api import Tub, fireEventually, flushEventualQueue from allmydata import key_generator from allmydata.util import pollmixin from allmydata.test.common import TEST_RSA_KEY_SIZE from pycryptopp.publickey import rsa def flush_but_dont_ignore(res): d = flushEventualQueue() def _done(ignored): return res d.addCallback(_done) return d class KeyGenService(unittest.TestCase, pollmixin.PollMixin): def setUp(self): self.parent = service.MultiService() self.parent.startService() self.tub = t = Tub() t.setOption("expose-remote-exception-types", False) t.setServiceParent(self.parent) t.listenOn("tcp:0") t.setLocationAutomatically() return fireEventually() def tearDown(self): d = self.parent.stopService() d.addCallback(fireEventually) d.addBoth(flush_but_dont_ignore) return d def test_key_gen_service(self): def p(junk, msg): #import time #print time.asctime(), msg return junk #print 'starting key generator service' keysize = TEST_RSA_KEY_SIZE kgs = key_generator.KeyGeneratorService(display_furl=False, default_key_size=keysize) kgs.key_generator.verbose = True kgs.setServiceParent(self.parent) kgs.key_generator.pool_size = 8 def keypool_full(): return len(kgs.key_generator.keypool) == kgs.key_generator.pool_size # first wait for key gen pool to fill up d = fireEventually() d.addCallback(p, 'waiting for pool to fill up') d.addCallback(lambda junk: self.poll(keypool_full)) d.addCallback(p, 'grabbing a few keys') # grab a few keys, check that pool size shrinks def get_key(junk=None): d = self.tub.getReference(kgs.keygen_furl) d.addCallback(lambda kg: kg.callRemote('get_rsa_key_pair', keysize)) return d def check_poolsize(junk, size): self.failUnlessEqual(len(kgs.key_generator.keypool), size) n_keys_to_waste = 4 for i in range(n_keys_to_waste): d.addCallback(get_key) d.addCallback(check_poolsize, kgs.key_generator.pool_size - n_keys_to_waste) d.addCallback(p, 'checking a key works') # check that a retrieved key is actually useful d.addCallback(get_key) def check_key_works(keys): verifying_key, signing_key = keys v = rsa.create_verifying_key_from_string(verifying_key) s = rsa.create_signing_key_from_string(signing_key) junk = os.urandom(42) sig = s.sign(junk) self.failUnless(v.verify(junk, sig)) d.addCallback(check_key_works) d.addCallback(p, 'checking pool exhaustion') # exhaust the pool for i in range(kgs.key_generator.pool_size): d.addCallback(get_key) d.addCallback(check_poolsize, 0) # and check it still works (will gen key synchronously on demand) d.addCallback(get_key) d.addCallback(check_key_works) d.addCallback(p, 'checking pool replenishment') # check that the pool will refill d.addCallback(lambda junk: self.poll(keypool_full)) return d tahoe-lafs-1.10.0/src/allmydata/test/test_mutable.py000066400000000000000000006037771221140116300224070ustar00rootroot00000000000000import os, re, base64 from cStringIO import StringIO from twisted.trial import unittest from twisted.internet import defer, reactor from allmydata import uri, client from allmydata.nodemaker import NodeMaker from allmydata.util import base32, consumer, fileutil, mathutil from allmydata.util.hashutil import tagged_hash, ssk_writekey_hash, \ ssk_pubkey_fingerprint_hash from allmydata.util.consumer import MemoryConsumer from allmydata.util.deferredutil import gatherResults from allmydata.interfaces import IRepairResults, ICheckAndRepairResults, \ NotEnoughSharesError, SDMF_VERSION, MDMF_VERSION, DownloadStopped from allmydata.monitor import Monitor from allmydata.test.common import ShouldFailMixin from allmydata.test.no_network import GridTestMixin from foolscap.api import eventually, fireEventually from foolscap.logging import log from allmydata.storage_client import StorageFarmBroker from allmydata.storage.common import storage_index_to_dir from allmydata.scripts import debug from allmydata.mutable.filenode import MutableFileNode, BackoffAgent from allmydata.mutable.common import \ MODE_CHECK, MODE_ANYTHING, MODE_WRITE, MODE_READ, \ NeedMoreDataError, UnrecoverableFileError, UncoordinatedWriteError, \ NotEnoughServersError, CorruptShareError from allmydata.mutable.retrieve import Retrieve from allmydata.mutable.publish import Publish, MutableFileHandle, \ MutableData, \ DEFAULT_MAX_SEGMENT_SIZE from allmydata.mutable.servermap import ServerMap, ServermapUpdater from allmydata.mutable.layout import unpack_header, MDMFSlotReadProxy from allmydata.mutable.repairer import MustForceRepairError import allmydata.test.common_util as testutil from allmydata.test.common import TEST_RSA_KEY_SIZE from allmydata.test.test_download import PausingConsumer, \ PausingAndStoppingConsumer, StoppingConsumer, \ ImmediatelyStoppingConsumer def eventuaaaaaly(res=None): d = fireEventually(res) d.addCallback(fireEventually) d.addCallback(fireEventually) return d # this "FakeStorage" exists to put the share data in RAM and avoid using real # network connections, both to speed up the tests and to reduce the amount of # non-mutable.py code being exercised. class FakeStorage: # this class replaces the collection of storage servers, allowing the # tests to examine and manipulate the published shares. It also lets us # control the order in which read queries are answered, to exercise more # of the error-handling code in Retrieve . # # Note that we ignore the storage index: this FakeStorage instance can # only be used for a single storage index. def __init__(self): self._peers = {} # _sequence is used to cause the responses to occur in a specific # order. If it is in use, then we will defer queries instead of # answering them right away, accumulating the Deferreds in a dict. We # don't know exactly how many queries we'll get, so exactly one # second after the first query arrives, we will release them all (in # order). self._sequence = None self._pending = {} self._pending_timer = None def read(self, peerid, storage_index): shares = self._peers.get(peerid, {}) if self._sequence is None: return eventuaaaaaly(shares) d = defer.Deferred() if not self._pending: self._pending_timer = reactor.callLater(1.0, self._fire_readers) if peerid not in self._pending: self._pending[peerid] = [] self._pending[peerid].append( (d, shares) ) return d def _fire_readers(self): self._pending_timer = None pending = self._pending self._pending = {} for peerid in self._sequence: if peerid in pending: for (d, shares) in pending.pop(peerid): eventually(d.callback, shares) for peerid in pending: for (d, shares) in pending[peerid]: eventually(d.callback, shares) def write(self, peerid, storage_index, shnum, offset, data): if peerid not in self._peers: self._peers[peerid] = {} shares = self._peers[peerid] f = StringIO() f.write(shares.get(shnum, "")) f.seek(offset) f.write(data) shares[shnum] = f.getvalue() class FakeStorageServer: def __init__(self, peerid, storage): self.peerid = peerid self.storage = storage self.queries = 0 def callRemote(self, methname, *args, **kwargs): self.queries += 1 def _call(): meth = getattr(self, methname) return meth(*args, **kwargs) d = fireEventually() d.addCallback(lambda res: _call()) return d def callRemoteOnly(self, methname, *args, **kwargs): self.queries += 1 d = self.callRemote(methname, *args, **kwargs) d.addBoth(lambda ignore: None) pass def advise_corrupt_share(self, share_type, storage_index, shnum, reason): pass def slot_readv(self, storage_index, shnums, readv): d = self.storage.read(self.peerid, storage_index) def _read(shares): response = {} for shnum in shares: if shnums and shnum not in shnums: continue vector = response[shnum] = [] for (offset, length) in readv: assert isinstance(offset, (int, long)), offset assert isinstance(length, (int, long)), length vector.append(shares[shnum][offset:offset+length]) return response d.addCallback(_read) return d def slot_testv_and_readv_and_writev(self, storage_index, secrets, tw_vectors, read_vector): # always-pass: parrot the test vectors back to them. readv = {} for shnum, (testv, writev, new_length) in tw_vectors.items(): for (offset, length, op, specimen) in testv: assert op in ("le", "eq", "ge") # TODO: this isn't right, the read is controlled by read_vector, # not by testv readv[shnum] = [ specimen for (offset, length, op, specimen) in testv ] for (offset, data) in writev: self.storage.write(self.peerid, storage_index, shnum, offset, data) answer = (True, readv) return fireEventually(answer) def flip_bit(original, byte_offset): return (original[:byte_offset] + chr(ord(original[byte_offset]) ^ 0x01) + original[byte_offset+1:]) def add_two(original, byte_offset): # It isn't enough to simply flip the bit for the version number, # because 1 is a valid version number. So we add two instead. return (original[:byte_offset] + chr(ord(original[byte_offset]) ^ 0x02) + original[byte_offset+1:]) def corrupt(res, s, offset, shnums_to_corrupt=None, offset_offset=0): # if shnums_to_corrupt is None, corrupt all shares. Otherwise it is a # list of shnums to corrupt. ds = [] for peerid in s._peers: shares = s._peers[peerid] for shnum in shares: if (shnums_to_corrupt is not None and shnum not in shnums_to_corrupt): continue data = shares[shnum] # We're feeding the reader all of the share data, so it # won't need to use the rref that we didn't provide, nor the # storage index that we didn't provide. We do this because # the reader will work for both MDMF and SDMF. reader = MDMFSlotReadProxy(None, None, shnum, data) # We need to get the offsets for the next part. d = reader.get_verinfo() def _do_corruption(verinfo, data, shnum, shares): (seqnum, root_hash, IV, segsize, datalen, k, n, prefix, o) = verinfo if isinstance(offset, tuple): offset1, offset2 = offset else: offset1 = offset offset2 = 0 if offset1 == "pubkey" and IV: real_offset = 107 elif offset1 in o: real_offset = o[offset1] else: real_offset = offset1 real_offset = int(real_offset) + offset2 + offset_offset assert isinstance(real_offset, int), offset if offset1 == 0: # verbyte f = add_two else: f = flip_bit shares[shnum] = f(data, real_offset) d.addCallback(_do_corruption, data, shnum, shares) ds.append(d) dl = defer.DeferredList(ds) dl.addCallback(lambda ignored: res) return dl def make_storagebroker(s=None, num_peers=10): if not s: s = FakeStorage() peerids = [tagged_hash("peerid", "%d" % i)[:20] for i in range(num_peers)] storage_broker = StorageFarmBroker(None, True) for peerid in peerids: fss = FakeStorageServer(peerid, s) ann = {"anonymous-storage-FURL": "pb://%s@nowhere/fake" % base32.b2a(peerid), "permutation-seed-base32": base32.b2a(peerid) } storage_broker.test_add_rref(peerid, fss, ann) return storage_broker def make_nodemaker(s=None, num_peers=10, keysize=TEST_RSA_KEY_SIZE): storage_broker = make_storagebroker(s, num_peers) sh = client.SecretHolder("lease secret", "convergence secret") keygen = client.KeyGenerator() if keysize: keygen.set_default_keysize(keysize) nodemaker = NodeMaker(storage_broker, sh, None, None, None, {"k": 3, "n": 10}, SDMF_VERSION, keygen) return nodemaker class Filenode(unittest.TestCase, testutil.ShouldFailMixin): # this used to be in Publish, but we removed the limit. Some of # these tests test whether the new code correctly allows files # larger than the limit. OLD_MAX_SEGMENT_SIZE = 3500000 def setUp(self): self._storage = s = FakeStorage() self.nodemaker = make_nodemaker(s) def test_create(self): d = self.nodemaker.create_mutable_file() def _created(n): self.failUnless(isinstance(n, MutableFileNode)) self.failUnlessEqual(n.get_storage_index(), n._storage_index) sb = self.nodemaker.storage_broker peer0 = sorted(sb.get_all_serverids())[0] shnums = self._storage._peers[peer0].keys() self.failUnlessEqual(len(shnums), 1) d.addCallback(_created) return d def test_create_mdmf(self): d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(n): self.failUnless(isinstance(n, MutableFileNode)) self.failUnlessEqual(n.get_storage_index(), n._storage_index) sb = self.nodemaker.storage_broker peer0 = sorted(sb.get_all_serverids())[0] shnums = self._storage._peers[peer0].keys() self.failUnlessEqual(len(shnums), 1) d.addCallback(_created) return d def test_single_share(self): # Make sure that we tolerate publishing a single share. self.nodemaker.default_encoding_parameters['k'] = 1 self.nodemaker.default_encoding_parameters['happy'] = 1 self.nodemaker.default_encoding_parameters['n'] = 1 d = defer.succeed(None) for v in (SDMF_VERSION, MDMF_VERSION): d.addCallback(lambda ignored, v=v: self.nodemaker.create_mutable_file(version=v)) def _created(n): self.failUnless(isinstance(n, MutableFileNode)) self._node = n return n d.addCallback(_created) d.addCallback(lambda n: n.overwrite(MutableData("Contents" * 50000))) d.addCallback(lambda ignored: self._node.download_best_version()) d.addCallback(lambda contents: self.failUnlessEqual(contents, "Contents" * 50000)) return d def test_max_shares(self): self.nodemaker.default_encoding_parameters['n'] = 255 d = self.nodemaker.create_mutable_file(version=SDMF_VERSION) def _created(n): self.failUnless(isinstance(n, MutableFileNode)) self.failUnlessEqual(n.get_storage_index(), n._storage_index) sb = self.nodemaker.storage_broker num_shares = sum([len(self._storage._peers[x].keys()) for x \ in sb.get_all_serverids()]) self.failUnlessEqual(num_shares, 255) self._node = n return n d.addCallback(_created) # Now we upload some contents d.addCallback(lambda n: n.overwrite(MutableData("contents" * 50000))) # ...then download contents d.addCallback(lambda ignored: self._node.download_best_version()) # ...and check to make sure everything went okay. d.addCallback(lambda contents: self.failUnlessEqual("contents" * 50000, contents)) return d def test_max_shares_mdmf(self): # Test how files behave when there are 255 shares. self.nodemaker.default_encoding_parameters['n'] = 255 d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(n): self.failUnless(isinstance(n, MutableFileNode)) self.failUnlessEqual(n.get_storage_index(), n._storage_index) sb = self.nodemaker.storage_broker num_shares = sum([len(self._storage._peers[x].keys()) for x \ in sb.get_all_serverids()]) self.failUnlessEqual(num_shares, 255) self._node = n return n d.addCallback(_created) d.addCallback(lambda n: n.overwrite(MutableData("contents" * 50000))) d.addCallback(lambda ignored: self._node.download_best_version()) d.addCallback(lambda contents: self.failUnlessEqual(contents, "contents" * 50000)) return d def test_mdmf_filenode_cap(self): # Test that an MDMF filenode, once created, returns an MDMF URI. d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(n): self.failUnless(isinstance(n, MutableFileNode)) cap = n.get_cap() self.failUnless(isinstance(cap, uri.WriteableMDMFFileURI)) rcap = n.get_readcap() self.failUnless(isinstance(rcap, uri.ReadonlyMDMFFileURI)) vcap = n.get_verify_cap() self.failUnless(isinstance(vcap, uri.MDMFVerifierURI)) d.addCallback(_created) return d def test_create_from_mdmf_writecap(self): # Test that the nodemaker is capable of creating an MDMF # filenode given an MDMF cap. d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(n): self.failUnless(isinstance(n, MutableFileNode)) s = n.get_uri() self.failUnless(s.startswith("URI:MDMF")) n2 = self.nodemaker.create_from_cap(s) self.failUnless(isinstance(n2, MutableFileNode)) self.failUnlessEqual(n.get_storage_index(), n2.get_storage_index()) self.failUnlessEqual(n.get_uri(), n2.get_uri()) d.addCallback(_created) return d def test_create_from_mdmf_readcap(self): d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(n): self.failUnless(isinstance(n, MutableFileNode)) s = n.get_readonly_uri() n2 = self.nodemaker.create_from_cap(s) self.failUnless(isinstance(n2, MutableFileNode)) # Check that it's a readonly node self.failUnless(n2.is_readonly()) d.addCallback(_created) return d def test_internal_version_from_cap(self): # MutableFileNodes and MutableFileVersions have an internal # switch that tells them whether they're dealing with an SDMF or # MDMF mutable file when they start doing stuff. We want to make # sure that this is set appropriately given an MDMF cap. d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(n): self.uri = n.get_uri() self.failUnlessEqual(n._protocol_version, MDMF_VERSION) n2 = self.nodemaker.create_from_cap(self.uri) self.failUnlessEqual(n2._protocol_version, MDMF_VERSION) d.addCallback(_created) return d def test_serialize(self): n = MutableFileNode(None, None, {"k": 3, "n": 10}, None) calls = [] def _callback(*args, **kwargs): self.failUnlessEqual(args, (4,) ) self.failUnlessEqual(kwargs, {"foo": 5}) calls.append(1) return 6 d = n._do_serialized(_callback, 4, foo=5) def _check_callback(res): self.failUnlessEqual(res, 6) self.failUnlessEqual(calls, [1]) d.addCallback(_check_callback) def _errback(): raise ValueError("heya") d.addCallback(lambda res: self.shouldFail(ValueError, "_check_errback", "heya", n._do_serialized, _errback)) return d def test_upload_and_download(self): d = self.nodemaker.create_mutable_file() def _created(n): d = defer.succeed(None) d.addCallback(lambda res: n.get_servermap(MODE_READ)) d.addCallback(lambda smap: smap.dump(StringIO())) d.addCallback(lambda sio: self.failUnless("3-of-10" in sio.getvalue())) d.addCallback(lambda res: n.overwrite(MutableData("contents 1"))) d.addCallback(lambda res: self.failUnlessIdentical(res, None)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 1")) d.addCallback(lambda res: n.get_size_of_best_version()) d.addCallback(lambda size: self.failUnlessEqual(size, len("contents 1"))) d.addCallback(lambda res: n.overwrite(MutableData("contents 2"))) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 2")) d.addCallback(lambda res: n.get_servermap(MODE_WRITE)) d.addCallback(lambda smap: n.upload(MutableData("contents 3"), smap)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 3")) d.addCallback(lambda res: n.get_servermap(MODE_ANYTHING)) d.addCallback(lambda smap: n.download_version(smap, smap.best_recoverable_version())) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 3")) # test a file that is large enough to overcome the # mapupdate-to-retrieve data caching (i.e. make the shares larger # than the default readsize, which is 2000 bytes). A 15kB file # will have 5kB shares. d.addCallback(lambda res: n.overwrite(MutableData("large size file" * 1000))) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "large size file" * 1000)) return d d.addCallback(_created) return d def test_upload_and_download_mdmf(self): d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(n): d = defer.succeed(None) d.addCallback(lambda ignored: n.get_servermap(MODE_READ)) def _then(servermap): dumped = servermap.dump(StringIO()) self.failUnlessIn("3-of-10", dumped.getvalue()) d.addCallback(_then) # Now overwrite the contents with some new contents. We want # to make them big enough to force the file to be uploaded # in more than one segment. big_contents = "contents1" * 100000 # about 900 KiB big_contents_uploadable = MutableData(big_contents) d.addCallback(lambda ignored: n.overwrite(big_contents_uploadable)) d.addCallback(lambda ignored: n.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(data, big_contents)) # Overwrite the contents again with some new contents. As # before, they need to be big enough to force multiple # segments, so that we make the downloader deal with # multiple segments. bigger_contents = "contents2" * 1000000 # about 9MiB bigger_contents_uploadable = MutableData(bigger_contents) d.addCallback(lambda ignored: n.overwrite(bigger_contents_uploadable)) d.addCallback(lambda ignored: n.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(data, bigger_contents)) return d d.addCallback(_created) return d def test_retrieve_producer_mdmf(self): # We should make sure that the retriever is able to pause and stop # correctly. data = "contents1" * 100000 d = self.nodemaker.create_mutable_file(MutableData(data), version=MDMF_VERSION) d.addCallback(lambda node: node.get_best_mutable_version()) d.addCallback(self._test_retrieve_producer, "MDMF", data) return d # note: SDMF has only one big segment, so we can't use the usual # after-the-first-write() trick to pause or stop the download. # Disabled until we find a better approach. def OFF_test_retrieve_producer_sdmf(self): data = "contents1" * 100000 d = self.nodemaker.create_mutable_file(MutableData(data), version=SDMF_VERSION) d.addCallback(lambda node: node.get_best_mutable_version()) d.addCallback(self._test_retrieve_producer, "SDMF", data) return d def _test_retrieve_producer(self, version, kind, data): # Now we'll retrieve it into a pausing consumer. c = PausingConsumer() d = version.read(c) d.addCallback(lambda ign: self.failUnlessEqual(c.size, len(data))) c2 = PausingAndStoppingConsumer() d.addCallback(lambda ign: self.shouldFail(DownloadStopped, kind+"_pause_stop", "our Consumer called stopProducing()", version.read, c2)) c3 = StoppingConsumer() d.addCallback(lambda ign: self.shouldFail(DownloadStopped, kind+"_stop", "our Consumer called stopProducing()", version.read, c3)) c4 = ImmediatelyStoppingConsumer() d.addCallback(lambda ign: self.shouldFail(DownloadStopped, kind+"_stop_imm", "our Consumer called stopProducing()", version.read, c4)) def _then(ign): c5 = MemoryConsumer() d1 = version.read(c5) c5.producer.stopProducing() return self.shouldFail(DownloadStopped, kind+"_stop_imm2", "our Consumer called stopProducing()", lambda: d1) d.addCallback(_then) return d def test_download_from_mdmf_cap(self): # We should be able to download an MDMF file given its cap d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) def _created(node): self.uri = node.get_uri() # also confirm that the cap has no extension fields pieces = self.uri.split(":") self.failUnlessEqual(len(pieces), 4) return node.overwrite(MutableData("contents1" * 100000)) def _then(ignored): node = self.nodemaker.create_from_cap(self.uri) return node.download_best_version() def _downloaded(data): self.failUnlessEqual(data, "contents1" * 100000) d.addCallback(_created) d.addCallback(_then) d.addCallback(_downloaded) return d def test_mdmf_write_count(self): # Publishing an MDMF file should only cause one write for each # share that is to be published. Otherwise, we introduce # undesirable semantics that are a regression from SDMF upload = MutableData("MDMF" * 100000) # about 400 KiB d = self.nodemaker.create_mutable_file(upload, version=MDMF_VERSION) def _check_server_write_counts(ignored): sb = self.nodemaker.storage_broker for server in sb.servers.itervalues(): self.failUnlessEqual(server.get_rref().queries, 1) d.addCallback(_check_server_write_counts) return d def test_create_with_initial_contents(self): upload1 = MutableData("contents 1") d = self.nodemaker.create_mutable_file(upload1) def _created(n): d = n.download_best_version() d.addCallback(lambda res: self.failUnlessEqual(res, "contents 1")) upload2 = MutableData("contents 2") d.addCallback(lambda res: n.overwrite(upload2)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 2")) return d d.addCallback(_created) return d def test_create_mdmf_with_initial_contents(self): initial_contents = "foobarbaz" * 131072 # 900KiB initial_contents_uploadable = MutableData(initial_contents) d = self.nodemaker.create_mutable_file(initial_contents_uploadable, version=MDMF_VERSION) def _created(n): d = n.download_best_version() d.addCallback(lambda data: self.failUnlessEqual(data, initial_contents)) uploadable2 = MutableData(initial_contents + "foobarbaz") d.addCallback(lambda ignored: n.overwrite(uploadable2)) d.addCallback(lambda ignored: n.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(data, initial_contents + "foobarbaz")) return d d.addCallback(_created) return d def test_create_with_initial_contents_function(self): data = "initial contents" def _make_contents(n): self.failUnless(isinstance(n, MutableFileNode)) key = n.get_writekey() self.failUnless(isinstance(key, str), key) self.failUnlessEqual(len(key), 16) # AES key size return MutableData(data) d = self.nodemaker.create_mutable_file(_make_contents) def _created(n): return n.download_best_version() d.addCallback(_created) d.addCallback(lambda data2: self.failUnlessEqual(data2, data)) return d def test_create_mdmf_with_initial_contents_function(self): data = "initial contents" * 100000 def _make_contents(n): self.failUnless(isinstance(n, MutableFileNode)) key = n.get_writekey() self.failUnless(isinstance(key, str), key) self.failUnlessEqual(len(key), 16) return MutableData(data) d = self.nodemaker.create_mutable_file(_make_contents, version=MDMF_VERSION) d.addCallback(lambda n: n.download_best_version()) d.addCallback(lambda data2: self.failUnlessEqual(data2, data)) return d def test_create_with_too_large_contents(self): BIG = "a" * (self.OLD_MAX_SEGMENT_SIZE + 1) BIG_uploadable = MutableData(BIG) d = self.nodemaker.create_mutable_file(BIG_uploadable) def _created(n): other_BIG_uploadable = MutableData(BIG) d = n.overwrite(other_BIG_uploadable) return d d.addCallback(_created) return d def failUnlessCurrentSeqnumIs(self, n, expected_seqnum, which): d = n.get_servermap(MODE_READ) d.addCallback(lambda servermap: servermap.best_recoverable_version()) d.addCallback(lambda verinfo: self.failUnlessEqual(verinfo[0], expected_seqnum, which)) return d def test_modify(self): def _modifier(old_contents, servermap, first_time): new_contents = old_contents + "line2" return new_contents def _non_modifier(old_contents, servermap, first_time): return old_contents def _none_modifier(old_contents, servermap, first_time): return None def _error_modifier(old_contents, servermap, first_time): raise ValueError("oops") def _toobig_modifier(old_contents, servermap, first_time): new_content = "b" * (self.OLD_MAX_SEGMENT_SIZE + 1) return new_content calls = [] def _ucw_error_modifier(old_contents, servermap, first_time): # simulate an UncoordinatedWriteError once calls.append(1) if len(calls) <= 1: raise UncoordinatedWriteError("simulated") new_contents = old_contents + "line3" return new_contents def _ucw_error_non_modifier(old_contents, servermap, first_time): # simulate an UncoordinatedWriteError once, and don't actually # modify the contents on subsequent invocations calls.append(1) if len(calls) <= 1: raise UncoordinatedWriteError("simulated") return old_contents initial_contents = "line1" d = self.nodemaker.create_mutable_file(MutableData(initial_contents)) def _created(n): d = n.modify(_modifier) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 2, "m")) d.addCallback(lambda res: n.modify(_non_modifier)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 2, "non")) d.addCallback(lambda res: n.modify(_none_modifier)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 2, "none")) d.addCallback(lambda res: self.shouldFail(ValueError, "error_modifier", None, n.modify, _error_modifier)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 2, "err")) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 2, "big")) d.addCallback(lambda res: n.modify(_ucw_error_modifier)) d.addCallback(lambda res: self.failUnlessEqual(len(calls), 2)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2line3")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 3, "ucw")) def _reset_ucw_error_modifier(res): calls[:] = [] return res d.addCallback(_reset_ucw_error_modifier) # in practice, this n.modify call should publish twice: the first # one gets a UCWE, the second does not. But our test jig (in # which the modifier raises the UCWE) skips over the first one, # so in this test there will be only one publish, and the seqnum # will only be one larger than the previous test, not two (i.e. 4 # instead of 5). d.addCallback(lambda res: n.modify(_ucw_error_non_modifier)) d.addCallback(lambda res: self.failUnlessEqual(len(calls), 2)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2line3")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 4, "ucw")) d.addCallback(lambda res: n.modify(_toobig_modifier)) return d d.addCallback(_created) return d def test_modify_backoffer(self): def _modifier(old_contents, servermap, first_time): return old_contents + "line2" calls = [] def _ucw_error_modifier(old_contents, servermap, first_time): # simulate an UncoordinatedWriteError once calls.append(1) if len(calls) <= 1: raise UncoordinatedWriteError("simulated") return old_contents + "line3" def _always_ucw_error_modifier(old_contents, servermap, first_time): raise UncoordinatedWriteError("simulated") def _backoff_stopper(node, f): return f def _backoff_pauser(node, f): d = defer.Deferred() reactor.callLater(0.5, d.callback, None) return d # the give-up-er will hit its maximum retry count quickly giveuper = BackoffAgent() giveuper._delay = 0.1 giveuper.factor = 1 d = self.nodemaker.create_mutable_file(MutableData("line1")) def _created(n): d = n.modify(_modifier) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 2, "m")) d.addCallback(lambda res: self.shouldFail(UncoordinatedWriteError, "_backoff_stopper", None, n.modify, _ucw_error_modifier, _backoff_stopper)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 2, "stop")) def _reset_ucw_error_modifier(res): calls[:] = [] return res d.addCallback(_reset_ucw_error_modifier) d.addCallback(lambda res: n.modify(_ucw_error_modifier, _backoff_pauser)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2line3")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 3, "pause")) d.addCallback(lambda res: self.shouldFail(UncoordinatedWriteError, "giveuper", None, n.modify, _always_ucw_error_modifier, giveuper.delay)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "line1line2line3")) d.addCallback(lambda res: self.failUnlessCurrentSeqnumIs(n, 3, "giveup")) return d d.addCallback(_created) return d def test_upload_and_download_full_size_keys(self): self.nodemaker.key_generator = client.KeyGenerator() d = self.nodemaker.create_mutable_file() def _created(n): d = defer.succeed(None) d.addCallback(lambda res: n.get_servermap(MODE_READ)) d.addCallback(lambda smap: smap.dump(StringIO())) d.addCallback(lambda sio: self.failUnless("3-of-10" in sio.getvalue())) d.addCallback(lambda res: n.overwrite(MutableData("contents 1"))) d.addCallback(lambda res: self.failUnlessIdentical(res, None)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 1")) d.addCallback(lambda res: n.overwrite(MutableData("contents 2"))) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 2")) d.addCallback(lambda res: n.get_servermap(MODE_WRITE)) d.addCallback(lambda smap: n.upload(MutableData("contents 3"), smap)) d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 3")) d.addCallback(lambda res: n.get_servermap(MODE_ANYTHING)) d.addCallback(lambda smap: n.download_version(smap, smap.best_recoverable_version())) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 3")) return d d.addCallback(_created) return d def test_size_after_servermap_update(self): # a mutable file node should have something to say about how big # it is after a servermap update is performed, since this tells # us how large the best version of that mutable file is. d = self.nodemaker.create_mutable_file() def _created(n): self.n = n return n.get_servermap(MODE_READ) d.addCallback(_created) d.addCallback(lambda ignored: self.failUnlessEqual(self.n.get_size(), 0)) d.addCallback(lambda ignored: self.n.overwrite(MutableData("foobarbaz"))) d.addCallback(lambda ignored: self.failUnlessEqual(self.n.get_size(), 9)) d.addCallback(lambda ignored: self.nodemaker.create_mutable_file(MutableData("foobarbaz"))) d.addCallback(_created) d.addCallback(lambda ignored: self.failUnlessEqual(self.n.get_size(), 9)) return d class PublishMixin: def publish_one(self): # publish a file and create shares, which can then be manipulated # later. self.CONTENTS = "New contents go here" * 1000 self.uploadable = MutableData(self.CONTENTS) self._storage = FakeStorage() self._nodemaker = make_nodemaker(self._storage) self._storage_broker = self._nodemaker.storage_broker d = self._nodemaker.create_mutable_file(self.uploadable) def _created(node): self._fn = node self._fn2 = self._nodemaker.create_from_cap(node.get_uri()) d.addCallback(_created) return d def publish_mdmf(self): # like publish_one, except that the result is guaranteed to be # an MDMF file. # self.CONTENTS should have more than one segment. self.CONTENTS = "This is an MDMF file" * 100000 self.uploadable = MutableData(self.CONTENTS) self._storage = FakeStorage() self._nodemaker = make_nodemaker(self._storage) self._storage_broker = self._nodemaker.storage_broker d = self._nodemaker.create_mutable_file(self.uploadable, version=MDMF_VERSION) def _created(node): self._fn = node self._fn2 = self._nodemaker.create_from_cap(node.get_uri()) d.addCallback(_created) return d def publish_sdmf(self): # like publish_one, except that the result is guaranteed to be # an SDMF file self.CONTENTS = "This is an SDMF file" * 1000 self.uploadable = MutableData(self.CONTENTS) self._storage = FakeStorage() self._nodemaker = make_nodemaker(self._storage) self._storage_broker = self._nodemaker.storage_broker d = self._nodemaker.create_mutable_file(self.uploadable, version=SDMF_VERSION) def _created(node): self._fn = node self._fn2 = self._nodemaker.create_from_cap(node.get_uri()) d.addCallback(_created) return d def publish_empty_sdmf(self): self.CONTENTS = "" self.uploadable = MutableData(self.CONTENTS) self._storage = FakeStorage() self._nodemaker = make_nodemaker(self._storage, keysize=None) self._storage_broker = self._nodemaker.storage_broker d = self._nodemaker.create_mutable_file(self.uploadable, version=SDMF_VERSION) def _created(node): self._fn = node self._fn2 = self._nodemaker.create_from_cap(node.get_uri()) d.addCallback(_created) return d def publish_multiple(self, version=0): self.CONTENTS = ["Contents 0", "Contents 1", "Contents 2", "Contents 3a", "Contents 3b"] self.uploadables = [MutableData(d) for d in self.CONTENTS] self._copied_shares = {} self._storage = FakeStorage() self._nodemaker = make_nodemaker(self._storage) d = self._nodemaker.create_mutable_file(self.uploadables[0], version=version) # seqnum=1 def _created(node): self._fn = node # now create multiple versions of the same file, and accumulate # their shares, so we can mix and match them later. d = defer.succeed(None) d.addCallback(self._copy_shares, 0) d.addCallback(lambda res: node.overwrite(self.uploadables[1])) #s2 d.addCallback(self._copy_shares, 1) d.addCallback(lambda res: node.overwrite(self.uploadables[2])) #s3 d.addCallback(self._copy_shares, 2) d.addCallback(lambda res: node.overwrite(self.uploadables[3])) #s4a d.addCallback(self._copy_shares, 3) # now we replace all the shares with version s3, and upload a new # version to get s4b. rollback = dict([(i,2) for i in range(10)]) d.addCallback(lambda res: self._set_versions(rollback)) d.addCallback(lambda res: node.overwrite(self.uploadables[4])) #s4b d.addCallback(self._copy_shares, 4) # we leave the storage in state 4 return d d.addCallback(_created) return d def _copy_shares(self, ignored, index): shares = self._storage._peers # we need a deep copy new_shares = {} for peerid in shares: new_shares[peerid] = {} for shnum in shares[peerid]: new_shares[peerid][shnum] = shares[peerid][shnum] self._copied_shares[index] = new_shares def _set_versions(self, versionmap): # versionmap maps shnums to which version (0,1,2,3,4) we want the # share to be at. Any shnum which is left out of the map will stay at # its current version. shares = self._storage._peers oldshares = self._copied_shares for peerid in shares: for shnum in shares[peerid]: if shnum in versionmap: index = versionmap[shnum] shares[peerid][shnum] = oldshares[index][peerid][shnum] class Servermap(unittest.TestCase, PublishMixin): def setUp(self): return self.publish_one() def make_servermap(self, mode=MODE_CHECK, fn=None, sb=None, update_range=None): if fn is None: fn = self._fn if sb is None: sb = self._storage_broker smu = ServermapUpdater(fn, sb, Monitor(), ServerMap(), mode, update_range=update_range) d = smu.update() return d def update_servermap(self, oldmap, mode=MODE_CHECK): smu = ServermapUpdater(self._fn, self._storage_broker, Monitor(), oldmap, mode) d = smu.update() return d def failUnlessOneRecoverable(self, sm, num_shares): self.failUnlessEqual(len(sm.recoverable_versions()), 1) self.failUnlessEqual(len(sm.unrecoverable_versions()), 0) best = sm.best_recoverable_version() self.failIfEqual(best, None) self.failUnlessEqual(sm.recoverable_versions(), set([best])) self.failUnlessEqual(len(sm.shares_available()), 1) self.failUnlessEqual(sm.shares_available()[best], (num_shares, 3, 10)) shnum, servers = sm.make_sharemap().items()[0] server = list(servers)[0] self.failUnlessEqual(sm.version_on_server(server, shnum), best) self.failUnlessEqual(sm.version_on_server(server, 666), None) return sm def test_basic(self): d = defer.succeed(None) ms = self.make_servermap us = self.update_servermap d.addCallback(lambda res: ms(mode=MODE_CHECK)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 10)) d.addCallback(lambda res: ms(mode=MODE_WRITE)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 10)) d.addCallback(lambda res: ms(mode=MODE_READ)) # this mode stops at k+epsilon, and epsilon=k, so 6 shares d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 6)) d.addCallback(lambda res: ms(mode=MODE_ANYTHING)) # this mode stops at 'k' shares d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 3)) # and can we re-use the same servermap? Note that these are sorted in # increasing order of number of servers queried, since once a server # gets into the servermap, we'll always ask it for an update. d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 3)) d.addCallback(lambda sm: us(sm, mode=MODE_READ)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 6)) d.addCallback(lambda sm: us(sm, mode=MODE_WRITE)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 10)) d.addCallback(lambda sm: us(sm, mode=MODE_CHECK)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 10)) d.addCallback(lambda sm: us(sm, mode=MODE_ANYTHING)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 10)) return d def test_fetch_privkey(self): d = defer.succeed(None) # use the sibling filenode (which hasn't been used yet), and make # sure it can fetch the privkey. The file is small, so the privkey # will be fetched on the first (query) pass. d.addCallback(lambda res: self.make_servermap(MODE_WRITE, self._fn2)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 10)) # create a new file, which is large enough to knock the privkey out # of the early part of the file LARGE = "These are Larger contents" * 200 # about 5KB LARGE_uploadable = MutableData(LARGE) d.addCallback(lambda res: self._nodemaker.create_mutable_file(LARGE_uploadable)) def _created(large_fn): large_fn2 = self._nodemaker.create_from_cap(large_fn.get_uri()) return self.make_servermap(MODE_WRITE, large_fn2) d.addCallback(_created) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 10)) return d def test_mark_bad(self): d = defer.succeed(None) ms = self.make_servermap d.addCallback(lambda res: ms(mode=MODE_READ)) d.addCallback(lambda sm: self.failUnlessOneRecoverable(sm, 6)) def _made_map(sm): v = sm.best_recoverable_version() vm = sm.make_versionmap() shares = list(vm[v]) self.failUnlessEqual(len(shares), 6) self._corrupted = set() # mark the first 5 shares as corrupt, then update the servermap. # The map should not have the marked shares it in any more, and # new shares should be found to replace the missing ones. for (shnum, server, timestamp) in shares: if shnum < 5: self._corrupted.add( (server, shnum) ) sm.mark_bad_share(server, shnum, "") return self.update_servermap(sm, MODE_WRITE) d.addCallback(_made_map) def _check_map(sm): # this should find all 5 shares that weren't marked bad v = sm.best_recoverable_version() vm = sm.make_versionmap() shares = list(vm[v]) for (server, shnum) in self._corrupted: server_shares = sm.debug_shares_on_server(server) self.failIf(shnum in server_shares, "%d was in %s" % (shnum, server_shares)) self.failUnlessEqual(len(shares), 5) d.addCallback(_check_map) return d def failUnlessNoneRecoverable(self, sm): self.failUnlessEqual(len(sm.recoverable_versions()), 0) self.failUnlessEqual(len(sm.unrecoverable_versions()), 0) best = sm.best_recoverable_version() self.failUnlessEqual(best, None) self.failUnlessEqual(len(sm.shares_available()), 0) def test_no_shares(self): self._storage._peers = {} # delete all shares ms = self.make_servermap d = defer.succeed(None) # d.addCallback(lambda res: ms(mode=MODE_CHECK)) d.addCallback(lambda sm: self.failUnlessNoneRecoverable(sm)) d.addCallback(lambda res: ms(mode=MODE_ANYTHING)) d.addCallback(lambda sm: self.failUnlessNoneRecoverable(sm)) d.addCallback(lambda res: ms(mode=MODE_WRITE)) d.addCallback(lambda sm: self.failUnlessNoneRecoverable(sm)) d.addCallback(lambda res: ms(mode=MODE_READ)) d.addCallback(lambda sm: self.failUnlessNoneRecoverable(sm)) return d def failUnlessNotQuiteEnough(self, sm): self.failUnlessEqual(len(sm.recoverable_versions()), 0) self.failUnlessEqual(len(sm.unrecoverable_versions()), 1) best = sm.best_recoverable_version() self.failUnlessEqual(best, None) self.failUnlessEqual(len(sm.shares_available()), 1) self.failUnlessEqual(sm.shares_available().values()[0], (2,3,10) ) return sm def test_not_quite_enough_shares(self): s = self._storage ms = self.make_servermap num_shares = len(s._peers) for peerid in s._peers: s._peers[peerid] = {} num_shares -= 1 if num_shares == 2: break # now there ought to be only two shares left assert len([peerid for peerid in s._peers if s._peers[peerid]]) == 2 d = defer.succeed(None) d.addCallback(lambda res: ms(mode=MODE_CHECK)) d.addCallback(lambda sm: self.failUnlessNotQuiteEnough(sm)) d.addCallback(lambda sm: self.failUnlessEqual(len(sm.make_sharemap()), 2)) d.addCallback(lambda res: ms(mode=MODE_ANYTHING)) d.addCallback(lambda sm: self.failUnlessNotQuiteEnough(sm)) d.addCallback(lambda res: ms(mode=MODE_WRITE)) d.addCallback(lambda sm: self.failUnlessNotQuiteEnough(sm)) d.addCallback(lambda res: ms(mode=MODE_READ)) d.addCallback(lambda sm: self.failUnlessNotQuiteEnough(sm)) return d def test_servermapupdater_finds_mdmf_files(self): # setUp already published an MDMF file for us. We just need to # make sure that when we run the ServermapUpdater, the file is # reported to have one recoverable version. d = defer.succeed(None) d.addCallback(lambda ignored: self.publish_mdmf()) d.addCallback(lambda ignored: self.make_servermap(mode=MODE_CHECK)) # Calling make_servermap also updates the servermap in the mode # that we specify, so we just need to see what it says. def _check_servermap(sm): self.failUnlessEqual(len(sm.recoverable_versions()), 1) d.addCallback(_check_servermap) return d def test_fetch_update(self): d = defer.succeed(None) d.addCallback(lambda ignored: self.publish_mdmf()) d.addCallback(lambda ignored: self.make_servermap(mode=MODE_WRITE, update_range=(1, 2))) def _check_servermap(sm): # 10 shares self.failUnlessEqual(len(sm.update_data), 10) # one version for data in sm.update_data.itervalues(): self.failUnlessEqual(len(data), 1) d.addCallback(_check_servermap) return d def test_servermapupdater_finds_sdmf_files(self): d = defer.succeed(None) d.addCallback(lambda ignored: self.publish_sdmf()) d.addCallback(lambda ignored: self.make_servermap(mode=MODE_CHECK)) d.addCallback(lambda servermap: self.failUnlessEqual(len(servermap.recoverable_versions()), 1)) return d class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin, PublishMixin): def setUp(self): return self.publish_one() def make_servermap(self, mode=MODE_READ, oldmap=None, sb=None): if oldmap is None: oldmap = ServerMap() if sb is None: sb = self._storage_broker smu = ServermapUpdater(self._fn, sb, Monitor(), oldmap, mode) d = smu.update() return d def abbrev_verinfo(self, verinfo): if verinfo is None: return None (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo return "%d-%s" % (seqnum, base32.b2a(root_hash)[:4]) def abbrev_verinfo_dict(self, verinfo_d): output = {} for verinfo,value in verinfo_d.items(): (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo output["%d-%s" % (seqnum, base32.b2a(root_hash)[:4])] = value return output def dump_servermap(self, servermap): print "SERVERMAP", servermap print "RECOVERABLE", [self.abbrev_verinfo(v) for v in servermap.recoverable_versions()] print "BEST", self.abbrev_verinfo(servermap.best_recoverable_version()) print "available", self.abbrev_verinfo_dict(servermap.shares_available()) def do_download(self, servermap, version=None): if version is None: version = servermap.best_recoverable_version() r = Retrieve(self._fn, self._storage_broker, servermap, version) c = consumer.MemoryConsumer() d = r.download(consumer=c) d.addCallback(lambda mc: "".join(mc.chunks)) return d def test_basic(self): d = self.make_servermap() def _do_retrieve(servermap): self._smap = servermap #self.dump_servermap(servermap) self.failUnlessEqual(len(servermap.recoverable_versions()), 1) return self.do_download(servermap) d.addCallback(_do_retrieve) def _retrieved(new_contents): self.failUnlessEqual(new_contents, self.CONTENTS) d.addCallback(_retrieved) # we should be able to re-use the same servermap, both with and # without updating it. d.addCallback(lambda res: self.do_download(self._smap)) d.addCallback(_retrieved) d.addCallback(lambda res: self.make_servermap(oldmap=self._smap)) d.addCallback(lambda res: self.do_download(self._smap)) d.addCallback(_retrieved) # clobbering the pubkey should make the servermap updater re-fetch it def _clobber_pubkey(res): self._fn._pubkey = None d.addCallback(_clobber_pubkey) d.addCallback(lambda res: self.make_servermap(oldmap=self._smap)) d.addCallback(lambda res: self.do_download(self._smap)) d.addCallback(_retrieved) return d def test_all_shares_vanished(self): d = self.make_servermap() def _remove_shares(servermap): for shares in self._storage._peers.values(): shares.clear() d1 = self.shouldFail(NotEnoughSharesError, "test_all_shares_vanished", "ran out of servers", self.do_download, servermap) return d1 d.addCallback(_remove_shares) return d def test_no_servers(self): sb2 = make_storagebroker(num_peers=0) # if there are no servers, then a MODE_READ servermap should come # back empty d = self.make_servermap(sb=sb2) def _check_servermap(servermap): self.failUnlessEqual(servermap.best_recoverable_version(), None) self.failIf(servermap.recoverable_versions()) self.failIf(servermap.unrecoverable_versions()) self.failIf(servermap.all_servers()) d.addCallback(_check_servermap) return d def test_no_servers_download(self): sb2 = make_storagebroker(num_peers=0) self._fn._storage_broker = sb2 d = self.shouldFail(UnrecoverableFileError, "test_no_servers_download", "no recoverable versions", self._fn.download_best_version) def _restore(res): # a failed download that occurs while we aren't connected to # anybody should not prevent a subsequent download from working. # This isn't quite the webapi-driven test that #463 wants, but it # should be close enough. self._fn._storage_broker = self._storage_broker return self._fn.download_best_version() def _retrieved(new_contents): self.failUnlessEqual(new_contents, self.CONTENTS) d.addCallback(_restore) d.addCallback(_retrieved) return d def _test_corrupt_all(self, offset, substring, should_succeed=False, corrupt_early=True, failure_checker=None, fetch_privkey=False): d = defer.succeed(None) if corrupt_early: d.addCallback(corrupt, self._storage, offset) d.addCallback(lambda res: self.make_servermap()) if not corrupt_early: d.addCallback(corrupt, self._storage, offset) def _do_retrieve(servermap): ver = servermap.best_recoverable_version() if ver is None and not should_succeed: # no recoverable versions == not succeeding. The problem # should be noted in the servermap's list of problems. if substring: allproblems = [str(f) for f in servermap.get_problems()] self.failUnlessIn(substring, "".join(allproblems)) return servermap if should_succeed: d1 = self._fn.download_version(servermap, ver, fetch_privkey) d1.addCallback(lambda new_contents: self.failUnlessEqual(new_contents, self.CONTENTS)) else: d1 = self.shouldFail(NotEnoughSharesError, "_corrupt_all(offset=%s)" % (offset,), substring, self._fn.download_version, servermap, ver, fetch_privkey) if failure_checker: d1.addCallback(failure_checker) d1.addCallback(lambda res: servermap) return d1 d.addCallback(_do_retrieve) return d def test_corrupt_all_verbyte(self): # when the version byte is not 0 or 1, we hit an UnknownVersionError # error in unpack_share(). d = self._test_corrupt_all(0, "UnknownVersionError") def _check_servermap(servermap): # and the dump should mention the problems s = StringIO() dump = servermap.dump(s).getvalue() self.failUnless("30 PROBLEMS" in dump, dump) d.addCallback(_check_servermap) return d def test_corrupt_all_seqnum(self): # a corrupt sequence number will trigger a bad signature return self._test_corrupt_all(1, "signature is invalid") def test_corrupt_all_R(self): # a corrupt root hash will trigger a bad signature return self._test_corrupt_all(9, "signature is invalid") def test_corrupt_all_IV(self): # a corrupt salt/IV will trigger a bad signature return self._test_corrupt_all(41, "signature is invalid") def test_corrupt_all_k(self): # a corrupt 'k' will trigger a bad signature return self._test_corrupt_all(57, "signature is invalid") def test_corrupt_all_N(self): # a corrupt 'N' will trigger a bad signature return self._test_corrupt_all(58, "signature is invalid") def test_corrupt_all_segsize(self): # a corrupt segsize will trigger a bad signature return self._test_corrupt_all(59, "signature is invalid") def test_corrupt_all_datalen(self): # a corrupt data length will trigger a bad signature return self._test_corrupt_all(67, "signature is invalid") def test_corrupt_all_pubkey(self): # a corrupt pubkey won't match the URI's fingerprint. We need to # remove the pubkey from the filenode, or else it won't bother trying # to update it. self._fn._pubkey = None return self._test_corrupt_all("pubkey", "pubkey doesn't match fingerprint") def test_corrupt_all_sig(self): # a corrupt signature is a bad one # the signature runs from about [543:799], depending upon the length # of the pubkey return self._test_corrupt_all("signature", "signature is invalid") def test_corrupt_all_share_hash_chain_number(self): # a corrupt share hash chain entry will show up as a bad hash. If we # mangle the first byte, that will look like a bad hash number, # causing an IndexError return self._test_corrupt_all("share_hash_chain", "corrupt hashes") def test_corrupt_all_share_hash_chain_hash(self): # a corrupt share hash chain entry will show up as a bad hash. If we # mangle a few bytes in, that will look like a bad hash. return self._test_corrupt_all(("share_hash_chain",4), "corrupt hashes") def test_corrupt_all_block_hash_tree(self): return self._test_corrupt_all("block_hash_tree", "block hash tree failure") def test_corrupt_all_block(self): return self._test_corrupt_all("share_data", "block hash tree failure") def test_corrupt_all_encprivkey(self): # a corrupted privkey won't even be noticed by the reader, only by a # writer. return self._test_corrupt_all("enc_privkey", None, should_succeed=True) def test_corrupt_all_encprivkey_late(self): # this should work for the same reason as above, but we corrupt # after the servermap update to exercise the error handling # code. # We need to remove the privkey from the node, or the retrieve # process won't know to update it. self._fn._privkey = None return self._test_corrupt_all("enc_privkey", None, # this shouldn't fail should_succeed=True, corrupt_early=False, fetch_privkey=True) # disabled until retrieve tests checkstring on each blockfetch. I didn't # just use a .todo because the failing-but-ignored test emits about 30kB # of noise. def OFF_test_corrupt_all_seqnum_late(self): # corrupting the seqnum between mapupdate and retrieve should result # in NotEnoughSharesError, since each share will look invalid def _check(res): f = res[0] self.failUnless(f.check(NotEnoughSharesError)) self.failUnless("uncoordinated write" in str(f)) return self._test_corrupt_all(1, "ran out of servers", corrupt_early=False, failure_checker=_check) def test_corrupt_all_block_late(self): def _check(res): f = res[0] self.failUnless(f.check(NotEnoughSharesError)) return self._test_corrupt_all("share_data", "block hash tree failure", corrupt_early=False, failure_checker=_check) def test_basic_pubkey_at_end(self): # we corrupt the pubkey in all but the last 'k' shares, allowing the # download to succeed but forcing a bunch of retries first. Note that # this is rather pessimistic: our Retrieve process will throw away # the whole share if the pubkey is bad, even though the rest of the # share might be good. self._fn._pubkey = None k = self._fn.get_required_shares() N = self._fn.get_total_shares() d = defer.succeed(None) d.addCallback(corrupt, self._storage, "pubkey", shnums_to_corrupt=range(0, N-k)) d.addCallback(lambda res: self.make_servermap()) def _do_retrieve(servermap): self.failUnless(servermap.get_problems()) self.failUnless("pubkey doesn't match fingerprint" in str(servermap.get_problems()[0])) ver = servermap.best_recoverable_version() r = Retrieve(self._fn, self._storage_broker, servermap, ver) c = consumer.MemoryConsumer() return r.download(c) d.addCallback(_do_retrieve) d.addCallback(lambda mc: "".join(mc.chunks)) d.addCallback(lambda new_contents: self.failUnlessEqual(new_contents, self.CONTENTS)) return d def _test_corrupt_some(self, offset, mdmf=False): if mdmf: d = self.publish_mdmf() else: d = defer.succeed(None) d.addCallback(lambda ignored: corrupt(None, self._storage, offset, range(5))) d.addCallback(lambda ignored: self.make_servermap()) def _do_retrieve(servermap): ver = servermap.best_recoverable_version() self.failUnless(ver) return self._fn.download_best_version() d.addCallback(_do_retrieve) d.addCallback(lambda new_contents: self.failUnlessEqual(new_contents, self.CONTENTS)) return d def test_corrupt_some(self): # corrupt the data of first five shares (so the servermap thinks # they're good but retrieve marks them as bad), so that the # MODE_READ set of 6 will be insufficient, forcing node.download to # retry with more servers. return self._test_corrupt_some("share_data") def test_download_fails(self): d = corrupt(None, self._storage, "signature") d.addCallback(lambda ignored: self.shouldFail(UnrecoverableFileError, "test_download_anyway", "no recoverable versions", self._fn.download_best_version)) return d def test_corrupt_mdmf_block_hash_tree(self): d = self.publish_mdmf() d.addCallback(lambda ignored: self._test_corrupt_all(("block_hash_tree", 12 * 32), "block hash tree failure", corrupt_early=True, should_succeed=False)) return d def test_corrupt_mdmf_block_hash_tree_late(self): # Note - there is no SDMF counterpart to this test, as the SDMF # files are guaranteed to have exactly one block, and therefore # the block hash tree fits within the initial read (#1240). d = self.publish_mdmf() d.addCallback(lambda ignored: self._test_corrupt_all(("block_hash_tree", 12 * 32), "block hash tree failure", corrupt_early=False, should_succeed=False)) return d def test_corrupt_mdmf_share_data(self): d = self.publish_mdmf() d.addCallback(lambda ignored: # TODO: Find out what the block size is and corrupt a # specific block, rather than just guessing. self._test_corrupt_all(("share_data", 12 * 40), "block hash tree failure", corrupt_early=True, should_succeed=False)) return d def test_corrupt_some_mdmf(self): return self._test_corrupt_some(("share_data", 12 * 40), mdmf=True) class CheckerMixin: def check_good(self, r, where): self.failUnless(r.is_healthy(), where) return r def check_bad(self, r, where): self.failIf(r.is_healthy(), where) return r def check_expected_failure(self, r, expected_exception, substring, where): for (peerid, storage_index, shnum, f) in r.get_share_problems(): if f.check(expected_exception): self.failUnless(substring in str(f), "%s: substring '%s' not in '%s'" % (where, substring, str(f))) return self.fail("%s: didn't see expected exception %s in problems %s" % (where, expected_exception, r.get_share_problems())) class Checker(unittest.TestCase, CheckerMixin, PublishMixin): def setUp(self): return self.publish_one() def test_check_good(self): d = self._fn.check(Monitor()) d.addCallback(self.check_good, "test_check_good") return d def test_check_mdmf_good(self): d = self.publish_mdmf() d.addCallback(lambda ignored: self._fn.check(Monitor())) d.addCallback(self.check_good, "test_check_mdmf_good") return d def test_check_no_shares(self): for shares in self._storage._peers.values(): shares.clear() d = self._fn.check(Monitor()) d.addCallback(self.check_bad, "test_check_no_shares") return d def test_check_mdmf_no_shares(self): d = self.publish_mdmf() def _then(ignored): for share in self._storage._peers.values(): share.clear() d.addCallback(_then) d.addCallback(lambda ignored: self._fn.check(Monitor())) d.addCallback(self.check_bad, "test_check_mdmf_no_shares") return d def test_check_not_enough_shares(self): for shares in self._storage._peers.values(): for shnum in shares.keys(): if shnum > 0: del shares[shnum] d = self._fn.check(Monitor()) d.addCallback(self.check_bad, "test_check_not_enough_shares") return d def test_check_mdmf_not_enough_shares(self): d = self.publish_mdmf() def _then(ignored): for shares in self._storage._peers.values(): for shnum in shares.keys(): if shnum > 0: del shares[shnum] d.addCallback(_then) d.addCallback(lambda ignored: self._fn.check(Monitor())) d.addCallback(self.check_bad, "test_check_mdmf_not_enougH_shares") return d def test_check_all_bad_sig(self): d = corrupt(None, self._storage, 1) # bad sig d.addCallback(lambda ignored: self._fn.check(Monitor())) d.addCallback(self.check_bad, "test_check_all_bad_sig") return d def test_check_mdmf_all_bad_sig(self): d = self.publish_mdmf() d.addCallback(lambda ignored: corrupt(None, self._storage, 1)) d.addCallback(lambda ignored: self._fn.check(Monitor())) d.addCallback(self.check_bad, "test_check_mdmf_all_bad_sig") return d def test_verify_mdmf_all_bad_sharedata(self): d = self.publish_mdmf() # On 8 of the shares, corrupt the beginning of the share data. # The signature check during the servermap update won't catch this. d.addCallback(lambda ignored: corrupt(None, self._storage, "share_data", range(8))) # On 2 of the shares, corrupt the end of the share data. # The signature check during the servermap update won't catch # this either, and the retrieval process will have to process # all of the segments before it notices. d.addCallback(lambda ignored: # the block hash tree comes right after the share data, so if we # corrupt a little before the block hash tree, we'll corrupt in the # last block of each share. corrupt(None, self._storage, "block_hash_tree", [8, 9], -5)) d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) # The verifier should flag the file as unhealthy, and should # list all 10 shares as bad. d.addCallback(self.check_bad, "test_verify_mdmf_all_bad_sharedata") def _check_num_bad(r): self.failIf(r.is_recoverable()) smap = r.get_servermap() self.failUnlessEqual(len(smap.get_bad_shares()), 10) d.addCallback(_check_num_bad) return d def test_check_all_bad_blocks(self): d = corrupt(None, self._storage, "share_data", [9]) # bad blocks # the Checker won't notice this.. it doesn't look at actual data d.addCallback(lambda ignored: self._fn.check(Monitor())) d.addCallback(self.check_good, "test_check_all_bad_blocks") return d def test_check_mdmf_all_bad_blocks(self): d = self.publish_mdmf() d.addCallback(lambda ignored: corrupt(None, self._storage, "share_data")) d.addCallback(lambda ignored: self._fn.check(Monitor())) d.addCallback(self.check_good, "test_check_mdmf_all_bad_blocks") return d def test_verify_good(self): d = self._fn.check(Monitor(), verify=True) d.addCallback(self.check_good, "test_verify_good") return d def test_verify_all_bad_sig(self): d = corrupt(None, self._storage, 1) # bad sig d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_bad, "test_verify_all_bad_sig") return d def test_verify_one_bad_sig(self): d = corrupt(None, self._storage, 1, [9]) # bad sig d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_bad, "test_verify_one_bad_sig") return d def test_verify_one_bad_block(self): d = corrupt(None, self._storage, "share_data", [9]) # bad blocks # the Verifier *will* notice this, since it examines every byte d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_bad, "test_verify_one_bad_block") d.addCallback(self.check_expected_failure, CorruptShareError, "block hash tree failure", "test_verify_one_bad_block") return d def test_verify_one_bad_sharehash(self): d = corrupt(None, self._storage, "share_hash_chain", [9], 5) d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_bad, "test_verify_one_bad_sharehash") d.addCallback(self.check_expected_failure, CorruptShareError, "corrupt hashes", "test_verify_one_bad_sharehash") return d def test_verify_one_bad_encprivkey(self): d = corrupt(None, self._storage, "enc_privkey", [9]) # bad privkey d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_bad, "test_verify_one_bad_encprivkey") d.addCallback(self.check_expected_failure, CorruptShareError, "invalid privkey", "test_verify_one_bad_encprivkey") return d def test_verify_one_bad_encprivkey_uncheckable(self): d = corrupt(None, self._storage, "enc_privkey", [9]) # bad privkey readonly_fn = self._fn.get_readonly() # a read-only node has no way to validate the privkey d.addCallback(lambda ignored: readonly_fn.check(Monitor(), verify=True)) d.addCallback(self.check_good, "test_verify_one_bad_encprivkey_uncheckable") return d def test_verify_mdmf_good(self): d = self.publish_mdmf() d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_good, "test_verify_mdmf_good") return d def test_verify_mdmf_one_bad_block(self): d = self.publish_mdmf() d.addCallback(lambda ignored: corrupt(None, self._storage, "share_data", [1])) d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) # We should find one bad block here d.addCallback(self.check_bad, "test_verify_mdmf_one_bad_block") d.addCallback(self.check_expected_failure, CorruptShareError, "block hash tree failure", "test_verify_mdmf_one_bad_block") return d def test_verify_mdmf_bad_encprivkey(self): d = self.publish_mdmf() d.addCallback(lambda ignored: corrupt(None, self._storage, "enc_privkey", [0])) d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_bad, "test_verify_mdmf_bad_encprivkey") d.addCallback(self.check_expected_failure, CorruptShareError, "privkey", "test_verify_mdmf_bad_encprivkey") return d def test_verify_mdmf_bad_sig(self): d = self.publish_mdmf() d.addCallback(lambda ignored: corrupt(None, self._storage, 1, [1])) d.addCallback(lambda ignored: self._fn.check(Monitor(), verify=True)) d.addCallback(self.check_bad, "test_verify_mdmf_bad_sig") return d def test_verify_mdmf_bad_encprivkey_uncheckable(self): d = self.publish_mdmf() d.addCallback(lambda ignored: corrupt(None, self._storage, "enc_privkey", [1])) d.addCallback(lambda ignored: self._fn.get_readonly()) d.addCallback(lambda fn: fn.check(Monitor(), verify=True)) d.addCallback(self.check_good, "test_verify_mdmf_bad_encprivkey_uncheckable") return d class Repair(unittest.TestCase, PublishMixin, ShouldFailMixin): def get_shares(self, s): all_shares = {} # maps (peerid, shnum) to share data for peerid in s._peers: shares = s._peers[peerid] for shnum in shares: data = shares[shnum] all_shares[ (peerid, shnum) ] = data return all_shares def copy_shares(self, ignored=None): self.old_shares.append(self.get_shares(self._storage)) def test_repair_nop(self): self.old_shares = [] d = self.publish_one() d.addCallback(self.copy_shares) d.addCallback(lambda res: self._fn.check(Monitor())) d.addCallback(lambda check_results: self._fn.repair(check_results)) def _check_results(rres): self.failUnless(IRepairResults.providedBy(rres)) self.failUnless(rres.get_successful()) # TODO: examine results self.copy_shares() initial_shares = self.old_shares[0] new_shares = self.old_shares[1] # TODO: this really shouldn't change anything. When we implement # a "minimal-bandwidth" repairer", change this test to assert: #self.failUnlessEqual(new_shares, initial_shares) # all shares should be in the same place as before self.failUnlessEqual(set(initial_shares.keys()), set(new_shares.keys())) # but they should all be at a newer seqnum. The IV will be # different, so the roothash will be too. for key in initial_shares: (version0, seqnum0, root_hash0, IV0, k0, N0, segsize0, datalen0, o0) = unpack_header(initial_shares[key]) (version1, seqnum1, root_hash1, IV1, k1, N1, segsize1, datalen1, o1) = unpack_header(new_shares[key]) self.failUnlessEqual(version0, version1) self.failUnlessEqual(seqnum0+1, seqnum1) self.failUnlessEqual(k0, k1) self.failUnlessEqual(N0, N1) self.failUnlessEqual(segsize0, segsize1) self.failUnlessEqual(datalen0, datalen1) d.addCallback(_check_results) return d def failIfSharesChanged(self, ignored=None): old_shares = self.old_shares[-2] current_shares = self.old_shares[-1] self.failUnlessEqual(old_shares, current_shares) def _test_whether_repairable(self, publisher, nshares, expected_result): d = publisher() def _delete_some_shares(ign): shares = self._storage._peers for peerid in shares: for shnum in list(shares[peerid]): if shnum >= nshares: del shares[peerid][shnum] d.addCallback(_delete_some_shares) d.addCallback(lambda ign: self._fn.check(Monitor())) def _check(cr): self.failIf(cr.is_healthy()) self.failUnlessEqual(cr.is_recoverable(), expected_result) return cr d.addCallback(_check) d.addCallback(lambda check_results: self._fn.repair(check_results)) d.addCallback(lambda crr: self.failUnlessEqual(crr.get_successful(), expected_result)) return d def test_unrepairable_0shares(self): return self._test_whether_repairable(self.publish_one, 0, False) def test_mdmf_unrepairable_0shares(self): return self._test_whether_repairable(self.publish_mdmf, 0, False) def test_unrepairable_1share(self): return self._test_whether_repairable(self.publish_one, 1, False) def test_mdmf_unrepairable_1share(self): return self._test_whether_repairable(self.publish_mdmf, 1, False) def test_repairable_5shares(self): return self._test_whether_repairable(self.publish_one, 5, True) def test_mdmf_repairable_5shares(self): return self._test_whether_repairable(self.publish_mdmf, 5, True) def _test_whether_checkandrepairable(self, publisher, nshares, expected_result): """ Like the _test_whether_repairable tests, but invoking check_and_repair instead of invoking check and then invoking repair. """ d = publisher() def _delete_some_shares(ign): shares = self._storage._peers for peerid in shares: for shnum in list(shares[peerid]): if shnum >= nshares: del shares[peerid][shnum] d.addCallback(_delete_some_shares) d.addCallback(lambda ign: self._fn.check_and_repair(Monitor())) d.addCallback(lambda crr: self.failUnlessEqual(crr.get_repair_successful(), expected_result)) return d def test_unrepairable_0shares_checkandrepair(self): return self._test_whether_checkandrepairable(self.publish_one, 0, False) def test_mdmf_unrepairable_0shares_checkandrepair(self): return self._test_whether_checkandrepairable(self.publish_mdmf, 0, False) def test_unrepairable_1share_checkandrepair(self): return self._test_whether_checkandrepairable(self.publish_one, 1, False) def test_mdmf_unrepairable_1share_checkandrepair(self): return self._test_whether_checkandrepairable(self.publish_mdmf, 1, False) def test_repairable_5shares_checkandrepair(self): return self._test_whether_checkandrepairable(self.publish_one, 5, True) def test_mdmf_repairable_5shares_checkandrepair(self): return self._test_whether_checkandrepairable(self.publish_mdmf, 5, True) def test_merge(self): self.old_shares = [] d = self.publish_multiple() # repair will refuse to merge multiple highest seqnums unless you # pass force=True d.addCallback(lambda res: self._set_versions({0:3,2:3,4:3,6:3,8:3, 1:4,3:4,5:4,7:4,9:4})) d.addCallback(self.copy_shares) d.addCallback(lambda res: self._fn.check(Monitor())) def _try_repair(check_results): ex = "There were multiple recoverable versions with identical seqnums, so force=True must be passed to the repair() operation" d2 = self.shouldFail(MustForceRepairError, "test_merge", ex, self._fn.repair, check_results) d2.addCallback(self.copy_shares) d2.addCallback(self.failIfSharesChanged) d2.addCallback(lambda res: check_results) return d2 d.addCallback(_try_repair) d.addCallback(lambda check_results: self._fn.repair(check_results, force=True)) # this should give us 10 shares of the highest roothash def _check_repair_results(rres): self.failUnless(rres.get_successful()) pass # TODO d.addCallback(_check_repair_results) d.addCallback(lambda res: self._fn.get_servermap(MODE_CHECK)) def _check_smap(smap): self.failUnlessEqual(len(smap.recoverable_versions()), 1) self.failIf(smap.unrecoverable_versions()) # now, which should have won? roothash_s4a = self.get_roothash_for(3) roothash_s4b = self.get_roothash_for(4) if roothash_s4b > roothash_s4a: expected_contents = self.CONTENTS[4] else: expected_contents = self.CONTENTS[3] new_versionid = smap.best_recoverable_version() self.failUnlessEqual(new_versionid[0], 5) # seqnum 5 d2 = self._fn.download_version(smap, new_versionid) d2.addCallback(self.failUnlessEqual, expected_contents) return d2 d.addCallback(_check_smap) return d def test_non_merge(self): self.old_shares = [] d = self.publish_multiple() # repair should not refuse a repair that doesn't need to merge. In # this case, we combine v2 with v3. The repair should ignore v2 and # copy v3 into a new v5. d.addCallback(lambda res: self._set_versions({0:2,2:2,4:2,6:2,8:2, 1:3,3:3,5:3,7:3,9:3})) d.addCallback(lambda res: self._fn.check(Monitor())) d.addCallback(lambda check_results: self._fn.repair(check_results)) # this should give us 10 shares of v3 def _check_repair_results(rres): self.failUnless(rres.get_successful()) pass # TODO d.addCallback(_check_repair_results) d.addCallback(lambda res: self._fn.get_servermap(MODE_CHECK)) def _check_smap(smap): self.failUnlessEqual(len(smap.recoverable_versions()), 1) self.failIf(smap.unrecoverable_versions()) # now, which should have won? expected_contents = self.CONTENTS[3] new_versionid = smap.best_recoverable_version() self.failUnlessEqual(new_versionid[0], 5) # seqnum 5 d2 = self._fn.download_version(smap, new_versionid) d2.addCallback(self.failUnlessEqual, expected_contents) return d2 d.addCallback(_check_smap) return d def get_roothash_for(self, index): # return the roothash for the first share we see in the saved set shares = self._copied_shares[index] for peerid in shares: for shnum in shares[peerid]: share = shares[peerid][shnum] (version, seqnum, root_hash, IV, k, N, segsize, datalen, o) = \ unpack_header(share) return root_hash def test_check_and_repair_readcap(self): # we can't currently repair from a mutable readcap: #625 self.old_shares = [] d = self.publish_one() d.addCallback(self.copy_shares) def _get_readcap(res): self._fn3 = self._fn.get_readonly() # also delete some shares for peerid,shares in self._storage._peers.items(): shares.pop(0, None) d.addCallback(_get_readcap) d.addCallback(lambda res: self._fn3.check_and_repair(Monitor())) def _check_results(crr): self.failUnless(ICheckAndRepairResults.providedBy(crr)) # we should detect the unhealthy, but skip over mutable-readcap # repairs until #625 is fixed self.failIf(crr.get_pre_repair_results().is_healthy()) self.failIf(crr.get_repair_attempted()) self.failIf(crr.get_post_repair_results().is_healthy()) d.addCallback(_check_results) return d def test_repair_empty(self): # bug 1689: delete one share of an empty mutable file, then repair. # In the buggy version, the check that precedes the retrieve+publish # cycle uses MODE_READ, instead of MODE_REPAIR, and fails to get the # privkey that repair needs. d = self.publish_empty_sdmf() def _delete_one_share(ign): shares = self._storage._peers for peerid in shares: for shnum in list(shares[peerid]): if shnum == 0: del shares[peerid][shnum] d.addCallback(_delete_one_share) d.addCallback(lambda ign: self._fn2.check(Monitor())) d.addCallback(lambda check_results: self._fn2.repair(check_results)) def _check(crr): self.failUnlessEqual(crr.get_successful(), True) d.addCallback(_check) return d class DevNullDictionary(dict): def __setitem__(self, key, value): return class MultipleEncodings(unittest.TestCase): def setUp(self): self.CONTENTS = "New contents go here" self.uploadable = MutableData(self.CONTENTS) self._storage = FakeStorage() self._nodemaker = make_nodemaker(self._storage, num_peers=20) self._storage_broker = self._nodemaker.storage_broker d = self._nodemaker.create_mutable_file(self.uploadable) def _created(node): self._fn = node d.addCallback(_created) return d def _encode(self, k, n, data, version=SDMF_VERSION): # encode 'data' into a peerid->shares dict. fn = self._fn # disable the nodecache, since for these tests we explicitly need # multiple nodes pointing at the same file self._nodemaker._node_cache = DevNullDictionary() fn2 = self._nodemaker.create_from_cap(fn.get_uri()) # then we copy over other fields that are normally fetched from the # existing shares fn2._pubkey = fn._pubkey fn2._privkey = fn._privkey fn2._encprivkey = fn._encprivkey # and set the encoding parameters to something completely different fn2._required_shares = k fn2._total_shares = n s = self._storage s._peers = {} # clear existing storage p2 = Publish(fn2, self._storage_broker, None) uploadable = MutableData(data) d = p2.publish(uploadable) def _published(res): shares = s._peers s._peers = {} return shares d.addCallback(_published) return d def make_servermap(self, mode=MODE_READ, oldmap=None): if oldmap is None: oldmap = ServerMap() smu = ServermapUpdater(self._fn, self._storage_broker, Monitor(), oldmap, mode) d = smu.update() return d def test_multiple_encodings(self): # we encode the same file in two different ways (3-of-10 and 4-of-9), # then mix up the shares, to make sure that download survives seeing # a variety of encodings. This is actually kind of tricky to set up. contents1 = "Contents for encoding 1 (3-of-10) go here"*1000 contents2 = "Contents for encoding 2 (4-of-9) go here"*1000 contents3 = "Contents for encoding 3 (4-of-7) go here"*1000 # we make a retrieval object that doesn't know what encoding # parameters to use fn3 = self._nodemaker.create_from_cap(self._fn.get_uri()) # now we upload a file through fn1, and grab its shares d = self._encode(3, 10, contents1) def _encoded_1(shares): self._shares1 = shares d.addCallback(_encoded_1) d.addCallback(lambda res: self._encode(4, 9, contents2)) def _encoded_2(shares): self._shares2 = shares d.addCallback(_encoded_2) d.addCallback(lambda res: self._encode(4, 7, contents3)) def _encoded_3(shares): self._shares3 = shares d.addCallback(_encoded_3) def _merge(res): log.msg("merging sharelists") # we merge the shares from the two sets, leaving each shnum in # its original location, but using a share from set1 or set2 # according to the following sequence: # # 4-of-9 a s2 # 4-of-9 b s2 # 4-of-7 c s3 # 4-of-9 d s2 # 3-of-9 e s1 # 3-of-9 f s1 # 3-of-9 g s1 # 4-of-9 h s2 # # so that neither form can be recovered until fetch [f], at which # point version-s1 (the 3-of-10 form) should be recoverable. If # the implementation latches on to the first version it sees, # then s2 will be recoverable at fetch [g]. # Later, when we implement code that handles multiple versions, # we can use this framework to assert that all recoverable # versions are retrieved, and test that 'epsilon' does its job places = [2, 2, 3, 2, 1, 1, 1, 2] sharemap = {} sb = self._storage_broker for peerid in sorted(sb.get_all_serverids()): for shnum in self._shares1.get(peerid, {}): if shnum < len(places): which = places[shnum] else: which = "x" self._storage._peers[peerid] = peers = {} in_1 = shnum in self._shares1[peerid] in_2 = shnum in self._shares2.get(peerid, {}) in_3 = shnum in self._shares3.get(peerid, {}) if which == 1: if in_1: peers[shnum] = self._shares1[peerid][shnum] sharemap[shnum] = peerid elif which == 2: if in_2: peers[shnum] = self._shares2[peerid][shnum] sharemap[shnum] = peerid elif which == 3: if in_3: peers[shnum] = self._shares3[peerid][shnum] sharemap[shnum] = peerid # we don't bother placing any other shares # now sort the sequence so that share 0 is returned first new_sequence = [sharemap[shnum] for shnum in sorted(sharemap.keys())] self._storage._sequence = new_sequence log.msg("merge done") d.addCallback(_merge) d.addCallback(lambda res: fn3.download_best_version()) def _retrieved(new_contents): # the current specified behavior is "first version recoverable" self.failUnlessEqual(new_contents, contents1) d.addCallback(_retrieved) return d class MultipleVersions(unittest.TestCase, PublishMixin, CheckerMixin): def setUp(self): return self.publish_multiple() def test_multiple_versions(self): # if we see a mix of versions in the grid, download_best_version # should get the latest one self._set_versions(dict([(i,2) for i in (0,2,4,6,8)])) d = self._fn.download_best_version() d.addCallback(lambda res: self.failUnlessEqual(res, self.CONTENTS[4])) # and the checker should report problems d.addCallback(lambda res: self._fn.check(Monitor())) d.addCallback(self.check_bad, "test_multiple_versions") # but if everything is at version 2, that's what we should download d.addCallback(lambda res: self._set_versions(dict([(i,2) for i in range(10)]))) d.addCallback(lambda res: self._fn.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, self.CONTENTS[2])) # if exactly one share is at version 3, we should still get v2 d.addCallback(lambda res: self._set_versions({0:3})) d.addCallback(lambda res: self._fn.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, self.CONTENTS[2])) # but the servermap should see the unrecoverable version. This # depends upon the single newer share being queried early. d.addCallback(lambda res: self._fn.get_servermap(MODE_READ)) def _check_smap(smap): self.failUnlessEqual(len(smap.unrecoverable_versions()), 1) newer = smap.unrecoverable_newer_versions() self.failUnlessEqual(len(newer), 1) verinfo, health = newer.items()[0] self.failUnlessEqual(verinfo[0], 4) self.failUnlessEqual(health, (1,3)) self.failIf(smap.needs_merge()) d.addCallback(_check_smap) # if we have a mix of two parallel versions (s4a and s4b), we could # recover either d.addCallback(lambda res: self._set_versions({0:3,2:3,4:3,6:3,8:3, 1:4,3:4,5:4,7:4,9:4})) d.addCallback(lambda res: self._fn.get_servermap(MODE_READ)) def _check_smap_mixed(smap): self.failUnlessEqual(len(smap.unrecoverable_versions()), 0) newer = smap.unrecoverable_newer_versions() self.failUnlessEqual(len(newer), 0) self.failUnless(smap.needs_merge()) d.addCallback(_check_smap_mixed) d.addCallback(lambda res: self._fn.download_best_version()) d.addCallback(lambda res: self.failUnless(res == self.CONTENTS[3] or res == self.CONTENTS[4])) return d def test_replace(self): # if we see a mix of versions in the grid, we should be able to # replace them all with a newer version # if exactly one share is at version 3, we should download (and # replace) v2, and the result should be v4. Note that the index we # give to _set_versions is different than the sequence number. target = dict([(i,2) for i in range(10)]) # seqnum3 target[0] = 3 # seqnum4 self._set_versions(target) def _modify(oldversion, servermap, first_time): return oldversion + " modified" d = self._fn.modify(_modify) d.addCallback(lambda res: self._fn.download_best_version()) expected = self.CONTENTS[2] + " modified" d.addCallback(lambda res: self.failUnlessEqual(res, expected)) # and the servermap should indicate that the outlier was replaced too d.addCallback(lambda res: self._fn.get_servermap(MODE_CHECK)) def _check_smap(smap): self.failUnlessEqual(smap.highest_seqnum(), 5) self.failUnlessEqual(len(smap.unrecoverable_versions()), 0) self.failUnlessEqual(len(smap.recoverable_versions()), 1) d.addCallback(_check_smap) return d class Exceptions(unittest.TestCase): def test_repr(self): nmde = NeedMoreDataError(100, 50, 100) self.failUnless("NeedMoreDataError" in repr(nmde), repr(nmde)) ucwe = UncoordinatedWriteError() self.failUnless("UncoordinatedWriteError" in repr(ucwe), repr(ucwe)) class SameKeyGenerator: def __init__(self, pubkey, privkey): self.pubkey = pubkey self.privkey = privkey def generate(self, keysize=None): return defer.succeed( (self.pubkey, self.privkey) ) class FirstServerGetsKilled: done = False def notify(self, retval, wrapper, methname): if not self.done: wrapper.broken = True self.done = True return retval class FirstServerGetsDeleted: def __init__(self): self.done = False self.silenced = None def notify(self, retval, wrapper, methname): if not self.done: # this query will work, but later queries should think the share # has been deleted self.done = True self.silenced = wrapper return retval if wrapper == self.silenced: assert methname == "slot_testv_and_readv_and_writev" return (True, {}) return retval class Problems(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin): def do_publish_surprise(self, version): self.basedir = "mutable/Problems/test_publish_surprise_%s" % version self.set_up_grid() nm = self.g.clients[0].nodemaker d = nm.create_mutable_file(MutableData("contents 1"), version=version) def _created(n): d = defer.succeed(None) d.addCallback(lambda res: n.get_servermap(MODE_WRITE)) def _got_smap1(smap): # stash the old state of the file self.old_map = smap d.addCallback(_got_smap1) # then modify the file, leaving the old map untouched d.addCallback(lambda res: log.msg("starting winning write")) d.addCallback(lambda res: n.overwrite(MutableData("contents 2"))) # now attempt to modify the file with the old servermap. This # will look just like an uncoordinated write, in which every # single share got updated between our mapupdate and our publish d.addCallback(lambda res: log.msg("starting doomed write")) d.addCallback(lambda res: self.shouldFail(UncoordinatedWriteError, "test_publish_surprise", None, n.upload, MutableData("contents 2a"), self.old_map)) return d d.addCallback(_created) return d def test_publish_surprise_sdmf(self): return self.do_publish_surprise(SDMF_VERSION) def test_publish_surprise_mdmf(self): return self.do_publish_surprise(MDMF_VERSION) def test_retrieve_surprise(self): self.basedir = "mutable/Problems/test_retrieve_surprise" self.set_up_grid() nm = self.g.clients[0].nodemaker d = nm.create_mutable_file(MutableData("contents 1"*4000)) def _created(n): d = defer.succeed(None) d.addCallback(lambda res: n.get_servermap(MODE_READ)) def _got_smap1(smap): # stash the old state of the file self.old_map = smap d.addCallback(_got_smap1) # then modify the file, leaving the old map untouched d.addCallback(lambda res: log.msg("starting winning write")) d.addCallback(lambda res: n.overwrite(MutableData("contents 2"))) # now attempt to retrieve the old version with the old servermap. # This will look like someone has changed the file since we # updated the servermap. d.addCallback(lambda res: log.msg("starting doomed read")) d.addCallback(lambda res: self.shouldFail(NotEnoughSharesError, "test_retrieve_surprise", "ran out of servers: have 0 of 1", n.download_version, self.old_map, self.old_map.best_recoverable_version(), )) return d d.addCallback(_created) return d def test_unexpected_shares(self): # upload the file, take a servermap, shut down one of the servers, # upload it again (causing shares to appear on a new server), then # upload using the old servermap. The last upload should fail with an # UncoordinatedWriteError, because of the shares that didn't appear # in the servermap. self.basedir = "mutable/Problems/test_unexpected_shares" self.set_up_grid() nm = self.g.clients[0].nodemaker d = nm.create_mutable_file(MutableData("contents 1")) def _created(n): d = defer.succeed(None) d.addCallback(lambda res: n.get_servermap(MODE_WRITE)) def _got_smap1(smap): # stash the old state of the file self.old_map = smap # now shut down one of the servers peer0 = list(smap.make_sharemap()[0])[0].get_serverid() self.g.remove_server(peer0) # then modify the file, leaving the old map untouched log.msg("starting winning write") return n.overwrite(MutableData("contents 2")) d.addCallback(_got_smap1) # now attempt to modify the file with the old servermap. This # will look just like an uncoordinated write, in which every # single share got updated between our mapupdate and our publish d.addCallback(lambda res: log.msg("starting doomed write")) d.addCallback(lambda res: self.shouldFail(UncoordinatedWriteError, "test_surprise", None, n.upload, MutableData("contents 2a"), self.old_map)) return d d.addCallback(_created) return d def test_multiply_placed_shares(self): self.basedir = "mutable/Problems/test_multiply_placed_shares" self.set_up_grid() nm = self.g.clients[0].nodemaker d = nm.create_mutable_file(MutableData("contents 1")) # remove one of the servers and reupload the file. def _created(n): self._node = n servers = self.g.get_all_serverids() self.ss = self.g.remove_server(servers[len(servers)-1]) new_server = self.g.make_server(len(servers)-1) self.g.add_server(len(servers)-1, new_server) return self._node.download_best_version() d.addCallback(_created) d.addCallback(lambda data: MutableData(data)) d.addCallback(lambda data: self._node.overwrite(data)) # restore the server we removed earlier, then download+upload # the file again def _overwritten(ign): self.g.add_server(len(self.g.servers_by_number), self.ss) return self._node.download_best_version() d.addCallback(_overwritten) d.addCallback(lambda data: MutableData(data)) d.addCallback(lambda data: self._node.overwrite(data)) d.addCallback(lambda ignored: self._node.get_servermap(MODE_CHECK)) def _overwritten_again(smap): # Make sure that all shares were updated by making sure that # there aren't any other versions in the sharemap. self.failUnlessEqual(len(smap.recoverable_versions()), 1) self.failUnlessEqual(len(smap.unrecoverable_versions()), 0) d.addCallback(_overwritten_again) return d def test_bad_server(self): # Break one server, then create the file: the initial publish should # complete with an alternate server. Breaking a second server should # not prevent an update from succeeding either. self.basedir = "mutable/Problems/test_bad_server" self.set_up_grid() nm = self.g.clients[0].nodemaker # to make sure that one of the initial peers is broken, we have to # get creative. We create an RSA key and compute its storage-index. # Then we make a KeyGenerator that always returns that one key, and # use it to create the mutable file. This will get easier when we can # use #467 static-server-selection to disable permutation and force # the choice of server for share[0]. d = nm.key_generator.generate(TEST_RSA_KEY_SIZE) def _got_key( (pubkey, privkey) ): nm.key_generator = SameKeyGenerator(pubkey, privkey) pubkey_s = pubkey.serialize() privkey_s = privkey.serialize() u = uri.WriteableSSKFileURI(ssk_writekey_hash(privkey_s), ssk_pubkey_fingerprint_hash(pubkey_s)) self._storage_index = u.get_storage_index() d.addCallback(_got_key) def _break_peer0(res): si = self._storage_index servers = nm.storage_broker.get_servers_for_psi(si) self.g.break_server(servers[0].get_serverid()) self.server1 = servers[1] d.addCallback(_break_peer0) # now "create" the file, using the pre-established key, and let the # initial publish finally happen d.addCallback(lambda res: nm.create_mutable_file(MutableData("contents 1"))) # that ought to work def _got_node(n): d = n.download_best_version() d.addCallback(lambda res: self.failUnlessEqual(res, "contents 1")) # now break the second peer def _break_peer1(res): self.g.break_server(self.server1.get_serverid()) d.addCallback(_break_peer1) d.addCallback(lambda res: n.overwrite(MutableData("contents 2"))) # that ought to work too d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 2")) def _explain_error(f): print f if f.check(NotEnoughServersError): print "first_error:", f.value.first_error return f d.addErrback(_explain_error) return d d.addCallback(_got_node) return d def test_bad_server_overlap(self): # like test_bad_server, but with no extra unused servers to fall back # upon. This means that we must re-use a server which we've already # used. If we don't remember the fact that we sent them one share # already, we'll mistakenly think we're experiencing an # UncoordinatedWriteError. # Break one server, then create the file: the initial publish should # complete with an alternate server. Breaking a second server should # not prevent an update from succeeding either. self.basedir = "mutable/Problems/test_bad_server_overlap" self.set_up_grid() nm = self.g.clients[0].nodemaker sb = nm.storage_broker peerids = [s.get_serverid() for s in sb.get_connected_servers()] self.g.break_server(peerids[0]) d = nm.create_mutable_file(MutableData("contents 1")) def _created(n): d = n.download_best_version() d.addCallback(lambda res: self.failUnlessEqual(res, "contents 1")) # now break one of the remaining servers def _break_second_server(res): self.g.break_server(peerids[1]) d.addCallback(_break_second_server) d.addCallback(lambda res: n.overwrite(MutableData("contents 2"))) # that ought to work too d.addCallback(lambda res: n.download_best_version()) d.addCallback(lambda res: self.failUnlessEqual(res, "contents 2")) return d d.addCallback(_created) return d def test_publish_all_servers_bad(self): # Break all servers: the publish should fail self.basedir = "mutable/Problems/test_publish_all_servers_bad" self.set_up_grid() nm = self.g.clients[0].nodemaker for s in nm.storage_broker.get_connected_servers(): s.get_rref().broken = True d = self.shouldFail(NotEnoughServersError, "test_publish_all_servers_bad", "ran out of good servers", nm.create_mutable_file, MutableData("contents")) return d def test_publish_no_servers(self): # no servers at all: the publish should fail self.basedir = "mutable/Problems/test_publish_no_servers" self.set_up_grid(num_servers=0) nm = self.g.clients[0].nodemaker d = self.shouldFail(NotEnoughServersError, "test_publish_no_servers", "Ran out of non-bad servers", nm.create_mutable_file, MutableData("contents")) return d def test_privkey_query_error(self): # when a servermap is updated with MODE_WRITE, it tries to get the # privkey. Something might go wrong during this query attempt. # Exercise the code in _privkey_query_failed which tries to handle # such an error. self.basedir = "mutable/Problems/test_privkey_query_error" self.set_up_grid(num_servers=20) nm = self.g.clients[0].nodemaker nm._node_cache = DevNullDictionary() # disable the nodecache # we need some contents that are large enough to push the privkey out # of the early part of the file LARGE = "These are Larger contents" * 2000 # about 50KB LARGE_uploadable = MutableData(LARGE) d = nm.create_mutable_file(LARGE_uploadable) def _created(n): self.uri = n.get_uri() self.n2 = nm.create_from_cap(self.uri) # When a mapupdate is performed on a node that doesn't yet know # the privkey, a short read is sent to a batch of servers, to get # the verinfo and (hopefully, if the file is short enough) the # encprivkey. Our file is too large to let this first read # contain the encprivkey. Each non-encprivkey-bearing response # that arrives (until the node gets the encprivkey) will trigger # a second read to specifically read the encprivkey. # # So, to exercise this case: # 1. notice which server gets a read() call first # 2. tell that server to start throwing errors killer = FirstServerGetsKilled() for s in nm.storage_broker.get_connected_servers(): s.get_rref().post_call_notifier = killer.notify d.addCallback(_created) # now we update a servermap from a new node (which doesn't have the # privkey yet, forcing it to use a separate privkey query). Note that # the map-update will succeed, since we'll just get a copy from one # of the other shares. d.addCallback(lambda res: self.n2.get_servermap(MODE_WRITE)) return d def test_privkey_query_missing(self): # like test_privkey_query_error, but the shares are deleted by the # second query, instead of raising an exception. self.basedir = "mutable/Problems/test_privkey_query_missing" self.set_up_grid(num_servers=20) nm = self.g.clients[0].nodemaker LARGE = "These are Larger contents" * 2000 # about 50KiB LARGE_uploadable = MutableData(LARGE) nm._node_cache = DevNullDictionary() # disable the nodecache d = nm.create_mutable_file(LARGE_uploadable) def _created(n): self.uri = n.get_uri() self.n2 = nm.create_from_cap(self.uri) deleter = FirstServerGetsDeleted() for s in nm.storage_broker.get_connected_servers(): s.get_rref().post_call_notifier = deleter.notify d.addCallback(_created) d.addCallback(lambda res: self.n2.get_servermap(MODE_WRITE)) return d def test_block_and_hash_query_error(self): # This tests for what happens when a query to a remote server # fails in either the hash validation step or the block getting # step (because of batching, this is the same actual query). # We need to have the storage server persist up until the point # that its prefix is validated, then suddenly die. This # exercises some exception handling code in Retrieve. self.basedir = "mutable/Problems/test_block_and_hash_query_error" self.set_up_grid(num_servers=20) nm = self.g.clients[0].nodemaker CONTENTS = "contents" * 2000 CONTENTS_uploadable = MutableData(CONTENTS) d = nm.create_mutable_file(CONTENTS_uploadable) def _created(node): self._node = node d.addCallback(_created) d.addCallback(lambda ignored: self._node.get_servermap(MODE_READ)) def _then(servermap): # we have our servermap. Now we set up the servers like the # tests above -- the first one that gets a read call should # start throwing errors, but only after returning its prefix # for validation. Since we'll download without fetching the # private key, the next query to the remote server will be # for either a block and salt or for hashes, either of which # will exercise the error handling code. killer = FirstServerGetsKilled() for s in nm.storage_broker.get_connected_servers(): s.get_rref().post_call_notifier = killer.notify ver = servermap.best_recoverable_version() assert ver return self._node.download_version(servermap, ver) d.addCallback(_then) d.addCallback(lambda data: self.failUnlessEqual(data, CONTENTS)) return d def test_1654(self): # test that the Retrieve object unconditionally verifies the block # hash tree root for mutable shares. The failure mode is that # carefully crafted shares can cause undetected corruption (the # retrieve appears to finish successfully, but the result is # corrupted). When fixed, these shares always cause a # CorruptShareError, which results in NotEnoughSharesError in this # 2-of-2 file. self.basedir = "mutable/Problems/test_1654" self.set_up_grid(num_servers=2) cap = uri.from_string(TEST_1654_CAP) si = cap.get_storage_index() for share, shnum in [(TEST_1654_SH0, 0), (TEST_1654_SH1, 1)]: sharedata = base64.b64decode(share) storedir = self.get_serverdir(shnum) storage_path = os.path.join(storedir, "shares", storage_index_to_dir(si)) fileutil.make_dirs(storage_path) fileutil.write(os.path.join(storage_path, "%d" % shnum), sharedata) nm = self.g.clients[0].nodemaker n = nm.create_from_cap(TEST_1654_CAP) # to exercise the problem correctly, we must ensure that sh0 is # processed first, and sh1 second. NoNetworkGrid has facilities to # stall the first request from a single server, but it's not # currently easy to extend that to stall the second request (mutable # retrievals will see two: first the mapupdate, then the fetch). # However, repeated executions of this run without the #1654 fix # suggests that we're failing reliably even without explicit stalls, # probably because the servers are queried in a fixed order. So I'm # ok with relying upon that. d = self.shouldFail(NotEnoughSharesError, "test #1654 share corruption", "ran out of servers", n.download_best_version) return d TEST_1654_CAP = "URI:SSK:6jthysgozssjnagqlcxjq7recm:yxawei54fmf2ijkrvs2shs6iey4kpdp6joi7brj2vrva6sp5nf3a" TEST_1654_SH0 = """\ VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA46m9s5j6lnzsOHytBTs2JOo AkWe8058hyrDa8igfBSqZMKO3aDOrFuRVt0ySYZ6oihFqPJRAAAAAAAAB8YAAAAA AAAJmgAAAAFPNgDkK8brSCzKz6n8HFqzbnAlALvnaB0Qpa1Bjo9jiZdmeMyneHR+ UoJcDb1Ls+lVLeUqP2JitBEXdCzcF/X2YMDlmKb2zmPqWfOw4fK0FOzYk6gCRZ7z AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABCDwr uIlhFlv21pDqyMeA9X1wHp98a1CKY4qfC7gn5exyODAcnhZKHCV18XBerbZLAgIA AAAAAAAAJgAAAAAAAAAmAAABjwAAAo8AAALTAAAC8wAAAAAAAAMGAAAAAAAAB8Yw ggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQCXKMor062nfxHVutMbqNcj vVC92wXTcQulenNWEX+0huK54igTAG60p0lZ6FpBJ9A+dlStT386bn5I6qe50ky5 CFodQSsQX+1yByMFlzqPDo4rclk/6oVySLypxnt/iBs3FPZ4zruhYXcITc6zaYYU Xqaw/C86g6M06MWQKsGev7PS3tH7q+dtovWzDgU13Q8PG2whGvGNfxPOmEX4j0wL FCBavpFnLpo3bJrj27V33HXxpPz3NP+fkaG0pKH03ANd/yYHfGf74dC+eD5dvWBM DU6fZQN4k/T+cth+qzjS52FPPTY9IHXIb4y+1HryVvxcx6JDifKoOzpFc3SDbBAP AgERKDjOFxVClH81DF/QkqpP0glOh6uTsFNx8Nes02q0d7iip2WqfG9m2+LmiWy8 Pg7RlQQy2M45gert1EDsH4OI69uxteviZP1Mo0wD6HjmWUbGIQRmsT3DmYEZCCMA /KjhNmlov2+OhVxIaHwE7aN840IfkGdJ/JssB6Z/Ym3+ou4+jAYKhifPQGrpBVjd 73oH6w9StnoGYIrEEQw8LFc4jnAFYciKlPuo6E6E3zDseE7gwkcOpCtVVksZu6Ii GQgIV8vjFbNz9M//RMXOBTwKFDiG08IAPh7fv2uKzFis0TFrR7sQcMQ/kZZCLPPi ECIX95NRoFRlxK/1kZ1+FuuDQgABz9+5yd/pjkVybmvc7Jr70bOVpxvRoI2ZEgh/ +QdxfcwAAm5iDnzPtsVdcbuNkKprfI8N4n+QmUOSMbAJ7M8r1cp4z9+5yd/pjkVy bmvc7Jr70bOVpxvRoI2ZEgh/+QdxfcxGzRV0shAW86irr5bDQOyyknYk0p2xw2Wn z6QccyXyobXPOFLO3ZBPnKaE58aaN7x3srQZYUKafet5ZMDX8fsQf2mbxnaeG5NF eO6wG++WBUo9leddnzKBnRcMGRAtJEjwfKMVPE8SmuTlL6kRc7n8wvY2ygClWlRm d7o95tZfoO+mexB/DLEpWLtlAiqh8yJ8cWaC5rYz4ZC2+z7QkeKXCHWAN3i4C++u dfZoD7qWnyAldYTydADwL885dVY7WN6NX9YtQrG3JGrp3wZvFrX5x9Jv7hls0A6l 2xI4NlcSSrgWIjzrGdwQEjIUDyfc7DWroEpJEfIaSnjkeTT0D8WV5NqzWH8UwWoF wjwDltaQ3Y8O/wJPGBqBAJEob+p6QxvP5T2W1jnOvbgsMZLNDuY6FF1XcuR7yvNF sXKP6aXMV8BKSlrehFlpBMTu4HvJ1rZlKuxgR1A9njiaKD2U0NitCKMIpIXQxT6L eZn9M8Ky68m0Zjdw/WCsKz22GTljSM5Nfme32BrW+4G+R55ECwZ1oh08nrnWjXmw PlSHj2lwpnsuOG2fwJkyMnIIoIUII31VLATeLERD9HfMK8/+uZqJ2PftT2fhHL/u CDCIdEWSUBBHpA7p8BbgiZKCpYzf+pbS2/EJGL8gQAvSH1atGv/o0BiAd10MzTXC Xn5xDB1Yh+FtYPYloBGAwmxKieDMnsjy6wp5ovdmOc2y6KBr27DzgEGchLyOxHV4 Q7u0Hkm7Om33ir1TUgK6bdPFL8rGNDOZq/SR4yn4qSsQTPD6Y/HQSK5GzkU4dGLw tU6GNpu142QE36NfWkoUWHKf1YgIYrlAGJWlj93et54ZGUZGVN7pAspZ+mvoMnDU Jh46nrQsEJiQz8AqgREck4Fi4S7Rmjh/AhXmzFWFca3YD0BmuYU6fxGTRPZ70eys LV5qPTmTGpX+bpvufAp0vznkiOdqTn1flnxdslM2AukiD6OwkX1dBH8AvzObhbz0 ABhx3c+cAhAnYhJmsYaAwbpWpp8CM5opmsRgwgaz8f8lxiRfXbrWD8vdd4dm2B9J jaiGCR8/UXHFBGZhCgLB2S+BNXKynIeP+POGQtMIIERUtwOIKt1KfZ9jZwf/ulJK fv/VmBPmGu+CHvFIlHAzlxwJeUz8wSltUeeHjADZ9Wag5ESN3R6hsmJL+KL4av5v DFobNPiNWbc+4H+3wg1R0oK/uTQb8u1S7uWIGVmi5fJ4rVVZ/VKKtHGVwm/8OGKF tcrJFJcJADFVkgpsqN8UINsMJLxfJRoBgABEWih5DTRwNXK76Ma2LjDBrEvxhw8M 7SLKhi5vH7/Cs7jfLZFgh2T6flDV4VM/EA7CYEHgEb8MFmioFGOmhUpqifkA3SdX jGi2KuZZ5+O+sHFWXsUjiFPEzUJF+syPEzH1aF5R+F8pkhifeYh0KP6OHd6Sgn8s TStXB+q0MndBXw5ADp/Jac1DVaSWruVAdjemQ+si1olk8xH+uTMXU7PgV9WkpIiy 4BhnFU9IbCr/m7806c13xfeelaffP2pr7EDdgwz5K89VWCa3k9OSDnMtj2CQXlC7 bQHi/oRGA1aHSn84SIt+HpAfRoVdr4N90bYWmYQNqfKoyWCbEr+dge/GSD1nddAJ 72mXGlqyLyWYuAAAAAA=""" TEST_1654_SH1 = """\ VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA45R4Y4kuV458rSTGDVTqdzz 9Fig3NQ3LermyD+0XLeqbC7KNgvv6cNzMZ9psQQ3FseYsIR1AAAAAAAAB8YAAAAA AAAJmgAAAAFPNgDkd/Y9Z+cuKctZk9gjwF8thT+fkmNCsulILsJw5StGHAA1f7uL MG73c5WBcesHB2epwazfbD3/0UZTlxXWXotywVHhjiS5XjnytJMYNVOp3PP0WKDc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABCDwr uIlhFlv21pDqyMeA9X1wHp98a1CKY4qfC7gn5exyODAcnhZKHCV18XBerbZLAgIA AAAAAAAAJgAAAAAAAAAmAAABjwAAAo8AAALTAAAC8wAAAAAAAAMGAAAAAAAAB8Yw ggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQCXKMor062nfxHVutMbqNcj vVC92wXTcQulenNWEX+0huK54igTAG60p0lZ6FpBJ9A+dlStT386bn5I6qe50ky5 CFodQSsQX+1yByMFlzqPDo4rclk/6oVySLypxnt/iBs3FPZ4zruhYXcITc6zaYYU Xqaw/C86g6M06MWQKsGev7PS3tH7q+dtovWzDgU13Q8PG2whGvGNfxPOmEX4j0wL FCBavpFnLpo3bJrj27V33HXxpPz3NP+fkaG0pKH03ANd/yYHfGf74dC+eD5dvWBM DU6fZQN4k/T+cth+qzjS52FPPTY9IHXIb4y+1HryVvxcx6JDifKoOzpFc3SDbBAP AgERKDjOFxVClH81DF/QkqpP0glOh6uTsFNx8Nes02q0d7iip2WqfG9m2+LmiWy8 Pg7RlQQy2M45gert1EDsH4OI69uxteviZP1Mo0wD6HjmWUbGIQRmsT3DmYEZCCMA /KjhNmlov2+OhVxIaHwE7aN840IfkGdJ/JssB6Z/Ym3+ou4+jAYKhifPQGrpBVjd 73oH6w9StnoGYIrEEQw8LFc4jnAFYciKlPuo6E6E3zDseE7gwkcOpCtVVksZu6Ii GQgIV8vjFbNz9M//RMXOBTwKFDiG08IAPh7fv2uKzFis0TFrR7sQcMQ/kZZCLPPi ECIX95NRoFRlxK/1kZ1+FuuDQgABz9+5yd/pjkVybmvc7Jr70bOVpxvRoI2ZEgh/ +QdxfcwAAm5iDnzPtsVdcbuNkKprfI8N4n+QmUOSMbAJ7M8r1cp40cTBnAw+rMKC 98P4pURrotx116Kd0i3XmMZu81ew57H3Zb73r+syQCXZNOP0xhMDclIt0p2xw2Wn z6QccyXyobXPOFLO3ZBPnKaE58aaN7x3srQZYUKafet5ZMDX8fsQf2mbxnaeG5NF eO6wG++WBUo9leddnzKBnRcMGRAtJEjwfKMVPE8SmuTlL6kRc7n8wvY2ygClWlRm d7o95tZfoO+mexB/DLEpWLtlAiqh8yJ8cWaC5rYz4ZC2+z7QkeKXCHWAN3i4C++u dfZoD7qWnyAldYTydADwL885dVY7WN6NX9YtQrG3JGrp3wZvFrX5x9Jv7hls0A6l 2xI4NlcSSrgWIjzrGdwQEjIUDyfc7DWroEpJEfIaSnjkeTT0D8WV5NqzWH8UwWoF wjwDltaQ3Y8O/wJPGBqBAJEob+p6QxvP5T2W1jnOvbgsMZLNDuY6FF1XcuR7yvNF sXKP6aXMV8BKSlrehFlpBMTu4HvJ1rZlKuxgR1A9njiaKD2U0NitCKMIpIXQxT6L eZn9M8Ky68m0Zjdw/WCsKz22GTljSM5Nfme32BrW+4G+R55ECwZ1oh08nrnWjXmw PlSHj2lwpnsuOG2fwJkyMnIIoIUII31VLATeLERD9HfMK8/+uZqJ2PftT2fhHL/u CDCIdEWSUBBHpA7p8BbgiZKCpYzf+pbS2/EJGL8gQAvSH1atGv/o0BiAd10MzTXC Xn5xDB1Yh+FtYPYloBGAwmxKieDMnsjy6wp5ovdmOc2y6KBr27DzgEGchLyOxHV4 Q7u0Hkm7Om33ir1TUgK6bdPFL8rGNDOZq/SR4yn4qSsQTPD6Y/HQSK5GzkU4dGLw tU6GNpu142QE36NfWkoUWHKf1YgIYrlAGJWlj93et54ZGUZGVN7pAspZ+mvoMnDU Jh46nrQsEJiQz8AqgREck4Fi4S7Rmjh/AhXmzFWFca3YD0BmuYU6fxGTRPZ70eys LV5qPTmTGpX+bpvufAp0vznkiOdqTn1flnxdslM2AukiD6OwkX1dBH8AvzObhbz0 ABhx3c+cAhAnYhJmsYaAwbpWpp8CM5opmsRgwgaz8f8lxiRfXbrWD8vdd4dm2B9J jaiGCR8/UXHFBGZhCgLB2S+BNXKynIeP+POGQtMIIERUtwOIKt1KfZ9jZwf/ulJK fv/VmBPmGu+CHvFIlHAzlxwJeUz8wSltUeeHjADZ9Wag5ESN3R6hsmJL+KL4av5v DFobNPiNWbc+4H+3wg1R0oK/uTQb8u1S7uWIGVmi5fJ4rVVZ/VKKtHGVwm/8OGKF tcrJFJcJADFVkgpsqN8UINsMJLxfJRoBgABEWih5DTRwNXK76Ma2LjDBrEvxhw8M 7SLKhi5vH7/Cs7jfLZFgh2T6flDV4VM/EA7CYEHgEb8MFmioFGOmhUpqifkA3SdX jGi2KuZZ5+O+sHFWXsUjiFPEzUJF+syPEzH1aF5R+F8pkhifeYh0KP6OHd6Sgn8s TStXB+q0MndBXw5ADp/Jac1DVaSWruVAdjemQ+si1olk8xH+uTMXU7PgV9WkpIiy 4BhnFU9IbCr/m7806c13xfeelaffP2pr7EDdgwz5K89VWCa3k9OSDnMtj2CQXlC7 bQHi/oRGA1aHSn84SIt+HpAfRoVdr4N90bYWmYQNqfKoyWCbEr+dge/GSD1nddAJ 72mXGlqyLyWYuAAAAAA=""" class FileHandle(unittest.TestCase): def setUp(self): self.test_data = "Test Data" * 50000 self.sio = StringIO(self.test_data) self.uploadable = MutableFileHandle(self.sio) def test_filehandle_read(self): self.basedir = "mutable/FileHandle/test_filehandle_read" chunk_size = 10 for i in xrange(0, len(self.test_data), chunk_size): data = self.uploadable.read(chunk_size) data = "".join(data) start = i end = i + chunk_size self.failUnlessEqual(data, self.test_data[start:end]) def test_filehandle_get_size(self): self.basedir = "mutable/FileHandle/test_filehandle_get_size" actual_size = len(self.test_data) size = self.uploadable.get_size() self.failUnlessEqual(size, actual_size) def test_filehandle_get_size_out_of_order(self): # We should be able to call get_size whenever we want without # disturbing the location of the seek pointer. chunk_size = 100 data = self.uploadable.read(chunk_size) self.failUnlessEqual("".join(data), self.test_data[:chunk_size]) # Now get the size. size = self.uploadable.get_size() self.failUnlessEqual(size, len(self.test_data)) # Now get more data. We should be right where we left off. more_data = self.uploadable.read(chunk_size) start = chunk_size end = chunk_size * 2 self.failUnlessEqual("".join(more_data), self.test_data[start:end]) def test_filehandle_file(self): # Make sure that the MutableFileHandle works on a file as well # as a StringIO object, since in some cases it will be asked to # deal with files. self.basedir = self.mktemp() # necessary? What am I doing wrong here? os.mkdir(self.basedir) f_path = os.path.join(self.basedir, "test_file") f = open(f_path, "w") f.write(self.test_data) f.close() f = open(f_path, "r") uploadable = MutableFileHandle(f) data = uploadable.read(len(self.test_data)) self.failUnlessEqual("".join(data), self.test_data) size = uploadable.get_size() self.failUnlessEqual(size, len(self.test_data)) def test_close(self): # Make sure that the MutableFileHandle closes its handle when # told to do so. self.uploadable.close() self.failUnless(self.sio.closed) class DataHandle(unittest.TestCase): def setUp(self): self.test_data = "Test Data" * 50000 self.uploadable = MutableData(self.test_data) def test_datahandle_read(self): chunk_size = 10 for i in xrange(0, len(self.test_data), chunk_size): data = self.uploadable.read(chunk_size) data = "".join(data) start = i end = i + chunk_size self.failUnlessEqual(data, self.test_data[start:end]) def test_datahandle_get_size(self): actual_size = len(self.test_data) size = self.uploadable.get_size() self.failUnlessEqual(size, actual_size) def test_datahandle_get_size_out_of_order(self): # We should be able to call get_size whenever we want without # disturbing the location of the seek pointer. chunk_size = 100 data = self.uploadable.read(chunk_size) self.failUnlessEqual("".join(data), self.test_data[:chunk_size]) # Now get the size. size = self.uploadable.get_size() self.failUnlessEqual(size, len(self.test_data)) # Now get more data. We should be right where we left off. more_data = self.uploadable.read(chunk_size) start = chunk_size end = chunk_size * 2 self.failUnlessEqual("".join(more_data), self.test_data[start:end]) class Version(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin, \ PublishMixin): def setUp(self): GridTestMixin.setUp(self) self.basedir = self.mktemp() self.set_up_grid() self.c = self.g.clients[0] self.nm = self.c.nodemaker self.data = "test data" * 100000 # about 900 KiB; MDMF self.small_data = "test data" * 10 # about 90 B; SDMF def do_upload_mdmf(self): d = self.nm.create_mutable_file(MutableData(self.data), version=MDMF_VERSION) def _then(n): assert isinstance(n, MutableFileNode) assert n._protocol_version == MDMF_VERSION self.mdmf_node = n return n d.addCallback(_then) return d def do_upload_sdmf(self): d = self.nm.create_mutable_file(MutableData(self.small_data)) def _then(n): assert isinstance(n, MutableFileNode) assert n._protocol_version == SDMF_VERSION self.sdmf_node = n return n d.addCallback(_then) return d def do_upload_empty_sdmf(self): d = self.nm.create_mutable_file(MutableData("")) def _then(n): assert isinstance(n, MutableFileNode) self.sdmf_zero_length_node = n assert n._protocol_version == SDMF_VERSION return n d.addCallback(_then) return d def do_upload(self): d = self.do_upload_mdmf() d.addCallback(lambda ign: self.do_upload_sdmf()) return d def test_debug(self): d = self.do_upload_mdmf() def _debug(n): fso = debug.FindSharesOptions() storage_index = base32.b2a(n.get_storage_index()) fso.si_s = storage_index fso.nodedirs = [unicode(os.path.dirname(os.path.abspath(storedir))) for (i,ss,storedir) in self.iterate_servers()] fso.stdout = StringIO() fso.stderr = StringIO() debug.find_shares(fso) sharefiles = fso.stdout.getvalue().splitlines() expected = self.nm.default_encoding_parameters["n"] self.failUnlessEqual(len(sharefiles), expected) do = debug.DumpOptions() do["filename"] = sharefiles[0] do.stdout = StringIO() debug.dump_share(do) output = do.stdout.getvalue() lines = set(output.splitlines()) self.failUnless("Mutable slot found:" in lines, output) self.failUnless(" share_type: MDMF" in lines, output) self.failUnless(" num_extra_leases: 0" in lines, output) self.failUnless(" MDMF contents:" in lines, output) self.failUnless(" seqnum: 1" in lines, output) self.failUnless(" required_shares: 3" in lines, output) self.failUnless(" total_shares: 10" in lines, output) self.failUnless(" segsize: 131073" in lines, output) self.failUnless(" datalen: %d" % len(self.data) in lines, output) vcap = n.get_verify_cap().to_string() self.failUnless(" verify-cap: %s" % vcap in lines, output) cso = debug.CatalogSharesOptions() cso.nodedirs = fso.nodedirs cso.stdout = StringIO() cso.stderr = StringIO() debug.catalog_shares(cso) shares = cso.stdout.getvalue().splitlines() oneshare = shares[0] # all shares should be MDMF self.failIf(oneshare.startswith("UNKNOWN"), oneshare) self.failUnless(oneshare.startswith("MDMF"), oneshare) fields = oneshare.split() self.failUnlessEqual(fields[0], "MDMF") self.failUnlessEqual(fields[1], storage_index) self.failUnlessEqual(fields[2], "3/10") self.failUnlessEqual(fields[3], "%d" % len(self.data)) self.failUnless(fields[4].startswith("#1:"), fields[3]) # the rest of fields[4] is the roothash, which depends upon # encryption salts and is not constant. fields[5] is the # remaining time on the longest lease, which is timing dependent. # The rest of the line is the quoted pathname to the share. d.addCallback(_debug) return d def test_get_sequence_number(self): d = self.do_upload() d.addCallback(lambda ign: self.mdmf_node.get_best_readable_version()) d.addCallback(lambda bv: self.failUnlessEqual(bv.get_sequence_number(), 1)) d.addCallback(lambda ignored: self.sdmf_node.get_best_readable_version()) d.addCallback(lambda bv: self.failUnlessEqual(bv.get_sequence_number(), 1)) # Now update. The sequence number in both cases should be 1 in # both cases. def _do_update(ignored): new_data = MutableData("foo bar baz" * 100000) new_small_data = MutableData("foo bar baz" * 10) d1 = self.mdmf_node.overwrite(new_data) d2 = self.sdmf_node.overwrite(new_small_data) dl = gatherResults([d1, d2]) return dl d.addCallback(_do_update) d.addCallback(lambda ignored: self.mdmf_node.get_best_readable_version()) d.addCallback(lambda bv: self.failUnlessEqual(bv.get_sequence_number(), 2)) d.addCallback(lambda ignored: self.sdmf_node.get_best_readable_version()) d.addCallback(lambda bv: self.failUnlessEqual(bv.get_sequence_number(), 2)) return d def test_cap_after_upload(self): # If we create a new mutable file and upload things to it, and # it's an MDMF file, we should get an MDMF cap back from that # file and should be able to use that. # That's essentially what MDMF node is, so just check that. d = self.do_upload_mdmf() def _then(ign): mdmf_uri = self.mdmf_node.get_uri() cap = uri.from_string(mdmf_uri) self.failUnless(isinstance(cap, uri.WriteableMDMFFileURI)) readonly_mdmf_uri = self.mdmf_node.get_readonly_uri() cap = uri.from_string(readonly_mdmf_uri) self.failUnless(isinstance(cap, uri.ReadonlyMDMFFileURI)) d.addCallback(_then) return d def test_mutable_version(self): # assert that getting parameters from the IMutableVersion object # gives us the same data as getting them from the filenode itself d = self.do_upload() d.addCallback(lambda ign: self.mdmf_node.get_best_mutable_version()) def _check_mdmf(bv): n = self.mdmf_node self.failUnlessEqual(bv.get_writekey(), n.get_writekey()) self.failUnlessEqual(bv.get_storage_index(), n.get_storage_index()) self.failIf(bv.is_readonly()) d.addCallback(_check_mdmf) d.addCallback(lambda ign: self.sdmf_node.get_best_mutable_version()) def _check_sdmf(bv): n = self.sdmf_node self.failUnlessEqual(bv.get_writekey(), n.get_writekey()) self.failUnlessEqual(bv.get_storage_index(), n.get_storage_index()) self.failIf(bv.is_readonly()) d.addCallback(_check_sdmf) return d def test_get_readonly_version(self): d = self.do_upload() d.addCallback(lambda ign: self.mdmf_node.get_best_readable_version()) d.addCallback(lambda bv: self.failUnless(bv.is_readonly())) # Attempting to get a mutable version of a mutable file from a # filenode initialized with a readcap should return a readonly # version of that same node. d.addCallback(lambda ign: self.mdmf_node.get_readonly()) d.addCallback(lambda ro: ro.get_best_mutable_version()) d.addCallback(lambda v: self.failUnless(v.is_readonly())) d.addCallback(lambda ign: self.sdmf_node.get_best_readable_version()) d.addCallback(lambda bv: self.failUnless(bv.is_readonly())) d.addCallback(lambda ign: self.sdmf_node.get_readonly()) d.addCallback(lambda ro: ro.get_best_mutable_version()) d.addCallback(lambda v: self.failUnless(v.is_readonly())) return d def test_toplevel_overwrite(self): new_data = MutableData("foo bar baz" * 100000) new_small_data = MutableData("foo bar baz" * 10) d = self.do_upload() d.addCallback(lambda ign: self.mdmf_node.overwrite(new_data)) d.addCallback(lambda ignored: self.mdmf_node.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(data, "foo bar baz" * 100000)) d.addCallback(lambda ignored: self.sdmf_node.overwrite(new_small_data)) d.addCallback(lambda ignored: self.sdmf_node.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(data, "foo bar baz" * 10)) return d def test_toplevel_modify(self): d = self.do_upload() def modifier(old_contents, servermap, first_time): return old_contents + "modified" d.addCallback(lambda ign: self.mdmf_node.modify(modifier)) d.addCallback(lambda ignored: self.mdmf_node.download_best_version()) d.addCallback(lambda data: self.failUnlessIn("modified", data)) d.addCallback(lambda ignored: self.sdmf_node.modify(modifier)) d.addCallback(lambda ignored: self.sdmf_node.download_best_version()) d.addCallback(lambda data: self.failUnlessIn("modified", data)) return d def test_version_modify(self): # TODO: When we can publish multiple versions, alter this test # to modify a version other than the best usable version, then # test to see that the best recoverable version is that. d = self.do_upload() def modifier(old_contents, servermap, first_time): return old_contents + "modified" d.addCallback(lambda ign: self.mdmf_node.modify(modifier)) d.addCallback(lambda ignored: self.mdmf_node.download_best_version()) d.addCallback(lambda data: self.failUnlessIn("modified", data)) d.addCallback(lambda ignored: self.sdmf_node.modify(modifier)) d.addCallback(lambda ignored: self.sdmf_node.download_best_version()) d.addCallback(lambda data: self.failUnlessIn("modified", data)) return d def test_download_version(self): d = self.publish_multiple() # We want to have two recoverable versions on the grid. d.addCallback(lambda res: self._set_versions({0:0,2:0,4:0,6:0,8:0, 1:1,3:1,5:1,7:1,9:1})) # Now try to download each version. We should get the plaintext # associated with that version. d.addCallback(lambda ignored: self._fn.get_servermap(mode=MODE_READ)) def _got_servermap(smap): versions = smap.recoverable_versions() assert len(versions) == 2 self.servermap = smap self.version1, self.version2 = versions assert self.version1 != self.version2 self.version1_seqnum = self.version1[0] self.version2_seqnum = self.version2[0] self.version1_index = self.version1_seqnum - 1 self.version2_index = self.version2_seqnum - 1 d.addCallback(_got_servermap) d.addCallback(lambda ignored: self._fn.download_version(self.servermap, self.version1)) d.addCallback(lambda results: self.failUnlessEqual(self.CONTENTS[self.version1_index], results)) d.addCallback(lambda ignored: self._fn.download_version(self.servermap, self.version2)) d.addCallback(lambda results: self.failUnlessEqual(self.CONTENTS[self.version2_index], results)) return d def test_download_nonexistent_version(self): d = self.do_upload_mdmf() d.addCallback(lambda ign: self.mdmf_node.get_servermap(mode=MODE_WRITE)) def _set_servermap(servermap): self.servermap = servermap d.addCallback(_set_servermap) d.addCallback(lambda ignored: self.shouldFail(UnrecoverableFileError, "nonexistent version", None, self.mdmf_node.download_version, self.servermap, "not a version")) return d def test_partial_read(self): d = self.do_upload_mdmf() d.addCallback(lambda ign: self.mdmf_node.get_best_readable_version()) modes = [("start_on_segment_boundary", mathutil.next_multiple(128 * 1024, 3), 50), ("ending_one_byte_after_segment_boundary", mathutil.next_multiple(128 * 1024, 3)-50, 51), ("zero_length_at_start", 0, 0), ("zero_length_in_middle", 50, 0), ("zero_length_at_segment_boundary", mathutil.next_multiple(128 * 1024, 3), 0), ] for (name, offset, length) in modes: d.addCallback(self._do_partial_read, name, offset, length) # then read only a few bytes at a time, and see that the results are # what we expect. def _read_data(version): c = consumer.MemoryConsumer() d2 = defer.succeed(None) for i in xrange(0, len(self.data), 10000): d2.addCallback(lambda ignored, i=i: version.read(c, i, 10000)) d2.addCallback(lambda ignored: self.failUnlessEqual(self.data, "".join(c.chunks))) return d2 d.addCallback(_read_data) return d def _do_partial_read(self, version, name, offset, length): c = consumer.MemoryConsumer() d = version.read(c, offset, length) expected = self.data[offset:offset+length] d.addCallback(lambda ignored: "".join(c.chunks)) def _check(results): if results != expected: print print "got: %s ... %s" % (results[:20], results[-20:]) print "exp: %s ... %s" % (expected[:20], expected[-20:]) self.fail("results[%s] != expected" % name) return version # daisy-chained to next call d.addCallback(_check) return d def _test_read_and_download(self, node, expected): d = node.get_best_readable_version() def _read_data(version): c = consumer.MemoryConsumer() d2 = defer.succeed(None) d2.addCallback(lambda ignored: version.read(c)) d2.addCallback(lambda ignored: self.failUnlessEqual(expected, "".join(c.chunks))) return d2 d.addCallback(_read_data) d.addCallback(lambda ignored: node.download_best_version()) d.addCallback(lambda data: self.failUnlessEqual(expected, data)) return d def test_read_and_download_mdmf(self): d = self.do_upload_mdmf() d.addCallback(self._test_read_and_download, self.data) return d def test_read_and_download_sdmf(self): d = self.do_upload_sdmf() d.addCallback(self._test_read_and_download, self.small_data) return d def test_read_and_download_sdmf_zero_length(self): d = self.do_upload_empty_sdmf() d.addCallback(self._test_read_and_download, "") return d class Update(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin): timeout = 400 # these tests are too big, 120s is not enough on slow # platforms def setUp(self): GridTestMixin.setUp(self) self.basedir = self.mktemp() self.set_up_grid() self.c = self.g.clients[0] self.nm = self.c.nodemaker self.data = "testdata " * 100000 # about 900 KiB; MDMF self.small_data = "test data" * 10 # about 90 B; SDMF def do_upload_sdmf(self): d = self.nm.create_mutable_file(MutableData(self.small_data)) def _then(n): assert isinstance(n, MutableFileNode) self.sdmf_node = n # Make SDMF node that has 255 shares. self.nm.default_encoding_parameters['n'] = 255 self.nm.default_encoding_parameters['k'] = 127 return self.nm.create_mutable_file(MutableData(self.small_data)) d.addCallback(_then) def _then2(n): assert isinstance(n, MutableFileNode) self.sdmf_max_shares_node = n d.addCallback(_then2) return d def do_upload_mdmf(self): d = self.nm.create_mutable_file(MutableData(self.data), version=MDMF_VERSION) def _then(n): assert isinstance(n, MutableFileNode) self.mdmf_node = n # Make MDMF node that has 255 shares. self.nm.default_encoding_parameters['n'] = 255 self.nm.default_encoding_parameters['k'] = 127 return self.nm.create_mutable_file(MutableData(self.data), version=MDMF_VERSION) d.addCallback(_then) def _then2(n): assert isinstance(n, MutableFileNode) self.mdmf_max_shares_node = n d.addCallback(_then2) return d def _test_replace(self, offset, new_data): expected = self.data[:offset]+new_data+self.data[offset+len(new_data):] d0 = self.do_upload_mdmf() def _run(ign): d = defer.succeed(None) for node in (self.mdmf_node, self.mdmf_max_shares_node): # close over 'node'. d.addCallback(lambda ign, node=node: node.get_best_mutable_version()) d.addCallback(lambda mv: mv.update(MutableData(new_data), offset)) d.addCallback(lambda ign, node=node: node.download_best_version()) def _check(results): if results != expected: print print "got: %s ... %s" % (results[:20], results[-20:]) print "exp: %s ... %s" % (expected[:20], expected[-20:]) self.fail("results != expected") d.addCallback(_check) return d d0.addCallback(_run) return d0 def test_append(self): # We should be able to append data to a mutable file and get # what we expect. return self._test_replace(len(self.data), "appended") def test_replace_middle(self): # We should be able to replace data in the middle of a mutable # file and get what we expect back. return self._test_replace(100, "replaced") def test_replace_beginning(self): # We should be able to replace data at the beginning of the file # without truncating the file return self._test_replace(0, "beginning") def test_replace_segstart1(self): return self._test_replace(128*1024+1, "NNNN") def test_replace_zero_length_beginning(self): return self._test_replace(0, "") def test_replace_zero_length_middle(self): return self._test_replace(50, "") def test_replace_zero_length_segstart1(self): return self._test_replace(128*1024+1, "") def test_replace_and_extend(self): # We should be able to replace data in the middle of a mutable # file and extend that mutable file and get what we expect. return self._test_replace(100, "modified " * 100000) def _check_differences(self, got, expected): # displaying arbitrary file corruption is tricky for a # 1MB file of repeating data,, so look for likely places # with problems and display them separately gotmods = [mo.span() for mo in re.finditer('([A-Z]+)', got)] expmods = [mo.span() for mo in re.finditer('([A-Z]+)', expected)] gotspans = ["%d:%d=%s" % (start,end,got[start:end]) for (start,end) in gotmods] expspans = ["%d:%d=%s" % (start,end,expected[start:end]) for (start,end) in expmods] #print "expecting: %s" % expspans SEGSIZE = 128*1024 if got != expected: print "differences:" for segnum in range(len(expected)//SEGSIZE): start = segnum * SEGSIZE end = (segnum+1) * SEGSIZE got_ends = "%s .. %s" % (got[start:start+20], got[end-20:end]) exp_ends = "%s .. %s" % (expected[start:start+20], expected[end-20:end]) if got_ends != exp_ends: print "expected[%d]: %s" % (start, exp_ends) print "got [%d]: %s" % (start, got_ends) if expspans != gotspans: print "expected: %s" % expspans print "got : %s" % gotspans open("EXPECTED","wb").write(expected) open("GOT","wb").write(got) print "wrote data to EXPECTED and GOT" self.fail("didn't get expected data") def test_replace_locations(self): # exercise fencepost conditions SEGSIZE = 128*1024 suspects = range(SEGSIZE-3, SEGSIZE+1)+range(2*SEGSIZE-3, 2*SEGSIZE+1) letters = iter("ABCDEFGHIJKLMNOPQRSTUVWXYZ") d0 = self.do_upload_mdmf() def _run(ign): expected = self.data d = defer.succeed(None) for offset in suspects: new_data = letters.next()*2 # "AA", then "BB", etc expected = expected[:offset]+new_data+expected[offset+2:] d.addCallback(lambda ign: self.mdmf_node.get_best_mutable_version()) def _modify(mv, offset=offset, new_data=new_data): # close over 'offset','new_data' md = MutableData(new_data) return mv.update(md, offset) d.addCallback(_modify) d.addCallback(lambda ignored: self.mdmf_node.download_best_version()) d.addCallback(self._check_differences, expected) return d d0.addCallback(_run) return d0 def test_replace_locations_max_shares(self): # exercise fencepost conditions SEGSIZE = 128*1024 suspects = range(SEGSIZE-3, SEGSIZE+1)+range(2*SEGSIZE-3, 2*SEGSIZE+1) letters = iter("ABCDEFGHIJKLMNOPQRSTUVWXYZ") d0 = self.do_upload_mdmf() def _run(ign): expected = self.data d = defer.succeed(None) for offset in suspects: new_data = letters.next()*2 # "AA", then "BB", etc expected = expected[:offset]+new_data+expected[offset+2:] d.addCallback(lambda ign: self.mdmf_max_shares_node.get_best_mutable_version()) def _modify(mv, offset=offset, new_data=new_data): # close over 'offset','new_data' md = MutableData(new_data) return mv.update(md, offset) d.addCallback(_modify) d.addCallback(lambda ignored: self.mdmf_max_shares_node.download_best_version()) d.addCallback(self._check_differences, expected) return d d0.addCallback(_run) return d0 def test_append_power_of_two(self): # If we attempt to extend a mutable file so that its segment # count crosses a power-of-two boundary, the update operation # should know how to reencode the file. # Note that the data populating self.mdmf_node is about 900 KiB # long -- this is 7 segments in the default segment size. So we # need to add 2 segments worth of data to push it over a # power-of-two boundary. segment = "a" * DEFAULT_MAX_SEGMENT_SIZE new_data = self.data + (segment * 2) d0 = self.do_upload_mdmf() def _run(ign): d = defer.succeed(None) for node in (self.mdmf_node, self.mdmf_max_shares_node): # close over 'node'. d.addCallback(lambda ign, node=node: node.get_best_mutable_version()) d.addCallback(lambda mv: mv.update(MutableData(segment * 2), len(self.data))) d.addCallback(lambda ign, node=node: node.download_best_version()) d.addCallback(lambda results: self.failUnlessEqual(results, new_data)) return d d0.addCallback(_run) return d0 def test_update_sdmf(self): # Running update on a single-segment file should still work. new_data = self.small_data + "appended" d0 = self.do_upload_sdmf() def _run(ign): d = defer.succeed(None) for node in (self.sdmf_node, self.sdmf_max_shares_node): # close over 'node'. d.addCallback(lambda ign, node=node: node.get_best_mutable_version()) d.addCallback(lambda mv: mv.update(MutableData("appended"), len(self.small_data))) d.addCallback(lambda ign, node=node: node.download_best_version()) d.addCallback(lambda results: self.failUnlessEqual(results, new_data)) return d d0.addCallback(_run) return d0 def test_replace_in_last_segment(self): # The wrapper should know how to handle the tail segment # appropriately. replace_offset = len(self.data) - 100 new_data = self.data[:replace_offset] + "replaced" rest_offset = replace_offset + len("replaced") new_data += self.data[rest_offset:] d0 = self.do_upload_mdmf() def _run(ign): d = defer.succeed(None) for node in (self.mdmf_node, self.mdmf_max_shares_node): # close over 'node'. d.addCallback(lambda ign, node=node: node.get_best_mutable_version()) d.addCallback(lambda mv: mv.update(MutableData("replaced"), replace_offset)) d.addCallback(lambda ign, node=node: node.download_best_version()) d.addCallback(lambda results: self.failUnlessEqual(results, new_data)) return d d0.addCallback(_run) return d0 def test_multiple_segment_replace(self): replace_offset = 2 * DEFAULT_MAX_SEGMENT_SIZE new_data = self.data[:replace_offset] new_segment = "a" * DEFAULT_MAX_SEGMENT_SIZE new_data += 2 * new_segment new_data += "replaced" rest_offset = len(new_data) new_data += self.data[rest_offset:] d0 = self.do_upload_mdmf() def _run(ign): d = defer.succeed(None) for node in (self.mdmf_node, self.mdmf_max_shares_node): # close over 'node'. d.addCallback(lambda ign, node=node: node.get_best_mutable_version()) d.addCallback(lambda mv: mv.update(MutableData((2 * new_segment) + "replaced"), replace_offset)) d.addCallback(lambda ignored, node=node: node.download_best_version()) d.addCallback(lambda results: self.failUnlessEqual(results, new_data)) return d d0.addCallback(_run) return d0 class Interoperability(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin): sdmf_old_shares = {} sdmf_old_shares[0] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcABOOLy8EETxh7h7/z9d62EiPu9CNpRrCOLxUhn+JUS+DuAAhgcAb/adrQFrhlrRNoRpvjDuxmFebA4F0qCyqWssm61AAQ/EX4eC/1+hGOQ/h4EiKUkqxdsfzdcPlDvd11SGWZ0VHsUclZChTzuBAU2zLTXm+cG8IFhO50ly6Ey/DB44NtMKVaVzO0nU8DE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[1] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcABOOLy8EETxh7h7/z9d62EiPu9CNpRrCOLxUhn+JUS+DuAAhgcAb/adrQFrhlrRNoRpvjDuxmFebA4F0qCyqWssm61AAP7FHJWQoU87gQFNsy015vnBvCBYTudJcuhMvwweODbTD8Rfh4L/X6EY5D+HgSIpSSrF2x/N1w+UO93XVIZZnRUeePDXEwhqYDE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[2] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcABOOLy8EETxh7h7/z9d62EiPu9CNpRrCOLxUhn+JUS+DuAAd8jdiCodW233N1acXhZGnulDKR3hiNsMdEIsijRPemewASoSCFpVj4utEE+eVFM146xfgC6DX39GaQ2zT3YKsWX3GiLwKtGffwqV7IlZIcBEVqMfTXSTZsY+dZm1MxxCZH0Zd33VY0yggDE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[3] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcABOOLy8EETxh7h7/z9d62EiPu9CNpRrCOLxUhn+JUS+DuAAd8jdiCodW233N1acXhZGnulDKR3hiNsMdEIsijRPemewARoi8CrRn38KleyJWSHARFajH010k2bGPnWZtTMcQmR9GhIIWlWPi60QT55UUzXjrF+ALoNff0ZpDbNPdgqxZfcSNSplrHqtsDE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[4] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcAA6dlE140Fc7FgB77PeM5Phv+bypQEYtyfLQHxd+OxlG3AAoIM8M4XulprmLd4gGMobS2Bv9CmwB5LpK/ySHE1QWjdwAUMA7/aVz7Mb1em0eks+biC8ZuVUhuAEkTVOAF4YulIjE8JlfW0dS1XKk62u0586QxiN38NTsluUDx8EAPTL66yRsfb1f3rRIDE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[5] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcAA6dlE140Fc7FgB77PeM5Phv+bypQEYtyfLQHxd+OxlG3AAoIM8M4XulprmLd4gGMobS2Bv9CmwB5LpK/ySHE1QWjdwATPCZX1tHUtVypOtrtOfOkMYjd/DU7JblA8fBAD0y+uskwDv9pXPsxvV6bR6Sz5uILxm5VSG4ASRNU4AXhi6UiMUKZHBmcmEgDE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[6] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcAA6dlE140Fc7FgB77PeM5Phv+bypQEYtyfLQHxd+OxlG3AAlyHZU7RfTJjbHu1gjabWZsTu+7nAeRVG6/ZSd4iMQ1ZgAWDSFSPvKzcFzRcuRlVgKUf0HBce1MCF8SwpUbPPEyfVJty4xLZ7DvNU/Eh/R6BarsVAagVXdp+GtEu0+fok7nilT4LchmHo8DE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[7] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgACtTh7+7gs/l5w1lOkgbF6w7rkXLNslK7L2KYF4SPFLUcAA6dlE140Fc7FgB77PeM5Phv+bypQEYtyfLQHxd+OxlG3AAlyHZU7RfTJjbHu1gjabWZsTu+7nAeRVG6/ZSd4iMQ1ZgAVbcuMS2ew7zVPxIf0egWq7FQGoFV3afhrRLtPn6JO54oNIVI+8rNwXNFy5GVWApR/QcFx7UwIXxLClRs88TJ9UtLnNF4/mM0DE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[8] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgABUSzNKiMx0E91q51/WH6ASL0fDEOLef9oxuyBX5F5cpoABojmWkDX3k3FKfgNHIeptE3lxB8HHzxDfSD250psyfNCAAwGsKbMxbmI2NpdTozZ3SICrySwgGkatA1gsDOJmOnTzgAYmqKY7A9vQChuYa17fYSyKerIb3682jxiIneQvCMWCK5WcuI4PMeIsUAj8yxdxHvV+a9vtSCEsDVvymrrooDKX1GK98t37yoDE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_shares[9] = "VGFob2UgbXV0YWJsZSBjb250YWluZXIgdjEKdQlEA47ESLbTdKdpLJXCpBxd5OH239tl5hvAiz1dvGdE5rIOpf8cbfxbPcwNF+Y5dM92uBVbmV6KAAAAAAAAB/wAAAAAAAAJ0AAAAAFOWSw7jSx7WXzaMpdleJYXwYsRCV82jNA5oex9m2YhXSnb2POh+vvC1LE1NAfRc9GOb2zQG84Xdsx1Jub2brEeKkyt0sRIttN0p2kslcKkHF3k4fbf22XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABamJprL6ecrsOoFKdrXUmWveLq8nzEGDOjFnyK9detI3noX3uyK2MwSnFdAfyN0tuAwoAAAAAAAAAFQAAAAAAAAAVAAABjwAAAo8AAAMXAAADNwAAAAAAAAM+AAAAAAAAB/wwggEgMA0GCSqGSIb3DQEBAQUAA4IBDQAwggEIAoIBAQC1IkainlJF12IBXBQdpRK1zXB7a26vuEYqRmQM09YjC6sQjCs0F2ICk8n9m/2Kw4l16eIEboB2Au9pODCE+u/dEAakEFh4qidTMn61rbGUbsLK8xzuWNW22ezzz9/nPia0HDrulXt51/FYtfnnAuD1RJGXJv/8tDllE9FL/18TzlH4WuB6Fp8FTgv7QdbZAfWJHDGFIpVCJr1XxOCsSZNFJIqGwZnD2lsChiWw5OJDbKd8otqN1hIbfHyMyfMOJ/BzRzvZXaUt4Dv5nf93EmQDWClxShRwpuX/NkZ5B2K9OFonFTbOCexm/MjMAdCBqebKKaiHFkiknUCn9eJQpZ5bAgERgV50VKj+AVTDfgTpqfO2vfo4wrufi6ZBb8QV7hllhUFBjYogQ9C96dnS7skv0s+cqFuUjwMILr5/rsbEmEMGvl0T0ytyAbtlXuowEFVj/YORNknM4yjY72YUtEPTlMpk0Cis7aIgTvu5qWMPER26PMApZuRqiwRsGIkaJIvOVOTHHjFYe3/YzdMkc7OZtqRMfQLtwVl2/zKQQV8b/a9vaT6q3mRLRd4P3esaAFe/+7sR/t+9tmB+a8kxtKM6kmaVQJMbXJZ4aoHGfeLX0m35Rcvu2Bmph7QfSDjk/eaE3q55zYSoGWShmlhlw4Kwg84sMuhmcVhLvo0LovR8bKmbdgABUSzNKiMx0E91q51/WH6ASL0fDEOLef9oxuyBX5F5cpoABojmWkDX3k3FKfgNHIeptE3lxB8HHzxDfSD250psyfNCAAwGsKbMxbmI2NpdTozZ3SICrySwgGkatA1gsDOJmOnTzgAXVnLiODzHiLFAI/MsXcR71fmvb7UghLA1b8pq66KAyl+aopjsD29AKG5hrXt9hLIp6shvfrzaPGIid5C8IxYIrjgBj1YohGgDE0Wua7Lx6Bnad5n91qmHAnwSEJE5YIhQM634omd6cq9Wk4seJCUIn+ucoknrpxp0IR9QMxpKSMRHRUg2K8ZegnY3YqFunRZKCfsq9ufQEKgjZN12AFqi551KPBdn4/3V5HK6xTv0P4robSsE/BvuIfByvRf/W7ZrDx+CFC4EEcsBOACOZCrkhhqd5TkYKbe9RA+vs56+9N5qZGurkxcoKviiyEncxvTuShD65DK/6x6kMDMgQv/EdZDI3x9GtHTnRBYXwDGnPJ19w+q2zC3e2XarbxTGYQIPEC5mYx0gAA0sbjf018NGfwBhl6SB54iGsa8uLvR3jHv6OSRJgwxL6j7P0Ts4Hv2EtO12P0Lv21pwi3JC1O/WviSrKCvrQD5lMHL9Uym3hwFi2zu0mqwZvxOAbGy7kfOPXkLYKOHTZLthzKj3PsdjeceWBfYIvPGKYcd6wDr36d1aXSYS4IWeApTS2AQ2lu0DUcgSefAvsA8NkgOklvJY1cjTMSg6j6cxQo48Bvl8RAWGLbr4h2S/8KwDGxwLsSv0Gop/gnFc3GzCsmL0EkEyHHWkCA8YRXCghfW80KLDV495ff7yF5oiwK56GniqowZ3RG9Jxp5MXoJQgsLV1VMQFMAmsY69yz8eoxRH3wl9L0dMyndLulhWWzNwPMQ2I0yAWdzA/pksVmwTJTFenB3MHCiWc5rEwJ3yofe6NZZnZQrYyL9r1TNnVwfTwRUiykPiLSk4x9Mi6DX7RamDAxc8u3gDVfjPsTOTagBOEGUWlGAL54KE/E6sgCQ5DEAt12chk8AxbjBFLPgV+/idrzS0lZHOL+IVBI9D0i3Bq1yZcSIqcjZB0M3IbxbPm4gLAYOWEiTUN2ecsEHHg9nt6rhgffVoqSbCCFPbpC0xf7WOC3+BQORIZECOCC7cUAciXq3xn+GuxpFE40RWRJeKAK7bBQ21X89ABIXlQFkFddZ9kRvlZ2Pnl0oeF+2pjnZu0Yc2czNfZEQF2P7BKIdLrgMgxG89snxAY8qAYTCKyQw6xTG87wkjDcpy1wzsZLP3WsOuO7cAm7b27xU0jRKq8Cw4d1hDoyRG+RdS53F8RFJzVMaNNYgxU2tfRwUvXpTRXiOheeRVvh25+YGVnjakUXjx/dSDnOw4ETHGHD+7styDkeSfc3BdSZxswzc6OehgMI+xsCxeeRym15QUm9hxvg8X7Bfz/0WulgFwgzrm11TVynZYOmvyHpiZKoqQyQyKahIrfhwuchCr7lMsZ4a+umIkNkKxCLZnI+T7jd+eGFMgKItjz3kTTxRl3IhaJG3LbPmwRUJynMxQKdMi4Uf0qy0U7+i8hIJ9m50QXc+3tw2bwDSbx22XYJ9Wf14gxx5G5SPTb1JVCbhe4fxNt91xIxCow2zk62tzbYfRe6dfmDmgYHkv2PIEtMJZK8iKLDjFfu2ZUxsKT2A5g1q17og6o9MeXeuFS3mzJXJYFQZd+3UzlFR9qwkFkby9mg5y4XSeMvRLOHPt/H/r5SpEqBE6a9MadZYt61FBV152CUEzd43ihXtrAa0XH9HdsiySBcWI1SpM3mv9rRP0DiLjMUzHw/K1D8TE2f07zW4t/9kvE11tFj/NpICixQAAAAA=" sdmf_old_cap = "URI:SSK:gmjgofw6gan57gwpsow6gtrz3e:5adm6fayxmu3e4lkmfvt6lkkfix34ai2wop2ioqr4bgvvhiol3kq" sdmf_old_contents = "This is a test file.\n" def copy_sdmf_shares(self): # We'll basically be short-circuiting the upload process. servernums = self.g.servers_by_number.keys() assert len(servernums) == 10 assignments = zip(self.sdmf_old_shares.keys(), servernums) # Get the storage index. cap = uri.from_string(self.sdmf_old_cap) si = cap.get_storage_index() # Now execute each assignment by writing the storage. for (share, servernum) in assignments: sharedata = base64.b64decode(self.sdmf_old_shares[share]) storedir = self.get_serverdir(servernum) storage_path = os.path.join(storedir, "shares", storage_index_to_dir(si)) fileutil.make_dirs(storage_path) fileutil.write(os.path.join(storage_path, "%d" % share), sharedata) # ...and verify that the shares are there. shares = self.find_uri_shares(self.sdmf_old_cap) assert len(shares) == 10 def test_new_downloader_can_read_old_shares(self): self.basedir = "mutable/Interoperability/new_downloader_can_read_old_shares" self.set_up_grid() self.copy_sdmf_shares() nm = self.g.clients[0].nodemaker n = nm.create_from_cap(self.sdmf_old_cap) d = n.download_best_version() d.addCallback(self.failUnlessEqual, self.sdmf_old_contents) return d class DifferentEncoding(unittest.TestCase): def setUp(self): self._storage = s = FakeStorage() self.nodemaker = make_nodemaker(s) def test_filenode(self): # create a file with 3-of-20, then modify it with a client configured # to do 3-of-10. #1510 tracks a failure here self.nodemaker.default_encoding_parameters["n"] = 20 d = self.nodemaker.create_mutable_file("old contents") def _created(n): filecap = n.get_cap().to_string() del n # we want a new object, not the cached one self.nodemaker.default_encoding_parameters["n"] = 10 n2 = self.nodemaker.create_from_cap(filecap) return n2 d.addCallback(_created) def modifier(old_contents, servermap, first_time): return "new contents" d.addCallback(lambda n: n.modify(modifier)) return d tahoe-lafs-1.10.0/src/allmydata/test/test_netstring.py000066400000000000000000000034701221140116300227530ustar00rootroot00000000000000 from twisted.trial import unittest from allmydata.util.netstring import netstring, split_netstring class Netstring(unittest.TestCase): def test_split(self): a = netstring("hello") + netstring("world") self.failUnlessEqual(split_netstring(a, 2), (["hello", "world"], len(a))) self.failUnlessEqual(split_netstring(a, 2, required_trailer=""), (["hello", "world"], len(a))) self.failUnlessRaises(ValueError, split_netstring, a, 3) self.failUnlessRaises(ValueError, split_netstring, a+" extra", 2, required_trailer="") self.failUnlessEqual(split_netstring(a+" extra", 2), (["hello", "world"], len(a))) self.failUnlessEqual(split_netstring(a+"++", 2, required_trailer="++"), (["hello", "world"], len(a)+2)) self.failUnlessRaises(ValueError, split_netstring, a+"+", 2, required_trailer="not") def test_extra(self): a = netstring("hello") self.failUnlessEqual(split_netstring(a, 1), (["hello"], len(a))) b = netstring("hello") + "extra stuff" self.failUnlessEqual(split_netstring(b, 1), (["hello"], len(a))) def test_nested(self): a = netstring("hello") + netstring("world") + "extra stuff" b = netstring("a") + netstring("is") + netstring(a) + netstring(".") (top, pos) = split_netstring(b, 4) self.failUnlessEqual(len(top), 4) self.failUnlessEqual(top[0], "a") self.failUnlessEqual(top[1], "is") self.failUnlessEqual(top[2], a) self.failUnlessEqual(top[3], ".") self.failUnlessRaises(ValueError, split_netstring, a, 2, required_trailer="") bottom = split_netstring(a, 2) self.failUnlessEqual(bottom, (["hello", "world"], len(netstring("hello")+netstring("world")))) tahoe-lafs-1.10.0/src/allmydata/test/test_no_network.py000066400000000000000000000022041221140116300231150ustar00rootroot00000000000000 # Test the NoNetworkGrid test harness from twisted.trial import unittest from twisted.application import service from allmydata.test.no_network import NoNetworkGrid from allmydata.immutable.upload import Data from allmydata.util.consumer import download_to_data class Harness(unittest.TestCase): def setUp(self): self.s = service.MultiService() self.s.startService() def tearDown(self): return self.s.stopService() def test_create(self): basedir = "no_network/Harness/create" g = NoNetworkGrid(basedir) g.startService() return g.stopService() def test_upload(self): basedir = "no_network/Harness/upload" g = NoNetworkGrid(basedir) g.setServiceParent(self.s) c0 = g.clients[0] DATA = "Data to upload" * 100 data = Data(DATA, "") d = c0.upload(data) def _uploaded(res): n = c0.create_node_from_uri(res.get_uri()) return download_to_data(n) d.addCallback(_uploaded) def _check(res): self.failUnlessEqual(res, DATA) d.addCallback(_check) return d tahoe-lafs-1.10.0/src/allmydata/test/test_node.py000066400000000000000000000136361221140116300216700ustar00rootroot00000000000000 import os, stat, sys, time from twisted.trial import unittest from twisted.internet import defer from twisted.python import log from mock import patch from foolscap.api import flushEventualQueue from twisted.application import service from allmydata.node import Node, formatTimeTahoeStyle, MissingConfigEntry from allmydata.util import fileutil import allmydata.test.common_util as testutil class LoggingMultiService(service.MultiService): def log(self, msg, **kw): pass class TestNode(Node): CERTFILE='DEFAULT_CERTFILE_BLANK' PORTNUMFILE='DEFAULT_PORTNUMFILE_BLANK' class TestCase(testutil.SignalMixin, unittest.TestCase): def setUp(self): testutil.SignalMixin.setUp(self) self.parent = LoggingMultiService() self.parent.startService() def tearDown(self): log.msg("%s.tearDown" % self.__class__.__name__) testutil.SignalMixin.tearDown(self) d = defer.succeed(None) d.addCallback(lambda res: self.parent.stopService()) d.addCallback(flushEventualQueue) return d def test_location(self): basedir = "test_node/test_location" fileutil.make_dirs(basedir) f = open(os.path.join(basedir, 'tahoe.cfg'), 'wt') f.write("[node]\n") f.write("tub.location = 1.2.3.4:5\n") f.close() n = TestNode(basedir) n.setServiceParent(self.parent) d = n.when_tub_ready() def _check_addresses(ignored_result): furl = n.tub.registerReference(n) self.failUnless("1.2.3.4:5" in furl, furl) d.addCallback(_check_addresses) return d def test_location2(self): basedir = "test_node/test_location2" fileutil.make_dirs(basedir) f = open(os.path.join(basedir, 'tahoe.cfg'), 'wt') f.write("[node]\n") f.write("tub.location = 1.2.3.4:5,example.org:8091\n") f.close() n = TestNode(basedir) n.setServiceParent(self.parent) d = n.when_tub_ready() def _check_addresses(ignored_result): furl = n.tub.registerReference(n) self.failUnless("1.2.3.4:5" in furl, furl) self.failUnless("example.org:8091" in furl, furl) d.addCallback(_check_addresses) return d def test_tahoe_cfg_utf8(self): basedir = "test_node/test_tahoe_cfg_utf8" fileutil.make_dirs(basedir) f = open(os.path.join(basedir, 'tahoe.cfg'), 'wt') f.write(u"\uFEFF[node]\n".encode('utf-8')) f.write(u"nickname = \u2621\n".encode('utf-8')) f.close() n = TestNode(basedir) n.setServiceParent(self.parent) d = n.when_tub_ready() d.addCallback(lambda ign: self.failUnlessEqual(n.get_config("node", "nickname").decode('utf-8'), u"\u2621")) return d def test_private_config(self): basedir = "test_node/test_private_config" privdir = os.path.join(basedir, "private") fileutil.make_dirs(privdir) f = open(os.path.join(privdir, 'already'), 'wt') f.write("secret") f.close() n = TestNode(basedir) self.failUnlessEqual(n.get_private_config("already"), "secret") self.failUnlessEqual(n.get_private_config("not", "default"), "default") self.failUnlessRaises(MissingConfigEntry, n.get_private_config, "not") value = n.get_or_create_private_config("new", "start") self.failUnlessEqual(value, "start") self.failUnlessEqual(n.get_private_config("new"), "start") counter = [] def make_newer(): counter.append("called") return "newer" value = n.get_or_create_private_config("newer", make_newer) self.failUnlessEqual(len(counter), 1) self.failUnlessEqual(value, "newer") self.failUnlessEqual(n.get_private_config("newer"), "newer") value = n.get_or_create_private_config("newer", make_newer) self.failUnlessEqual(len(counter), 1) # don't call unless necessary self.failUnlessEqual(value, "newer") def test_timestamp(self): # this modified logger doesn't seem to get used during the tests, # probably because we don't modify the LogObserver that trial # installs (only the one that twistd installs). So manually exercise # it a little bit. t = formatTimeTahoeStyle("ignored", time.time()) self.failUnless("Z" in t) t2 = formatTimeTahoeStyle("ignored", int(time.time())) self.failUnless("Z" in t2) def test_secrets_dir(self): basedir = "test_node/test_secrets_dir" fileutil.make_dirs(basedir) n = TestNode(basedir) self.failUnless(isinstance(n, TestNode)) self.failUnless(os.path.exists(os.path.join(basedir, "private"))) def test_secrets_dir_protected(self): if "win32" in sys.platform.lower() or "cygwin" in sys.platform.lower(): # We don't know how to test that unprivileged users can't read this # thing. (Also we don't know exactly how to set the permissions so # that unprivileged users can't read this thing.) raise unittest.SkipTest("We don't know how to set permissions on Windows.") basedir = "test_node/test_secrets_dir_protected" fileutil.make_dirs(basedir) n = TestNode(basedir) self.failUnless(isinstance(n, TestNode)) privdir = os.path.join(basedir, "private") st = os.stat(privdir) bits = stat.S_IMODE(st[stat.ST_MODE]) self.failUnless(bits & 0001 == 0, bits) @patch("foolscap.logging.log.setLogDir") def test_logdir_is_str(self, mock_setLogDir): basedir = "test_node/test_logdir_is_str" fileutil.make_dirs(basedir) def call_setLogDir(logdir): self.failUnless(isinstance(logdir, str), logdir) mock_setLogDir.side_effect = call_setLogDir TestNode(basedir) self.failUnless(mock_setLogDir.called) tahoe-lafs-1.10.0/src/allmydata/test/test_observer.py000066400000000000000000000053101221140116300225600ustar00rootroot00000000000000 from twisted.trial import unittest from twisted.internet import defer, reactor from allmydata.util import observer def nextTurn(res=None): d = defer.Deferred() reactor.callLater(1, d.callback, res) return d class Observer(unittest.TestCase): def test_oneshot(self): ol = observer.OneShotObserverList() rep = repr(ol) self.failUnlessEqual(rep, "") d1 = ol.when_fired() d2 = ol.when_fired() def _addmore(res): self.failUnlessEqual(res, "result") d3 = ol.when_fired() d3.addCallback(self.failUnlessEqual, "result") return d3 d1.addCallback(_addmore) ol.fire("result") rep = repr(ol) self.failUnlessEqual(rep, " result>") d4 = ol.when_fired() dl = defer.DeferredList([d1,d2,d4]) return dl def test_oneshot_fireagain(self): ol = observer.OneShotObserverList() d = ol.when_fired() def _addmore(res): self.failUnlessEqual(res, "result") ol.fire_if_not_fired("result3") # should be ignored d2 = ol.when_fired() d2.addCallback(self.failUnlessEqual, "result") return d2 d.addCallback(_addmore) ol.fire_if_not_fired("result") ol.fire_if_not_fired("result2") return d def test_lazy_oneshot(self): ol = observer.LazyOneShotObserverList() d1 = ol.when_fired() d2 = ol.when_fired() def _addmore(res): self.failUnlessEqual(res, "result") d3 = ol.when_fired() d3.addCallback(self.failUnlessEqual, "result") return d3 d1.addCallback(_addmore) def _get_result(): return "result" ol.fire(_get_result) d4 = ol.when_fired() dl = defer.DeferredList([d1,d2,d4]) return dl def test_observerlist(self): ol = observer.ObserverList() l1 = [] l2 = [] l3 = [] ol.subscribe(l1.append) ol.notify(1) ol.subscribe(l2.append) ol.notify(2) ol.unsubscribe(l1.append) ol.notify(3) def _check(res): self.failUnlessEqual(l1, [1,2]) self.failUnlessEqual(l2, [2,3]) d = nextTurn() d.addCallback(_check) def _step2(res): def _add(a, b, c=None): l3.append((a,b,c)) ol.unsubscribe(l2.append) ol.subscribe(_add) ol.notify(4, 5, c=6) return nextTurn() def _check2(res): self.failUnlessEqual(l3, [(4,5,6)]) d.addCallback(_step2) d.addCallback(_check2) return d tahoe-lafs-1.10.0/src/allmydata/test/test_repairer.py000066400000000000000000001061501221140116300225460ustar00rootroot00000000000000# -*- coding: utf-8 -*- from allmydata.test import common from allmydata.monitor import Monitor from allmydata import check_results from allmydata.interfaces import NotEnoughSharesError from allmydata.immutable import upload from allmydata.util.consumer import download_to_data from twisted.internet import defer from twisted.trial import unittest import random from allmydata.test.no_network import GridTestMixin # We'll allow you to pass this test even if you trigger eighteen times as # many disk reads and block fetches as would be optimal. READ_LEEWAY = 18 MAX_DELTA_READS = 10 * READ_LEEWAY # N = 10 timeout=240 # François's ARM box timed out after 120 seconds of Verifier.test_corrupt_crypttext_hashtree class RepairTestMixin: def failUnlessIsInstance(self, x, xtype): self.failUnless(isinstance(x, xtype), x) def _count_reads(self): sum_of_read_counts = 0 for (i, ss, storedir) in self.iterate_servers(): counters = ss.stats_provider.get_stats()['counters'] sum_of_read_counts += counters.get('storage_server.read', 0) return sum_of_read_counts def _count_allocates(self): sum_of_allocate_counts = 0 for (i, ss, storedir) in self.iterate_servers(): counters = ss.stats_provider.get_stats()['counters'] sum_of_allocate_counts += counters.get('storage_server.allocate', 0) return sum_of_allocate_counts def _count_writes(self): sum_of_write_counts = 0 for (i, ss, storedir) in self.iterate_servers(): counters = ss.stats_provider.get_stats()['counters'] sum_of_write_counts += counters.get('storage_server.write', 0) return sum_of_write_counts def _stash_counts(self): self.before_repair_reads = self._count_reads() self.before_repair_allocates = self._count_allocates() self.before_repair_writes = self._count_writes() def _get_delta_counts(self): delta_reads = self._count_reads() - self.before_repair_reads delta_allocates = self._count_allocates() - self.before_repair_allocates delta_writes = self._count_writes() - self.before_repair_writes return (delta_reads, delta_allocates, delta_writes) def failIfBigger(self, x, y): self.failIf(x > y, "%s > %s" % (x, y)) def upload_and_stash(self): c0 = self.g.clients[0] c1 = self.g.clients[1] c0.DEFAULT_ENCODING_PARAMETERS['max_segment_size'] = 12 d = c0.upload(upload.Data(common.TEST_DATA, convergence="")) def _stash_uri(ur): self.uri = ur.get_uri() self.c0_filenode = c0.create_node_from_uri(ur.get_uri()) self.c1_filenode = c1.create_node_from_uri(ur.get_uri()) d.addCallback(_stash_uri) return d class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin): def test_check_without_verify(self): """Check says the file is healthy when none of the shares have been touched. It says that the file is unhealthy when all of them have been removed. It doesn't use any reads. """ self.basedir = "repairer/Verifier/check_without_verify" self.set_up_grid(num_clients=2) d = self.upload_and_stash() d.addCallback(lambda ignored: self._stash_counts()) d.addCallback(lambda ignored: self.c0_filenode.check(Monitor(), verify=False)) def _check(cr): self.failUnless(cr.is_healthy()) delta_reads, delta_allocates, delta_writes = self._get_delta_counts() self.failIfBigger(delta_reads, 0) d.addCallback(_check) def _remove_all(ignored): for sh in self.find_uri_shares(self.uri): self.delete_share(sh) d.addCallback(_remove_all) d.addCallback(lambda ignored: self._stash_counts()) d.addCallback(lambda ignored: self.c0_filenode.check(Monitor(), verify=False)) def _check2(cr): self.failIf(cr.is_healthy()) delta_reads, delta_allocates, delta_writes = self._get_delta_counts() self.failIfBigger(delta_reads, 0) d.addCallback(_check2) return d def _help_test_verify(self, corruptor, judgement, shnum=0, debug=False): self.set_up_grid(num_clients=2) d = self.upload_and_stash() d.addCallback(lambda ignored: self._stash_counts()) d.addCallback(lambda ignored: self.corrupt_shares_numbered(self.uri, [shnum],corruptor,debug=debug)) d.addCallback(lambda ignored: self.c1_filenode.check(Monitor(), verify=True)) def _check(vr): delta_reads, delta_allocates, delta_writes = self._get_delta_counts() self.failIfBigger(delta_reads, MAX_DELTA_READS) try: judgement(vr) except unittest.FailTest, e: # FailTest just uses e.args[0] == str new_arg = str(e.args[0]) + "\nvr.data is: " + str(vr.as_dict()) e.args = (new_arg,) raise d.addCallback(_check) return d def judge_no_problem(self, vr): """ Verify says the file is healthy when none of the shares have been touched in a way that matters. It doesn't use more than seven times as many reads as it needs.""" self.failUnless(vr.is_healthy(), (vr, vr.is_healthy(), vr.as_dict())) self.failUnlessEqual(vr.get_share_counter_good(), 10) self.failUnlessEqual(len(vr.get_sharemap()), 10) self.failUnlessEqual(vr.get_encoding_needed(), 3) self.failUnlessEqual(vr.get_encoding_expected(), 10) self.failUnlessEqual(vr.get_host_counter_good_shares(), 10) self.failUnlessEqual(len(vr.get_servers_responding()), 10) self.failUnlessEqual(len(vr.get_corrupt_shares()), 0) def test_ok_no_corruption(self): self.basedir = "repairer/Verifier/ok_no_corruption" return self._help_test_verify(common._corrupt_nothing, self.judge_no_problem) def test_ok_filedata_size(self): self.basedir = "repairer/Verifier/ok_filedatasize" return self._help_test_verify(common._corrupt_size_of_file_data, self.judge_no_problem) def test_ok_sharedata_size(self): self.basedir = "repairer/Verifier/ok_sharedata_size" return self._help_test_verify(common._corrupt_size_of_sharedata, self.judge_no_problem) def test_ok_segment_size(self): self.basedir = "repairer/Verifier/test_ok_segment_size" return self._help_test_verify(common._corrupt_segment_size, self.judge_no_problem) def judge_visible_corruption(self, vr): """Corruption which is detected by the server means that the server will send you back a Failure in response to get_bucket instead of giving you the share data. Test that verifier handles these answers correctly. It doesn't use more than seven times as many reads as it needs.""" self.failIf(vr.is_healthy(), (vr, vr.is_healthy(), vr.as_dict())) self.failUnlessEqual(vr.get_share_counter_good(), 9) self.failUnlessEqual(len(vr.get_sharemap()), 9) self.failUnlessEqual(vr.get_encoding_needed(), 3) self.failUnlessEqual(vr.get_encoding_expected(), 10) self.failUnlessEqual(vr.get_host_counter_good_shares(), 9) self.failUnlessEqual(len(vr.get_servers_responding()), 9) self.failUnlessEqual(len(vr.get_corrupt_shares()), 0) def test_corrupt_file_verno(self): self.basedir = "repairer/Verifier/corrupt_file_verno" return self._help_test_verify(common._corrupt_file_version_number, self.judge_visible_corruption) def judge_share_version_incompatibility(self, vr): # corruption of the share version (inside the container, the 1/2 # value that determines whether we've got 4-byte offsets or 8-byte # offsets) to something larger than 2 will trigger a # ShareVersionIncompatible exception, which should be counted in # list-incompatible-shares, rather than list-corrupt-shares. self.failIf(vr.is_healthy(), (vr, vr.is_healthy(), vr.as_dict())) self.failUnlessEqual(vr.get_share_counter_good(), 9) self.failUnlessEqual(len(vr.get_sharemap()), 9) self.failUnlessEqual(vr.get_encoding_needed(), 3) self.failUnlessEqual(vr.get_encoding_expected(), 10) self.failUnlessEqual(vr.get_host_counter_good_shares(), 9) self.failUnlessEqual(len(vr.get_servers_responding()), 10) self.failUnlessEqual(len(vr.get_corrupt_shares()), 0) self.failUnlessEqual(len(vr.get_incompatible_shares()), 1) def test_corrupt_share_verno(self): self.basedir = "repairer/Verifier/corrupt_share_verno" return self._help_test_verify(common._corrupt_sharedata_version_number, self.judge_share_version_incompatibility) def judge_invisible_corruption(self, vr): # corruption of fields that the server does not check (which is most # of them), which will be detected by the client as it downloads # those shares. self.failIf(vr.is_healthy(), (vr, vr.is_healthy(), vr.as_dict())) self.failUnlessEqual(vr.get_share_counter_good(), 9) self.failUnlessEqual(vr.get_encoding_needed(), 3) self.failUnlessEqual(vr.get_encoding_expected(), 10) self.failUnlessEqual(vr.get_host_counter_good_shares(), 9) self.failUnlessEqual(len(vr.get_corrupt_shares()), 1) self.failUnlessEqual(len(vr.get_incompatible_shares()), 0) self.failUnlessEqual(len(vr.get_servers_responding()), 10) self.failUnlessEqual(len(vr.get_sharemap()), 9) def test_corrupt_sharedata_offset(self): self.basedir = "repairer/Verifier/corrupt_sharedata_offset" return self._help_test_verify(common._corrupt_offset_of_sharedata, self.judge_invisible_corruption) def test_corrupt_ueb_offset(self): self.basedir = "repairer/Verifier/corrupt_ueb_offset" return self._help_test_verify(common._corrupt_offset_of_uri_extension, self.judge_invisible_corruption) def test_corrupt_ueb_offset_shortread(self): self.basedir = "repairer/Verifier/corrupt_ueb_offset_shortread" return self._help_test_verify(common._corrupt_offset_of_uri_extension_to_force_short_read, self.judge_invisible_corruption) def test_corrupt_sharedata(self): self.basedir = "repairer/Verifier/corrupt_sharedata" return self._help_test_verify(common._corrupt_share_data, self.judge_invisible_corruption) def test_corrupt_sharedata_last_byte(self): self.basedir = "repairer/Verifier/corrupt_sharedata_last_byte" return self._help_test_verify(common._corrupt_share_data_last_byte, self.judge_invisible_corruption) def test_corrupt_ueb_length(self): self.basedir = "repairer/Verifier/corrupt_ueb_length" return self._help_test_verify(common._corrupt_length_of_uri_extension, self.judge_invisible_corruption) def test_corrupt_ueb(self): self.basedir = "repairer/Verifier/corrupt_ueb" return self._help_test_verify(common._corrupt_uri_extension, self.judge_invisible_corruption) def test_truncate_crypttext_hashtree(self): # change the start of the block hashtree, to truncate the preceding # crypttext hashtree self.basedir = "repairer/Verifier/truncate_crypttext_hashtree" return self._help_test_verify(common._corrupt_offset_of_block_hashes_to_truncate_crypttext_hashes, self.judge_invisible_corruption) def test_corrupt_block_hashtree_offset(self): self.basedir = "repairer/Verifier/corrupt_block_hashtree_offset" return self._help_test_verify(common._corrupt_offset_of_block_hashes, self.judge_invisible_corruption) def test_wrong_share_verno(self): self.basedir = "repairer/Verifier/wrong_share_verno" return self._help_test_verify(common._corrupt_sharedata_version_number_to_plausible_version, self.judge_invisible_corruption) def test_corrupt_share_hashtree_offset(self): self.basedir = "repairer/Verifier/corrupt_share_hashtree_offset" return self._help_test_verify(common._corrupt_offset_of_share_hashes, self.judge_invisible_corruption) def test_corrupt_crypttext_hashtree_offset(self): self.basedir = "repairer/Verifier/corrupt_crypttext_hashtree_offset" return self._help_test_verify(common._corrupt_offset_of_ciphertext_hash_tree, self.judge_invisible_corruption) def test_corrupt_crypttext_hashtree(self): self.basedir = "repairer/Verifier/corrupt_crypttext_hashtree" return self._help_test_verify(common._corrupt_crypttext_hash_tree, self.judge_invisible_corruption) def test_corrupt_crypttext_hashtree_byte_x221(self): self.basedir = "repairer/Verifier/corrupt_crypttext_hashtree_byte_9_bit_7" return self._help_test_verify(common._corrupt_crypttext_hash_tree_byte_x221, self.judge_invisible_corruption, debug=True) def test_corrupt_block_hashtree(self): self.basedir = "repairer/Verifier/corrupt_block_hashtree" return self._help_test_verify(common._corrupt_block_hashes, self.judge_invisible_corruption) def test_corrupt_share_hashtree(self): self.basedir = "repairer/Verifier/corrupt_share_hashtree" return self._help_test_verify(common._corrupt_share_hashes, self.judge_invisible_corruption) # TODO: the Verifier should decode to ciphertext and check it against the # crypttext-hash-tree. Check this by constructing a bogus file, in which # the crypttext-hash-tree is modified after encoding is done, but before # the UEB is finalized. The Verifier should see a valid # crypttext-hash-tree but then the ciphertext should show up as invalid. # Normally this could only be triggered by a bug in FEC decode. def OFF_test_each_byte(self): # this test takes 140s to run on my laptop, and doesn't have any # actual asserts, so it's commented out. It corrupts each byte of the # share in sequence, and checks to see which ones the Verifier # catches and which it misses. Ticket #819 contains details: there # are several portions of the share that are unused, for which # corruption is not supposed to be caught. # # If the test ran quickly, we could use the share size to compute the # offsets of these unused portions and assert that everything outside # of them was detected. We could then replace the rest of # Verifier.test_* (which takes 16s to run on my laptop) with this # one. self.basedir = "repairer/Verifier/each_byte" self.set_up_grid(num_clients=2) d = self.upload_and_stash() def _grab_sh0(res): self.sh0_file = [sharefile for (shnum, serverid, sharefile) in self.find_uri_shares(self.uri) if shnum == 0][0] self.sh0_orig = open(self.sh0_file, "rb").read() d.addCallback(_grab_sh0) def _fix_sh0(res): f = open(self.sh0_file, "wb") f.write(self.sh0_orig) f.close() def _corrupt(ign, which): def _corruptor(s, debug=False): return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] self.corrupt_shares_numbered(self.uri, [0], _corruptor) results = {} def _did_check(vr, i): #print "corrupt %d: healthy=%s" % (i, vr.is_healthy()) results[i] = vr.is_healthy() def _start(ign): d = defer.succeed(None) for i in range(len(self.sh0_orig)): d.addCallback(_corrupt, i) d.addCallback(lambda ign: self.c1_filenode.check(Monitor(), verify=True)) d.addCallback(_did_check, i) d.addCallback(_fix_sh0) return d d.addCallback(_start) def _show_results(ign): f = open("test_each_byte_output", "w") for i in sorted(results.keys()): print >>f, "%d: %s" % (i, results[i]) f.close() print "Please look in _trial_temp/test_each_byte_output for results" d.addCallback(_show_results) return d # We'll allow you to pass this test even if you trigger thirty-five times as # many block sends and disk writes as would be optimal. WRITE_LEEWAY = 35 # Optimally, you could repair one of these (small) files in a single write. DELTA_WRITES_PER_SHARE = 1 * WRITE_LEEWAY class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, common.ShouldFailMixin): def test_harness(self): # This test is actually to make sure our test harness works, rather # than testing anything about Tahoe code itself. self.basedir = "repairer/Repairer/test_code" self.set_up_grid(num_clients=2) d = self.upload_and_stash() d.addCallback(lambda ignored: self.find_uri_shares(self.uri)) def _stash_shares(oldshares): self.oldshares = oldshares d.addCallback(_stash_shares) d.addCallback(lambda ignored: self.find_uri_shares(self.uri)) def _compare(newshares): self.failUnlessEqual(newshares, self.oldshares) d.addCallback(_compare) def _delete_8(ignored): shnum = self.oldshares[0][0] self.delete_shares_numbered(self.uri, [shnum]) for sh in self.oldshares[1:8]: self.delete_share(sh) d.addCallback(_delete_8) d.addCallback(lambda ignored: self.find_uri_shares(self.uri)) d.addCallback(lambda shares: self.failUnlessEqual(len(shares), 2)) d.addCallback(lambda ignored: self.shouldFail(NotEnoughSharesError, "then_download", None, download_to_data, self.c1_filenode)) d.addCallback(lambda ignored: self.shouldFail(NotEnoughSharesError, "then_repair", None, self.c1_filenode.check_and_repair, Monitor(), verify=False)) # test share corruption def _test_corrupt(ignored): olddata = {} shares = self.find_uri_shares(self.uri) for (shnum, serverid, sharefile) in shares: olddata[ (shnum, serverid) ] = open(sharefile, "rb").read() for sh in shares: self.corrupt_share(sh, common._corrupt_uri_extension) for (shnum, serverid, sharefile) in shares: newdata = open(sharefile, "rb").read() self.failIfEqual(olddata[ (shnum, serverid) ], newdata) d.addCallback(_test_corrupt) def _remove_all(ignored): for sh in self.find_uri_shares(self.uri): self.delete_share(sh) d.addCallback(_remove_all) d.addCallback(lambda ignored: self.find_uri_shares(self.uri)) d.addCallback(lambda shares: self.failUnlessEqual(shares, [])) return d def test_repair_from_deletion_of_1(self): """ Repair replaces a share that got deleted. """ self.basedir = "repairer/Repairer/repair_from_deletion_of_1" self.set_up_grid(num_clients=2) d = self.upload_and_stash() d.addCallback(lambda ignored: self.delete_shares_numbered(self.uri, [2])) d.addCallback(lambda ignored: self._stash_counts()) d.addCallback(lambda ignored: self.c0_filenode.check_and_repair(Monitor(), verify=False)) def _check_results(crr): self.failUnlessIsInstance(crr, check_results.CheckAndRepairResults) pre = crr.get_pre_repair_results() self.failUnlessIsInstance(pre, check_results.CheckResults) post = crr.get_post_repair_results() self.failUnlessIsInstance(post, check_results.CheckResults) delta_reads, delta_allocates, delta_writes = self._get_delta_counts() self.failIfBigger(delta_reads, MAX_DELTA_READS) self.failIfBigger(delta_allocates, DELTA_WRITES_PER_SHARE) self.failIf(pre.is_healthy()) self.failUnless(post.is_healthy()) # Now we inspect the filesystem to make sure that it has 10 # shares. shares = self.find_uri_shares(self.uri) self.failIf(len(shares) < 10) d.addCallback(_check_results) d.addCallback(lambda ignored: self.c0_filenode.check(Monitor(), verify=True)) d.addCallback(lambda vr: self.failUnless(vr.is_healthy())) # Now we delete seven of the other shares, then try to download the # file and assert that it succeeds at downloading and has the right # contents. This can't work unless it has already repaired the # previously-deleted share #2. d.addCallback(lambda ignored: self.delete_shares_numbered(self.uri, range(3, 10+1))) d.addCallback(lambda ignored: download_to_data(self.c1_filenode)) d.addCallback(lambda newdata: self.failUnlessEqual(newdata, common.TEST_DATA)) return d def test_repair_from_deletion_of_7(self): """ Repair replaces seven shares that got deleted. """ self.basedir = "repairer/Repairer/repair_from_deletion_of_7" self.set_up_grid(num_clients=2) d = self.upload_and_stash() d.addCallback(lambda ignored: self.delete_shares_numbered(self.uri, range(7))) d.addCallback(lambda ignored: self._stash_counts()) d.addCallback(lambda ignored: self.c0_filenode.check_and_repair(Monitor(), verify=False)) def _check_results(crr): self.failUnlessIsInstance(crr, check_results.CheckAndRepairResults) pre = crr.get_pre_repair_results() self.failUnlessIsInstance(pre, check_results.CheckResults) post = crr.get_post_repair_results() self.failUnlessIsInstance(post, check_results.CheckResults) delta_reads, delta_allocates, delta_writes = self._get_delta_counts() self.failIfBigger(delta_reads, MAX_DELTA_READS) self.failIfBigger(delta_allocates, (DELTA_WRITES_PER_SHARE * 7)) self.failIf(pre.is_healthy()) self.failUnless(post.is_healthy(), post.as_dict()) # Make sure we really have 10 shares. shares = self.find_uri_shares(self.uri) self.failIf(len(shares) < 10) d.addCallback(_check_results) d.addCallback(lambda ignored: self.c0_filenode.check(Monitor(), verify=True)) d.addCallback(lambda vr: self.failUnless(vr.is_healthy())) # Now we delete seven of the other shares, then try to download the # file and assert that it succeeds at downloading and has the right # contents. This can't work unless it has already repaired the # previously-deleted share #2. d.addCallback(lambda ignored: self.delete_shares_numbered(self.uri, range(3, 10+1))) d.addCallback(lambda ignored: download_to_data(self.c1_filenode)) d.addCallback(lambda newdata: self.failUnlessEqual(newdata, common.TEST_DATA)) return d def test_repairer_servers_of_happiness(self): # The repairer is supposed to generate and place as many of the # missing shares as possible without caring about how they are # distributed. self.basedir = "repairer/Repairer/repairer_servers_of_happiness" self.set_up_grid(num_clients=2, num_servers=10) d = self.upload_and_stash() # Now delete some servers. We want to leave 3 servers, which # will allow us to restore the file to a healthy state without # distributing the shares widely enough to satisfy the default # happiness setting. def _delete_some_servers(ignored): for i in xrange(7): self.g.remove_server(self.g.servers_by_number[i].my_nodeid) assert len(self.g.servers_by_number) == 3 d.addCallback(_delete_some_servers) # Now try to repair the file. d.addCallback(lambda ignored: self.c0_filenode.check_and_repair(Monitor(), verify=False)) def _check_results(crr): self.failUnlessIsInstance(crr, check_results.CheckAndRepairResults) pre = crr.get_pre_repair_results() post = crr.get_post_repair_results() for p in (pre, post): self.failUnlessIsInstance(p, check_results.CheckResults) self.failIf(pre.is_healthy()) self.failUnless(post.is_healthy()) d.addCallback(_check_results) return d # why is test_repair_from_corruption_of_1 disabled? Read on: # # As recently documented in NEWS.rst for the 1.3.0 release, the current # immutable repairer suffers from several limitations: # # * minimalistic verifier: it's just download without decryption, so we # don't look for corruption in N-k shares, and for many fields (those # which are the same in all shares) we only look for corruption in a # single share # # * some kinds of corruption cause download to fail (when it ought to # just switch to a different share), so repair will fail on these too # # * RIStorageServer doesn't offer a way to delete old corrupt immutable # shares (the authority model is not at all clear), so the best the # repairer can do is to put replacement shares on new servers, # unfortunately leaving the corrupt shares in place # # This test is pretty strenuous: it asserts that the repairer does the # ideal thing in 8 distinct situations, with randomized corruption in # each. Because of the aforementioned limitations, it is highly unlikely # to pass any of these. We're also concerned that the download-fails case # can provoke a lost-progress bug (one was fixed, but there might be more # lurking), which will cause the test to fail despite a ".todo" marker, # and will probably cause subsequent unrelated tests to fail too (due to # "unclean reactor" problems). # # In addition, I (warner) have recently refactored the rest of this class # to use the much-faster no_network.GridTestMixin, so this tests needs to # be updated before it will be able to run again. # # So we're turning this test off until we've done one or more of the # following: # * remove some of these limitations # * break the test up into smaller, more functionally-oriented pieces # * simplify the repairer enough to let us be confident that it is free # of lost-progress bugs def OFF_test_repair_from_corruption_of_1(self): d = defer.succeed(None) d.addCallback(self.find_all_shares) stash = [None] def _stash_it(res): stash[0] = res return res d.addCallback(_stash_it) def _put_it_all_back(ignored): self.replace_shares(stash[0], storage_index=self.uri.get_storage_index()) return ignored def _repair_from_corruption(shnum, corruptor_func): before_repair_reads = self._count_reads() before_repair_allocates = self._count_writes() d2 = self.filenode.check_and_repair(Monitor(), verify=True) def _after_repair(checkandrepairresults): prerepairres = checkandrepairresults.get_pre_repair_results() postrepairres = checkandrepairresults.get_post_repair_results() after_repair_reads = self._count_reads() after_repair_allocates = self._count_writes() # The "* 2" in reads is because you might read a whole share # before figuring out that it is corrupted. It might be # possible to make this delta reads number a little tighter. self.failIf(after_repair_reads - before_repair_reads > (MAX_DELTA_READS * 2), (after_repair_reads, before_repair_reads)) # The "* 2" in writes is because each server has two shares, # and it is reasonable for repairer to conclude that there # are two shares that it should upload, if the server fails # to serve the first share. self.failIf(after_repair_allocates - before_repair_allocates > (DELTA_WRITES_PER_SHARE * 2), (after_repair_allocates, before_repair_allocates)) self.failIf(prerepairres.is_healthy(), (prerepairres.data, corruptor_func)) self.failUnless(postrepairres.is_healthy(), (postrepairres.data, corruptor_func)) # Now we inspect the filesystem to make sure that it has 10 # shares. shares = self.find_all_shares() self.failIf(len(shares) < 10) # Now we assert that the verifier reports the file as healthy. d3 = self.filenode.check(Monitor(), verify=True) def _after_verify(verifyresults): self.failUnless(verifyresults.is_healthy()) d3.addCallback(_after_verify) # Now we delete seven of the other shares, then try to # download the file and assert that it succeeds at # downloading and has the right contents. This can't work # unless it has already repaired the previously-corrupted share. def _then_delete_7_and_try_a_download(unused=None): shnums = range(10) shnums.remove(shnum) random.shuffle(shnums) for sharenum in shnums[:7]: self._delete_a_share(sharenum=sharenum) return self._download_and_check_plaintext() d3.addCallback(_then_delete_7_and_try_a_download) return d3 d2.addCallback(_after_repair) return d2 for corruptor_func in ( common._corrupt_file_version_number, common._corrupt_sharedata_version_number, common._corrupt_offset_of_sharedata, common._corrupt_offset_of_uri_extension, common._corrupt_offset_of_uri_extension_to_force_short_read, common._corrupt_share_data, common._corrupt_length_of_uri_extension, common._corrupt_uri_extension, ): # Now we corrupt a share... d.addCallback(self._corrupt_a_random_share, corruptor_func) # And repair... d.addCallback(_repair_from_corruption, corruptor_func) return d #test_repair_from_corruption_of_1.todo = "Repairer doesn't properly replace corrupted shares yet." def test_tiny_reads(self): # ticket #1223 points out three problems: # repairer reads beyond end of input file # new-downloader does not tolerate overreads # uploader does lots of tiny reads, inefficient self.basedir = "repairer/Repairer/test_tiny_reads" self.set_up_grid() c0 = self.g.clients[0] DATA = "a"*135 c0.DEFAULT_ENCODING_PARAMETERS['k'] = 22 c0.DEFAULT_ENCODING_PARAMETERS['n'] = 66 d = c0.upload(upload.Data(DATA, convergence="")) def _then(ur): self.uri = ur.get_uri() self.delete_shares_numbered(self.uri, [0]) self.c0_filenode = c0.create_node_from_uri(ur.get_uri()) self._stash_counts() return self.c0_filenode.check_and_repair(Monitor()) d.addCallback(_then) def _check(ign): (r,a,w) = self._get_delta_counts() # when the uploader (driven by the repairer) does full-segment # reads, this makes 44 server read calls (2*k). Before, when it # was doing input_chunk_size reads (7 bytes), it was doing over # 400. self.failIf(r > 100, "too many reads: %d>100" % r) d.addCallback(_check) return d def test_servers_responding(self): self.basedir = "repairer/Repairer/servers_responding" self.set_up_grid(num_clients=2) d = self.upload_and_stash() # now cause one of the servers to not respond during the pre-repair # filecheck, but then *do* respond to the post-repair filecheck def _then(ign): ss = self.g.servers_by_number[0] self.g.break_server(ss.my_nodeid, count=1) self.delete_shares_numbered(self.uri, [9]) return self.c0_filenode.check_and_repair(Monitor()) d.addCallback(_then) def _check(rr): # this exercises a bug in which the servers-responding list did # not include servers that responded to the Repair, but which did # not respond to the pre-repair filecheck prr = rr.get_post_repair_results() expected = set(self.g.get_all_serverids()) responding_set = frozenset([s.get_serverid() for s in prr.get_servers_responding()]) self.failIf(expected - responding_set, expected - responding_set) self.failIf(responding_set - expected, responding_set - expected) self.failUnlessEqual(expected, set([s.get_serverid() for s in prr.get_servers_responding()])) d.addCallback(_check) return d # XXX extend these tests to show that the checker detects which specific # share on which specific server is broken -- this is necessary so that the # checker results can be passed to the repairer and the repairer can go ahead # and upload fixes without first doing what is effectively a check (/verify) # run # XXX extend these tests to show bad behavior of various kinds from servers: # raising exception from each remove_foo() method, for example # XXX test disconnect DeadReferenceError from get_buckets and get_block_whatsit # XXX test corruption that truncates other hash trees than just the crypttext # hash tree # XXX test the notify-someone-about-corruption feature (also implement that # feature) # XXX test whether repairer (downloader) correctly downloads a file even if # to do so it has to acquire shares from a server that has already tried to # serve it a corrupted share. (I don't think the current downloader would # pass this test, depending on the kind of corruption.) tahoe-lafs-1.10.0/src/allmydata/test/test_runner.py000066400000000000000000001010001221140116300222330ustar00rootroot00000000000000from twisted.trial import unittest from twisted.python import usage, runtime from twisted.internet import threads import os.path, re, sys, subprocess from cStringIO import StringIO from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv, unicode_to_output, get_filesystem_encoding from allmydata.scripts import runner from allmydata.test import common_util import allmydata timeout = 240 def get_root_from_file(src): srcdir = os.path.dirname(os.path.dirname(os.path.normcase(os.path.realpath(src)))) root = os.path.dirname(srcdir) if os.path.basename(srcdir) == 'site-packages': if re.search(r'python.+\..+', os.path.basename(root)): root = os.path.dirname(root) root = os.path.dirname(root) elif os.path.basename(root) == 'src': root = os.path.dirname(root) return root srcfile = allmydata.__file__ rootdir = get_root_from_file(srcfile) if hasattr(sys, 'frozen'): bintahoe = os.path.join(rootdir, 'tahoe') if sys.platform == "win32" and os.path.exists(bintahoe + '.exe'): bintahoe += '.exe' else: bintahoe = os.path.join(rootdir, 'bin', 'tahoe') if sys.platform == "win32": bintahoe += '.pyscript' if not os.path.exists(bintahoe): alt_bintahoe = os.path.join(rootdir, 'Scripts', 'tahoe.pyscript') if os.path.exists(alt_bintahoe): bintahoe = alt_bintahoe class RunBinTahoeMixin: def skip_if_cannot_run_bintahoe(self): if not os.path.exists(bintahoe): raise unittest.SkipTest("The bin/tahoe script isn't to be found in the expected location (%s), and I don't want to test a 'tahoe' executable that I find somewhere else, in case it isn't the right executable for this version of Tahoe. Perhaps running 'setup.py build' again will help." % (bintahoe,)) def skip_if_cannot_daemonize(self): self.skip_if_cannot_run_bintahoe() if runtime.platformType == "win32": # twistd on windows doesn't daemonize. cygwin should work normally. raise unittest.SkipTest("twistd does not fork under windows") def run_bintahoe(self, args, stdin=None, python_options=[], env=None): self.skip_if_cannot_run_bintahoe() if hasattr(sys, 'frozen'): if python_options: raise unittest.SkipTest("This test doesn't apply to frozen builds.") command = [bintahoe] + args else: command = [sys.executable] + python_options + [bintahoe] + args if stdin is None: stdin_stream = None else: stdin_stream = subprocess.PIPE def _run(): p = subprocess.Popen(command, stdin=stdin_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) (out, err) = p.communicate(stdin) return (out, err, p.returncode) return threads.deferToThread(_run) class BinTahoe(common_util.SignalMixin, unittest.TestCase, RunBinTahoeMixin): def _check_right_code(self, file_to_check): root_to_check = get_root_from_file(file_to_check) if os.path.basename(root_to_check) == 'dist': root_to_check = os.path.dirname(root_to_check) cwd = os.path.normcase(os.path.realpath(".")) root_from_cwd = os.path.dirname(cwd) if os.path.basename(root_from_cwd) == 'src': root_from_cwd = os.path.dirname(root_from_cwd) # This is needed if we are running in a temporary directory created by 'make tmpfstest'. if os.path.basename(root_from_cwd).startswith('tmp'): root_from_cwd = os.path.dirname(root_from_cwd) same = (root_from_cwd == root_to_check) if not same: try: same = os.path.samefile(root_from_cwd, root_to_check) except AttributeError, e: e # hush pyflakes if not same: msg = ("We seem to be testing the code at %r,\n" "(according to the source filename %r),\n" "but expected to be testing the code at %r.\n" % (root_to_check, file_to_check, root_from_cwd)) root_from_cwdu = os.path.dirname(os.path.normcase(os.path.normpath(os.getcwdu()))) if os.path.basename(root_from_cwdu) == u'src': root_from_cwdu = os.path.dirname(root_from_cwdu) # This is needed if we are running in a temporary directory created by 'make tmpfstest'. if os.path.basename(root_from_cwdu).startswith(u'tmp'): root_from_cwdu = os.path.dirname(root_from_cwdu) if not isinstance(root_from_cwd, unicode) and root_from_cwd.decode(get_filesystem_encoding(), 'replace') != root_from_cwdu: msg += ("However, this may be a false alarm because the current directory path\n" "is not representable in the filesystem encoding. Please run the tests\n" "from the root of the Tahoe-LAFS distribution at a non-Unicode path.") raise unittest.SkipTest(msg) else: msg += "Please run the tests from the root of the Tahoe-LAFS distribution." self.fail(msg) def test_the_right_code(self): self._check_right_code(srcfile) def test_import_in_repl(self): d = self.run_bintahoe(["debug", "repl"], stdin="import allmydata; print; print allmydata.__file__") def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0, str(res)) lines = out.splitlines() self.failUnlessIn('>>>', lines[0], str(res)) self._check_right_code(lines[1]) d.addCallback(_cb) return d # The timeout was exceeded on FreeStorm's CentOS5-i386. test_import_in_repl.timeout = 480 def test_path(self): d = self.run_bintahoe(["--version-and-path"]) def _cb(res): from allmydata import normalized_version out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0, str(res)) # Fail unless the allmydata-tahoe package is *this* version *and* # was loaded from *this* source directory. required_verstr = str(allmydata.__version__) self.failIfEqual(required_verstr, "unknown", "We don't know our version, because this distribution didn't come " "with a _version.py and 'setup.py update_version' hasn't been run.") srcdir = os.path.dirname(os.path.dirname(os.path.normcase(os.path.realpath(srcfile)))) info = repr((res, allmydata.__appname__, required_verstr, srcdir)) appverpath = out.split(')')[0] (appver, path) = appverpath.split(' (') (app, ver) = appver.split(': ') self.failUnlessEqual(app, allmydata.__appname__, info) norm_ver = normalized_version(ver) norm_required = normalized_version(required_verstr) self.failUnlessEqual(norm_ver, norm_required, info) self.failUnlessEqual(path, srcdir, info) d.addCallback(_cb) return d def test_unicode_arguments_and_output(self): self.skip_if_cannot_run_bintahoe() tricky = u"\u2621" try: tricky_arg = unicode_to_argv(tricky, mangle=True) tricky_out = unicode_to_output(tricky) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII argument/output could not be encoded on this platform.") d = self.run_bintahoe([tricky_arg]) def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 1, str(res)) self.failUnlessIn("Unknown command: "+tricky_out, out) d.addCallback(_cb) return d def test_run_with_python_options(self): # -t is a harmless option that warns about tabs. d = self.run_bintahoe(["--version"], python_options=["-t"]) def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0, str(res)) self.failUnless(out.startswith(allmydata.__appname__+':'), str(res)) d.addCallback(_cb) return d def test_version_no_noise(self): self.skip_if_cannot_run_bintahoe() d = self.run_bintahoe(["--version"]) def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0, str(res)) self.failUnless(out.startswith(allmydata.__appname__+':'), str(res)) self.failIfIn("DeprecationWarning", out, str(res)) errlines = err.split("\n") self.failIf([True for line in errlines if (line != "" and "UserWarning: Unbuilt egg for setuptools" not in line and "from pkg_resources import load_entry_point" not in line)], str(res)) if err != "": raise unittest.SkipTest("This test is known not to pass on Ubuntu Lucid; see #1235.") d.addCallback(_cb) return d class CreateNode(unittest.TestCase): # exercise "tahoe create-node", create-introducer, # create-key-generator, and create-stats-gatherer, by calling the # corresponding code as a subroutine. def workdir(self, name): basedir = os.path.join("test_runner", "CreateNode", name) fileutil.make_dirs(basedir) return basedir def run_tahoe(self, argv): out,err = StringIO(), StringIO() rc = runner.runner(argv, stdout=out, stderr=err) return rc, out.getvalue(), err.getvalue() def do_create(self, kind): basedir = self.workdir("test_" + kind) command = "create-" + kind is_client = kind in ("node", "client") tac = is_client and "tahoe-client.tac" or ("tahoe-" + kind + ".tac") n1 = os.path.join(basedir, command + "-n1") argv = ["--quiet", command, "--basedir", n1] rc, out, err = self.run_tahoe(argv) self.failUnlessEqual(err, "") self.failUnlessEqual(out, "") self.failUnlessEqual(rc, 0) self.failUnless(os.path.exists(n1)) self.failUnless(os.path.exists(os.path.join(n1, tac))) if is_client: # tahoe.cfg should exist, and should have storage enabled for # 'create-node', and disabled for 'create-client'. tahoe_cfg = os.path.join(n1, "tahoe.cfg") self.failUnless(os.path.exists(tahoe_cfg)) content = fileutil.read(tahoe_cfg).replace('\r\n', '\n') if kind == "client": self.failUnless(re.search(r"\n\[storage\]\n#.*\nenabled = false\n", content), content) else: self.failUnless(re.search(r"\n\[storage\]\n#.*\nenabled = true\n", content), content) self.failUnless("\nreserved_space = 1G\n" in content) self.failUnless(re.search(r"\n\[drop_upload\]\n#.*\nenabled = false\n", content), content) # creating the node a second time should be rejected rc, out, err = self.run_tahoe(argv) self.failIfEqual(rc, 0, str((out, err, rc))) self.failUnlessEqual(out, "") self.failUnless("is not empty." in err) # Fail if there is a non-empty line that doesn't end with a # punctuation mark. for line in err.splitlines(): self.failIf(re.search("[\S][^\.!?]$", line), (line,)) # test that the non --basedir form works too n2 = os.path.join(basedir, command + "-n2") argv = ["--quiet", command, n2] rc, out, err = self.run_tahoe(argv) self.failUnlessEqual(err, "") self.failUnlessEqual(out, "") self.failUnlessEqual(rc, 0) self.failUnless(os.path.exists(n2)) self.failUnless(os.path.exists(os.path.join(n2, tac))) # test the --node-directory form n3 = os.path.join(basedir, command + "-n3") argv = ["--quiet", "--node-directory", n3, command] rc, out, err = self.run_tahoe(argv) self.failUnlessEqual(err, "") self.failUnlessEqual(out, "") self.failUnlessEqual(rc, 0) self.failUnless(os.path.exists(n3)) self.failUnless(os.path.exists(os.path.join(n3, tac))) # make sure it rejects too many arguments argv = [command, "basedir", "extraarg"] self.failUnlessRaises(usage.UsageError, runner.runner, argv, run_by_human=False) # when creating a non-client, there is no default for the basedir if not is_client: argv = [command] self.failUnlessRaises(usage.UsageError, runner.runner, argv, run_by_human=False) def test_node(self): self.do_create("node") def test_client(self): # create-client should behave like create-node --no-storage. self.do_create("client") def test_introducer(self): self.do_create("introducer") def test_key_generator(self): self.do_create("key-generator") def test_stats_gatherer(self): self.do_create("stats-gatherer") def test_subcommands(self): # no arguments should trigger a command listing, via UsageError self.failUnlessRaises(usage.UsageError, runner.runner, [], run_by_human=False) class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, RunBinTahoeMixin): # exercise "tahoe start", for both introducer, client node, and # key-generator, by spawning "tahoe start" as a subprocess. This doesn't # get us figleaf-based line-level coverage, but it does a better job of # confirming that the user can actually run "./bin/tahoe start" and # expect it to work. This verifies that bin/tahoe sets up PYTHONPATH and # the like correctly. # This doesn't work on cygwin (it hangs forever), so we skip this test # when we're on cygwin. It is likely that "tahoe start" itself doesn't # work on cygwin: twisted seems unable to provide a version of # spawnProcess which really works there. def workdir(self, name): basedir = os.path.join("test_runner", "RunNode", name) fileutil.make_dirs(basedir) return basedir def test_introducer(self): self.skip_if_cannot_daemonize() basedir = self.workdir("test_introducer") c1 = os.path.join(basedir, "c1") HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline") TWISTD_PID_FILE = os.path.join(c1, "twistd.pid") INTRODUCER_FURL_FILE = os.path.join(c1, "private", "introducer.furl") PORTNUM_FILE = os.path.join(c1, "introducer.port") NODE_URL_FILE = os.path.join(c1, "node.url") CONFIG_FILE = os.path.join(c1, "tahoe.cfg") d = self.run_bintahoe(["--quiet", "create-introducer", "--basedir", c1]) def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0) # This makes sure that node.url is written, which allows us to # detect when the introducer restarts in _node_has_restarted below. config = fileutil.read(CONFIG_FILE) self.failUnlessIn('\nweb.port = \n', config) fileutil.write(CONFIG_FILE, config.replace('\nweb.port = \n', '\nweb.port = 0\n')) # by writing this file, we get ten seconds before the node will # exit. This insures that even if the test fails (and the 'stop' # command doesn't work), the client should still terminate. fileutil.write(HOTLINE_FILE, "") # now it's safe to start the node d.addCallback(_cb) def _then_start_the_node(res): return self.run_bintahoe(["--quiet", "start", c1]) d.addCallback(_then_start_the_node) def _cb2(res): out, err, rc_or_sig = res fileutil.write(HOTLINE_FILE, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. # the parent (twistd) has exited. However, twistd writes the pid # from the child, not the parent, so we can't expect twistd.pid # to exist quite yet. # the node is running, but it might not have made it past the # first reactor turn yet, and if we kill it too early, it won't # remove the twistd.pid file. So wait until it does something # that we know it won't do until after the first turn. d.addCallback(_cb2) def _node_has_started(): return os.path.exists(INTRODUCER_FURL_FILE) d.addCallback(lambda res: self.poll(_node_has_started)) def _started(res): # read the introducer.furl and introducer.port files so we can # check that their contents don't change on restart self.furl = fileutil.read(INTRODUCER_FURL_FILE) self.failUnless(os.path.exists(PORTNUM_FILE)) self.portnum = fileutil.read(PORTNUM_FILE) fileutil.write(HOTLINE_FILE, "") self.failUnless(os.path.exists(TWISTD_PID_FILE)) self.failUnless(os.path.exists(NODE_URL_FILE)) # rm this so we can detect when the second incarnation is ready os.unlink(NODE_URL_FILE) return self.run_bintahoe(["--quiet", "restart", c1]) d.addCallback(_started) def _then(res): out, err, rc_or_sig = res fileutil.write(HOTLINE_FILE, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. d.addCallback(_then) # again, the second incarnation of the node might not be ready yet, # so poll until it is. This time INTRODUCER_FURL_FILE already # exists, so we check for the existence of NODE_URL_FILE instead. def _node_has_restarted(): return os.path.exists(NODE_URL_FILE) and os.path.exists(PORTNUM_FILE) d.addCallback(lambda res: self.poll(_node_has_restarted)) def _check_same_furl_and_port(res): self.failUnless(os.path.exists(INTRODUCER_FURL_FILE)) self.failUnlessEqual(self.furl, fileutil.read(INTRODUCER_FURL_FILE)) self.failUnlessEqual(self.portnum, fileutil.read(PORTNUM_FILE)) d.addCallback(_check_same_furl_and_port) # now we can kill it. TODO: On a slow machine, the node might kill # itself before we get a chance to, especially if spawning the # 'tahoe stop' command takes a while. def _stop(res): fileutil.write(HOTLINE_FILE, "") self.failUnless(os.path.exists(TWISTD_PID_FILE)) return self.run_bintahoe(["--quiet", "stop", c1]) d.addCallback(_stop) def _after_stopping(res): out, err, rc_or_sig = res fileutil.write(HOTLINE_FILE, "") # the parent has exited by now errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. # the parent was supposed to poll and wait until it sees # twistd.pid go away before it exits, so twistd.pid should be # gone by now. self.failIf(os.path.exists(TWISTD_PID_FILE)) d.addCallback(_after_stopping) d.addBoth(self._remove, HOTLINE_FILE) return d # This test has hit a 240-second timeout on our feisty2.5 buildslave, and a 480-second timeout # on Francois's Lenny-armv5tel buildslave. test_introducer.timeout = 960 def test_client_no_noise(self): self.skip_if_cannot_daemonize() basedir = self.workdir("test_client_no_noise") c1 = os.path.join(basedir, "c1") HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline") TWISTD_PID_FILE = os.path.join(c1, "twistd.pid") PORTNUM_FILE = os.path.join(c1, "client.port") d = self.run_bintahoe(["--quiet", "create-client", "--basedir", c1, "--webport", "0"]) def _cb(res): out, err, rc_or_sig = res errstr = "cc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) assert rc_or_sig == 0, errstr self.failUnlessEqual(rc_or_sig, 0) # By writing this file, we get two minutes before the client will exit. This ensures # that even if the 'stop' command doesn't work (and the test fails), the client should # still terminate. fileutil.write(HOTLINE_FILE, "") # now it's safe to start the node d.addCallback(_cb) def _start(res): return self.run_bintahoe(["--quiet", "start", c1]) d.addCallback(_start) def _cb2(res): out, err, rc_or_sig = res errstr = "cc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) fileutil.write(HOTLINE_FILE, "") self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # If you emit noise, you fail this test. errlines = err.split("\n") self.failIf([True for line in errlines if (line != "" and "UserWarning: Unbuilt egg for setuptools" not in line and "from pkg_resources import load_entry_point" not in line)], errstr) if err != "": raise unittest.SkipTest("This test is known not to pass on Ubuntu Lucid; see #1235.") # the parent (twistd) has exited. However, twistd writes the pid # from the child, not the parent, so we can't expect twistd.pid # to exist quite yet. # the node is running, but it might not have made it past the # first reactor turn yet, and if we kill it too early, it won't # remove the twistd.pid file. So wait until it does something # that we know it won't do until after the first turn. d.addCallback(_cb2) def _node_has_started(): return os.path.exists(PORTNUM_FILE) d.addCallback(lambda res: self.poll(_node_has_started)) # now we can kill it. TODO: On a slow machine, the node might kill # itself before we get a chance to, especially if spawning the # 'tahoe stop' command takes a while. def _stop(res): self.failUnless(os.path.exists(TWISTD_PID_FILE), (TWISTD_PID_FILE, os.listdir(os.path.dirname(TWISTD_PID_FILE)))) return self.run_bintahoe(["--quiet", "stop", c1]) d.addCallback(_stop) d.addBoth(self._remove, HOTLINE_FILE) return d def test_client(self): self.skip_if_cannot_daemonize() basedir = self.workdir("test_client") c1 = os.path.join(basedir, "c1") HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline") TWISTD_PID_FILE = os.path.join(c1, "twistd.pid") PORTNUM_FILE = os.path.join(c1, "client.port") NODE_URL_FILE = os.path.join(c1, "node.url") CONFIG_FILE = os.path.join(c1, "tahoe.cfg") d = self.run_bintahoe(["--quiet", "create-node", "--basedir", c1, "--webport", "0"]) def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0) # Check that the --webport option worked. config = fileutil.read(CONFIG_FILE) self.failUnlessIn('\nweb.port = 0\n', config) # By writing this file, we get two minutes before the client will exit. This ensures # that even if the 'stop' command doesn't work (and the test fails), the client should # still terminate. fileutil.write(HOTLINE_FILE, "") # now it's safe to start the node d.addCallback(_cb) def _start(res): return self.run_bintahoe(["--quiet", "start", c1]) d.addCallback(_start) def _cb2(res): out, err, rc_or_sig = res fileutil.write(HOTLINE_FILE, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. # the parent (twistd) has exited. However, twistd writes the pid # from the child, not the parent, so we can't expect twistd.pid # to exist quite yet. # the node is running, but it might not have made it past the # first reactor turn yet, and if we kill it too early, it won't # remove the twistd.pid file. So wait until it does something # that we know it won't do until after the first turn. d.addCallback(_cb2) def _node_has_started(): # this depends upon both files being created atomically return os.path.exists(NODE_URL_FILE) and os.path.exists(PORTNUM_FILE) d.addCallback(lambda res: self.poll(_node_has_started)) def _started(res): # read the client.port file so we can check that its contents # don't change on restart self.portnum = fileutil.read(PORTNUM_FILE) fileutil.write(HOTLINE_FILE, "") self.failUnless(os.path.exists(TWISTD_PID_FILE)) # rm this so we can detect when the second incarnation is ready os.unlink(NODE_URL_FILE) return self.run_bintahoe(["--quiet", "restart", c1]) d.addCallback(_started) def _cb3(res): out, err, rc_or_sig = res fileutil.write(HOTLINE_FILE, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. d.addCallback(_cb3) # again, the second incarnation of the node might not be ready yet, # so poll until it is d.addCallback(lambda res: self.poll(_node_has_started)) def _check_same_port(res): self.failUnlessEqual(self.portnum, fileutil.read(PORTNUM_FILE)) d.addCallback(_check_same_port) # now we can kill it. TODO: On a slow machine, the node might kill # itself before we get a chance to, especially if spawning the # 'tahoe stop' command takes a while. def _stop(res): fileutil.write(HOTLINE_FILE, "") self.failUnless(os.path.exists(TWISTD_PID_FILE), (TWISTD_PID_FILE, os.listdir(os.path.dirname(TWISTD_PID_FILE)))) return self.run_bintahoe(["--quiet", "stop", c1]) d.addCallback(_stop) def _cb4(res): out, err, rc_or_sig = res fileutil.write(HOTLINE_FILE, "") # the parent has exited by now errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. # the parent was supposed to poll and wait until it sees # twistd.pid go away before it exits, so twistd.pid should be # gone by now. self.failIf(os.path.exists(TWISTD_PID_FILE)) d.addCallback(_cb4) d.addBoth(self._remove, HOTLINE_FILE) return d def _remove(self, res, file): fileutil.remove(file) return res def test_baddir(self): self.skip_if_cannot_daemonize() basedir = self.workdir("test_baddir") fileutil.make_dirs(basedir) d = self.run_bintahoe(["--quiet", "start", "--basedir", basedir]) def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 1) self.failUnless("does not look like a node directory" in err, err) d.addCallback(_cb) def _then_stop_it(res): return self.run_bintahoe(["--quiet", "stop", "--basedir", basedir]) d.addCallback(_then_stop_it) def _cb2(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 2) self.failUnless("does not look like a running node directory" in err) d.addCallback(_cb2) def _then_start_in_bogus_basedir(res): not_a_dir = os.path.join(basedir, "bogus") return self.run_bintahoe(["--quiet", "start", "--basedir", not_a_dir]) d.addCallback(_then_start_in_bogus_basedir) def _cb3(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 1) self.failUnless("does not look like a directory at all" in err, err) d.addCallback(_cb3) return d def test_keygen(self): self.skip_if_cannot_daemonize() basedir = self.workdir("test_keygen") c1 = os.path.join(basedir, "c1") TWISTD_PID_FILE = os.path.join(c1, "twistd.pid") KEYGEN_FURL_FILE = os.path.join(c1, "key_generator.furl") d = self.run_bintahoe(["--quiet", "create-key-generator", "--basedir", c1]) def _cb(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0) d.addCallback(_cb) def _start(res): return self.run_bintahoe(["--quiet", "start", c1]) d.addCallback(_start) def _cb2(res): out, err, rc_or_sig = res errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. # the parent (twistd) has exited. However, twistd writes the pid # from the child, not the parent, so we can't expect twistd.pid # to exist quite yet. # the node is running, but it might not have made it past the # first reactor turn yet, and if we kill it too early, it won't # remove the twistd.pid file. So wait until it does something # that we know it won't do until after the first turn. d.addCallback(_cb2) def _node_has_started(): return os.path.exists(KEYGEN_FURL_FILE) d.addCallback(lambda res: self.poll(_node_has_started)) def _started(res): self.failUnless(os.path.exists(TWISTD_PID_FILE)) # rm this so we can detect when the second incarnation is ready os.unlink(KEYGEN_FURL_FILE) return self.run_bintahoe(["--quiet", "restart", c1]) d.addCallback(_started) def _cb3(res): out, err, rc_or_sig = res errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. d.addCallback(_cb3) # again, the second incarnation of the node might not be ready yet, # so poll until it is d.addCallback(lambda res: self.poll(_node_has_started)) # now we can kill it. TODO: On a slow machine, the node might kill # itself before we get a chance too, especially if spawning the # 'tahoe stop' command takes a while. def _stop(res): self.failUnless(os.path.exists(TWISTD_PID_FILE)) return self.run_bintahoe(["--quiet", "stop", c1]) d.addCallback(_stop) def _cb4(res): out, err, rc_or_sig = res # the parent has exited by now errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # self.failUnlessEqual(err, "", errstr) # See test_client_no_noise -- for now we ignore noise. # the parent was supposed to poll and wait until it sees # twistd.pid go away before it exits, so twistd.pid should be # gone by now. self.failIf(os.path.exists(TWISTD_PID_FILE)) d.addCallback(_cb4) return d tahoe-lafs-1.10.0/src/allmydata/test/test_sftp.py000066400000000000000000002347471221140116300217270ustar00rootroot00000000000000 import re, struct, traceback, time, calendar from stat import S_IFREG, S_IFDIR from twisted.trial import unittest from twisted.internet import defer, reactor from twisted.python.failure import Failure from twisted.internet.error import ProcessDone, ProcessTerminated from allmydata.util import deferredutil conch_interfaces = None sftp = None sftpd = None have_pycrypto = False try: from Crypto import Util Util # hush pyflakes have_pycrypto = True except ImportError: pass if have_pycrypto: from twisted.conch import interfaces as conch_interfaces from twisted.conch.ssh import filetransfer as sftp from allmydata.frontends import sftpd from allmydata.interfaces import IDirectoryNode, ExistingChildError, NoSuchChildError from allmydata.mutable.common import NotWriteableError from allmydata.util.consumer import download_to_data from allmydata.immutable import upload from allmydata.mutable import publish from allmydata.test.no_network import GridTestMixin from allmydata.test.common import ShouldFailMixin from allmydata.test.common_util import ReallyEqualMixin timeout = 240 class Handler(GridTestMixin, ShouldFailMixin, ReallyEqualMixin, unittest.TestCase): """This is a no-network unit test of the SFTPUserHandler and the abstractions it uses.""" if not have_pycrypto: skip = "SFTP support requires pycrypto, which is not installed" def shouldFailWithSFTPError(self, expected_code, which, callable, *args, **kwargs): assert isinstance(expected_code, int), repr(expected_code) assert isinstance(which, str), repr(which) s = traceback.format_stack() d = defer.maybeDeferred(callable, *args, **kwargs) def _done(res): if isinstance(res, Failure): res.trap(sftp.SFTPError) self.failUnlessReallyEqual(res.value.code, expected_code, "%s was supposed to raise SFTPError(%r), not SFTPError(%r): %s" % (which, expected_code, res.value.code, res)) else: print '@' + '@'.join(s) self.fail("%s was supposed to raise SFTPError(%r), not get %r" % (which, expected_code, res)) d.addBoth(_done) return d def _set_up(self, basedir, num_clients=1, num_servers=10): self.basedir = "sftp/" + basedir self.set_up_grid(num_clients=num_clients, num_servers=num_servers) self.client = self.g.clients[0] self.username = "alice" d = self.client.create_dirnode() def _created_root(node): self.root = node self.root_uri = node.get_uri() sftpd._reload() self.handler = sftpd.SFTPUserHandler(self.client, self.root, self.username) d.addCallback(_created_root) return d def _set_up_tree(self): u = publish.MutableData("mutable file contents") d = self.client.create_mutable_file(u) d.addCallback(lambda node: self.root.set_node(u"mutable", node)) def _created_mutable(n): self.mutable = n self.mutable_uri = n.get_uri() d.addCallback(_created_mutable) d.addCallback(lambda ign: self.root._create_and_validate_node(None, self.mutable.get_readonly_uri(), name=u"readonly")) d.addCallback(lambda node: self.root.set_node(u"readonly", node)) def _created_readonly(n): self.readonly = n self.readonly_uri = n.get_uri() d.addCallback(_created_readonly) gross = upload.Data("0123456789" * 101, None) d.addCallback(lambda ign: self.root.add_file(u"gro\u00DF", gross)) def _created_gross(n): self.gross = n self.gross_uri = n.get_uri() d.addCallback(_created_gross) small = upload.Data("0123456789", None) d.addCallback(lambda ign: self.root.add_file(u"small", small)) def _created_small(n): self.small = n self.small_uri = n.get_uri() d.addCallback(_created_small) small2 = upload.Data("Small enough for a LIT too", None) d.addCallback(lambda ign: self.root.add_file(u"small2", small2)) def _created_small2(n): self.small2 = n self.small2_uri = n.get_uri() d.addCallback(_created_small2) empty_litdir_uri = "URI:DIR2-LIT:" # contains one child which is itself also LIT: tiny_litdir_uri = "URI:DIR2-LIT:gqytunj2onug64tufqzdcosvkjetutcjkq5gw4tvm5vwszdgnz5hgyzufqydulbshj5x2lbm" unknown_uri = "x-tahoe-crazy://I_am_from_the_future." d.addCallback(lambda ign: self.root._create_and_validate_node(None, empty_litdir_uri, name=u"empty_lit_dir")) def _created_empty_lit_dir(n): self.empty_lit_dir = n self.empty_lit_dir_uri = n.get_uri() self.root.set_node(u"empty_lit_dir", n) d.addCallback(_created_empty_lit_dir) d.addCallback(lambda ign: self.root._create_and_validate_node(None, tiny_litdir_uri, name=u"tiny_lit_dir")) def _created_tiny_lit_dir(n): self.tiny_lit_dir = n self.tiny_lit_dir_uri = n.get_uri() self.root.set_node(u"tiny_lit_dir", n) d.addCallback(_created_tiny_lit_dir) d.addCallback(lambda ign: self.root._create_and_validate_node(None, unknown_uri, name=u"unknown")) def _created_unknown(n): self.unknown = n self.unknown_uri = n.get_uri() self.root.set_node(u"unknown", n) d.addCallback(_created_unknown) fall_of_the_Berlin_wall = calendar.timegm(time.strptime("1989-11-09 20:00:00 UTC", "%Y-%m-%d %H:%M:%S %Z")) md = {'mtime': fall_of_the_Berlin_wall, 'tahoe': {'linkmotime': fall_of_the_Berlin_wall}} d.addCallback(lambda ign: self.root.set_node(u"loop", self.root, metadata=md)) return d def test_basic(self): d = self._set_up("basic") def _check(ign): # Test operations that have no side-effects, and don't need the tree. version = self.handler.gotVersion(3, {}) self.failUnless(isinstance(version, dict)) self.failUnlessReallyEqual(self.handler._path_from_string(""), []) self.failUnlessReallyEqual(self.handler._path_from_string("/"), []) self.failUnlessReallyEqual(self.handler._path_from_string("."), []) self.failUnlessReallyEqual(self.handler._path_from_string("//"), []) self.failUnlessReallyEqual(self.handler._path_from_string("/."), []) self.failUnlessReallyEqual(self.handler._path_from_string("/./"), []) self.failUnlessReallyEqual(self.handler._path_from_string("foo"), [u"foo"]) self.failUnlessReallyEqual(self.handler._path_from_string("/foo"), [u"foo"]) self.failUnlessReallyEqual(self.handler._path_from_string("foo/"), [u"foo"]) self.failUnlessReallyEqual(self.handler._path_from_string("/foo/"), [u"foo"]) self.failUnlessReallyEqual(self.handler._path_from_string("foo/bar"), [u"foo", u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("/foo/bar"), [u"foo", u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("foo/bar//"), [u"foo", u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("/foo/bar//"), [u"foo", u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("foo/./bar"), [u"foo", u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("./foo/./bar"), [u"foo", u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("foo/../bar"), [u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("/foo/../bar"), [u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("../bar"), [u"bar"]) self.failUnlessReallyEqual(self.handler._path_from_string("/../bar"), [u"bar"]) self.failUnlessReallyEqual(self.handler.realPath(""), "/") self.failUnlessReallyEqual(self.handler.realPath("/"), "/") self.failUnlessReallyEqual(self.handler.realPath("."), "/") self.failUnlessReallyEqual(self.handler.realPath("//"), "/") self.failUnlessReallyEqual(self.handler.realPath("/."), "/") self.failUnlessReallyEqual(self.handler.realPath("/./"), "/") self.failUnlessReallyEqual(self.handler.realPath("foo"), "/foo") self.failUnlessReallyEqual(self.handler.realPath("/foo"), "/foo") self.failUnlessReallyEqual(self.handler.realPath("foo/"), "/foo") self.failUnlessReallyEqual(self.handler.realPath("/foo/"), "/foo") self.failUnlessReallyEqual(self.handler.realPath("foo/bar"), "/foo/bar") self.failUnlessReallyEqual(self.handler.realPath("/foo/bar"), "/foo/bar") self.failUnlessReallyEqual(self.handler.realPath("foo/bar//"), "/foo/bar") self.failUnlessReallyEqual(self.handler.realPath("/foo/bar//"), "/foo/bar") self.failUnlessReallyEqual(self.handler.realPath("foo/./bar"), "/foo/bar") self.failUnlessReallyEqual(self.handler.realPath("./foo/./bar"), "/foo/bar") self.failUnlessReallyEqual(self.handler.realPath("foo/../bar"), "/bar") self.failUnlessReallyEqual(self.handler.realPath("/foo/../bar"), "/bar") self.failUnlessReallyEqual(self.handler.realPath("../bar"), "/bar") self.failUnlessReallyEqual(self.handler.realPath("/../bar"), "/bar") d.addCallback(_check) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "_path_from_string invalid UTF-8", self.handler._path_from_string, "\xFF")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "realPath invalid UTF-8", self.handler.realPath, "\xFF")) return d def test_convert_error(self): self.failUnlessReallyEqual(sftpd._convert_error(None, "request"), None) d = defer.succeed(None) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "_convert_error SFTPError", sftpd._convert_error, Failure(sftp.SFTPError(sftp.FX_FAILURE, "foo")), "request")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "_convert_error NoSuchChildError", sftpd._convert_error, Failure(NoSuchChildError("foo")), "request")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "_convert_error ExistingChildError", sftpd._convert_error, Failure(ExistingChildError("foo")), "request")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "_convert_error NotWriteableError", sftpd._convert_error, Failure(NotWriteableError("foo")), "request")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_OP_UNSUPPORTED, "_convert_error NotImplementedError", sftpd._convert_error, Failure(NotImplementedError("foo")), "request")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "_convert_error EOFError", sftpd._convert_error, Failure(EOFError("foo")), "request")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "_convert_error defer.FirstError", sftpd._convert_error, Failure(defer.FirstError( Failure(sftp.SFTPError(sftp.FX_EOF, "foo")), 0)), "request")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "_convert_error AssertionError", sftpd._convert_error, Failure(AssertionError("foo")), "request")) return d def test_not_implemented(self): d = self._set_up("not_implemented") d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_OP_UNSUPPORTED, "readLink link", self.handler.readLink, "link")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_OP_UNSUPPORTED, "makeLink link file", self.handler.makeLink, "link", "file")) return d def _compareDirLists(self, actual, expected): actual_list = sorted(actual) expected_list = sorted(expected) self.failUnlessReallyEqual(len(actual_list), len(expected_list), "%r is wrong length, expecting %r" % (actual_list, expected_list)) for (a, b) in zip(actual_list, expected_list): (name, text, attrs) = a (expected_name, expected_text_re, expected_attrs) = b self.failUnlessReallyEqual(name, expected_name) self.failUnless(re.match(expected_text_re, text), "%r does not match %r in\n%r" % (text, expected_text_re, actual_list)) self._compareAttributes(attrs, expected_attrs) def _compareAttributes(self, attrs, expected_attrs): # It is ok for there to be extra actual attributes. # TODO: check times for e in expected_attrs: self.failUnless(e in attrs, "%r is not in\n%r" % (e, attrs)) self.failUnlessReallyEqual(attrs[e], expected_attrs[e], "%r:%r is not %r in\n%r" % (e, attrs[e], expected_attrs[e], attrs)) def test_openDirectory_and_attrs(self): d = self._set_up("openDirectory_and_attrs") d.addCallback(lambda ign: self._set_up_tree()) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openDirectory small", self.handler.openDirectory, "small")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openDirectory unknown", self.handler.openDirectory, "unknown")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "openDirectory nodir", self.handler.openDirectory, "nodir")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "openDirectory nodir/nodir", self.handler.openDirectory, "nodir/nodir")) gross = u"gro\u00DF".encode("utf-8") expected_root = [ ('empty_lit_dir', r'dr-xr-xr-x .* 0 .* empty_lit_dir$', {'permissions': S_IFDIR | 0555}), (gross, r'-rw-rw-rw- .* 1010 .* '+gross+'$', {'permissions': S_IFREG | 0666, 'size': 1010}), # The fall of the Berlin wall may have been on 9th or 10th November 1989 depending on the gateway's timezone. #('loop', r'drwxrwxrwx .* 0 Nov (09|10) 1989 loop$', {'permissions': S_IFDIR | 0777}), ('loop', r'drwxrwxrwx .* 0 .* loop$', {'permissions': S_IFDIR | 0777}), ('mutable', r'-rw-rw-rw- .* 0 .* mutable$', {'permissions': S_IFREG | 0666}), ('readonly', r'-r--r--r-- .* 0 .* readonly$', {'permissions': S_IFREG | 0444}), ('small', r'-rw-rw-rw- .* 10 .* small$', {'permissions': S_IFREG | 0666, 'size': 10}), ('small2', r'-rw-rw-rw- .* 26 .* small2$', {'permissions': S_IFREG | 0666, 'size': 26}), ('tiny_lit_dir', r'dr-xr-xr-x .* 0 .* tiny_lit_dir$', {'permissions': S_IFDIR | 0555}), ('unknown', r'\?--------- .* 0 .* unknown$', {'permissions': 0}), ] d.addCallback(lambda ign: self.handler.openDirectory("")) d.addCallback(lambda res: self._compareDirLists(res, expected_root)) d.addCallback(lambda ign: self.handler.openDirectory("loop")) d.addCallback(lambda res: self._compareDirLists(res, expected_root)) d.addCallback(lambda ign: self.handler.openDirectory("loop/loop")) d.addCallback(lambda res: self._compareDirLists(res, expected_root)) d.addCallback(lambda ign: self.handler.openDirectory("empty_lit_dir")) d.addCallback(lambda res: self._compareDirLists(res, [])) # The UTC epoch may either be in Jan 1 1970 or Dec 31 1969 depending on the gateway's timezone. expected_tiny_lit = [ ('short', r'-r--r--r-- .* 8 (Jan 01 1970|Dec 31 1969) short$', {'permissions': S_IFREG | 0444, 'size': 8}), ] d.addCallback(lambda ign: self.handler.openDirectory("tiny_lit_dir")) d.addCallback(lambda res: self._compareDirLists(res, expected_tiny_lit)) d.addCallback(lambda ign: self.handler.getAttrs("small", True)) d.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 10})) d.addCallback(lambda ign: self.handler.setAttrs("small", {})) d.addCallback(lambda res: self.failUnlessReallyEqual(res, None)) d.addCallback(lambda ign: self.handler.getAttrs("small", True)) d.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 10})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_OP_UNSUPPORTED, "setAttrs size", self.handler.setAttrs, "small", {'size': 0})) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_openFile_read(self): d = self._set_up("openFile_read") d.addCallback(lambda ign: self._set_up_tree()) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "openFile small 0 bad", self.handler.openFile, "small", 0, {})) # attempting to open a non-existent file should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "openFile nofile READ nosuch", self.handler.openFile, "nofile", sftp.FXF_READ, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "openFile nodir/file READ nosuch", self.handler.openFile, "nodir/file", sftp.FXF_READ, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile unknown READ denied", self.handler.openFile, "unknown", sftp.FXF_READ, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile unknown/file READ denied", self.handler.openFile, "unknown/file", sftp.FXF_READ, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile tiny_lit_dir READ denied", self.handler.openFile, "tiny_lit_dir", sftp.FXF_READ, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile unknown uri READ denied", self.handler.openFile, "uri/"+self.unknown_uri, sftp.FXF_READ, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile tiny_lit_dir uri READ denied", self.handler.openFile, "uri/"+self.tiny_lit_dir_uri, sftp.FXF_READ, {})) # FIXME: should be FX_NO_SUCH_FILE? d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile noexist uri READ denied", self.handler.openFile, "uri/URI:noexist", sftp.FXF_READ, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "openFile invalid UTF-8 uri READ denied", self.handler.openFile, "uri/URI:\xFF", sftp.FXF_READ, {})) # reading an existing file should succeed d.addCallback(lambda ign: self.handler.openFile("small", sftp.FXF_READ, {})) def _read_small(rf): d2 = rf.readChunk(0, 10) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) d2.addCallback(lambda ign: rf.readChunk(2, 6)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "234567")) d2.addCallback(lambda ign: rf.readChunk(1, 0)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "")) d2.addCallback(lambda ign: rf.readChunk(8, 4)) # read that starts before EOF is OK d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "89")) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "readChunk starting at EOF (0-byte)", rf.readChunk, 10, 0)) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "readChunk starting at EOF", rf.readChunk, 10, 1)) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "readChunk starting after EOF", rf.readChunk, 11, 1)) d2.addCallback(lambda ign: rf.getAttrs()) d2.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 10})) d2.addCallback(lambda ign: self.handler.getAttrs("small", followLinks=0)) d2.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 10})) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "writeChunk on read-only handle denied", rf.writeChunk, 0, "a")) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "setAttrs on read-only handle denied", rf.setAttrs, {})) d2.addCallback(lambda ign: rf.close()) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "readChunk on closed file bad", rf.readChunk, 0, 1)) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "getAttrs on closed file bad", rf.getAttrs)) d2.addCallback(lambda ign: rf.close()) # should be no-op return d2 d.addCallback(_read_small) # repeat for a large file gross = u"gro\u00DF".encode("utf-8") d.addCallback(lambda ign: self.handler.openFile(gross, sftp.FXF_READ, {})) def _read_gross(rf): d2 = rf.readChunk(0, 10) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) d2.addCallback(lambda ign: rf.readChunk(2, 6)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "234567")) d2.addCallback(lambda ign: rf.readChunk(1, 0)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "")) d2.addCallback(lambda ign: rf.readChunk(1008, 4)) # read that starts before EOF is OK d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "89")) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "readChunk starting at EOF (0-byte)", rf.readChunk, 1010, 0)) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "readChunk starting at EOF", rf.readChunk, 1010, 1)) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_EOF, "readChunk starting after EOF", rf.readChunk, 1011, 1)) d2.addCallback(lambda ign: rf.getAttrs()) d2.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 1010})) d2.addCallback(lambda ign: self.handler.getAttrs(gross, followLinks=0)) d2.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 1010})) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "writeChunk on read-only handle denied", rf.writeChunk, 0, "a")) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "setAttrs on read-only handle denied", rf.setAttrs, {})) d2.addCallback(lambda ign: rf.close()) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "readChunk on closed file", rf.readChunk, 0, 1)) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "getAttrs on closed file", rf.getAttrs)) d2.addCallback(lambda ign: rf.close()) # should be no-op return d2 d.addCallback(_read_gross) # reading an existing small file via uri/ should succeed d.addCallback(lambda ign: self.handler.openFile("uri/"+self.small_uri, sftp.FXF_READ, {})) def _read_small_uri(rf): d2 = rf.readChunk(0, 10) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) d2.addCallback(lambda ign: rf.close()) return d2 d.addCallback(_read_small_uri) # repeat for a large file d.addCallback(lambda ign: self.handler.openFile("uri/"+self.gross_uri, sftp.FXF_READ, {})) def _read_gross_uri(rf): d2 = rf.readChunk(0, 10) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) d2.addCallback(lambda ign: rf.close()) return d2 d.addCallback(_read_gross_uri) # repeat for a mutable file d.addCallback(lambda ign: self.handler.openFile("uri/"+self.mutable_uri, sftp.FXF_READ, {})) def _read_mutable_uri(rf): d2 = rf.readChunk(0, 100) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "mutable file contents")) d2.addCallback(lambda ign: rf.close()) return d2 d.addCallback(_read_mutable_uri) # repeat for a file within a directory referenced by URI d.addCallback(lambda ign: self.handler.openFile("uri/"+self.tiny_lit_dir_uri+"/short", sftp.FXF_READ, {})) def _read_short(rf): d2 = rf.readChunk(0, 100) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "The end.")) d2.addCallback(lambda ign: rf.close()) return d2 d.addCallback(_read_short) # check that failed downloads cause failed reads. Note that this # trashes the grid (by deleting all shares), so this must be at the # end of the test function. d.addCallback(lambda ign: self.handler.openFile("uri/"+self.gross_uri, sftp.FXF_READ, {})) def _read_broken(rf): d2 = defer.succeed(None) d2.addCallback(lambda ign: self.g.nuke_from_orbit()) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "read broken", rf.readChunk, 0, 100)) # close shouldn't fail d2.addCallback(lambda ign: rf.close()) d2.addCallback(lambda res: self.failUnlessReallyEqual(res, None)) return d2 d.addCallback(_read_broken) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_openFile_read_error(self): # The check at the end of openFile_read tested this for large files, # but it trashed the grid in the process, so this needs to be a # separate test. small = upload.Data("0123456789"*10, None) d = self._set_up("openFile_read_error") d.addCallback(lambda ign: self.root.add_file(u"small", small)) d.addCallback(lambda n: self.handler.openFile("/uri/"+n.get_uri(), sftp.FXF_READ, {})) def _read_broken(rf): d2 = defer.succeed(None) d2.addCallback(lambda ign: self.g.nuke_from_orbit()) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "read broken", rf.readChunk, 0, 100)) # close shouldn't fail d2.addCallback(lambda ign: rf.close()) d2.addCallback(lambda res: self.failUnlessReallyEqual(res, None)) return d2 d.addCallback(_read_broken) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_openFile_write(self): d = self._set_up("openFile_write") d.addCallback(lambda ign: self._set_up_tree()) # '' is an invalid filename d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "openFile '' WRITE|CREAT|TRUNC nosuch", self.handler.openFile, "", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_TRUNC, {})) # TRUNC is not valid without CREAT if the file does not already exist d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "openFile newfile WRITE|TRUNC nosuch", self.handler.openFile, "newfile", sftp.FXF_WRITE | sftp.FXF_TRUNC, {})) # EXCL is not valid without CREAT d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "openFile small WRITE|EXCL bad", self.handler.openFile, "small", sftp.FXF_WRITE | sftp.FXF_EXCL, {})) # cannot write to an existing directory d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile tiny_lit_dir WRITE denied", self.handler.openFile, "tiny_lit_dir", sftp.FXF_WRITE, {})) # cannot write to an existing unknown d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile unknown WRITE denied", self.handler.openFile, "unknown", sftp.FXF_WRITE, {})) # cannot create a child of an unknown d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile unknown/newfile WRITE|CREAT denied", self.handler.openFile, "unknown/newfile", sftp.FXF_WRITE | sftp.FXF_CREAT, {})) # cannot write to a new file in an immutable directory d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile tiny_lit_dir/newfile WRITE|CREAT|TRUNC denied", self.handler.openFile, "tiny_lit_dir/newfile", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_TRUNC, {})) # cannot write to an existing immutable file in an immutable directory (with or without CREAT and EXCL) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile tiny_lit_dir/short WRITE denied", self.handler.openFile, "tiny_lit_dir/short", sftp.FXF_WRITE, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile tiny_lit_dir/short WRITE|CREAT denied", self.handler.openFile, "tiny_lit_dir/short", sftp.FXF_WRITE | sftp.FXF_CREAT, {})) # cannot write to a mutable file via a readonly cap (by path or uri) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile readonly WRITE denied", self.handler.openFile, "readonly", sftp.FXF_WRITE, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile readonly uri WRITE denied", self.handler.openFile, "uri/"+self.readonly_uri, sftp.FXF_WRITE, {})) # cannot create a file with the EXCL flag if it already exists d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "openFile small WRITE|CREAT|EXCL failure", self.handler.openFile, "small", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "openFile mutable WRITE|CREAT|EXCL failure", self.handler.openFile, "mutable", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "openFile mutable uri WRITE|CREAT|EXCL failure", self.handler.openFile, "uri/"+self.mutable_uri, sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "openFile tiny_lit_dir/short WRITE|CREAT|EXCL failure", self.handler.openFile, "tiny_lit_dir/short", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL, {})) # cannot write to an immutable file if we don't have its parent (with or without CREAT, TRUNC, or EXCL) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile small uri WRITE denied", self.handler.openFile, "uri/"+self.small_uri, sftp.FXF_WRITE, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile small uri WRITE|CREAT denied", self.handler.openFile, "uri/"+self.small_uri, sftp.FXF_WRITE | sftp.FXF_CREAT, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile small uri WRITE|CREAT|TRUNC denied", self.handler.openFile, "uri/"+self.small_uri, sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_TRUNC, {})) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "openFile small uri WRITE|CREAT|EXCL denied", self.handler.openFile, "uri/"+self.small_uri, sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL, {})) # test creating a new file with truncation and extension d.addCallback(lambda ign: self.handler.openFile("newfile", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_TRUNC, {})) def _write(wf): d2 = wf.writeChunk(0, "0123456789") d2.addCallback(lambda res: self.failUnlessReallyEqual(res, None)) d2.addCallback(lambda ign: wf.writeChunk(8, "0123")) d2.addCallback(lambda ign: wf.writeChunk(13, "abc")) d2.addCallback(lambda ign: wf.getAttrs()) d2.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 16})) d2.addCallback(lambda ign: self.handler.getAttrs("newfile", followLinks=0)) d2.addCallback(lambda attrs: self._compareAttributes(attrs, {'permissions': S_IFREG | 0666, 'size': 16})) d2.addCallback(lambda ign: wf.setAttrs({})) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "setAttrs with negative size bad", wf.setAttrs, {'size': -1})) d2.addCallback(lambda ign: wf.setAttrs({'size': 14})) d2.addCallback(lambda ign: wf.getAttrs()) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['size'], 14)) d2.addCallback(lambda ign: wf.setAttrs({'size': 14})) d2.addCallback(lambda ign: wf.getAttrs()) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['size'], 14)) d2.addCallback(lambda ign: wf.setAttrs({'size': 17})) d2.addCallback(lambda ign: wf.getAttrs()) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['size'], 17)) d2.addCallback(lambda ign: self.handler.getAttrs("newfile", followLinks=0)) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['size'], 17)) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "readChunk on write-only handle denied", wf.readChunk, 0, 1)) d2.addCallback(lambda ign: wf.close()) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "writeChunk on closed file bad", wf.writeChunk, 0, "a")) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "setAttrs on closed file bad", wf.setAttrs, {'size': 0})) d2.addCallback(lambda ign: wf.close()) # should be no-op return d2 d.addCallback(_write) d.addCallback(lambda ign: self.root.get(u"newfile")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "012345670123\x00a\x00\x00\x00")) # test APPEND flag, and also replacing an existing file ("newfile" created by the previous test) d.addCallback(lambda ign: self.handler.openFile("newfile", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_TRUNC | sftp.FXF_APPEND, {})) def _write_append(wf): d2 = wf.writeChunk(0, "0123456789") d2.addCallback(lambda ign: wf.writeChunk(8, "0123")) d2.addCallback(lambda ign: wf.setAttrs({'size': 17})) d2.addCallback(lambda ign: wf.getAttrs()) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['size'], 17)) d2.addCallback(lambda ign: wf.writeChunk(0, "z")) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_append) d.addCallback(lambda ign: self.root.get(u"newfile")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "01234567890123\x00\x00\x00z")) # test WRITE | TRUNC without CREAT, when the file already exists # This is invalid according to section 6.3 of the SFTP spec, but required for interoperability, # since POSIX does allow O_WRONLY | O_TRUNC. d.addCallback(lambda ign: self.handler.openFile("newfile", sftp.FXF_WRITE | sftp.FXF_TRUNC, {})) def _write_trunc(wf): d2 = wf.writeChunk(0, "01234") d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_trunc) d.addCallback(lambda ign: self.root.get(u"newfile")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "01234")) # test WRITE | TRUNC with permissions: 0 d.addCallback(lambda ign: self.handler.openFile("newfile", sftp.FXF_WRITE | sftp.FXF_TRUNC, {'permissions': 0})) d.addCallback(_write_trunc) d.addCallback(lambda ign: self.root.get(u"newfile")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "01234")) d.addCallback(lambda ign: self.root.get_metadata_for(u"newfile")) d.addCallback(lambda metadata: self.failIf(metadata.get('no-write', False), metadata)) # test EXCL flag d.addCallback(lambda ign: self.handler.openFile("excl", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_TRUNC | sftp.FXF_EXCL, {})) def _write_excl(wf): d2 = self.root.get(u"excl") d2.addCallback(lambda node: download_to_data(node)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "")) d2.addCallback(lambda ign: wf.writeChunk(0, "0123456789")) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_excl) d.addCallback(lambda ign: self.root.get(u"excl")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) # test that writing a zero-length file with EXCL only updates the directory once d.addCallback(lambda ign: self.handler.openFile("zerolength", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL, {})) def _write_excl_zerolength(wf): d2 = self.root.get(u"zerolength") d2.addCallback(lambda node: download_to_data(node)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "")) # FIXME: no API to get the best version number exists (fix as part of #993) """ d2.addCallback(lambda ign: self.root.get_best_version_number()) def _check_version(version): d3 = wf.close() d3.addCallback(lambda ign: self.root.get_best_version_number()) d3.addCallback(lambda new_version: self.failUnlessReallyEqual(new_version, version)) return d3 d2.addCallback(_check_version) """ d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_excl_zerolength) d.addCallback(lambda ign: self.root.get(u"zerolength")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "")) # test WRITE | CREAT | EXCL | APPEND d.addCallback(lambda ign: self.handler.openFile("exclappend", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL | sftp.FXF_APPEND, {})) def _write_excl_append(wf): d2 = self.root.get(u"exclappend") d2.addCallback(lambda node: download_to_data(node)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "")) d2.addCallback(lambda ign: wf.writeChunk(10, "0123456789")) d2.addCallback(lambda ign: wf.writeChunk(5, "01234")) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_excl_append) d.addCallback(lambda ign: self.root.get(u"exclappend")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "012345678901234")) # test WRITE | CREAT | APPEND when the file does not already exist d.addCallback(lambda ign: self.handler.openFile("creatappend", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_APPEND, {})) def _write_creat_append_new(wf): d2 = wf.writeChunk(10, "0123456789") d2.addCallback(lambda ign: wf.writeChunk(5, "01234")) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_creat_append_new) d.addCallback(lambda ign: self.root.get(u"creatappend")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "012345678901234")) # ... and when it does exist d.addCallback(lambda ign: self.handler.openFile("creatappend", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_APPEND, {})) def _write_creat_append_existing(wf): d2 = wf.writeChunk(5, "01234") d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_creat_append_existing) d.addCallback(lambda ign: self.root.get(u"creatappend")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "01234567890123401234")) # test WRITE | CREAT without TRUNC, when the file does not already exist d.addCallback(lambda ign: self.handler.openFile("newfile2", sftp.FXF_WRITE | sftp.FXF_CREAT, {})) def _write_creat_new(wf): d2 = wf.writeChunk(0, "0123456789") d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_creat_new) d.addCallback(lambda ign: self.root.get(u"newfile2")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) # ... and when it does exist d.addCallback(lambda ign: self.handler.openFile("newfile2", sftp.FXF_WRITE | sftp.FXF_CREAT, {})) def _write_creat_existing(wf): d2 = wf.writeChunk(0, "abcde") d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_creat_existing) d.addCallback(lambda ign: self.root.get(u"newfile2")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "abcde56789")) d.addCallback(lambda ign: self.root.set_node(u"mutable2", self.mutable)) # test writing to a mutable file d.addCallback(lambda ign: self.handler.openFile("mutable", sftp.FXF_WRITE, {})) def _write_mutable(wf): d2 = wf.writeChunk(8, "new!") d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_mutable) d.addCallback(lambda ign: self.root.get(u"mutable")) def _check_same_file(node): self.failUnless(node.is_mutable()) self.failIf(node.is_readonly()) self.failUnlessReallyEqual(node.get_uri(), self.mutable_uri) return node.download_best_version() d.addCallback(_check_same_file) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "mutable new! contents")) # ... and with permissions, which should be ignored d.addCallback(lambda ign: self.handler.openFile("mutable", sftp.FXF_WRITE, {'permissions': 0})) d.addCallback(_write_mutable) d.addCallback(lambda ign: self.root.get(u"mutable")) d.addCallback(_check_same_file) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "mutable new! contents")) # ... and with a setAttrs call that diminishes the parent link to read-only, first by path d.addCallback(lambda ign: self.handler.openFile("mutable", sftp.FXF_WRITE, {})) def _write_mutable_setattr(wf): d2 = wf.writeChunk(8, "read-only link from parent") d2.addCallback(lambda ign: self.handler.setAttrs("mutable", {'permissions': 0444})) d2.addCallback(lambda ign: self.root.get(u"mutable")) d2.addCallback(lambda node: self.failUnless(node.is_readonly())) d2.addCallback(lambda ign: wf.getAttrs()) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['permissions'], S_IFREG | 0666)) d2.addCallback(lambda ign: self.handler.getAttrs("mutable", followLinks=0)) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['permissions'], S_IFREG | 0444)) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_mutable_setattr) d.addCallback(lambda ign: self.root.get(u"mutable")) def _check_readonly_file(node): self.failUnless(node.is_mutable()) self.failUnless(node.is_readonly()) self.failUnlessReallyEqual(node.get_write_uri(), None) self.failUnlessReallyEqual(node.get_storage_index(), self.mutable.get_storage_index()) return node.download_best_version() d.addCallback(_check_readonly_file) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "mutable read-only link from parent")) # ... and then by handle d.addCallback(lambda ign: self.handler.openFile("mutable2", sftp.FXF_WRITE, {})) def _write_mutable2_setattr(wf): d2 = wf.writeChunk(7, "2") d2.addCallback(lambda ign: wf.setAttrs({'permissions': 0444, 'size': 8})) # The link isn't made read-only until the file is closed. d2.addCallback(lambda ign: self.root.get(u"mutable2")) d2.addCallback(lambda node: self.failIf(node.is_readonly())) d2.addCallback(lambda ign: wf.getAttrs()) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['permissions'], S_IFREG | 0444)) d2.addCallback(lambda ign: self.handler.getAttrs("mutable2", followLinks=0)) d2.addCallback(lambda attrs: self.failUnlessReallyEqual(attrs['permissions'], S_IFREG | 0666)) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_mutable2_setattr) d.addCallback(lambda ign: self.root.get(u"mutable2")) d.addCallback(_check_readonly_file) # from above d.addCallback(lambda data: self.failUnlessReallyEqual(data, "mutable2")) # test READ | WRITE without CREAT or TRUNC d.addCallback(lambda ign: self.handler.openFile("small", sftp.FXF_READ | sftp.FXF_WRITE, {})) def _read_write(rwf): d2 = rwf.writeChunk(8, "0123") # test immediate read starting after the old end-of-file d2.addCallback(lambda ign: rwf.readChunk(11, 1)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "3")) d2.addCallback(lambda ign: rwf.readChunk(0, 100)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "012345670123")) d2.addCallback(lambda ign: rwf.close()) return d2 d.addCallback(_read_write) d.addCallback(lambda ign: self.root.get(u"small")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "012345670123")) # test WRITE and rename while still open d.addCallback(lambda ign: self.handler.openFile("small", sftp.FXF_WRITE, {})) def _write_rename(wf): d2 = wf.writeChunk(0, "abcd") d2.addCallback(lambda ign: self.handler.renameFile("small", "renamed")) d2.addCallback(lambda ign: wf.writeChunk(4, "efgh")) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_rename) d.addCallback(lambda ign: self.root.get(u"renamed")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "abcdefgh0123")) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "rename small while open", "small", self.root.get, u"small")) # test WRITE | CREAT | EXCL and rename while still open d.addCallback(lambda ign: self.handler.openFile("newexcl", sftp.FXF_WRITE | sftp.FXF_CREAT | sftp.FXF_EXCL, {})) def _write_creat_excl_rename(wf): d2 = wf.writeChunk(0, "abcd") d2.addCallback(lambda ign: self.handler.renameFile("newexcl", "renamedexcl")) d2.addCallback(lambda ign: wf.writeChunk(4, "efgh")) d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_creat_excl_rename) d.addCallback(lambda ign: self.root.get(u"renamedexcl")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "abcdefgh")) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "rename newexcl while open", "newexcl", self.root.get, u"newexcl")) # it should be possible to rename even before the open has completed def _open_and_rename_race(ign): slow_open = defer.Deferred() reactor.callLater(1, slow_open.callback, None) d2 = self.handler.openFile("new", sftp.FXF_WRITE | sftp.FXF_CREAT, {}, delay=slow_open) # deliberate race between openFile and renameFile d3 = self.handler.renameFile("new", "new2") d3.addErrback(lambda err: self.fail("renameFile failed: %r" % (err,))) return d2 d.addCallback(_open_and_rename_race) def _write_rename_race(wf): d2 = wf.writeChunk(0, "abcd") d2.addCallback(lambda ign: wf.close()) return d2 d.addCallback(_write_rename_race) d.addCallback(lambda ign: self.root.get(u"new2")) d.addCallback(lambda node: download_to_data(node)) d.addCallback(lambda data: self.failUnlessReallyEqual(data, "abcd")) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "rename new while open", "new", self.root.get, u"new")) # check that failed downloads cause failed reads and failed close, # when open for writing. Note that this trashes the grid (by deleting # all shares), so this must be at the end of the test function. gross = u"gro\u00DF".encode("utf-8") d.addCallback(lambda ign: self.handler.openFile(gross, sftp.FXF_READ | sftp.FXF_WRITE, {})) def _read_write_broken(rwf): d2 = rwf.writeChunk(0, "abcdefghij") d2.addCallback(lambda ign: self.g.nuke_from_orbit()) # reading should fail (reliably if we read past the written chunk) d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "read/write broken", rwf.readChunk, 0, 100)) # close should fail in this case d2.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "read/write broken close", rwf.close)) return d2 d.addCallback(_read_write_broken) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_removeFile(self): d = self._set_up("removeFile") d.addCallback(lambda ign: self._set_up_tree()) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "removeFile nofile", self.handler.removeFile, "nofile")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "removeFile nofile", self.handler.removeFile, "nofile")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "removeFile nodir/file", self.handler.removeFile, "nodir/file")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "removefile ''", self.handler.removeFile, "")) # removing a directory should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "removeFile tiny_lit_dir", self.handler.removeFile, "tiny_lit_dir")) # removing a file should succeed d.addCallback(lambda ign: self.root.get(u"gro\u00DF")) d.addCallback(lambda ign: self.handler.removeFile(u"gro\u00DF".encode('utf-8'))) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "removeFile gross", "gro\\xdf", self.root.get, u"gro\u00DF")) # removing an unknown should succeed d.addCallback(lambda ign: self.root.get(u"unknown")) d.addCallback(lambda ign: self.handler.removeFile("unknown")) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "removeFile unknown", "unknown", self.root.get, u"unknown")) # removing a link to an open file should not prevent it from being read d.addCallback(lambda ign: self.handler.openFile("small", sftp.FXF_READ, {})) def _remove_and_read_small(rf): d2 = self.handler.removeFile("small") d2.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "removeFile small", "small", self.root.get, u"small")) d2.addCallback(lambda ign: rf.readChunk(0, 10)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) d2.addCallback(lambda ign: rf.close()) return d2 d.addCallback(_remove_and_read_small) # removing a link to a created file should prevent it from being created d.addCallback(lambda ign: self.handler.openFile("tempfile", sftp.FXF_READ | sftp.FXF_WRITE | sftp.FXF_CREAT, {})) def _write_remove(rwf): d2 = rwf.writeChunk(0, "0123456789") d2.addCallback(lambda ign: self.handler.removeFile("tempfile")) d2.addCallback(lambda ign: rwf.readChunk(0, 10)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) d2.addCallback(lambda ign: rwf.close()) return d2 d.addCallback(_write_remove) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "removeFile tempfile", "tempfile", self.root.get, u"tempfile")) # ... even if the link is renamed while open d.addCallback(lambda ign: self.handler.openFile("tempfile2", sftp.FXF_READ | sftp.FXF_WRITE | sftp.FXF_CREAT, {})) def _write_rename_remove(rwf): d2 = rwf.writeChunk(0, "0123456789") d2.addCallback(lambda ign: self.handler.renameFile("tempfile2", "tempfile3")) d2.addCallback(lambda ign: self.handler.removeFile("tempfile3")) d2.addCallback(lambda ign: rwf.readChunk(0, 10)) d2.addCallback(lambda data: self.failUnlessReallyEqual(data, "0123456789")) d2.addCallback(lambda ign: rwf.close()) return d2 d.addCallback(_write_rename_remove) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "removeFile tempfile2", "tempfile2", self.root.get, u"tempfile2")) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "removeFile tempfile3", "tempfile3", self.root.get, u"tempfile3")) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_removeDirectory(self): d = self._set_up("removeDirectory") d.addCallback(lambda ign: self._set_up_tree()) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "removeDirectory nodir", self.handler.removeDirectory, "nodir")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "removeDirectory nodir/nodir", self.handler.removeDirectory, "nodir/nodir")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "removeDirectory ''", self.handler.removeDirectory, "")) # removing a file should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "removeDirectory gross", self.handler.removeDirectory, u"gro\u00DF".encode('utf-8'))) # removing a directory should succeed d.addCallback(lambda ign: self.root.get(u"tiny_lit_dir")) d.addCallback(lambda ign: self.handler.removeDirectory("tiny_lit_dir")) d.addCallback(lambda ign: self.shouldFail(NoSuchChildError, "removeDirectory tiny_lit_dir", "tiny_lit_dir", self.root.get, u"tiny_lit_dir")) # removing an unknown should succeed d.addCallback(lambda ign: self.root.get(u"unknown")) d.addCallback(lambda ign: self.handler.removeDirectory("unknown")) d.addCallback(lambda err: self.shouldFail(NoSuchChildError, "removeDirectory unknown", "unknown", self.root.get, u"unknown")) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_renameFile(self): d = self._set_up("renameFile") d.addCallback(lambda ign: self._set_up_tree()) # renaming a non-existent file should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile nofile newfile", self.handler.renameFile, "nofile", "newfile")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile '' newfile", self.handler.renameFile, "", "newfile")) # renaming a file to a non-existent path should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile small nodir/small", self.handler.renameFile, "small", "nodir/small")) # renaming a file to an invalid UTF-8 name should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile small invalid", self.handler.renameFile, "small", "\xFF")) # renaming a file to or from an URI should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile small from uri", self.handler.renameFile, "uri/"+self.small_uri, "new")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile small to uri", self.handler.renameFile, "small", "uri/fake_uri")) # renaming a file onto an existing file, directory or unknown should fail # The SFTP spec isn't clear about what error should be returned, but sshfs depends on # it being FX_PERMISSION_DENIED. d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "renameFile small small2", self.handler.renameFile, "small", "small2")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "renameFile small tiny_lit_dir", self.handler.renameFile, "small", "tiny_lit_dir")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "renameFile small unknown", self.handler.renameFile, "small", "unknown")) # renaming a file onto a heisenfile should fail, even if the open hasn't completed def _rename_onto_heisenfile_race(wf): slow_open = defer.Deferred() reactor.callLater(1, slow_open.callback, None) d2 = self.handler.openFile("heisenfile", sftp.FXF_WRITE | sftp.FXF_CREAT, {}, delay=slow_open) # deliberate race between openFile and renameFile d3 = self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "renameFile small heisenfile", self.handler.renameFile, "small", "heisenfile") d2.addCallback(lambda wf: wf.close()) return deferredutil.gatherResults([d2, d3]) d.addCallback(_rename_onto_heisenfile_race) # renaming a file to a correct path should succeed d.addCallback(lambda ign: self.handler.renameFile("small", "new_small")) d.addCallback(lambda ign: self.root.get(u"new_small")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.small_uri)) # renaming a file into a subdirectory should succeed (also tests Unicode names) d.addCallback(lambda ign: self.handler.renameFile(u"gro\u00DF".encode('utf-8'), u"loop/neue_gro\u00DF".encode('utf-8'))) d.addCallback(lambda ign: self.root.get(u"neue_gro\u00DF")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.gross_uri)) # renaming a directory to a correct path should succeed d.addCallback(lambda ign: self.handler.renameFile("tiny_lit_dir", "new_tiny_lit_dir")) d.addCallback(lambda ign: self.root.get(u"new_tiny_lit_dir")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.tiny_lit_dir_uri)) # renaming an unknown to a correct path should succeed d.addCallback(lambda ign: self.handler.renameFile("unknown", "new_unknown")) d.addCallback(lambda ign: self.root.get(u"new_unknown")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.unknown_uri)) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_renameFile_posix(self): def _renameFile(fromPathstring, toPathstring): extData = (struct.pack('>L', len(fromPathstring)) + fromPathstring + struct.pack('>L', len(toPathstring)) + toPathstring) d2 = self.handler.extendedRequest('posix-rename@openssh.com', extData) def _check(res): res.trap(sftp.SFTPError) if res.value.code == sftp.FX_OK: return None return res d2.addCallbacks(lambda res: self.fail("posix-rename request was supposed to " "raise an SFTPError, not get '%r'" % (res,)), _check) return d2 d = self._set_up("renameFile_posix") d.addCallback(lambda ign: self._set_up_tree()) d.addCallback(lambda ign: self.root.set_node(u"loop2", self.root)) d.addCallback(lambda ign: self.root.set_node(u"unknown2", self.unknown)) # POSIX-renaming a non-existent file should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile_posix nofile newfile", _renameFile, "nofile", "newfile")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile_posix '' newfile", _renameFile, "", "newfile")) # POSIX-renaming a file to a non-existent path should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile_posix small nodir/small", _renameFile, "small", "nodir/small")) # POSIX-renaming a file to an invalid UTF-8 name should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile_posix small invalid", _renameFile, "small", "\xFF")) # POSIX-renaming a file to or from an URI should fail d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile_posix small from uri", _renameFile, "uri/"+self.small_uri, "new")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "renameFile_posix small to uri", _renameFile, "small", "uri/fake_uri")) # POSIX-renaming a file onto an existing file, directory or unknown should succeed d.addCallback(lambda ign: _renameFile("small", "small2")) d.addCallback(lambda ign: self.root.get(u"small2")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.small_uri)) d.addCallback(lambda ign: _renameFile("small2", "loop2")) d.addCallback(lambda ign: self.root.get(u"loop2")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.small_uri)) d.addCallback(lambda ign: _renameFile("loop2", "unknown2")) d.addCallback(lambda ign: self.root.get(u"unknown2")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.small_uri)) # POSIX-renaming a file to a correct new path should succeed d.addCallback(lambda ign: _renameFile("unknown2", "new_small")) d.addCallback(lambda ign: self.root.get(u"new_small")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.small_uri)) # POSIX-renaming a file into a subdirectory should succeed (also tests Unicode names) d.addCallback(lambda ign: _renameFile(u"gro\u00DF".encode('utf-8'), u"loop/neue_gro\u00DF".encode('utf-8'))) d.addCallback(lambda ign: self.root.get(u"neue_gro\u00DF")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.gross_uri)) # POSIX-renaming a directory to a correct path should succeed d.addCallback(lambda ign: _renameFile("tiny_lit_dir", "new_tiny_lit_dir")) d.addCallback(lambda ign: self.root.get(u"new_tiny_lit_dir")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.tiny_lit_dir_uri)) # POSIX-renaming an unknown to a correct path should succeed d.addCallback(lambda ign: _renameFile("unknown", "new_unknown")) d.addCallback(lambda ign: self.root.get(u"new_unknown")) d.addCallback(lambda node: self.failUnlessReallyEqual(node.get_uri(), self.unknown_uri)) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_makeDirectory(self): d = self._set_up("makeDirectory") d.addCallback(lambda ign: self._set_up_tree()) # making a directory at a correct path should succeed d.addCallback(lambda ign: self.handler.makeDirectory("newdir", {'ext_foo': 'bar', 'ctime': 42})) d.addCallback(lambda ign: self.root.get_child_and_metadata(u"newdir")) def _got( (child, metadata) ): self.failUnless(IDirectoryNode.providedBy(child)) self.failUnless(child.is_mutable()) # FIXME #self.failUnless('ctime' in metadata, metadata) #self.failUnlessReallyEqual(metadata['ctime'], 42) #self.failUnless('ext_foo' in metadata, metadata) #self.failUnlessReallyEqual(metadata['ext_foo'], 'bar') # TODO: child should be empty d.addCallback(_got) # making intermediate directories should also succeed d.addCallback(lambda ign: self.handler.makeDirectory("newparent/newchild", {})) d.addCallback(lambda ign: self.root.get(u"newparent")) def _got_newparent(newparent): self.failUnless(IDirectoryNode.providedBy(newparent)) self.failUnless(newparent.is_mutable()) return newparent.get(u"newchild") d.addCallback(_got_newparent) def _got_newchild(newchild): self.failUnless(IDirectoryNode.providedBy(newchild)) self.failUnless(newchild.is_mutable()) d.addCallback(_got_newchild) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_NO_SUCH_FILE, "makeDirectory invalid UTF-8", self.handler.makeDirectory, "\xFF", {})) # should fail because there is an existing file "small" d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_FAILURE, "makeDirectory small", self.handler.makeDirectory, "small", {})) # directories cannot be created read-only via SFTP d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_PERMISSION_DENIED, "makeDirectory newdir2 permissions:0444 denied", self.handler.makeDirectory, "newdir2", {'permissions': 0444})) d.addCallback(lambda ign: self.failUnlessEqual(sftpd.all_heisenfiles, {})) d.addCallback(lambda ign: self.failUnlessEqual(self.handler._heisenfiles, {})) return d def test_execCommand_and_openShell(self): class MockProtocol: def __init__(self): self.output = "" self.error = "" self.reason = None def write(self, data): return self.outReceived(data) def outReceived(self, data): self.output += data return defer.succeed(None) def errReceived(self, data): self.error += data return defer.succeed(None) def processEnded(self, reason): self.reason = reason return defer.succeed(None) def _lines_end_in_crlf(s): return s.replace('\r\n', '').find('\n') == -1 and s.endswith('\r\n') d = self._set_up("execCommand_and_openShell") d.addCallback(lambda ign: conch_interfaces.ISession(self.handler)) def _exec_df(session): protocol = MockProtocol() d2 = session.execCommand(protocol, "df -P -k /") d2.addCallback(lambda ign: self.failUnlessIn("1024-blocks", protocol.output)) d2.addCallback(lambda ign: self.failUnless(_lines_end_in_crlf(protocol.output), protocol.output)) d2.addCallback(lambda ign: self.failUnlessEqual(protocol.error, "")) d2.addCallback(lambda ign: self.failUnless(isinstance(protocol.reason.value, ProcessDone))) d2.addCallback(lambda ign: session.eofReceived()) d2.addCallback(lambda ign: session.closed()) return d2 d.addCallback(_exec_df) def _check_unsupported(protocol): d2 = defer.succeed(None) d2.addCallback(lambda ign: self.failUnlessEqual(protocol.output, "")) d2.addCallback(lambda ign: self.failUnlessIn("only the SFTP protocol", protocol.error)) d2.addCallback(lambda ign: self.failUnless(_lines_end_in_crlf(protocol.error), protocol.error)) d2.addCallback(lambda ign: self.failUnless(isinstance(protocol.reason.value, ProcessTerminated))) d2.addCallback(lambda ign: self.failUnlessEqual(protocol.reason.value.exitCode, 1)) return d2 d.addCallback(lambda ign: conch_interfaces.ISession(self.handler)) def _exec_error(session): protocol = MockProtocol() d2 = session.execCommand(protocol, "error") d2.addCallback(lambda ign: session.windowChanged(None)) d2.addCallback(lambda ign: _check_unsupported(protocol)) d2.addCallback(lambda ign: session.closed()) return d2 d.addCallback(_exec_error) d.addCallback(lambda ign: conch_interfaces.ISession(self.handler)) def _openShell(session): protocol = MockProtocol() d2 = session.openShell(protocol) d2.addCallback(lambda ign: _check_unsupported(protocol)) d2.addCallback(lambda ign: session.closed()) return d2 d.addCallback(_openShell) return d def test_extendedRequest(self): d = self._set_up("extendedRequest") d.addCallback(lambda ign: self.handler.extendedRequest("statvfs@openssh.com", "/")) def _check(res): self.failUnless(isinstance(res, str)) self.failUnlessEqual(len(res), 8*11) d.addCallback(_check) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_OP_UNSUPPORTED, "extendedRequest foo bar", self.handler.extendedRequest, "foo", "bar")) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "extendedRequest posix-rename@openssh.com invalid 1", self.handler.extendedRequest, 'posix-rename@openssh.com', '')) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "extendedRequest posix-rename@openssh.com invalid 2", self.handler.extendedRequest, 'posix-rename@openssh.com', '\x00\x00\x00\x01')) d.addCallback(lambda ign: self.shouldFailWithSFTPError(sftp.FX_BAD_MESSAGE, "extendedRequest posix-rename@openssh.com invalid 3", self.handler.extendedRequest, 'posix-rename@openssh.com', '\x00\x00\x00\x01_\x00\x00\x00\x01')) return d tahoe-lafs-1.10.0/src/allmydata/test/test_stats.py000066400000000000000000000023751221140116300220770ustar00rootroot00000000000000 from twisted.trial import unittest from twisted.application import service from allmydata.stats import CPUUsageMonitor from allmydata.util import pollmixin import allmydata.test.common_util as testutil class FasterMonitor(CPUUsageMonitor): POLL_INTERVAL = 0.1 class CPUUsage(unittest.TestCase, pollmixin.PollMixin, testutil.StallMixin): def setUp(self): self.s = service.MultiService() self.s.startService() def tearDown(self): return self.s.stopService() def test_monitor(self): m = FasterMonitor() s = m.get_stats() # before it has been started self.failIf("cpu_monitor.1min_avg" in s) m.setServiceParent(self.s) def _poller(): return bool(len(m.samples) == m.HISTORY_LENGTH+1) d = self.poll(_poller) # pause one more second, to make sure that the history-trimming code # is exercised d.addCallback(self.stall, 1.0) def _check(res): s = m.get_stats() self.failUnless("cpu_monitor.1min_avg" in s) self.failUnless("cpu_monitor.5min_avg" in s) self.failUnless("cpu_monitor.15min_avg" in s) self.failUnless("cpu_monitor.total" in s) d.addCallback(_check) return d tahoe-lafs-1.10.0/src/allmydata/test/test_storage.py000066400000000000000000005316001221140116300224030ustar00rootroot00000000000000import time, os.path, platform, stat, re, simplejson, struct, shutil import mock from twisted.trial import unittest from twisted.internet import defer from twisted.application import service from foolscap.api import fireEventually import itertools from allmydata import interfaces from allmydata.util import fileutil, hashutil, base32, pollmixin, time_format from allmydata.storage.server import StorageServer from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import BucketWriter, BucketReader from allmydata.storage.common import DataTooLargeError, storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError from allmydata.storage.lease import LeaseInfo from allmydata.storage.crawler import BucketCountingCrawler from allmydata.storage.expirer import LeaseCheckingCrawler from allmydata.immutable.layout import WriteBucketProxy, WriteBucketProxy_v2, \ ReadBucketProxy from allmydata.mutable.layout import MDMFSlotWriteProxy, MDMFSlotReadProxy, \ LayoutInvalid, MDMFSIGNABLEHEADER, \ SIGNED_PREFIX, MDMFHEADER, \ MDMFOFFSETS, SDMFSlotWriteProxy, \ PRIVATE_KEY_SIZE, \ SIGNATURE_SIZE, \ VERIFICATION_KEY_SIZE, \ SHARE_HASH_CHAIN_SIZE from allmydata.interfaces import BadWriteEnablerError from allmydata.test.common import LoggingServiceParent, ShouldFailMixin from allmydata.test.common_web import WebRenderingMixin from allmydata.test.no_network import NoNetworkServer from allmydata.web.storage import StorageStatus, remove_prefix class Marker: pass class FakeCanary: def __init__(self, ignore_disconnectors=False): self.ignore = ignore_disconnectors self.disconnectors = {} def notifyOnDisconnect(self, f, *args, **kwargs): if self.ignore: return m = Marker() self.disconnectors[m] = (f, args, kwargs) return m def dontNotifyOnDisconnect(self, marker): if self.ignore: return del self.disconnectors[marker] class FakeStatsProvider: def count(self, name, delta=1): pass def register_producer(self, producer): pass class Bucket(unittest.TestCase): def make_workdir(self, name): basedir = os.path.join("storage", "Bucket", name) incoming = os.path.join(basedir, "tmp", "bucket") final = os.path.join(basedir, "bucket") fileutil.make_dirs(basedir) fileutil.make_dirs(os.path.join(basedir, "tmp")) return incoming, final def bucket_writer_closed(self, bw, consumed): pass def add_latency(self, category, latency): pass def count(self, name, delta=1): pass def make_lease(self): owner_num = 0 renew_secret = os.urandom(32) cancel_secret = os.urandom(32) expiration_time = time.time() + 5000 return LeaseInfo(owner_num, renew_secret, cancel_secret, expiration_time, "\x00" * 20) def test_create(self): incoming, final = self.make_workdir("test_create") bw = BucketWriter(self, incoming, final, 200, self.make_lease(), FakeCanary()) bw.remote_write(0, "a"*25) bw.remote_write(25, "b"*25) bw.remote_write(50, "c"*25) bw.remote_write(75, "d"*7) bw.remote_close() def test_readwrite(self): incoming, final = self.make_workdir("test_readwrite") bw = BucketWriter(self, incoming, final, 200, self.make_lease(), FakeCanary()) bw.remote_write(0, "a"*25) bw.remote_write(25, "b"*25) bw.remote_write(50, "c"*7) # last block may be short bw.remote_close() # now read from it br = BucketReader(self, bw.finalhome) self.failUnlessEqual(br.remote_read(0, 25), "a"*25) self.failUnlessEqual(br.remote_read(25, 25), "b"*25) self.failUnlessEqual(br.remote_read(50, 7), "c"*7) def test_read_past_end_of_share_data(self): # test vector for immutable files (hard-coded contents of an immutable share # file): # The following immutable share file content is identical to that # generated with storage.immutable.ShareFile from Tahoe-LAFS v1.8.2 # with share data == 'a'. The total size of this content is 85 # bytes. containerdata = struct.pack('>LLL', 1, 1, 1) # A Tahoe-LAFS storage client would send as the share_data a # complicated string involving hash trees and a URI Extension Block # -- see allmydata/immutable/layout.py . This test, which is # simulating a client, just sends 'a'. share_data = 'a' ownernumber = struct.pack('>L', 0) renewsecret = 'THIS LETS ME RENEW YOUR FILE....' assert len(renewsecret) == 32 cancelsecret = 'THIS LETS ME KILL YOUR FILE HAHA' assert len(cancelsecret) == 32 expirationtime = struct.pack('>L', 60*60*24*31) # 31 days in seconds lease_data = ownernumber + renewsecret + cancelsecret + expirationtime share_file_data = containerdata + share_data + lease_data incoming, final = self.make_workdir("test_read_past_end_of_share_data") fileutil.write(final, share_file_data) mockstorageserver = mock.Mock() # Now read from it. br = BucketReader(mockstorageserver, final) self.failUnlessEqual(br.remote_read(0, len(share_data)), share_data) # Read past the end of share data to get the cancel secret. read_length = len(share_data) + len(ownernumber) + len(renewsecret) + len(cancelsecret) result_of_read = br.remote_read(0, read_length) self.failUnlessEqual(result_of_read, share_data) result_of_read = br.remote_read(0, len(share_data)+1) self.failUnlessEqual(result_of_read, share_data) class RemoteBucket: def __init__(self): self.read_count = 0 self.write_count = 0 def callRemote(self, methname, *args, **kwargs): def _call(): meth = getattr(self.target, "remote_" + methname) return meth(*args, **kwargs) if methname == "slot_readv": self.read_count += 1 if "writev" in methname: self.write_count += 1 return defer.maybeDeferred(_call) class BucketProxy(unittest.TestCase): def make_bucket(self, name, size): basedir = os.path.join("storage", "BucketProxy", name) incoming = os.path.join(basedir, "tmp", "bucket") final = os.path.join(basedir, "bucket") fileutil.make_dirs(basedir) fileutil.make_dirs(os.path.join(basedir, "tmp")) bw = BucketWriter(self, incoming, final, size, self.make_lease(), FakeCanary()) rb = RemoteBucket() rb.target = bw return bw, rb, final def make_lease(self): owner_num = 0 renew_secret = os.urandom(32) cancel_secret = os.urandom(32) expiration_time = time.time() + 5000 return LeaseInfo(owner_num, renew_secret, cancel_secret, expiration_time, "\x00" * 20) def bucket_writer_closed(self, bw, consumed): pass def add_latency(self, category, latency): pass def count(self, name, delta=1): pass def test_create(self): bw, rb, sharefname = self.make_bucket("test_create", 500) bp = WriteBucketProxy(rb, None, data_size=300, block_size=10, num_segments=5, num_share_hashes=3, uri_extension_size_max=500) self.failUnless(interfaces.IStorageBucketWriter.providedBy(bp), bp) def _do_test_readwrite(self, name, header_size, wbp_class, rbp_class): # Let's pretend each share has 100 bytes of data, and that there are # 4 segments (25 bytes each), and 8 shares total. So the two # per-segment merkle trees (crypttext_hash_tree, # block_hashes) will have 4 leaves and 7 nodes each. The per-share # merkle tree (share_hashes) has 8 leaves and 15 nodes, and we need 3 # nodes. Furthermore, let's assume the uri_extension is 500 bytes # long. That should make the whole share: # # 0x24 + 100 + 7*32 + 7*32 + 7*32 + 3*(2+32) + 4+500 = 1414 bytes long # 0x44 + 100 + 7*32 + 7*32 + 7*32 + 3*(2+32) + 4+500 = 1446 bytes long sharesize = header_size + 100 + 7*32 + 7*32 + 7*32 + 3*(2+32) + 4+500 crypttext_hashes = [hashutil.tagged_hash("crypt", "bar%d" % i) for i in range(7)] block_hashes = [hashutil.tagged_hash("block", "bar%d" % i) for i in range(7)] share_hashes = [(i, hashutil.tagged_hash("share", "bar%d" % i)) for i in (1,9,13)] uri_extension = "s" + "E"*498 + "e" bw, rb, sharefname = self.make_bucket(name, sharesize) bp = wbp_class(rb, None, data_size=95, block_size=25, num_segments=4, num_share_hashes=3, uri_extension_size_max=len(uri_extension)) d = bp.put_header() d.addCallback(lambda res: bp.put_block(0, "a"*25)) d.addCallback(lambda res: bp.put_block(1, "b"*25)) d.addCallback(lambda res: bp.put_block(2, "c"*25)) d.addCallback(lambda res: bp.put_block(3, "d"*20)) d.addCallback(lambda res: bp.put_crypttext_hashes(crypttext_hashes)) d.addCallback(lambda res: bp.put_block_hashes(block_hashes)) d.addCallback(lambda res: bp.put_share_hashes(share_hashes)) d.addCallback(lambda res: bp.put_uri_extension(uri_extension)) d.addCallback(lambda res: bp.close()) # now read everything back def _start_reading(res): br = BucketReader(self, sharefname) rb = RemoteBucket() rb.target = br server = NoNetworkServer("abc", None) rbp = rbp_class(rb, server, storage_index="") self.failUnlessIn("to peer", repr(rbp)) self.failUnless(interfaces.IStorageBucketReader.providedBy(rbp), rbp) d1 = rbp.get_block_data(0, 25, 25) d1.addCallback(lambda res: self.failUnlessEqual(res, "a"*25)) d1.addCallback(lambda res: rbp.get_block_data(1, 25, 25)) d1.addCallback(lambda res: self.failUnlessEqual(res, "b"*25)) d1.addCallback(lambda res: rbp.get_block_data(2, 25, 25)) d1.addCallback(lambda res: self.failUnlessEqual(res, "c"*25)) d1.addCallback(lambda res: rbp.get_block_data(3, 25, 20)) d1.addCallback(lambda res: self.failUnlessEqual(res, "d"*20)) d1.addCallback(lambda res: rbp.get_crypttext_hashes()) d1.addCallback(lambda res: self.failUnlessEqual(res, crypttext_hashes)) d1.addCallback(lambda res: rbp.get_block_hashes(set(range(4)))) d1.addCallback(lambda res: self.failUnlessEqual(res, block_hashes)) d1.addCallback(lambda res: rbp.get_share_hashes()) d1.addCallback(lambda res: self.failUnlessEqual(res, share_hashes)) d1.addCallback(lambda res: rbp.get_uri_extension()) d1.addCallback(lambda res: self.failUnlessEqual(res, uri_extension)) return d1 d.addCallback(_start_reading) return d def test_readwrite_v1(self): return self._do_test_readwrite("test_readwrite_v1", 0x24, WriteBucketProxy, ReadBucketProxy) def test_readwrite_v2(self): return self._do_test_readwrite("test_readwrite_v2", 0x44, WriteBucketProxy_v2, ReadBucketProxy) class Server(unittest.TestCase): def setUp(self): self.sparent = LoggingServiceParent() self.sparent.startService() self._lease_secret = itertools.count() def tearDown(self): return self.sparent.stopService() def workdir(self, name): basedir = os.path.join("storage", "Server", name) return basedir def create(self, name, reserved_space=0, klass=StorageServer): workdir = self.workdir(name) ss = klass(workdir, "\x00" * 20, reserved_space=reserved_space, stats_provider=FakeStatsProvider()) ss.setServiceParent(self.sparent) return ss def test_create(self): self.create("test_create") def test_declares_fixed_1528(self): ss = self.create("test_declares_fixed_1528") ver = ss.remote_get_version() sv1 = ver['http://allmydata.org/tahoe/protocols/storage/v1'] self.failUnless(sv1.get('prevents-read-past-end-of-share-data'), sv1) def test_declares_maximum_share_sizes(self): ss = self.create("test_declares_maximum_share_sizes") ver = ss.remote_get_version() sv1 = ver['http://allmydata.org/tahoe/protocols/storage/v1'] self.failUnlessIn('maximum-immutable-share-size', sv1) self.failUnlessIn('maximum-mutable-share-size', sv1) def allocate(self, ss, storage_index, sharenums, size, canary=None): renew_secret = hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()) cancel_secret = hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()) if not canary: canary = FakeCanary() return ss.remote_allocate_buckets(storage_index, renew_secret, cancel_secret, sharenums, size, canary) def test_large_share(self): syslow = platform.system().lower() if 'cygwin' in syslow or 'windows' in syslow or 'darwin' in syslow: raise unittest.SkipTest("If your filesystem doesn't support efficient sparse files then it is very expensive (Mac OS X and Windows don't support efficient sparse files).") avail = fileutil.get_available_space('.', 512*2**20) if avail <= 4*2**30: raise unittest.SkipTest("This test will spuriously fail if you have less than 4 GiB free on your filesystem.") ss = self.create("test_large_share") already,writers = self.allocate(ss, "allocate", [0], 2**32+2) self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0])) shnum, bucket = writers.items()[0] # This test is going to hammer your filesystem if it doesn't make a sparse file for this. :-( bucket.remote_write(2**32, "ab") bucket.remote_close() readers = ss.remote_get_buckets("allocate") reader = readers[shnum] self.failUnlessEqual(reader.remote_read(2**32, 2), "ab") def test_dont_overfill_dirs(self): """ This test asserts that if you add a second share whose storage index share lots of leading bits with an extant share (but isn't the exact same storage index), this won't add an entry to the share directory. """ ss = self.create("test_dont_overfill_dirs") already, writers = self.allocate(ss, "storageindex", [0], 10) for i, wb in writers.items(): wb.remote_write(0, "%10d" % i) wb.remote_close() storedir = os.path.join(self.workdir("test_dont_overfill_dirs"), "shares") children_of_storedir = set(os.listdir(storedir)) # Now store another one under another storageindex that has leading # chars the same as the first storageindex. already, writers = self.allocate(ss, "storageindey", [0], 10) for i, wb in writers.items(): wb.remote_write(0, "%10d" % i) wb.remote_close() storedir = os.path.join(self.workdir("test_dont_overfill_dirs"), "shares") new_children_of_storedir = set(os.listdir(storedir)) self.failUnlessEqual(children_of_storedir, new_children_of_storedir) def test_remove_incoming(self): ss = self.create("test_remove_incoming") already, writers = self.allocate(ss, "vid", range(3), 10) for i,wb in writers.items(): wb.remote_write(0, "%10d" % i) wb.remote_close() incoming_share_dir = wb.incominghome incoming_bucket_dir = os.path.dirname(incoming_share_dir) incoming_prefix_dir = os.path.dirname(incoming_bucket_dir) incoming_dir = os.path.dirname(incoming_prefix_dir) self.failIf(os.path.exists(incoming_bucket_dir), incoming_bucket_dir) self.failIf(os.path.exists(incoming_prefix_dir), incoming_prefix_dir) self.failUnless(os.path.exists(incoming_dir), incoming_dir) def test_abort(self): # remote_abort, when called on a writer, should make sure that # the allocated size of the bucket is not counted by the storage # server when accounting for space. ss = self.create("test_abort") already, writers = self.allocate(ss, "allocate", [0, 1, 2], 150) self.failIfEqual(ss.allocated_size(), 0) # Now abort the writers. for writer in writers.itervalues(): writer.remote_abort() self.failUnlessEqual(ss.allocated_size(), 0) def test_allocate(self): ss = self.create("test_allocate") self.failUnlessEqual(ss.remote_get_buckets("allocate"), {}) already,writers = self.allocate(ss, "allocate", [0,1,2], 75) self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0,1,2])) # while the buckets are open, they should not count as readable self.failUnlessEqual(ss.remote_get_buckets("allocate"), {}) # close the buckets for i,wb in writers.items(): wb.remote_write(0, "%25d" % i) wb.remote_close() # aborting a bucket that was already closed is a no-op wb.remote_abort() # now they should be readable b = ss.remote_get_buckets("allocate") self.failUnlessEqual(set(b.keys()), set([0,1,2])) self.failUnlessEqual(b[0].remote_read(0, 25), "%25d" % 0) b_str = str(b[0]) self.failUnlessIn("BucketReader", b_str) self.failUnlessIn("mfwgy33dmf2g 0", b_str) # now if we ask about writing again, the server should offer those # three buckets as already present. It should offer them even if we # don't ask about those specific ones. already,writers = self.allocate(ss, "allocate", [2,3,4], 75) self.failUnlessEqual(already, set([0,1,2])) self.failUnlessEqual(set(writers.keys()), set([3,4])) # while those two buckets are open for writing, the server should # refuse to offer them to uploaders already2,writers2 = self.allocate(ss, "allocate", [2,3,4,5], 75) self.failUnlessEqual(already2, set([0,1,2])) self.failUnlessEqual(set(writers2.keys()), set([5])) # aborting the writes should remove the tempfiles for i,wb in writers2.items(): wb.remote_abort() already2,writers2 = self.allocate(ss, "allocate", [2,3,4,5], 75) self.failUnlessEqual(already2, set([0,1,2])) self.failUnlessEqual(set(writers2.keys()), set([5])) for i,wb in writers2.items(): wb.remote_abort() for i,wb in writers.items(): wb.remote_abort() def test_bad_container_version(self): ss = self.create("test_bad_container_version") a,w = self.allocate(ss, "si1", [0], 10) w[0].remote_write(0, "\xff"*10) w[0].remote_close() fn = os.path.join(ss.sharedir, storage_index_to_dir("si1"), "0") f = open(fn, "rb+") f.seek(0) f.write(struct.pack(">L", 0)) # this is invalid: minimum used is v1 f.close() ss.remote_get_buckets("allocate") e = self.failUnlessRaises(UnknownImmutableContainerVersionError, ss.remote_get_buckets, "si1") self.failUnlessIn(" had version 0 but we wanted 1", str(e)) def test_disconnect(self): # simulate a disconnection ss = self.create("test_disconnect") canary = FakeCanary() already,writers = self.allocate(ss, "disconnect", [0,1,2], 75, canary) self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0,1,2])) for (f,args,kwargs) in canary.disconnectors.values(): f(*args, **kwargs) del already del writers # that ought to delete the incoming shares already,writers = self.allocate(ss, "disconnect", [0,1,2], 75) self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0,1,2])) @mock.patch('allmydata.util.fileutil.get_disk_stats') def test_reserved_space(self, mock_get_disk_stats): reserved_space=10000 mock_get_disk_stats.return_value = { 'free_for_nonroot': 15000, 'avail': max(15000 - reserved_space, 0), } ss = self.create("test_reserved_space", reserved_space=reserved_space) # 15k available, 10k reserved, leaves 5k for shares # a newly created and filled share incurs this much overhead, beyond # the size we request. OVERHEAD = 3*4 LEASE_SIZE = 4+32+32+4 canary = FakeCanary(True) already,writers = self.allocate(ss, "vid1", [0,1,2], 1000, canary) self.failUnlessEqual(len(writers), 3) # now the StorageServer should have 3000 bytes provisionally # allocated, allowing only 2000 more to be claimed self.failUnlessEqual(len(ss._active_writers), 3) # allocating 1001-byte shares only leaves room for one already2,writers2 = self.allocate(ss, "vid2", [0,1,2], 1001, canary) self.failUnlessEqual(len(writers2), 1) self.failUnlessEqual(len(ss._active_writers), 4) # we abandon the first set, so their provisional allocation should be # returned del already del writers self.failUnlessEqual(len(ss._active_writers), 1) # now we have a provisional allocation of 1001 bytes # and we close the second set, so their provisional allocation should # become real, long-term allocation, and grows to include the # overhead. for bw in writers2.values(): bw.remote_write(0, "a"*25) bw.remote_close() del already2 del writers2 del bw self.failUnlessEqual(len(ss._active_writers), 0) allocated = 1001 + OVERHEAD + LEASE_SIZE # we have to manually increase available, since we're not doing real # disk measurements mock_get_disk_stats.return_value = { 'free_for_nonroot': 15000 - allocated, 'avail': max(15000 - allocated - reserved_space, 0), } # now there should be ALLOCATED=1001+12+72=1085 bytes allocated, and # 5000-1085=3915 free, therefore we can fit 39 100byte shares already3,writers3 = self.allocate(ss,"vid3", range(100), 100, canary) self.failUnlessEqual(len(writers3), 39) self.failUnlessEqual(len(ss._active_writers), 39) del already3 del writers3 self.failUnlessEqual(len(ss._active_writers), 0) ss.disownServiceParent() del ss def test_seek(self): basedir = self.workdir("test_seek_behavior") fileutil.make_dirs(basedir) filename = os.path.join(basedir, "testfile") f = open(filename, "wb") f.write("start") f.close() # mode="w" allows seeking-to-create-holes, but truncates pre-existing # files. mode="a" preserves previous contents but does not allow # seeking-to-create-holes. mode="r+" allows both. f = open(filename, "rb+") f.seek(100) f.write("100") f.close() filelen = os.stat(filename)[stat.ST_SIZE] self.failUnlessEqual(filelen, 100+3) f2 = open(filename, "rb") self.failUnlessEqual(f2.read(5), "start") def test_leases(self): ss = self.create("test_leases") canary = FakeCanary() sharenums = range(5) size = 100 rs0,cs0 = (hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()), hashutil.tagged_hash("blah", "%d" % self._lease_secret.next())) already,writers = ss.remote_allocate_buckets("si0", rs0, cs0, sharenums, size, canary) self.failUnlessEqual(len(already), 0) self.failUnlessEqual(len(writers), 5) for wb in writers.values(): wb.remote_close() leases = list(ss.get_leases("si0")) self.failUnlessEqual(len(leases), 1) self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs0])) rs1,cs1 = (hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()), hashutil.tagged_hash("blah", "%d" % self._lease_secret.next())) already,writers = ss.remote_allocate_buckets("si1", rs1, cs1, sharenums, size, canary) for wb in writers.values(): wb.remote_close() # take out a second lease on si1 rs2,cs2 = (hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()), hashutil.tagged_hash("blah", "%d" % self._lease_secret.next())) already,writers = ss.remote_allocate_buckets("si1", rs2, cs2, sharenums, size, canary) self.failUnlessEqual(len(already), 5) self.failUnlessEqual(len(writers), 0) leases = list(ss.get_leases("si1")) self.failUnlessEqual(len(leases), 2) self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs1, rs2])) # and a third lease, using add-lease rs2a,cs2a = (hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()), hashutil.tagged_hash("blah", "%d" % self._lease_secret.next())) ss.remote_add_lease("si1", rs2a, cs2a) leases = list(ss.get_leases("si1")) self.failUnlessEqual(len(leases), 3) self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs1, rs2, rs2a])) # add-lease on a missing storage index is silently ignored self.failUnlessEqual(ss.remote_add_lease("si18", "", ""), None) # check that si0 is readable readers = ss.remote_get_buckets("si0") self.failUnlessEqual(len(readers), 5) # renew the first lease. Only the proper renew_secret should work ss.remote_renew_lease("si0", rs0) self.failUnlessRaises(IndexError, ss.remote_renew_lease, "si0", cs0) self.failUnlessRaises(IndexError, ss.remote_renew_lease, "si0", rs1) # check that si0 is still readable readers = ss.remote_get_buckets("si0") self.failUnlessEqual(len(readers), 5) # There is no such method as remote_cancel_lease for now -- see # ticket #1528. self.failIf(hasattr(ss, 'remote_cancel_lease'), \ "ss should not have a 'remote_cancel_lease' method/attribute") # test overlapping uploads rs3,cs3 = (hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()), hashutil.tagged_hash("blah", "%d" % self._lease_secret.next())) rs4,cs4 = (hashutil.tagged_hash("blah", "%d" % self._lease_secret.next()), hashutil.tagged_hash("blah", "%d" % self._lease_secret.next())) already,writers = ss.remote_allocate_buckets("si3", rs3, cs3, sharenums, size, canary) self.failUnlessEqual(len(already), 0) self.failUnlessEqual(len(writers), 5) already2,writers2 = ss.remote_allocate_buckets("si3", rs4, cs4, sharenums, size, canary) self.failUnlessEqual(len(already2), 0) self.failUnlessEqual(len(writers2), 0) for wb in writers.values(): wb.remote_close() leases = list(ss.get_leases("si3")) self.failUnlessEqual(len(leases), 1) already3,writers3 = ss.remote_allocate_buckets("si3", rs4, cs4, sharenums, size, canary) self.failUnlessEqual(len(already3), 5) self.failUnlessEqual(len(writers3), 0) leases = list(ss.get_leases("si3")) self.failUnlessEqual(len(leases), 2) def test_readonly(self): workdir = self.workdir("test_readonly") ss = StorageServer(workdir, "\x00" * 20, readonly_storage=True) ss.setServiceParent(self.sparent) already,writers = self.allocate(ss, "vid", [0,1,2], 75) self.failUnlessEqual(already, set()) self.failUnlessEqual(writers, {}) stats = ss.get_stats() self.failUnlessEqual(stats["storage_server.accepting_immutable_shares"], 0) if "storage_server.disk_avail" in stats: # Some platforms may not have an API to get disk stats. # But if there are stats, readonly_storage means disk_avail=0 self.failUnlessEqual(stats["storage_server.disk_avail"], 0) def test_discard(self): # discard is really only used for other tests, but we test it anyways workdir = self.workdir("test_discard") ss = StorageServer(workdir, "\x00" * 20, discard_storage=True) ss.setServiceParent(self.sparent) already,writers = self.allocate(ss, "vid", [0,1,2], 75) self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0,1,2])) for i,wb in writers.items(): wb.remote_write(0, "%25d" % i) wb.remote_close() # since we discard the data, the shares should be present but sparse. # Since we write with some seeks, the data we read back will be all # zeros. b = ss.remote_get_buckets("vid") self.failUnlessEqual(set(b.keys()), set([0,1,2])) self.failUnlessEqual(b[0].remote_read(0, 25), "\x00" * 25) def test_advise_corruption(self): workdir = self.workdir("test_advise_corruption") ss = StorageServer(workdir, "\x00" * 20, discard_storage=True) ss.setServiceParent(self.sparent) si0_s = base32.b2a("si0") ss.remote_advise_corrupt_share("immutable", "si0", 0, "This share smells funny.\n") reportdir = os.path.join(workdir, "corruption-advisories") reports = os.listdir(reportdir) self.failUnlessEqual(len(reports), 1) report_si0 = reports[0] self.failUnlessIn(si0_s, report_si0) f = open(os.path.join(reportdir, report_si0), "r") report = f.read() f.close() self.failUnlessIn("type: immutable", report) self.failUnlessIn("storage_index: %s" % si0_s, report) self.failUnlessIn("share_number: 0", report) self.failUnlessIn("This share smells funny.", report) # test the RIBucketWriter version too si1_s = base32.b2a("si1") already,writers = self.allocate(ss, "si1", [1], 75) self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([1])) writers[1].remote_write(0, "data") writers[1].remote_close() b = ss.remote_get_buckets("si1") self.failUnlessEqual(set(b.keys()), set([1])) b[1].remote_advise_corrupt_share("This share tastes like dust.\n") reports = os.listdir(reportdir) self.failUnlessEqual(len(reports), 2) report_si1 = [r for r in reports if si1_s in r][0] f = open(os.path.join(reportdir, report_si1), "r") report = f.read() f.close() self.failUnlessIn("type: immutable", report) self.failUnlessIn("storage_index: %s" % si1_s, report) self.failUnlessIn("share_number: 1", report) self.failUnlessIn("This share tastes like dust.", report) class MutableServer(unittest.TestCase): def setUp(self): self.sparent = LoggingServiceParent() self._lease_secret = itertools.count() def tearDown(self): return self.sparent.stopService() def workdir(self, name): basedir = os.path.join("storage", "MutableServer", name) return basedir def create(self, name): workdir = self.workdir(name) ss = StorageServer(workdir, "\x00" * 20) ss.setServiceParent(self.sparent) return ss def test_create(self): self.create("test_create") def write_enabler(self, we_tag): return hashutil.tagged_hash("we_blah", we_tag) def renew_secret(self, tag): return hashutil.tagged_hash("renew_blah", str(tag)) def cancel_secret(self, tag): return hashutil.tagged_hash("cancel_blah", str(tag)) def allocate(self, ss, storage_index, we_tag, lease_tag, sharenums, size): write_enabler = self.write_enabler(we_tag) renew_secret = self.renew_secret(lease_tag) cancel_secret = self.cancel_secret(lease_tag) rstaraw = ss.remote_slot_testv_and_readv_and_writev testandwritev = dict( [ (shnum, ([], [], None) ) for shnum in sharenums ] ) readv = [] rc = rstaraw(storage_index, (write_enabler, renew_secret, cancel_secret), testandwritev, readv) (did_write, readv_data) = rc self.failUnless(did_write) self.failUnless(isinstance(readv_data, dict)) self.failUnlessEqual(len(readv_data), 0) def test_bad_magic(self): ss = self.create("test_bad_magic") self.allocate(ss, "si1", "we1", self._lease_secret.next(), set([0]), 10) fn = os.path.join(ss.sharedir, storage_index_to_dir("si1"), "0") f = open(fn, "rb+") f.seek(0) f.write("BAD MAGIC") f.close() read = ss.remote_slot_readv e = self.failUnlessRaises(UnknownMutableContainerVersionError, read, "si1", [0], [(0,10)]) self.failUnlessIn(" had magic ", str(e)) self.failUnlessIn(" but we wanted ", str(e)) def test_container_size(self): ss = self.create("test_container_size") self.allocate(ss, "si1", "we1", self._lease_secret.next(), set([0,1,2]), 100) read = ss.remote_slot_readv rstaraw = ss.remote_slot_testv_and_readv_and_writev secrets = ( self.write_enabler("we1"), self.renew_secret("we1"), self.cancel_secret("we1") ) data = "".join([ ("%d" % i) * 10 for i in range(10) ]) answer = rstaraw("si1", secrets, {0: ([], [(0,data)], len(data)+12)}, []) self.failUnlessEqual(answer, (True, {0:[],1:[],2:[]}) ) # Trying to make the container too large (by sending a write vector # whose offset is too high) will raise an exception. TOOBIG = MutableShareFile.MAX_SIZE + 10 self.failUnlessRaises(DataTooLargeError, rstaraw, "si1", secrets, {0: ([], [(TOOBIG,data)], None)}, []) answer = rstaraw("si1", secrets, {0: ([], [(0,data)], None)}, []) self.failUnlessEqual(answer, (True, {0:[],1:[],2:[]}) ) read_answer = read("si1", [0], [(0,10)]) self.failUnlessEqual(read_answer, {0: [data[:10]]}) # Sending a new_length shorter than the current length truncates the # data. answer = rstaraw("si1", secrets, {0: ([], [], 9)}, []) read_answer = read("si1", [0], [(0,10)]) self.failUnlessEqual(read_answer, {0: [data[:9]]}) # Sending a new_length longer than the current length doesn't change # the data. answer = rstaraw("si1", secrets, {0: ([], [], 20)}, []) assert answer == (True, {0:[],1:[],2:[]}) read_answer = read("si1", [0], [(0, 20)]) self.failUnlessEqual(read_answer, {0: [data[:9]]}) # Sending a write vector whose start is after the end of the current # data doesn't reveal "whatever was there last time" (palimpsest), # but instead fills with zeroes. # To test this, we fill the data area with a recognizable pattern. pattern = ''.join([chr(i) for i in range(100)]) answer = rstaraw("si1", secrets, {0: ([], [(0, pattern)], None)}, []) assert answer == (True, {0:[],1:[],2:[]}) # Then truncate the data... answer = rstaraw("si1", secrets, {0: ([], [], 20)}, []) assert answer == (True, {0:[],1:[],2:[]}) # Just confirm that you get an empty string if you try to read from # past the (new) endpoint now. answer = rstaraw("si1", secrets, {0: ([], [], None)}, [(20, 1980)]) self.failUnlessEqual(answer, (True, {0:[''],1:[''],2:['']})) # Then the extend the file by writing a vector which starts out past # the end... answer = rstaraw("si1", secrets, {0: ([], [(50, 'hellothere')], None)}, []) assert answer == (True, {0:[],1:[],2:[]}) # Now if you read the stuff between 20 (where we earlier truncated) # and 50, it had better be all zeroes. answer = rstaraw("si1", secrets, {0: ([], [], None)}, [(20, 30)]) self.failUnlessEqual(answer, (True, {0:['\x00'*30],1:[''],2:['']})) # Also see if the server explicitly declares that it supports this # feature. ver = ss.remote_get_version() storage_v1_ver = ver["http://allmydata.org/tahoe/protocols/storage/v1"] self.failUnless(storage_v1_ver.get("fills-holes-with-zero-bytes")) # If the size is dropped to zero the share is deleted. answer = rstaraw("si1", secrets, {0: ([], [(0,data)], 0)}, []) self.failUnlessEqual(answer, (True, {0:[],1:[],2:[]}) ) read_answer = read("si1", [0], [(0,10)]) self.failUnlessEqual(read_answer, {}) def test_allocate(self): ss = self.create("test_allocate") self.allocate(ss, "si1", "we1", self._lease_secret.next(), set([0,1,2]), 100) read = ss.remote_slot_readv self.failUnlessEqual(read("si1", [0], [(0, 10)]), {0: [""]}) self.failUnlessEqual(read("si1", [], [(0, 10)]), {0: [""], 1: [""], 2: [""]}) self.failUnlessEqual(read("si1", [0], [(100, 10)]), {0: [""]}) # try writing to one secrets = ( self.write_enabler("we1"), self.renew_secret("we1"), self.cancel_secret("we1") ) data = "".join([ ("%d" % i) * 10 for i in range(10) ]) write = ss.remote_slot_testv_and_readv_and_writev answer = write("si1", secrets, {0: ([], [(0,data)], None)}, []) self.failUnlessEqual(answer, (True, {0:[],1:[],2:[]}) ) self.failUnlessEqual(read("si1", [0], [(0,20)]), {0: ["00000000001111111111"]}) self.failUnlessEqual(read("si1", [0], [(95,10)]), {0: ["99999"]}) #self.failUnlessEqual(s0.remote_get_length(), 100) bad_secrets = ("bad write enabler", secrets[1], secrets[2]) f = self.failUnlessRaises(BadWriteEnablerError, write, "si1", bad_secrets, {}, []) self.failUnlessIn("The write enabler was recorded by nodeid 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'.", f) # this testv should fail answer = write("si1", secrets, {0: ([(0, 12, "eq", "444444444444"), (20, 5, "eq", "22222"), ], [(0, "x"*100)], None), }, [(0,12), (20,5)], ) self.failUnlessEqual(answer, (False, {0: ["000000000011", "22222"], 1: ["", ""], 2: ["", ""], })) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) # as should this one answer = write("si1", secrets, {0: ([(10, 5, "lt", "11111"), ], [(0, "x"*100)], None), }, [(10,5)], ) self.failUnlessEqual(answer, (False, {0: ["11111"], 1: [""], 2: [""]}, )) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) def test_operators(self): # test operators, the data we're comparing is '11111' in all cases. # test both fail+pass, reset data after each one. ss = self.create("test_operators") secrets = ( self.write_enabler("we1"), self.renew_secret("we1"), self.cancel_secret("we1") ) data = "".join([ ("%d" % i) * 10 for i in range(10) ]) write = ss.remote_slot_testv_and_readv_and_writev read = ss.remote_slot_readv def reset(): write("si1", secrets, {0: ([], [(0,data)], None)}, []) reset() # lt answer = write("si1", secrets, {0: ([(10, 5, "lt", "11110"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) self.failUnlessEqual(read("si1", [], [(0,100)]), {0: [data]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "lt", "11111"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "lt", "11112"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() # le answer = write("si1", secrets, {0: ([(10, 5, "le", "11110"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "le", "11111"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "le", "11112"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() # eq answer = write("si1", secrets, {0: ([(10, 5, "eq", "11112"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "eq", "11111"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() # ne answer = write("si1", secrets, {0: ([(10, 5, "ne", "11111"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "ne", "11112"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() # ge answer = write("si1", secrets, {0: ([(10, 5, "ge", "11110"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "ge", "11111"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "ge", "11112"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() # gt answer = write("si1", secrets, {0: ([(10, 5, "gt", "11110"), ], [(0, "y"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (True, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: ["y"*100]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "gt", "11111"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() answer = write("si1", secrets, {0: ([(10, 5, "gt", "11112"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() # finally, test some operators against empty shares answer = write("si1", secrets, {1: ([(10, 5, "eq", "11112"), ], [(0, "x"*100)], None, )}, [(10,5)]) self.failUnlessEqual(answer, (False, {0: ["11111"]})) self.failUnlessEqual(read("si1", [0], [(0,100)]), {0: [data]}) reset() def test_readv(self): ss = self.create("test_readv") secrets = ( self.write_enabler("we1"), self.renew_secret("we1"), self.cancel_secret("we1") ) data = "".join([ ("%d" % i) * 10 for i in range(10) ]) write = ss.remote_slot_testv_and_readv_and_writev read = ss.remote_slot_readv data = [("%d" % i) * 100 for i in range(3)] rc = write("si1", secrets, {0: ([], [(0,data[0])], None), 1: ([], [(0,data[1])], None), 2: ([], [(0,data[2])], None), }, []) self.failUnlessEqual(rc, (True, {})) answer = read("si1", [], [(0, 10)]) self.failUnlessEqual(answer, {0: ["0"*10], 1: ["1"*10], 2: ["2"*10]}) def compare_leases_without_timestamps(self, leases_a, leases_b): self.failUnlessEqual(len(leases_a), len(leases_b)) for i in range(len(leases_a)): a = leases_a[i] b = leases_b[i] self.failUnlessEqual(a.owner_num, b.owner_num) self.failUnlessEqual(a.renew_secret, b.renew_secret) self.failUnlessEqual(a.cancel_secret, b.cancel_secret) self.failUnlessEqual(a.nodeid, b.nodeid) def compare_leases(self, leases_a, leases_b): self.failUnlessEqual(len(leases_a), len(leases_b)) for i in range(len(leases_a)): a = leases_a[i] b = leases_b[i] self.failUnlessEqual(a.owner_num, b.owner_num) self.failUnlessEqual(a.renew_secret, b.renew_secret) self.failUnlessEqual(a.cancel_secret, b.cancel_secret) self.failUnlessEqual(a.nodeid, b.nodeid) self.failUnlessEqual(a.expiration_time, b.expiration_time) def test_leases(self): ss = self.create("test_leases") def secrets(n): return ( self.write_enabler("we1"), self.renew_secret("we1-%d" % n), self.cancel_secret("we1-%d" % n) ) data = "".join([ ("%d" % i) * 10 for i in range(10) ]) write = ss.remote_slot_testv_and_readv_and_writev read = ss.remote_slot_readv rc = write("si1", secrets(0), {0: ([], [(0,data)], None)}, []) self.failUnlessEqual(rc, (True, {})) # create a random non-numeric file in the bucket directory, to # exercise the code that's supposed to ignore those. bucket_dir = os.path.join(self.workdir("test_leases"), "shares", storage_index_to_dir("si1")) f = open(os.path.join(bucket_dir, "ignore_me.txt"), "w") f.write("you ought to be ignoring me\n") f.close() s0 = MutableShareFile(os.path.join(bucket_dir, "0")) self.failUnlessEqual(len(list(s0.get_leases())), 1) # add-lease on a missing storage index is silently ignored self.failUnlessEqual(ss.remote_add_lease("si18", "", ""), None) # re-allocate the slots and use the same secrets, that should update # the lease write("si1", secrets(0), {0: ([], [(0,data)], None)}, []) self.failUnlessEqual(len(list(s0.get_leases())), 1) # renew it directly ss.remote_renew_lease("si1", secrets(0)[1]) self.failUnlessEqual(len(list(s0.get_leases())), 1) # now allocate them with a bunch of different secrets, to trigger the # extended lease code. Use add_lease for one of them. write("si1", secrets(1), {0: ([], [(0,data)], None)}, []) self.failUnlessEqual(len(list(s0.get_leases())), 2) secrets2 = secrets(2) ss.remote_add_lease("si1", secrets2[1], secrets2[2]) self.failUnlessEqual(len(list(s0.get_leases())), 3) write("si1", secrets(3), {0: ([], [(0,data)], None)}, []) write("si1", secrets(4), {0: ([], [(0,data)], None)}, []) write("si1", secrets(5), {0: ([], [(0,data)], None)}, []) self.failUnlessEqual(len(list(s0.get_leases())), 6) all_leases = list(s0.get_leases()) # and write enough data to expand the container, forcing the server # to move the leases write("si1", secrets(0), {0: ([], [(0,data)], 200), }, []) # read back the leases, make sure they're still intact. self.compare_leases_without_timestamps(all_leases, list(s0.get_leases())) ss.remote_renew_lease("si1", secrets(0)[1]) ss.remote_renew_lease("si1", secrets(1)[1]) ss.remote_renew_lease("si1", secrets(2)[1]) ss.remote_renew_lease("si1", secrets(3)[1]) ss.remote_renew_lease("si1", secrets(4)[1]) self.compare_leases_without_timestamps(all_leases, list(s0.get_leases())) # get a new copy of the leases, with the current timestamps. Reading # data and failing to renew/cancel leases should leave the timestamps # alone. all_leases = list(s0.get_leases()) # renewing with a bogus token should prompt an error message # examine the exception thus raised, make sure the old nodeid is # present, to provide for share migration e = self.failUnlessRaises(IndexError, ss.remote_renew_lease, "si1", secrets(20)[1]) e_s = str(e) self.failUnlessIn("Unable to renew non-existent lease", e_s) self.failUnlessIn("I have leases accepted by nodeids:", e_s) self.failUnlessIn("nodeids: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' .", e_s) self.compare_leases(all_leases, list(s0.get_leases())) # reading shares should not modify the timestamp read("si1", [], [(0,200)]) self.compare_leases(all_leases, list(s0.get_leases())) write("si1", secrets(0), {0: ([], [(200, "make me bigger")], None)}, []) self.compare_leases_without_timestamps(all_leases, list(s0.get_leases())) write("si1", secrets(0), {0: ([], [(500, "make me really bigger")], None)}, []) self.compare_leases_without_timestamps(all_leases, list(s0.get_leases())) def test_remove(self): ss = self.create("test_remove") self.allocate(ss, "si1", "we1", self._lease_secret.next(), set([0,1,2]), 100) readv = ss.remote_slot_readv writev = ss.remote_slot_testv_and_readv_and_writev secrets = ( self.write_enabler("we1"), self.renew_secret("we1"), self.cancel_secret("we1") ) # delete sh0 by setting its size to zero answer = writev("si1", secrets, {0: ([], [], 0)}, []) # the answer should mention all the shares that existed before the # write self.failUnlessEqual(answer, (True, {0:[],1:[],2:[]}) ) # but a new read should show only sh1 and sh2 self.failUnlessEqual(readv("si1", [], [(0,10)]), {1: [""], 2: [""]}) # delete sh1 by setting its size to zero answer = writev("si1", secrets, {1: ([], [], 0)}, []) self.failUnlessEqual(answer, (True, {1:[],2:[]}) ) self.failUnlessEqual(readv("si1", [], [(0,10)]), {2: [""]}) # delete sh2 by setting its size to zero answer = writev("si1", secrets, {2: ([], [], 0)}, []) self.failUnlessEqual(answer, (True, {2:[]}) ) self.failUnlessEqual(readv("si1", [], [(0,10)]), {}) # and the bucket directory should now be gone si = base32.b2a("si1") # note: this is a detail of the storage server implementation, and # may change in the future prefix = si[:2] prefixdir = os.path.join(self.workdir("test_remove"), "shares", prefix) bucketdir = os.path.join(prefixdir, si) self.failUnless(os.path.exists(prefixdir), prefixdir) self.failIf(os.path.exists(bucketdir), bucketdir) class MDMFProxies(unittest.TestCase, ShouldFailMixin): def setUp(self): self.sparent = LoggingServiceParent() self._lease_secret = itertools.count() self.ss = self.create("MDMFProxies storage test server") self.rref = RemoteBucket() self.rref.target = self.ss self.secrets = (self.write_enabler("we_secret"), self.renew_secret("renew_secret"), self.cancel_secret("cancel_secret")) self.segment = "aaaaaa" self.block = "aa" self.salt = "a" * 16 self.block_hash = "a" * 32 self.block_hash_tree = [self.block_hash for i in xrange(6)] self.share_hash = self.block_hash self.share_hash_chain = dict([(i, self.share_hash) for i in xrange(6)]) self.signature = "foobarbaz" self.verification_key = "vvvvvv" self.encprivkey = "private" self.root_hash = self.block_hash self.salt_hash = self.root_hash self.salt_hash_tree = [self.salt_hash for i in xrange(6)] self.block_hash_tree_s = self.serialize_blockhashes(self.block_hash_tree) self.share_hash_chain_s = self.serialize_sharehashes(self.share_hash_chain) # blockhashes and salt hashes are serialized in the same way, # only we lop off the first element and store that in the # header. self.salt_hash_tree_s = self.serialize_blockhashes(self.salt_hash_tree[1:]) def tearDown(self): self.sparent.stopService() shutil.rmtree(self.workdir("MDMFProxies storage test server")) def write_enabler(self, we_tag): return hashutil.tagged_hash("we_blah", we_tag) def renew_secret(self, tag): return hashutil.tagged_hash("renew_blah", str(tag)) def cancel_secret(self, tag): return hashutil.tagged_hash("cancel_blah", str(tag)) def workdir(self, name): basedir = os.path.join("storage", "MutableServer", name) return basedir def create(self, name): workdir = self.workdir(name) ss = StorageServer(workdir, "\x00" * 20) ss.setServiceParent(self.sparent) return ss def build_test_mdmf_share(self, tail_segment=False, empty=False): # Start with the checkstring data = struct.pack(">BQ32s", 1, 0, self.root_hash) self.checkstring = data # Next, the encoding parameters if tail_segment: data += struct.pack(">BBQQ", 3, 10, 6, 33) elif empty: data += struct.pack(">BBQQ", 3, 10, 0, 0) else: data += struct.pack(">BBQQ", 3, 10, 6, 36) # Now we'll build the offsets. sharedata = "" if not tail_segment and not empty: for i in xrange(6): sharedata += self.salt + self.block elif tail_segment: for i in xrange(5): sharedata += self.salt + self.block sharedata += self.salt + "a" # The encrypted private key comes after the shares + salts offset_size = struct.calcsize(MDMFOFFSETS) encrypted_private_key_offset = len(data) + offset_size # The share has chain comes after the private key sharehashes_offset = encrypted_private_key_offset + \ len(self.encprivkey) # The signature comes after the share hash chain. signature_offset = sharehashes_offset + len(self.share_hash_chain_s) verification_key_offset = signature_offset + len(self.signature) verification_key_end = verification_key_offset + \ len(self.verification_key) share_data_offset = offset_size share_data_offset += PRIVATE_KEY_SIZE share_data_offset += SIGNATURE_SIZE share_data_offset += VERIFICATION_KEY_SIZE share_data_offset += SHARE_HASH_CHAIN_SIZE blockhashes_offset = share_data_offset + len(sharedata) eof_offset = blockhashes_offset + len(self.block_hash_tree_s) data += struct.pack(MDMFOFFSETS, encrypted_private_key_offset, sharehashes_offset, signature_offset, verification_key_offset, verification_key_end, share_data_offset, blockhashes_offset, eof_offset) self.offsets = {} self.offsets['enc_privkey'] = encrypted_private_key_offset self.offsets['block_hash_tree'] = blockhashes_offset self.offsets['share_hash_chain'] = sharehashes_offset self.offsets['signature'] = signature_offset self.offsets['verification_key'] = verification_key_offset self.offsets['share_data'] = share_data_offset self.offsets['verification_key_end'] = verification_key_end self.offsets['EOF'] = eof_offset # the private key, data += self.encprivkey # the sharehashes data += self.share_hash_chain_s # the signature, data += self.signature # and the verification key data += self.verification_key # Then we'll add in gibberish until we get to the right point. nulls = "".join([" " for i in xrange(len(data), share_data_offset)]) data += nulls # Then the share data data += sharedata # the blockhashes data += self.block_hash_tree_s return data def write_test_share_to_server(self, storage_index, tail_segment=False, empty=False): """ I write some data for the read tests to read to self.ss If tail_segment=True, then I will write a share that has a smaller tail segment than other segments. """ write = self.ss.remote_slot_testv_and_readv_and_writev data = self.build_test_mdmf_share(tail_segment, empty) # Finally, we write the whole thing to the storage server in one # pass. testvs = [(0, 1, "eq", "")] tws = {} tws[0] = (testvs, [(0, data)], None) readv = [(0, 1)] results = write(storage_index, self.secrets, tws, readv) self.failUnless(results[0]) def build_test_sdmf_share(self, empty=False): if empty: sharedata = "" else: sharedata = self.segment * 6 self.sharedata = sharedata blocksize = len(sharedata) / 3 block = sharedata[:blocksize] self.blockdata = block prefix = struct.pack(">BQ32s16s BBQQ", 0, # version, 0, self.root_hash, self.salt, 3, 10, len(sharedata), len(sharedata), ) post_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ") signature_offset = post_offset + len(self.verification_key) sharehashes_offset = signature_offset + len(self.signature) blockhashes_offset = sharehashes_offset + len(self.share_hash_chain_s) sharedata_offset = blockhashes_offset + len(self.block_hash_tree_s) encprivkey_offset = sharedata_offset + len(block) eof_offset = encprivkey_offset + len(self.encprivkey) offsets = struct.pack(">LLLLQQ", signature_offset, sharehashes_offset, blockhashes_offset, sharedata_offset, encprivkey_offset, eof_offset) final_share = "".join([prefix, offsets, self.verification_key, self.signature, self.share_hash_chain_s, self.block_hash_tree_s, block, self.encprivkey]) self.offsets = {} self.offsets['signature'] = signature_offset self.offsets['share_hash_chain'] = sharehashes_offset self.offsets['block_hash_tree'] = blockhashes_offset self.offsets['share_data'] = sharedata_offset self.offsets['enc_privkey'] = encprivkey_offset self.offsets['EOF'] = eof_offset return final_share def write_sdmf_share_to_server(self, storage_index, empty=False): # Some tests need SDMF shares to verify that we can still # read them. This method writes one, which resembles but is not assert self.rref write = self.ss.remote_slot_testv_and_readv_and_writev share = self.build_test_sdmf_share(empty) testvs = [(0, 1, "eq", "")] tws = {} tws[0] = (testvs, [(0, share)], None) readv = [] results = write(storage_index, self.secrets, tws, readv) self.failUnless(results[0]) def test_read(self): self.write_test_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) # Check that every method equals what we expect it to. d = defer.succeed(None) def _check_block_and_salt((block, salt)): self.failUnlessEqual(block, self.block) self.failUnlessEqual(salt, self.salt) for i in xrange(6): d.addCallback(lambda ignored, i=i: mr.get_block_and_salt(i)) d.addCallback(_check_block_and_salt) d.addCallback(lambda ignored: mr.get_encprivkey()) d.addCallback(lambda encprivkey: self.failUnlessEqual(self.encprivkey, encprivkey)) d.addCallback(lambda ignored: mr.get_blockhashes()) d.addCallback(lambda blockhashes: self.failUnlessEqual(self.block_hash_tree, blockhashes)) d.addCallback(lambda ignored: mr.get_sharehashes()) d.addCallback(lambda sharehashes: self.failUnlessEqual(self.share_hash_chain, sharehashes)) d.addCallback(lambda ignored: mr.get_signature()) d.addCallback(lambda signature: self.failUnlessEqual(signature, self.signature)) d.addCallback(lambda ignored: mr.get_verification_key()) d.addCallback(lambda verification_key: self.failUnlessEqual(verification_key, self.verification_key)) d.addCallback(lambda ignored: mr.get_seqnum()) d.addCallback(lambda seqnum: self.failUnlessEqual(seqnum, 0)) d.addCallback(lambda ignored: mr.get_root_hash()) d.addCallback(lambda root_hash: self.failUnlessEqual(self.root_hash, root_hash)) d.addCallback(lambda ignored: mr.get_seqnum()) d.addCallback(lambda seqnum: self.failUnlessEqual(0, seqnum)) d.addCallback(lambda ignored: mr.get_encoding_parameters()) def _check_encoding_parameters((k, n, segsize, datalen)): self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) self.failUnlessEqual(segsize, 6) self.failUnlessEqual(datalen, 36) d.addCallback(_check_encoding_parameters) d.addCallback(lambda ignored: mr.get_checkstring()) d.addCallback(lambda checkstring: self.failUnlessEqual(checkstring, checkstring)) return d def test_read_with_different_tail_segment_size(self): self.write_test_share_to_server("si1", tail_segment=True) mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = mr.get_block_and_salt(5) def _check_tail_segment(results): block, salt = results self.failUnlessEqual(len(block), 1) self.failUnlessEqual(block, "a") d.addCallback(_check_tail_segment) return d def test_get_block_with_invalid_segnum(self): self.write_test_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = defer.succeed(None) d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "test invalid segnum", None, mr.get_block_and_salt, 7)) return d def test_get_encoding_parameters_first(self): self.write_test_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = mr.get_encoding_parameters() def _check_encoding_parameters((k, n, segment_size, datalen)): self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) self.failUnlessEqual(segment_size, 6) self.failUnlessEqual(datalen, 36) d.addCallback(_check_encoding_parameters) return d def test_get_seqnum_first(self): self.write_test_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = mr.get_seqnum() d.addCallback(lambda seqnum: self.failUnlessEqual(seqnum, 0)) return d def test_get_root_hash_first(self): self.write_test_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = mr.get_root_hash() d.addCallback(lambda root_hash: self.failUnlessEqual(root_hash, self.root_hash)) return d def test_get_checkstring_first(self): self.write_test_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = mr.get_checkstring() d.addCallback(lambda checkstring: self.failUnlessEqual(checkstring, self.checkstring)) return d def test_write_read_vectors(self): # When writing for us, the storage server will return to us a # read vector, along with its result. If a write fails because # the test vectors failed, this read vector can help us to # diagnose the problem. This test ensures that the read vector # is working appropriately. mw = self._make_new_mw("si1", 0) for i in xrange(6): mw.put_block(self.block, i, self.salt) mw.put_encprivkey(self.encprivkey) mw.put_blockhashes(self.block_hash_tree) mw.put_sharehashes(self.share_hash_chain) mw.put_root_hash(self.root_hash) mw.put_signature(self.signature) mw.put_verification_key(self.verification_key) d = mw.finish_publishing() def _then(results): self.failUnless(len(results), 2) result, readv = results self.failUnless(result) self.failIf(readv) self.old_checkstring = mw.get_checkstring() mw.set_checkstring("") d.addCallback(_then) d.addCallback(lambda ignored: mw.finish_publishing()) def _then_again(results): self.failUnlessEqual(len(results), 2) result, readvs = results self.failIf(result) self.failUnlessIn(0, readvs) readv = readvs[0][0] self.failUnlessEqual(readv, self.old_checkstring) d.addCallback(_then_again) # The checkstring remains the same for the rest of the process. return d def test_private_key_after_share_hash_chain(self): mw = self._make_new_mw("si1", 0) d = defer.succeed(None) for i in xrange(6): d.addCallback(lambda ignored, i=i: mw.put_block(self.block, i, self.salt)) d.addCallback(lambda ignored: mw.put_encprivkey(self.encprivkey)) d.addCallback(lambda ignored: mw.put_sharehashes(self.share_hash_chain)) # Now try to put the private key again. d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "test repeat private key", None, mw.put_encprivkey, self.encprivkey)) return d def test_signature_after_verification_key(self): mw = self._make_new_mw("si1", 0) d = defer.succeed(None) # Put everything up to and including the verification key. for i in xrange(6): d.addCallback(lambda ignored, i=i: mw.put_block(self.block, i, self.salt)) d.addCallback(lambda ignored: mw.put_encprivkey(self.encprivkey)) d.addCallback(lambda ignored: mw.put_blockhashes(self.block_hash_tree)) d.addCallback(lambda ignored: mw.put_sharehashes(self.share_hash_chain)) d.addCallback(lambda ignored: mw.put_root_hash(self.root_hash)) d.addCallback(lambda ignored: mw.put_signature(self.signature)) d.addCallback(lambda ignored: mw.put_verification_key(self.verification_key)) # Now try to put the signature again. This should fail d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "signature after verification", None, mw.put_signature, self.signature)) return d def test_uncoordinated_write(self): # Make two mutable writers, both pointing to the same storage # server, both at the same storage index, and try writing to the # same share. mw1 = self._make_new_mw("si1", 0) mw2 = self._make_new_mw("si1", 0) def _check_success(results): result, readvs = results self.failUnless(result) def _check_failure(results): result, readvs = results self.failIf(result) def _write_share(mw): for i in xrange(6): mw.put_block(self.block, i, self.salt) mw.put_encprivkey(self.encprivkey) mw.put_blockhashes(self.block_hash_tree) mw.put_sharehashes(self.share_hash_chain) mw.put_root_hash(self.root_hash) mw.put_signature(self.signature) mw.put_verification_key(self.verification_key) return mw.finish_publishing() d = _write_share(mw1) d.addCallback(_check_success) d.addCallback(lambda ignored: _write_share(mw2)) d.addCallback(_check_failure) return d def test_invalid_salt_size(self): # Salts need to be 16 bytes in size. Writes that attempt to # write more or less than this should be rejected. mw = self._make_new_mw("si1", 0) invalid_salt = "a" * 17 # 17 bytes another_invalid_salt = "b" * 15 # 15 bytes d = defer.succeed(None) d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "salt too big", None, mw.put_block, self.block, 0, invalid_salt)) d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "salt too small", None, mw.put_block, self.block, 0, another_invalid_salt)) return d def test_write_test_vectors(self): # If we give the write proxy a bogus test vector at # any point during the process, it should fail to write when we # tell it to write. def _check_failure(results): self.failUnlessEqual(len(results), 2) res, d = results self.failIf(res) def _check_success(results): self.failUnlessEqual(len(results), 2) res, d = results self.failUnless(results) mw = self._make_new_mw("si1", 0) mw.set_checkstring("this is a lie") for i in xrange(6): mw.put_block(self.block, i, self.salt) mw.put_encprivkey(self.encprivkey) mw.put_blockhashes(self.block_hash_tree) mw.put_sharehashes(self.share_hash_chain) mw.put_root_hash(self.root_hash) mw.put_signature(self.signature) mw.put_verification_key(self.verification_key) d = mw.finish_publishing() d.addCallback(_check_failure) d.addCallback(lambda ignored: mw.set_checkstring("")) d.addCallback(lambda ignored: mw.finish_publishing()) d.addCallback(_check_success) return d def serialize_blockhashes(self, blockhashes): return "".join(blockhashes) def serialize_sharehashes(self, sharehashes): ret = "".join([struct.pack(">H32s", i, sharehashes[i]) for i in sorted(sharehashes.keys())]) return ret def test_write(self): # This translates to a file with 6 6-byte segments, and with 2-byte # blocks. mw = self._make_new_mw("si1", 0) # Test writing some blocks. read = self.ss.remote_slot_readv expected_private_key_offset = struct.calcsize(MDMFHEADER) expected_sharedata_offset = struct.calcsize(MDMFHEADER) + \ PRIVATE_KEY_SIZE + \ SIGNATURE_SIZE + \ VERIFICATION_KEY_SIZE + \ SHARE_HASH_CHAIN_SIZE written_block_size = 2 + len(self.salt) written_block = self.block + self.salt for i in xrange(6): mw.put_block(self.block, i, self.salt) mw.put_encprivkey(self.encprivkey) mw.put_blockhashes(self.block_hash_tree) mw.put_sharehashes(self.share_hash_chain) mw.put_root_hash(self.root_hash) mw.put_signature(self.signature) mw.put_verification_key(self.verification_key) d = mw.finish_publishing() def _check_publish(results): self.failUnlessEqual(len(results), 2) result, ign = results self.failUnless(result, "publish failed") for i in xrange(6): self.failUnlessEqual(read("si1", [0], [(expected_sharedata_offset + (i * written_block_size), written_block_size)]), {0: [written_block]}) self.failUnlessEqual(len(self.encprivkey), 7) self.failUnlessEqual(read("si1", [0], [(expected_private_key_offset, 7)]), {0: [self.encprivkey]}) expected_block_hash_offset = expected_sharedata_offset + \ (6 * written_block_size) self.failUnlessEqual(len(self.block_hash_tree_s), 32 * 6) self.failUnlessEqual(read("si1", [0], [(expected_block_hash_offset, 32 * 6)]), {0: [self.block_hash_tree_s]}) expected_share_hash_offset = expected_private_key_offset + len(self.encprivkey) self.failUnlessEqual(read("si1", [0],[(expected_share_hash_offset, (32 + 2) * 6)]), {0: [self.share_hash_chain_s]}) self.failUnlessEqual(read("si1", [0], [(9, 32)]), {0: [self.root_hash]}) expected_signature_offset = expected_share_hash_offset + \ len(self.share_hash_chain_s) self.failUnlessEqual(len(self.signature), 9) self.failUnlessEqual(read("si1", [0], [(expected_signature_offset, 9)]), {0: [self.signature]}) expected_verification_key_offset = expected_signature_offset + len(self.signature) self.failUnlessEqual(len(self.verification_key), 6) self.failUnlessEqual(read("si1", [0], [(expected_verification_key_offset, 6)]), {0: [self.verification_key]}) signable = mw.get_signable() verno, seq, roothash, k, n, segsize, datalen = \ struct.unpack(">BQ32sBBQQ", signable) self.failUnlessEqual(verno, 1) self.failUnlessEqual(seq, 0) self.failUnlessEqual(roothash, self.root_hash) self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) self.failUnlessEqual(segsize, 6) self.failUnlessEqual(datalen, 36) expected_eof_offset = expected_block_hash_offset + \ len(self.block_hash_tree_s) # Check the version number to make sure that it is correct. expected_version_number = struct.pack(">B", 1) self.failUnlessEqual(read("si1", [0], [(0, 1)]), {0: [expected_version_number]}) # Check the sequence number to make sure that it is correct expected_sequence_number = struct.pack(">Q", 0) self.failUnlessEqual(read("si1", [0], [(1, 8)]), {0: [expected_sequence_number]}) # Check that the encoding parameters (k, N, segement size, data # length) are what they should be. These are 3, 10, 6, 36 expected_k = struct.pack(">B", 3) self.failUnlessEqual(read("si1", [0], [(41, 1)]), {0: [expected_k]}) expected_n = struct.pack(">B", 10) self.failUnlessEqual(read("si1", [0], [(42, 1)]), {0: [expected_n]}) expected_segment_size = struct.pack(">Q", 6) self.failUnlessEqual(read("si1", [0], [(43, 8)]), {0: [expected_segment_size]}) expected_data_length = struct.pack(">Q", 36) self.failUnlessEqual(read("si1", [0], [(51, 8)]), {0: [expected_data_length]}) expected_offset = struct.pack(">Q", expected_private_key_offset) self.failUnlessEqual(read("si1", [0], [(59, 8)]), {0: [expected_offset]}) expected_offset = struct.pack(">Q", expected_share_hash_offset) self.failUnlessEqual(read("si1", [0], [(67, 8)]), {0: [expected_offset]}) expected_offset = struct.pack(">Q", expected_signature_offset) self.failUnlessEqual(read("si1", [0], [(75, 8)]), {0: [expected_offset]}) expected_offset = struct.pack(">Q", expected_verification_key_offset) self.failUnlessEqual(read("si1", [0], [(83, 8)]), {0: [expected_offset]}) expected_offset = struct.pack(">Q", expected_verification_key_offset + len(self.verification_key)) self.failUnlessEqual(read("si1", [0], [(91, 8)]), {0: [expected_offset]}) expected_offset = struct.pack(">Q", expected_sharedata_offset) self.failUnlessEqual(read("si1", [0], [(99, 8)]), {0: [expected_offset]}) expected_offset = struct.pack(">Q", expected_block_hash_offset) self.failUnlessEqual(read("si1", [0], [(107, 8)]), {0: [expected_offset]}) expected_offset = struct.pack(">Q", expected_eof_offset) self.failUnlessEqual(read("si1", [0], [(115, 8)]), {0: [expected_offset]}) d.addCallback(_check_publish) return d def _make_new_mw(self, si, share, datalength=36): # This is a file of size 36 bytes. Since it has a segment # size of 6, we know that it has 6 byte segments, which will # be split into blocks of 2 bytes because our FEC k # parameter is 3. mw = MDMFSlotWriteProxy(share, self.rref, si, self.secrets, 0, 3, 10, 6, datalength) return mw def test_write_rejected_with_too_many_blocks(self): mw = self._make_new_mw("si0", 0) # Try writing too many blocks. We should not be able to write # more than 6 # blocks into each share. d = defer.succeed(None) for i in xrange(6): d.addCallback(lambda ignored, i=i: mw.put_block(self.block, i, self.salt)) d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "too many blocks", None, mw.put_block, self.block, 7, self.salt)) return d def test_write_rejected_with_invalid_salt(self): # Try writing an invalid salt. Salts are 16 bytes -- any more or # less should cause an error. mw = self._make_new_mw("si1", 0) bad_salt = "a" * 17 # 17 bytes d = defer.succeed(None) d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "test_invalid_salt", None, mw.put_block, self.block, 7, bad_salt)) return d def test_write_rejected_with_invalid_root_hash(self): # Try writing an invalid root hash. This should be SHA256d, and # 32 bytes long as a result. mw = self._make_new_mw("si2", 0) # 17 bytes != 32 bytes invalid_root_hash = "a" * 17 d = defer.succeed(None) # Before this test can work, we need to put some blocks + salts, # a block hash tree, and a share hash tree. Otherwise, we'll see # failures that match what we are looking for, but are caused by # the constraints imposed on operation ordering. for i in xrange(6): d.addCallback(lambda ignored, i=i: mw.put_block(self.block, i, self.salt)) d.addCallback(lambda ignored: mw.put_encprivkey(self.encprivkey)) d.addCallback(lambda ignored: mw.put_blockhashes(self.block_hash_tree)) d.addCallback(lambda ignored: mw.put_sharehashes(self.share_hash_chain)) d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "invalid root hash", None, mw.put_root_hash, invalid_root_hash)) return d def test_write_rejected_with_invalid_blocksize(self): # The blocksize implied by the writer that we get from # _make_new_mw is 2bytes -- any more or any less than this # should be cause for failure, unless it is the tail segment, in # which case it may not be failure. invalid_block = "a" mw = self._make_new_mw("si3", 0, 33) # implies a tail segment with # one byte blocks # 1 bytes != 2 bytes d = defer.succeed(None) d.addCallback(lambda ignored, invalid_block=invalid_block: self.shouldFail(LayoutInvalid, "test blocksize too small", None, mw.put_block, invalid_block, 0, self.salt)) invalid_block = invalid_block * 3 # 3 bytes != 2 bytes d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "test blocksize too large", None, mw.put_block, invalid_block, 0, self.salt)) for i in xrange(5): d.addCallback(lambda ignored, i=i: mw.put_block(self.block, i, self.salt)) # Try to put an invalid tail segment d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "test invalid tail segment", None, mw.put_block, self.block, 5, self.salt)) valid_block = "a" d.addCallback(lambda ignored: mw.put_block(valid_block, 5, self.salt)) return d def test_write_enforces_order_constraints(self): # We require that the MDMFSlotWriteProxy be interacted with in a # specific way. # That way is: # 0: __init__ # 1: write blocks and salts # 2: Write the encrypted private key # 3: Write the block hashes # 4: Write the share hashes # 5: Write the root hash and salt hash # 6: Write the signature and verification key # 7: Write the file. # # Some of these can be performed out-of-order, and some can't. # The dependencies that I want to test here are: # - Private key before block hashes # - share hashes and block hashes before root hash # - root hash before signature # - signature before verification key mw0 = self._make_new_mw("si0", 0) # Write some shares d = defer.succeed(None) for i in xrange(6): d.addCallback(lambda ignored, i=i: mw0.put_block(self.block, i, self.salt)) # Try to write the share hash chain without writing the # encrypted private key d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "share hash chain before " "private key", None, mw0.put_sharehashes, self.share_hash_chain)) # Write the private key. d.addCallback(lambda ignored: mw0.put_encprivkey(self.encprivkey)) # Now write the block hashes and try again d.addCallback(lambda ignored: mw0.put_blockhashes(self.block_hash_tree)) # We haven't yet put the root hash on the share, so we shouldn't # be able to sign it. d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "signature before root hash", None, mw0.put_signature, self.signature)) d.addCallback(lambda ignored: self.failUnlessRaises(LayoutInvalid, mw0.get_signable)) # ..and, since that fails, we also shouldn't be able to put the # verification key. d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "key before signature", None, mw0.put_verification_key, self.verification_key)) # Now write the share hashes. d.addCallback(lambda ignored: mw0.put_sharehashes(self.share_hash_chain)) # We should be able to write the root hash now too d.addCallback(lambda ignored: mw0.put_root_hash(self.root_hash)) # We should still be unable to put the verification key d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "key before signature", None, mw0.put_verification_key, self.verification_key)) d.addCallback(lambda ignored: mw0.put_signature(self.signature)) # We shouldn't be able to write the offsets to the remote server # until the offset table is finished; IOW, until we have written # the verification key. d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "offsets before verification key", None, mw0.finish_publishing)) d.addCallback(lambda ignored: mw0.put_verification_key(self.verification_key)) return d def test_end_to_end(self): mw = self._make_new_mw("si1", 0) # Write a share using the mutable writer, and make sure that the # reader knows how to read everything back to us. d = defer.succeed(None) for i in xrange(6): d.addCallback(lambda ignored, i=i: mw.put_block(self.block, i, self.salt)) d.addCallback(lambda ignored: mw.put_encprivkey(self.encprivkey)) d.addCallback(lambda ignored: mw.put_blockhashes(self.block_hash_tree)) d.addCallback(lambda ignored: mw.put_sharehashes(self.share_hash_chain)) d.addCallback(lambda ignored: mw.put_root_hash(self.root_hash)) d.addCallback(lambda ignored: mw.put_signature(self.signature)) d.addCallback(lambda ignored: mw.put_verification_key(self.verification_key)) d.addCallback(lambda ignored: mw.finish_publishing()) mr = MDMFSlotReadProxy(self.rref, "si1", 0) def _check_block_and_salt((block, salt)): self.failUnlessEqual(block, self.block) self.failUnlessEqual(salt, self.salt) for i in xrange(6): d.addCallback(lambda ignored, i=i: mr.get_block_and_salt(i)) d.addCallback(_check_block_and_salt) d.addCallback(lambda ignored: mr.get_encprivkey()) d.addCallback(lambda encprivkey: self.failUnlessEqual(self.encprivkey, encprivkey)) d.addCallback(lambda ignored: mr.get_blockhashes()) d.addCallback(lambda blockhashes: self.failUnlessEqual(self.block_hash_tree, blockhashes)) d.addCallback(lambda ignored: mr.get_sharehashes()) d.addCallback(lambda sharehashes: self.failUnlessEqual(self.share_hash_chain, sharehashes)) d.addCallback(lambda ignored: mr.get_signature()) d.addCallback(lambda signature: self.failUnlessEqual(signature, self.signature)) d.addCallback(lambda ignored: mr.get_verification_key()) d.addCallback(lambda verification_key: self.failUnlessEqual(verification_key, self.verification_key)) d.addCallback(lambda ignored: mr.get_seqnum()) d.addCallback(lambda seqnum: self.failUnlessEqual(seqnum, 0)) d.addCallback(lambda ignored: mr.get_root_hash()) d.addCallback(lambda root_hash: self.failUnlessEqual(self.root_hash, root_hash)) d.addCallback(lambda ignored: mr.get_encoding_parameters()) def _check_encoding_parameters((k, n, segsize, datalen)): self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) self.failUnlessEqual(segsize, 6) self.failUnlessEqual(datalen, 36) d.addCallback(_check_encoding_parameters) d.addCallback(lambda ignored: mr.get_checkstring()) d.addCallback(lambda checkstring: self.failUnlessEqual(checkstring, mw.get_checkstring())) return d def test_is_sdmf(self): # The MDMFSlotReadProxy should also know how to read SDMF files, # since it will encounter them on the grid. Callers use the # is_sdmf method to test this. self.write_sdmf_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = mr.is_sdmf() d.addCallback(lambda issdmf: self.failUnless(issdmf)) return d def test_reads_sdmf(self): # The slot read proxy should, naturally, know how to tell us # about data in the SDMF format self.write_sdmf_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = defer.succeed(None) d.addCallback(lambda ignored: mr.is_sdmf()) d.addCallback(lambda issdmf: self.failUnless(issdmf)) # What do we need to read? # - The sharedata # - The salt d.addCallback(lambda ignored: mr.get_block_and_salt(0)) def _check_block_and_salt(results): block, salt = results # Our original file is 36 bytes long. Then each share is 12 # bytes in size. The share is composed entirely of the # letter a. self.block contains 2 as, so 6 * self.block is # what we are looking for. self.failUnlessEqual(block, self.block * 6) self.failUnlessEqual(salt, self.salt) d.addCallback(_check_block_and_salt) # - The blockhashes d.addCallback(lambda ignored: mr.get_blockhashes()) d.addCallback(lambda blockhashes: self.failUnlessEqual(self.block_hash_tree, blockhashes, blockhashes)) # - The sharehashes d.addCallback(lambda ignored: mr.get_sharehashes()) d.addCallback(lambda sharehashes: self.failUnlessEqual(self.share_hash_chain, sharehashes)) # - The keys d.addCallback(lambda ignored: mr.get_encprivkey()) d.addCallback(lambda encprivkey: self.failUnlessEqual(encprivkey, self.encprivkey, encprivkey)) d.addCallback(lambda ignored: mr.get_verification_key()) d.addCallback(lambda verification_key: self.failUnlessEqual(verification_key, self.verification_key, verification_key)) # - The signature d.addCallback(lambda ignored: mr.get_signature()) d.addCallback(lambda signature: self.failUnlessEqual(signature, self.signature, signature)) # - The sequence number d.addCallback(lambda ignored: mr.get_seqnum()) d.addCallback(lambda seqnum: self.failUnlessEqual(seqnum, 0, seqnum)) # - The root hash d.addCallback(lambda ignored: mr.get_root_hash()) d.addCallback(lambda root_hash: self.failUnlessEqual(root_hash, self.root_hash, root_hash)) return d def test_only_reads_one_segment_sdmf(self): # SDMF shares have only one segment, so it doesn't make sense to # read more segments than that. The reader should know this and # complain if we try to do that. self.write_sdmf_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = defer.succeed(None) d.addCallback(lambda ignored: mr.is_sdmf()) d.addCallback(lambda issdmf: self.failUnless(issdmf)) d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "test bad segment", None, mr.get_block_and_salt, 1)) return d def test_read_with_prefetched_mdmf_data(self): # The MDMFSlotReadProxy will prefill certain fields if you pass # it data that you have already fetched. This is useful for # cases like the Servermap, which prefetches ~2kb of data while # finding out which shares are on the remote peer so that it # doesn't waste round trips. mdmf_data = self.build_test_mdmf_share() self.write_test_share_to_server("si1") def _make_mr(ignored, length): mr = MDMFSlotReadProxy(self.rref, "si1", 0, mdmf_data[:length]) return mr d = defer.succeed(None) # This should be enough to fill in both the encoding parameters # and the table of offsets, which will complete the version # information tuple. d.addCallback(_make_mr, 123) d.addCallback(lambda mr: mr.get_verinfo()) def _check_verinfo(verinfo): self.failUnless(verinfo) self.failUnlessEqual(len(verinfo), 9) (seqnum, root_hash, salt_hash, segsize, datalen, k, n, prefix, offsets) = verinfo self.failUnlessEqual(seqnum, 0) self.failUnlessEqual(root_hash, self.root_hash) self.failUnlessEqual(segsize, 6) self.failUnlessEqual(datalen, 36) self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) expected_prefix = struct.pack(MDMFSIGNABLEHEADER, 1, seqnum, root_hash, k, n, segsize, datalen) self.failUnlessEqual(expected_prefix, prefix) self.failUnlessEqual(self.rref.read_count, 0) d.addCallback(_check_verinfo) # This is not enough data to read a block and a share, so the # wrapper should attempt to read this from the remote server. d.addCallback(_make_mr, 123) d.addCallback(lambda mr: mr.get_block_and_salt(0)) def _check_block_and_salt((block, salt)): self.failUnlessEqual(block, self.block) self.failUnlessEqual(salt, self.salt) self.failUnlessEqual(self.rref.read_count, 1) # This should be enough data to read one block. d.addCallback(_make_mr, 123 + PRIVATE_KEY_SIZE + SIGNATURE_SIZE + VERIFICATION_KEY_SIZE + SHARE_HASH_CHAIN_SIZE + 140) d.addCallback(lambda mr: mr.get_block_and_salt(0)) d.addCallback(_check_block_and_salt) return d def test_read_with_prefetched_sdmf_data(self): sdmf_data = self.build_test_sdmf_share() self.write_sdmf_share_to_server("si1") def _make_mr(ignored, length): mr = MDMFSlotReadProxy(self.rref, "si1", 0, sdmf_data[:length]) return mr d = defer.succeed(None) # This should be enough to get us the encoding parameters, # offset table, and everything else we need to build a verinfo # string. d.addCallback(_make_mr, 123) d.addCallback(lambda mr: mr.get_verinfo()) def _check_verinfo(verinfo): self.failUnless(verinfo) self.failUnlessEqual(len(verinfo), 9) (seqnum, root_hash, salt, segsize, datalen, k, n, prefix, offsets) = verinfo self.failUnlessEqual(seqnum, 0) self.failUnlessEqual(root_hash, self.root_hash) self.failUnlessEqual(salt, self.salt) self.failUnlessEqual(segsize, 36) self.failUnlessEqual(datalen, 36) self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) expected_prefix = struct.pack(SIGNED_PREFIX, 0, seqnum, root_hash, salt, k, n, segsize, datalen) self.failUnlessEqual(expected_prefix, prefix) self.failUnlessEqual(self.rref.read_count, 0) d.addCallback(_check_verinfo) # This shouldn't be enough to read any share data. d.addCallback(_make_mr, 123) d.addCallback(lambda mr: mr.get_block_and_salt(0)) def _check_block_and_salt((block, salt)): self.failUnlessEqual(block, self.block * 6) self.failUnlessEqual(salt, self.salt) # TODO: Fix the read routine so that it reads only the data # that it has cached if it can't read all of it. self.failUnlessEqual(self.rref.read_count, 2) # This should be enough to read share data. d.addCallback(_make_mr, self.offsets['share_data']) d.addCallback(lambda mr: mr.get_block_and_salt(0)) d.addCallback(_check_block_and_salt) return d def test_read_with_empty_mdmf_file(self): # Some tests upload a file with no contents to test things # unrelated to the actual handling of the content of the file. # The reader should behave intelligently in these cases. self.write_test_share_to_server("si1", empty=True) mr = MDMFSlotReadProxy(self.rref, "si1", 0) # We should be able to get the encoding parameters, and they # should be correct. d = defer.succeed(None) d.addCallback(lambda ignored: mr.get_encoding_parameters()) def _check_encoding_parameters(params): self.failUnlessEqual(len(params), 4) k, n, segsize, datalen = params self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) self.failUnlessEqual(segsize, 0) self.failUnlessEqual(datalen, 0) d.addCallback(_check_encoding_parameters) # We should not be able to fetch a block, since there are no # blocks to fetch d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "get block on empty file", None, mr.get_block_and_salt, 0)) return d def test_read_with_empty_sdmf_file(self): self.write_sdmf_share_to_server("si1", empty=True) mr = MDMFSlotReadProxy(self.rref, "si1", 0) # We should be able to get the encoding parameters, and they # should be correct d = defer.succeed(None) d.addCallback(lambda ignored: mr.get_encoding_parameters()) def _check_encoding_parameters(params): self.failUnlessEqual(len(params), 4) k, n, segsize, datalen = params self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) self.failUnlessEqual(segsize, 0) self.failUnlessEqual(datalen, 0) d.addCallback(_check_encoding_parameters) # It does not make sense to get a block in this format, so we # should not be able to. d.addCallback(lambda ignored: self.shouldFail(LayoutInvalid, "get block on an empty file", None, mr.get_block_and_salt, 0)) return d def test_verinfo_with_sdmf_file(self): self.write_sdmf_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) # We should be able to get the version information. d = defer.succeed(None) d.addCallback(lambda ignored: mr.get_verinfo()) def _check_verinfo(verinfo): self.failUnless(verinfo) self.failUnlessEqual(len(verinfo), 9) (seqnum, root_hash, salt, segsize, datalen, k, n, prefix, offsets) = verinfo self.failUnlessEqual(seqnum, 0) self.failUnlessEqual(root_hash, self.root_hash) self.failUnlessEqual(salt, self.salt) self.failUnlessEqual(segsize, 36) self.failUnlessEqual(datalen, 36) self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) expected_prefix = struct.pack(">BQ32s16s BBQQ", 0, seqnum, root_hash, salt, k, n, segsize, datalen) self.failUnlessEqual(prefix, expected_prefix) self.failUnlessEqual(offsets, self.offsets) d.addCallback(_check_verinfo) return d def test_verinfo_with_mdmf_file(self): self.write_test_share_to_server("si1") mr = MDMFSlotReadProxy(self.rref, "si1", 0) d = defer.succeed(None) d.addCallback(lambda ignored: mr.get_verinfo()) def _check_verinfo(verinfo): self.failUnless(verinfo) self.failUnlessEqual(len(verinfo), 9) (seqnum, root_hash, IV, segsize, datalen, k, n, prefix, offsets) = verinfo self.failUnlessEqual(seqnum, 0) self.failUnlessEqual(root_hash, self.root_hash) self.failIf(IV) self.failUnlessEqual(segsize, 6) self.failUnlessEqual(datalen, 36) self.failUnlessEqual(k, 3) self.failUnlessEqual(n, 10) expected_prefix = struct.pack(">BQ32s BBQQ", 1, seqnum, root_hash, k, n, segsize, datalen) self.failUnlessEqual(prefix, expected_prefix) self.failUnlessEqual(offsets, self.offsets) d.addCallback(_check_verinfo) return d def test_sdmf_writer(self): # Go through the motions of writing an SDMF share to the storage # server. Then read the storage server to see that the share got # written in the way that we think it should have. # We do this first so that the necessary instance variables get # set the way we want them for the tests below. data = self.build_test_sdmf_share() sdmfr = SDMFSlotWriteProxy(0, self.rref, "si1", self.secrets, 0, 3, 10, 36, 36) # Put the block and salt. sdmfr.put_block(self.blockdata, 0, self.salt) # Put the encprivkey sdmfr.put_encprivkey(self.encprivkey) # Put the block and share hash chains sdmfr.put_blockhashes(self.block_hash_tree) sdmfr.put_sharehashes(self.share_hash_chain) sdmfr.put_root_hash(self.root_hash) # Put the signature sdmfr.put_signature(self.signature) # Put the verification key sdmfr.put_verification_key(self.verification_key) # Now check to make sure that nothing has been written yet. self.failUnlessEqual(self.rref.write_count, 0) # Now finish publishing d = sdmfr.finish_publishing() def _then(ignored): self.failUnlessEqual(self.rref.write_count, 1) read = self.ss.remote_slot_readv self.failUnlessEqual(read("si1", [0], [(0, len(data))]), {0: [data]}) d.addCallback(_then) return d def test_sdmf_writer_preexisting_share(self): data = self.build_test_sdmf_share() self.write_sdmf_share_to_server("si1") # Now there is a share on the storage server. To successfully # write, we need to set the checkstring correctly. When we # don't, no write should occur. sdmfw = SDMFSlotWriteProxy(0, self.rref, "si1", self.secrets, 1, 3, 10, 36, 36) sdmfw.put_block(self.blockdata, 0, self.salt) # Put the encprivkey sdmfw.put_encprivkey(self.encprivkey) # Put the block and share hash chains sdmfw.put_blockhashes(self.block_hash_tree) sdmfw.put_sharehashes(self.share_hash_chain) # Put the root hash sdmfw.put_root_hash(self.root_hash) # Put the signature sdmfw.put_signature(self.signature) # Put the verification key sdmfw.put_verification_key(self.verification_key) # We shouldn't have a checkstring yet self.failUnlessEqual(sdmfw.get_checkstring(), "") d = sdmfw.finish_publishing() def _then(results): self.failIf(results[0]) # this is the correct checkstring self._expected_checkstring = results[1][0][0] return self._expected_checkstring d.addCallback(_then) d.addCallback(sdmfw.set_checkstring) d.addCallback(lambda ignored: sdmfw.get_checkstring()) d.addCallback(lambda checkstring: self.failUnlessEqual(checkstring, self._expected_checkstring)) d.addCallback(lambda ignored: sdmfw.finish_publishing()) def _then_again(results): self.failUnless(results[0]) read = self.ss.remote_slot_readv self.failUnlessEqual(read("si1", [0], [(1, 8)]), {0: [struct.pack(">Q", 1)]}) self.failUnlessEqual(read("si1", [0], [(9, len(data) - 9)]), {0: [data[9:]]}) d.addCallback(_then_again) return d class Stats(unittest.TestCase): def setUp(self): self.sparent = LoggingServiceParent() self._lease_secret = itertools.count() def tearDown(self): return self.sparent.stopService() def workdir(self, name): basedir = os.path.join("storage", "Server", name) return basedir def create(self, name): workdir = self.workdir(name) ss = StorageServer(workdir, "\x00" * 20) ss.setServiceParent(self.sparent) return ss def test_latencies(self): ss = self.create("test_latencies") for i in range(10000): ss.add_latency("allocate", 1.0 * i) for i in range(1000): ss.add_latency("renew", 1.0 * i) for i in range(20): ss.add_latency("write", 1.0 * i) for i in range(10): ss.add_latency("cancel", 2.0 * i) ss.add_latency("get", 5.0) output = ss.get_latencies() self.failUnlessEqual(sorted(output.keys()), sorted(["allocate", "renew", "cancel", "write", "get"])) self.failUnlessEqual(len(ss.latencies["allocate"]), 1000) self.failUnless(abs(output["allocate"]["mean"] - 9500) < 1, output) self.failUnless(abs(output["allocate"]["01_0_percentile"] - 9010) < 1, output) self.failUnless(abs(output["allocate"]["10_0_percentile"] - 9100) < 1, output) self.failUnless(abs(output["allocate"]["50_0_percentile"] - 9500) < 1, output) self.failUnless(abs(output["allocate"]["90_0_percentile"] - 9900) < 1, output) self.failUnless(abs(output["allocate"]["95_0_percentile"] - 9950) < 1, output) self.failUnless(abs(output["allocate"]["99_0_percentile"] - 9990) < 1, output) self.failUnless(abs(output["allocate"]["99_9_percentile"] - 9999) < 1, output) self.failUnlessEqual(len(ss.latencies["renew"]), 1000) self.failUnless(abs(output["renew"]["mean"] - 500) < 1, output) self.failUnless(abs(output["renew"]["01_0_percentile"] - 10) < 1, output) self.failUnless(abs(output["renew"]["10_0_percentile"] - 100) < 1, output) self.failUnless(abs(output["renew"]["50_0_percentile"] - 500) < 1, output) self.failUnless(abs(output["renew"]["90_0_percentile"] - 900) < 1, output) self.failUnless(abs(output["renew"]["95_0_percentile"] - 950) < 1, output) self.failUnless(abs(output["renew"]["99_0_percentile"] - 990) < 1, output) self.failUnless(abs(output["renew"]["99_9_percentile"] - 999) < 1, output) self.failUnlessEqual(len(ss.latencies["write"]), 20) self.failUnless(abs(output["write"]["mean"] - 9) < 1, output) self.failUnless(output["write"]["01_0_percentile"] is None, output) self.failUnless(abs(output["write"]["10_0_percentile"] - 2) < 1, output) self.failUnless(abs(output["write"]["50_0_percentile"] - 10) < 1, output) self.failUnless(abs(output["write"]["90_0_percentile"] - 18) < 1, output) self.failUnless(abs(output["write"]["95_0_percentile"] - 19) < 1, output) self.failUnless(output["write"]["99_0_percentile"] is None, output) self.failUnless(output["write"]["99_9_percentile"] is None, output) self.failUnlessEqual(len(ss.latencies["cancel"]), 10) self.failUnless(abs(output["cancel"]["mean"] - 9) < 1, output) self.failUnless(output["cancel"]["01_0_percentile"] is None, output) self.failUnless(abs(output["cancel"]["10_0_percentile"] - 2) < 1, output) self.failUnless(abs(output["cancel"]["50_0_percentile"] - 10) < 1, output) self.failUnless(abs(output["cancel"]["90_0_percentile"] - 18) < 1, output) self.failUnless(output["cancel"]["95_0_percentile"] is None, output) self.failUnless(output["cancel"]["99_0_percentile"] is None, output) self.failUnless(output["cancel"]["99_9_percentile"] is None, output) self.failUnlessEqual(len(ss.latencies["get"]), 1) self.failUnless(output["get"]["mean"] is None, output) self.failUnless(output["get"]["01_0_percentile"] is None, output) self.failUnless(output["get"]["10_0_percentile"] is None, output) self.failUnless(output["get"]["50_0_percentile"] is None, output) self.failUnless(output["get"]["90_0_percentile"] is None, output) self.failUnless(output["get"]["95_0_percentile"] is None, output) self.failUnless(output["get"]["99_0_percentile"] is None, output) self.failUnless(output["get"]["99_9_percentile"] is None, output) def remove_tags(s): s = re.sub(r'<[^>]*>', ' ', s) s = re.sub(r'\s+', ' ', s) return s class MyBucketCountingCrawler(BucketCountingCrawler): def finished_prefix(self, cycle, prefix): BucketCountingCrawler.finished_prefix(self, cycle, prefix) if self.hook_ds: d = self.hook_ds.pop(0) d.callback(None) class MyStorageServer(StorageServer): def add_bucket_counter(self): statefile = os.path.join(self.storedir, "bucket_counter.state") self.bucket_counter = MyBucketCountingCrawler(self, statefile) self.bucket_counter.setServiceParent(self) class BucketCounter(unittest.TestCase, pollmixin.PollMixin): def setUp(self): self.s = service.MultiService() self.s.startService() def tearDown(self): return self.s.stopService() def test_bucket_counter(self): basedir = "storage/BucketCounter/bucket_counter" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20) # to make sure we capture the bucket-counting-crawler in the middle # of a cycle, we reach in and reduce its maximum slice time to 0. We # also make it start sooner than usual. ss.bucket_counter.slow_start = 0 orig_cpu_slice = ss.bucket_counter.cpu_slice ss.bucket_counter.cpu_slice = 0 ss.setServiceParent(self.s) w = StorageStatus(ss) # this sample is before the crawler has started doing anything html = w.renderSynchronously() self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Accepting new shares: Yes", s) self.failUnlessIn("Reserved space: - 0 B (0)", s) self.failUnlessIn("Total buckets: Not computed yet", s) self.failUnlessIn("Next crawl in", s) # give the bucket-counting-crawler one tick to get started. The # cpu_slice=0 will force it to yield right after it processes the # first prefix d = fireEventually() def _check(ignored): # are we really right after the first prefix? state = ss.bucket_counter.get_state() if state["last-complete-prefix"] is None: d2 = fireEventually() d2.addCallback(_check) return d2 self.failUnlessEqual(state["last-complete-prefix"], ss.bucket_counter.prefixes[0]) ss.bucket_counter.cpu_slice = 100.0 # finish as fast as possible html = w.renderSynchronously() s = remove_tags(html) self.failUnlessIn(" Current crawl ", s) self.failUnlessIn(" (next work in ", s) d.addCallback(_check) # now give it enough time to complete a full cycle def _watch(): return not ss.bucket_counter.get_progress()["cycle-in-progress"] d.addCallback(lambda ignored: self.poll(_watch)) def _check2(ignored): ss.bucket_counter.cpu_slice = orig_cpu_slice html = w.renderSynchronously() s = remove_tags(html) self.failUnlessIn("Total buckets: 0 (the number of", s) self.failUnless("Next crawl in 59 minutes" in s or "Next crawl in 60 minutes" in s, s) d.addCallback(_check2) return d def test_bucket_counter_cleanup(self): basedir = "storage/BucketCounter/bucket_counter_cleanup" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20) # to make sure we capture the bucket-counting-crawler in the middle # of a cycle, we reach in and reduce its maximum slice time to 0. ss.bucket_counter.slow_start = 0 orig_cpu_slice = ss.bucket_counter.cpu_slice ss.bucket_counter.cpu_slice = 0 ss.setServiceParent(self.s) d = fireEventually() def _after_first_prefix(ignored): state = ss.bucket_counter.state if state["last-complete-prefix"] is None: d2 = fireEventually() d2.addCallback(_after_first_prefix) return d2 ss.bucket_counter.cpu_slice = 100.0 # finish as fast as possible # now sneak in and mess with its state, to make sure it cleans up # properly at the end of the cycle self.failUnlessEqual(state["last-complete-prefix"], ss.bucket_counter.prefixes[0]) state["bucket-counts"][-12] = {} state["storage-index-samples"]["bogusprefix!"] = (-12, []) ss.bucket_counter.save_state() d.addCallback(_after_first_prefix) # now give it enough time to complete a cycle def _watch(): return not ss.bucket_counter.get_progress()["cycle-in-progress"] d.addCallback(lambda ignored: self.poll(_watch)) def _check2(ignored): ss.bucket_counter.cpu_slice = orig_cpu_slice s = ss.bucket_counter.get_state() self.failIf(-12 in s["bucket-counts"], s["bucket-counts"].keys()) self.failIf("bogusprefix!" in s["storage-index-samples"], s["storage-index-samples"].keys()) d.addCallback(_check2) return d def test_bucket_counter_eta(self): basedir = "storage/BucketCounter/bucket_counter_eta" fileutil.make_dirs(basedir) ss = MyStorageServer(basedir, "\x00" * 20) ss.bucket_counter.slow_start = 0 # these will be fired inside finished_prefix() hooks = ss.bucket_counter.hook_ds = [defer.Deferred() for i in range(3)] w = StorageStatus(ss) d = defer.Deferred() def _check_1(ignored): # no ETA is available yet html = w.renderSynchronously() s = remove_tags(html) self.failUnlessIn("complete (next work", s) def _check_2(ignored): # one prefix has finished, so an ETA based upon that elapsed time # should be available. html = w.renderSynchronously() s = remove_tags(html) self.failUnlessIn("complete (ETA ", s) def _check_3(ignored): # two prefixes have finished html = w.renderSynchronously() s = remove_tags(html) self.failUnlessIn("complete (ETA ", s) d.callback("done") hooks[0].addCallback(_check_1).addErrback(d.errback) hooks[1].addCallback(_check_2).addErrback(d.errback) hooks[2].addCallback(_check_3).addErrback(d.errback) ss.setServiceParent(self.s) return d class InstrumentedLeaseCheckingCrawler(LeaseCheckingCrawler): stop_after_first_bucket = False def process_bucket(self, *args, **kwargs): LeaseCheckingCrawler.process_bucket(self, *args, **kwargs) if self.stop_after_first_bucket: self.stop_after_first_bucket = False self.cpu_slice = -1.0 def yielding(self, sleep_time): if not self.stop_after_first_bucket: self.cpu_slice = 500 class BrokenStatResults: pass class No_ST_BLOCKS_LeaseCheckingCrawler(LeaseCheckingCrawler): def stat(self, fn): s = os.stat(fn) bsr = BrokenStatResults() for attrname in dir(s): if attrname.startswith("_"): continue if attrname == "st_blocks": continue setattr(bsr, attrname, getattr(s, attrname)) return bsr class InstrumentedStorageServer(StorageServer): LeaseCheckerClass = InstrumentedLeaseCheckingCrawler class No_ST_BLOCKS_StorageServer(StorageServer): LeaseCheckerClass = No_ST_BLOCKS_LeaseCheckingCrawler class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): def setUp(self): self.s = service.MultiService() self.s.startService() def tearDown(self): return self.s.stopService() def make_shares(self, ss): def make(si): return (si, hashutil.tagged_hash("renew", si), hashutil.tagged_hash("cancel", si)) def make_mutable(si): return (si, hashutil.tagged_hash("renew", si), hashutil.tagged_hash("cancel", si), hashutil.tagged_hash("write-enabler", si)) def make_extra_lease(si, num): return (hashutil.tagged_hash("renew-%d" % num, si), hashutil.tagged_hash("cancel-%d" % num, si)) immutable_si_0, rs0, cs0 = make("\x00" * 16) immutable_si_1, rs1, cs1 = make("\x01" * 16) rs1a, cs1a = make_extra_lease(immutable_si_1, 1) mutable_si_2, rs2, cs2, we2 = make_mutable("\x02" * 16) mutable_si_3, rs3, cs3, we3 = make_mutable("\x03" * 16) rs3a, cs3a = make_extra_lease(mutable_si_3, 1) sharenums = [0] canary = FakeCanary() # note: 'tahoe debug dump-share' will not handle this file, since the # inner contents are not a valid CHK share data = "\xff" * 1000 a,w = ss.remote_allocate_buckets(immutable_si_0, rs0, cs0, sharenums, 1000, canary) w[0].remote_write(0, data) w[0].remote_close() a,w = ss.remote_allocate_buckets(immutable_si_1, rs1, cs1, sharenums, 1000, canary) w[0].remote_write(0, data) w[0].remote_close() ss.remote_add_lease(immutable_si_1, rs1a, cs1a) writev = ss.remote_slot_testv_and_readv_and_writev writev(mutable_si_2, (we2, rs2, cs2), {0: ([], [(0,data)], len(data))}, []) writev(mutable_si_3, (we3, rs3, cs3), {0: ([], [(0,data)], len(data))}, []) ss.remote_add_lease(mutable_si_3, rs3a, cs3a) self.sis = [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] self.renew_secrets = [rs0, rs1, rs1a, rs2, rs3, rs3a] self.cancel_secrets = [cs0, cs1, cs1a, cs2, cs3, cs3a] def test_basic(self): basedir = "storage/LeaseCrawler/basic" fileutil.make_dirs(basedir) ss = InstrumentedStorageServer(basedir, "\x00" * 20) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 lc.cpu_slice = 500 lc.stop_after_first_bucket = True webstatus = StorageStatus(ss) # create a few shares, with some leases on them self.make_shares(ss) [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] = self.sis # add a non-sharefile to exercise another code path fn = os.path.join(ss.sharedir, storage_index_to_dir(immutable_si_0), "not-a-share") f = open(fn, "wb") f.write("I am not a share.\n") f.close() # this is before the crawl has started, so we're not in a cycle yet initial_state = lc.get_state() self.failIf(lc.get_progress()["cycle-in-progress"]) self.failIfIn("cycle-to-date", initial_state) self.failIfIn("estimated-remaining-cycle", initial_state) self.failIfIn("estimated-current-cycle", initial_state) self.failUnlessIn("history", initial_state) self.failUnlessEqual(initial_state["history"], {}) ss.setServiceParent(self.s) DAY = 24*60*60 d = fireEventually() # now examine the state right after the first bucket has been # processed. def _after_first_bucket(ignored): initial_state = lc.get_state() if "cycle-to-date" not in initial_state: d2 = fireEventually() d2.addCallback(_after_first_bucket) return d2 self.failUnlessIn("cycle-to-date", initial_state) self.failUnlessIn("estimated-remaining-cycle", initial_state) self.failUnlessIn("estimated-current-cycle", initial_state) self.failUnlessIn("history", initial_state) self.failUnlessEqual(initial_state["history"], {}) so_far = initial_state["cycle-to-date"] self.failUnlessEqual(so_far["expiration-enabled"], False) self.failUnlessIn("configured-expiration-mode", so_far) self.failUnlessIn("lease-age-histogram", so_far) lah = so_far["lease-age-histogram"] self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) self.failUnlessEqual(lah, [ (0.0, DAY, 1) ] ) self.failUnlessEqual(so_far["leases-per-share-histogram"], {1: 1}) self.failUnlessEqual(so_far["corrupt-shares"], []) sr1 = so_far["space-recovered"] self.failUnlessEqual(sr1["examined-buckets"], 1) self.failUnlessEqual(sr1["examined-shares"], 1) self.failUnlessEqual(sr1["actual-shares"], 0) self.failUnlessEqual(sr1["configured-diskbytes"], 0) self.failUnlessEqual(sr1["original-sharebytes"], 0) left = initial_state["estimated-remaining-cycle"] sr2 = left["space-recovered"] self.failUnless(sr2["examined-buckets"] > 0, sr2["examined-buckets"]) self.failUnless(sr2["examined-shares"] > 0, sr2["examined-shares"]) self.failIfEqual(sr2["actual-shares"], None) self.failIfEqual(sr2["configured-diskbytes"], None) self.failIfEqual(sr2["original-sharebytes"], None) d.addCallback(_after_first_bucket) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html_in_cycle(html): s = remove_tags(html) self.failUnlessIn("So far, this cycle has examined " "1 shares in 1 buckets (0 mutable / 1 immutable) ", s) self.failUnlessIn("and has recovered: " "0 shares, 0 buckets (0 mutable / 0 immutable), " "0 B (0 B / 0 B)", s) self.failUnlessIn("If expiration were enabled, " "we would have recovered: " "0 shares, 0 buckets (0 mutable / 0 immutable)," " 0 B (0 B / 0 B) by now", s) self.failUnlessIn("and the remainder of this cycle " "would probably recover: " "0 shares, 0 buckets (0 mutable / 0 immutable)," " 0 B (0 B / 0 B)", s) self.failUnlessIn("and the whole cycle would probably recover: " "0 shares, 0 buckets (0 mutable / 0 immutable)," " 0 B (0 B / 0 B)", s) self.failUnlessIn("if we were strictly using each lease's default " "31-day lease lifetime", s) self.failUnlessIn("this cycle would be expected to recover: ", s) d.addCallback(_check_html_in_cycle) # wait for the crawler to finish the first cycle. Nothing should have # been removed. def _wait(): return bool(lc.get_state()["last-cycle-finished"] is not None) d.addCallback(lambda ign: self.poll(_wait)) def _after_first_cycle(ignored): s = lc.get_state() self.failIf("cycle-to-date" in s) self.failIf("estimated-remaining-cycle" in s) self.failIf("estimated-current-cycle" in s) last = s["history"][0] self.failUnlessIn("cycle-start-finish-times", last) self.failUnlessEqual(type(last["cycle-start-finish-times"]), tuple) self.failUnlessEqual(last["expiration-enabled"], False) self.failUnlessIn("configured-expiration-mode", last) self.failUnlessIn("lease-age-histogram", last) lah = last["lease-age-histogram"] self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) self.failUnlessEqual(lah, [ (0.0, DAY, 6) ] ) self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) self.failUnlessEqual(last["corrupt-shares"], []) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) self.failUnlessEqual(rec["examined-shares"], 4) self.failUnlessEqual(rec["actual-buckets"], 0) self.failUnlessEqual(rec["original-buckets"], 0) self.failUnlessEqual(rec["configured-buckets"], 0) self.failUnlessEqual(rec["actual-shares"], 0) self.failUnlessEqual(rec["original-shares"], 0) self.failUnlessEqual(rec["configured-shares"], 0) self.failUnlessEqual(rec["actual-diskbytes"], 0) self.failUnlessEqual(rec["original-diskbytes"], 0) self.failUnlessEqual(rec["configured-diskbytes"], 0) self.failUnlessEqual(rec["actual-sharebytes"], 0) self.failUnlessEqual(rec["original-sharebytes"], 0) self.failUnlessEqual(rec["configured-sharebytes"], 0) def _get_sharefile(si): return list(ss._iter_share_files(si))[0] def count_leases(si): return len(list(_get_sharefile(si).get_leases())) self.failUnlessEqual(count_leases(immutable_si_0), 1) self.failUnlessEqual(count_leases(immutable_si_1), 2) self.failUnlessEqual(count_leases(mutable_si_2), 1) self.failUnlessEqual(count_leases(mutable_si_3), 2) d.addCallback(_after_first_cycle) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html(html): s = remove_tags(html) self.failUnlessIn("recovered: 0 shares, 0 buckets " "(0 mutable / 0 immutable), 0 B (0 B / 0 B) ", s) self.failUnlessIn("and saw a total of 4 shares, 4 buckets " "(2 mutable / 2 immutable),", s) self.failUnlessIn("but expiration was not enabled", s) d.addCallback(_check_html) d.addCallback(lambda ign: self.render_json(webstatus)) def _check_json(json): data = simplejson.loads(json) self.failUnlessIn("lease-checker", data) self.failUnlessIn("lease-checker-progress", data) d.addCallback(_check_json) return d def backdate_lease(self, sf, renew_secret, new_expire_time): # ShareFile.renew_lease ignores attempts to back-date a lease (i.e. # "renew" a lease with a new_expire_time that is older than what the # current lease has), so we have to reach inside it. for i,lease in enumerate(sf.get_leases()): if lease.renew_secret == renew_secret: lease.expiration_time = new_expire_time f = open(sf.home, 'rb+') sf._write_lease_record(f, i, lease) f.close() return raise IndexError("unable to renew non-existent lease") def test_expire_age(self): basedir = "storage/LeaseCrawler/expire_age" fileutil.make_dirs(basedir) # setting expiration_time to 2000 means that any lease which is more # than 2000s old will be expired. ss = InstrumentedStorageServer(basedir, "\x00" * 20, expiration_enabled=True, expiration_mode="age", expiration_override_lease_duration=2000) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 lc.stop_after_first_bucket = True webstatus = StorageStatus(ss) # create a few shares, with some leases on them self.make_shares(ss) [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] = self.sis def count_shares(si): return len(list(ss._iter_share_files(si))) def _get_sharefile(si): return list(ss._iter_share_files(si))[0] def count_leases(si): return len(list(_get_sharefile(si).get_leases())) self.failUnlessEqual(count_shares(immutable_si_0), 1) self.failUnlessEqual(count_leases(immutable_si_0), 1) self.failUnlessEqual(count_shares(immutable_si_1), 1) self.failUnlessEqual(count_leases(immutable_si_1), 2) self.failUnlessEqual(count_shares(mutable_si_2), 1) self.failUnlessEqual(count_leases(mutable_si_2), 1) self.failUnlessEqual(count_shares(mutable_si_3), 1) self.failUnlessEqual(count_leases(mutable_si_3), 2) # artificially crank back the expiration time on the first lease of # each share, to make it look like it expired already (age=1000s). # Some shares have an extra lease which is set to expire at the # default time in 31 days from now (age=31days). We then run the # crawler, which will expire the first lease, making some shares get # deleted and others stay alive (with one remaining lease) now = time.time() sf0 = _get_sharefile(immutable_si_0) self.backdate_lease(sf0, self.renew_secrets[0], now - 1000) sf0_size = os.stat(sf0.home).st_size # immutable_si_1 gets an extra lease sf1 = _get_sharefile(immutable_si_1) self.backdate_lease(sf1, self.renew_secrets[1], now - 1000) sf2 = _get_sharefile(mutable_si_2) self.backdate_lease(sf2, self.renew_secrets[3], now - 1000) sf2_size = os.stat(sf2.home).st_size # mutable_si_3 gets an extra lease sf3 = _get_sharefile(mutable_si_3) self.backdate_lease(sf3, self.renew_secrets[4], now - 1000) ss.setServiceParent(self.s) d = fireEventually() # examine the state right after the first bucket has been processed def _after_first_bucket(ignored): p = lc.get_progress() if not p["cycle-in-progress"]: d2 = fireEventually() d2.addCallback(_after_first_bucket) return d2 d.addCallback(_after_first_bucket) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html_in_cycle(html): s = remove_tags(html) # the first bucket encountered gets deleted, and its prefix # happens to be about 1/5th of the way through the ring, so the # predictor thinks we'll have 5 shares and that we'll delete them # all. This part of the test depends upon the SIs landing right # where they do now. self.failUnlessIn("The remainder of this cycle is expected to " "recover: 4 shares, 4 buckets", s) self.failUnlessIn("The whole cycle is expected to examine " "5 shares in 5 buckets and to recover: " "5 shares, 5 buckets", s) d.addCallback(_check_html_in_cycle) # wait for the crawler to finish the first cycle. Two shares should # have been removed def _wait(): return bool(lc.get_state()["last-cycle-finished"] is not None) d.addCallback(lambda ign: self.poll(_wait)) def _after_first_cycle(ignored): self.failUnlessEqual(count_shares(immutable_si_0), 0) self.failUnlessEqual(count_shares(immutable_si_1), 1) self.failUnlessEqual(count_leases(immutable_si_1), 1) self.failUnlessEqual(count_shares(mutable_si_2), 0) self.failUnlessEqual(count_shares(mutable_si_3), 1) self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() last = s["history"][0] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], ("age", 2000, None, ("mutable", "immutable"))) self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) self.failUnlessEqual(rec["examined-shares"], 4) self.failUnlessEqual(rec["actual-buckets"], 2) self.failUnlessEqual(rec["original-buckets"], 2) self.failUnlessEqual(rec["configured-buckets"], 2) self.failUnlessEqual(rec["actual-shares"], 2) self.failUnlessEqual(rec["original-shares"], 2) self.failUnlessEqual(rec["configured-shares"], 2) size = sf0_size + sf2_size self.failUnlessEqual(rec["actual-sharebytes"], size) self.failUnlessEqual(rec["original-sharebytes"], size) self.failUnlessEqual(rec["configured-sharebytes"], size) # different platforms have different notions of "blocks used by # this file", so merely assert that it's a number self.failUnless(rec["actual-diskbytes"] >= 0, rec["actual-diskbytes"]) self.failUnless(rec["original-diskbytes"] >= 0, rec["original-diskbytes"]) self.failUnless(rec["configured-diskbytes"] >= 0, rec["configured-diskbytes"]) d.addCallback(_after_first_cycle) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html(html): s = remove_tags(html) self.failUnlessIn("Expiration Enabled: expired leases will be removed", s) self.failUnlessIn("Leases created or last renewed more than 33 minutes ago will be considered expired.", s) self.failUnlessIn(" recovered: 2 shares, 2 buckets (1 mutable / 1 immutable), ", s) d.addCallback(_check_html) return d def test_expire_cutoff_date(self): basedir = "storage/LeaseCrawler/expire_cutoff_date" fileutil.make_dirs(basedir) # setting cutoff-date to 2000 seconds ago means that any lease which # is more than 2000s old will be expired. now = time.time() then = int(now - 2000) ss = InstrumentedStorageServer(basedir, "\x00" * 20, expiration_enabled=True, expiration_mode="cutoff-date", expiration_cutoff_date=then) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 lc.stop_after_first_bucket = True webstatus = StorageStatus(ss) # create a few shares, with some leases on them self.make_shares(ss) [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] = self.sis def count_shares(si): return len(list(ss._iter_share_files(si))) def _get_sharefile(si): return list(ss._iter_share_files(si))[0] def count_leases(si): return len(list(_get_sharefile(si).get_leases())) self.failUnlessEqual(count_shares(immutable_si_0), 1) self.failUnlessEqual(count_leases(immutable_si_0), 1) self.failUnlessEqual(count_shares(immutable_si_1), 1) self.failUnlessEqual(count_leases(immutable_si_1), 2) self.failUnlessEqual(count_shares(mutable_si_2), 1) self.failUnlessEqual(count_leases(mutable_si_2), 1) self.failUnlessEqual(count_shares(mutable_si_3), 1) self.failUnlessEqual(count_leases(mutable_si_3), 2) # artificially crank back the expiration time on the first lease of # each share, to make it look like was renewed 3000s ago. To achieve # this, we need to set the expiration time to now-3000+31days. This # will change when the lease format is improved to contain both # create/renew time and duration. new_expiration_time = now - 3000 + 31*24*60*60 # Some shares have an extra lease which is set to expire at the # default time in 31 days from now (age=31days). We then run the # crawler, which will expire the first lease, making some shares get # deleted and others stay alive (with one remaining lease) sf0 = _get_sharefile(immutable_si_0) self.backdate_lease(sf0, self.renew_secrets[0], new_expiration_time) sf0_size = os.stat(sf0.home).st_size # immutable_si_1 gets an extra lease sf1 = _get_sharefile(immutable_si_1) self.backdate_lease(sf1, self.renew_secrets[1], new_expiration_time) sf2 = _get_sharefile(mutable_si_2) self.backdate_lease(sf2, self.renew_secrets[3], new_expiration_time) sf2_size = os.stat(sf2.home).st_size # mutable_si_3 gets an extra lease sf3 = _get_sharefile(mutable_si_3) self.backdate_lease(sf3, self.renew_secrets[4], new_expiration_time) ss.setServiceParent(self.s) d = fireEventually() # examine the state right after the first bucket has been processed def _after_first_bucket(ignored): p = lc.get_progress() if not p["cycle-in-progress"]: d2 = fireEventually() d2.addCallback(_after_first_bucket) return d2 d.addCallback(_after_first_bucket) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html_in_cycle(html): s = remove_tags(html) # the first bucket encountered gets deleted, and its prefix # happens to be about 1/5th of the way through the ring, so the # predictor thinks we'll have 5 shares and that we'll delete them # all. This part of the test depends upon the SIs landing right # where they do now. self.failUnlessIn("The remainder of this cycle is expected to " "recover: 4 shares, 4 buckets", s) self.failUnlessIn("The whole cycle is expected to examine " "5 shares in 5 buckets and to recover: " "5 shares, 5 buckets", s) d.addCallback(_check_html_in_cycle) # wait for the crawler to finish the first cycle. Two shares should # have been removed def _wait(): return bool(lc.get_state()["last-cycle-finished"] is not None) d.addCallback(lambda ign: self.poll(_wait)) def _after_first_cycle(ignored): self.failUnlessEqual(count_shares(immutable_si_0), 0) self.failUnlessEqual(count_shares(immutable_si_1), 1) self.failUnlessEqual(count_leases(immutable_si_1), 1) self.failUnlessEqual(count_shares(mutable_si_2), 0) self.failUnlessEqual(count_shares(mutable_si_3), 1) self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() last = s["history"][0] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], ("cutoff-date", None, then, ("mutable", "immutable"))) self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) self.failUnlessEqual(rec["examined-shares"], 4) self.failUnlessEqual(rec["actual-buckets"], 2) self.failUnlessEqual(rec["original-buckets"], 0) self.failUnlessEqual(rec["configured-buckets"], 2) self.failUnlessEqual(rec["actual-shares"], 2) self.failUnlessEqual(rec["original-shares"], 0) self.failUnlessEqual(rec["configured-shares"], 2) size = sf0_size + sf2_size self.failUnlessEqual(rec["actual-sharebytes"], size) self.failUnlessEqual(rec["original-sharebytes"], 0) self.failUnlessEqual(rec["configured-sharebytes"], size) # different platforms have different notions of "blocks used by # this file", so merely assert that it's a number self.failUnless(rec["actual-diskbytes"] >= 0, rec["actual-diskbytes"]) self.failUnless(rec["original-diskbytes"] >= 0, rec["original-diskbytes"]) self.failUnless(rec["configured-diskbytes"] >= 0, rec["configured-diskbytes"]) d.addCallback(_after_first_cycle) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html(html): s = remove_tags(html) self.failUnlessIn("Expiration Enabled:" " expired leases will be removed", s) date = time.strftime("%Y-%m-%d (%d-%b-%Y) UTC", time.gmtime(then)) substr = "Leases created or last renewed before %s will be considered expired." % date self.failUnlessIn(substr, s) self.failUnlessIn(" recovered: 2 shares, 2 buckets (1 mutable / 1 immutable), ", s) d.addCallback(_check_html) return d def test_only_immutable(self): basedir = "storage/LeaseCrawler/only_immutable" fileutil.make_dirs(basedir) now = time.time() then = int(now - 2000) ss = StorageServer(basedir, "\x00" * 20, expiration_enabled=True, expiration_mode="cutoff-date", expiration_cutoff_date=then, expiration_sharetypes=("immutable",)) lc = ss.lease_checker lc.slow_start = 0 webstatus = StorageStatus(ss) self.make_shares(ss) [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] = self.sis # set all leases to be expirable new_expiration_time = now - 3000 + 31*24*60*60 def count_shares(si): return len(list(ss._iter_share_files(si))) def _get_sharefile(si): return list(ss._iter_share_files(si))[0] def count_leases(si): return len(list(_get_sharefile(si).get_leases())) sf0 = _get_sharefile(immutable_si_0) self.backdate_lease(sf0, self.renew_secrets[0], new_expiration_time) sf1 = _get_sharefile(immutable_si_1) self.backdate_lease(sf1, self.renew_secrets[1], new_expiration_time) self.backdate_lease(sf1, self.renew_secrets[2], new_expiration_time) sf2 = _get_sharefile(mutable_si_2) self.backdate_lease(sf2, self.renew_secrets[3], new_expiration_time) sf3 = _get_sharefile(mutable_si_3) self.backdate_lease(sf3, self.renew_secrets[4], new_expiration_time) self.backdate_lease(sf3, self.renew_secrets[5], new_expiration_time) ss.setServiceParent(self.s) def _wait(): return bool(lc.get_state()["last-cycle-finished"] is not None) d = self.poll(_wait) def _after_first_cycle(ignored): self.failUnlessEqual(count_shares(immutable_si_0), 0) self.failUnlessEqual(count_shares(immutable_si_1), 0) self.failUnlessEqual(count_shares(mutable_si_2), 1) self.failUnlessEqual(count_leases(mutable_si_2), 1) self.failUnlessEqual(count_shares(mutable_si_3), 1) self.failUnlessEqual(count_leases(mutable_si_3), 2) d.addCallback(_after_first_cycle) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html(html): s = remove_tags(html) self.failUnlessIn("The following sharetypes will be expired: immutable.", s) d.addCallback(_check_html) return d def test_only_mutable(self): basedir = "storage/LeaseCrawler/only_mutable" fileutil.make_dirs(basedir) now = time.time() then = int(now - 2000) ss = StorageServer(basedir, "\x00" * 20, expiration_enabled=True, expiration_mode="cutoff-date", expiration_cutoff_date=then, expiration_sharetypes=("mutable",)) lc = ss.lease_checker lc.slow_start = 0 webstatus = StorageStatus(ss) self.make_shares(ss) [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] = self.sis # set all leases to be expirable new_expiration_time = now - 3000 + 31*24*60*60 def count_shares(si): return len(list(ss._iter_share_files(si))) def _get_sharefile(si): return list(ss._iter_share_files(si))[0] def count_leases(si): return len(list(_get_sharefile(si).get_leases())) sf0 = _get_sharefile(immutable_si_0) self.backdate_lease(sf0, self.renew_secrets[0], new_expiration_time) sf1 = _get_sharefile(immutable_si_1) self.backdate_lease(sf1, self.renew_secrets[1], new_expiration_time) self.backdate_lease(sf1, self.renew_secrets[2], new_expiration_time) sf2 = _get_sharefile(mutable_si_2) self.backdate_lease(sf2, self.renew_secrets[3], new_expiration_time) sf3 = _get_sharefile(mutable_si_3) self.backdate_lease(sf3, self.renew_secrets[4], new_expiration_time) self.backdate_lease(sf3, self.renew_secrets[5], new_expiration_time) ss.setServiceParent(self.s) def _wait(): return bool(lc.get_state()["last-cycle-finished"] is not None) d = self.poll(_wait) def _after_first_cycle(ignored): self.failUnlessEqual(count_shares(immutable_si_0), 1) self.failUnlessEqual(count_leases(immutable_si_0), 1) self.failUnlessEqual(count_shares(immutable_si_1), 1) self.failUnlessEqual(count_leases(immutable_si_1), 2) self.failUnlessEqual(count_shares(mutable_si_2), 0) self.failUnlessEqual(count_shares(mutable_si_3), 0) d.addCallback(_after_first_cycle) d.addCallback(lambda ign: self.render1(webstatus)) def _check_html(html): s = remove_tags(html) self.failUnlessIn("The following sharetypes will be expired: mutable.", s) d.addCallback(_check_html) return d def test_bad_mode(self): basedir = "storage/LeaseCrawler/bad_mode" fileutil.make_dirs(basedir) e = self.failUnlessRaises(ValueError, StorageServer, basedir, "\x00" * 20, expiration_mode="bogus") self.failUnlessIn("GC mode 'bogus' must be 'age' or 'cutoff-date'", str(e)) def test_parse_duration(self): DAY = 24*60*60 MONTH = 31*DAY YEAR = 365*DAY p = time_format.parse_duration self.failUnlessEqual(p("7days"), 7*DAY) self.failUnlessEqual(p("31day"), 31*DAY) self.failUnlessEqual(p("60 days"), 60*DAY) self.failUnlessEqual(p("2mo"), 2*MONTH) self.failUnlessEqual(p("3 month"), 3*MONTH) self.failUnlessEqual(p("2years"), 2*YEAR) e = self.failUnlessRaises(ValueError, p, "2kumquats") self.failUnlessIn("no unit (like day, month, or year) in '2kumquats'", str(e)) def test_parse_date(self): p = time_format.parse_date self.failUnless(isinstance(p("2009-03-18"), int), p("2009-03-18")) self.failUnlessEqual(p("2009-03-18"), 1237334400) def test_limited_history(self): basedir = "storage/LeaseCrawler/limited_history" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 lc.cpu_slice = 500 # create a few shares, with some leases on them self.make_shares(ss) ss.setServiceParent(self.s) def _wait_until_15_cycles_done(): last = lc.state["last-cycle-finished"] if last is not None and last >= 15: return True if lc.timer: lc.timer.reset(0) return False d = self.poll(_wait_until_15_cycles_done) def _check(ignored): s = lc.get_state() h = s["history"] self.failUnlessEqual(len(h), 10) self.failUnlessEqual(max(h.keys()), 15) self.failUnlessEqual(min(h.keys()), 6) d.addCallback(_check) return d def test_unpredictable_future(self): basedir = "storage/LeaseCrawler/unpredictable_future" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 lc.cpu_slice = -1.0 # stop quickly self.make_shares(ss) ss.setServiceParent(self.s) d = fireEventually() def _check(ignored): # this should fire after the first bucket is complete, but before # the first prefix is complete, so the progress-measurer won't # think we've gotten far enough to raise our percent-complete # above 0%, triggering the cannot-predict-the-future code in # expirer.py . This will have to change if/when the # progress-measurer gets smart enough to count buckets (we'll # have to interrupt it even earlier, before it's finished the # first bucket). s = lc.get_state() if "cycle-to-date" not in s: d2 = fireEventually() d2.addCallback(_check) return d2 self.failUnlessIn("cycle-to-date", s) self.failUnlessIn("estimated-remaining-cycle", s) self.failUnlessIn("estimated-current-cycle", s) left = s["estimated-remaining-cycle"]["space-recovered"] self.failUnlessEqual(left["actual-buckets"], None) self.failUnlessEqual(left["original-buckets"], None) self.failUnlessEqual(left["configured-buckets"], None) self.failUnlessEqual(left["actual-shares"], None) self.failUnlessEqual(left["original-shares"], None) self.failUnlessEqual(left["configured-shares"], None) self.failUnlessEqual(left["actual-diskbytes"], None) self.failUnlessEqual(left["original-diskbytes"], None) self.failUnlessEqual(left["configured-diskbytes"], None) self.failUnlessEqual(left["actual-sharebytes"], None) self.failUnlessEqual(left["original-sharebytes"], None) self.failUnlessEqual(left["configured-sharebytes"], None) full = s["estimated-remaining-cycle"]["space-recovered"] self.failUnlessEqual(full["actual-buckets"], None) self.failUnlessEqual(full["original-buckets"], None) self.failUnlessEqual(full["configured-buckets"], None) self.failUnlessEqual(full["actual-shares"], None) self.failUnlessEqual(full["original-shares"], None) self.failUnlessEqual(full["configured-shares"], None) self.failUnlessEqual(full["actual-diskbytes"], None) self.failUnlessEqual(full["original-diskbytes"], None) self.failUnlessEqual(full["configured-diskbytes"], None) self.failUnlessEqual(full["actual-sharebytes"], None) self.failUnlessEqual(full["original-sharebytes"], None) self.failUnlessEqual(full["configured-sharebytes"], None) d.addCallback(_check) return d def test_no_st_blocks(self): basedir = "storage/LeaseCrawler/no_st_blocks" fileutil.make_dirs(basedir) ss = No_ST_BLOCKS_StorageServer(basedir, "\x00" * 20, expiration_mode="age", expiration_override_lease_duration=-1000) # a negative expiration_time= means the "configured-" # space-recovered counts will be non-zero, since all shares will have # expired by then # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 self.make_shares(ss) ss.setServiceParent(self.s) def _wait(): return bool(lc.get_state()["last-cycle-finished"] is not None) d = self.poll(_wait) def _check(ignored): s = lc.get_state() last = s["history"][0] rec = last["space-recovered"] self.failUnlessEqual(rec["configured-buckets"], 4) self.failUnlessEqual(rec["configured-shares"], 4) self.failUnless(rec["configured-sharebytes"] > 0, rec["configured-sharebytes"]) # without the .st_blocks field in os.stat() results, we should be # reporting diskbytes==sharebytes self.failUnlessEqual(rec["configured-sharebytes"], rec["configured-diskbytes"]) d.addCallback(_check) return d def test_share_corruption(self): self._poll_should_ignore_these_errors = [ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, ] basedir = "storage/LeaseCrawler/share_corruption" fileutil.make_dirs(basedir) ss = InstrumentedStorageServer(basedir, "\x00" * 20) w = StorageStatus(ss) # make it start sooner than usual. lc = ss.lease_checker lc.stop_after_first_bucket = True lc.slow_start = 0 lc.cpu_slice = 500 # create a few shares, with some leases on them self.make_shares(ss) # now corrupt one, and make sure the lease-checker keeps going [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] = self.sis first = min(self.sis) first_b32 = base32.b2a(first) fn = os.path.join(ss.sharedir, storage_index_to_dir(first), "0") f = open(fn, "rb+") f.seek(0) f.write("BAD MAGIC") f.close() # if get_share_file() doesn't see the correct mutable magic, it # assumes the file is an immutable share, and then # immutable.ShareFile sees a bad version. So regardless of which kind # of share we corrupted, this will trigger an # UnknownImmutableContainerVersionError. # also create an empty bucket empty_si = base32.b2a("\x04"*16) empty_bucket_dir = os.path.join(ss.sharedir, storage_index_to_dir(empty_si)) fileutil.make_dirs(empty_bucket_dir) ss.setServiceParent(self.s) d = fireEventually() # now examine the state right after the first bucket has been # processed. def _after_first_bucket(ignored): s = lc.get_state() if "cycle-to-date" not in s: d2 = fireEventually() d2.addCallback(_after_first_bucket) return d2 so_far = s["cycle-to-date"] rec = so_far["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 1) self.failUnlessEqual(rec["examined-shares"], 0) self.failUnlessEqual(so_far["corrupt-shares"], [(first_b32, 0)]) d.addCallback(_after_first_bucket) d.addCallback(lambda ign: self.render_json(w)) def _check_json(json): data = simplejson.loads(json) # grr. json turns all dict keys into strings. so_far = data["lease-checker"]["cycle-to-date"] corrupt_shares = so_far["corrupt-shares"] # it also turns all tuples into lists self.failUnlessEqual(corrupt_shares, [[first_b32, 0]]) d.addCallback(_check_json) d.addCallback(lambda ign: self.render1(w)) def _check_html(html): s = remove_tags(html) self.failUnlessIn("Corrupt shares: SI %s shnum 0" % first_b32, s) d.addCallback(_check_html) def _wait(): return bool(lc.get_state()["last-cycle-finished"] is not None) d.addCallback(lambda ign: self.poll(_wait)) def _after_first_cycle(ignored): s = lc.get_state() last = s["history"][0] rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 5) self.failUnlessEqual(rec["examined-shares"], 3) self.failUnlessEqual(last["corrupt-shares"], [(first_b32, 0)]) d.addCallback(_after_first_cycle) d.addCallback(lambda ign: self.render_json(w)) def _check_json_history(json): data = simplejson.loads(json) last = data["lease-checker"]["history"]["0"] corrupt_shares = last["corrupt-shares"] self.failUnlessEqual(corrupt_shares, [[first_b32, 0]]) d.addCallback(_check_json_history) d.addCallback(lambda ign: self.render1(w)) def _check_html_history(html): s = remove_tags(html) self.failUnlessIn("Corrupt shares: SI %s shnum 0" % first_b32, s) d.addCallback(_check_html_history) def _cleanup(res): self.flushLoggedErrors(UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError) return res d.addBoth(_cleanup) return d def render_json(self, page): d = self.render1(page, args={"t": ["json"]}) return d class WebStatus(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): def setUp(self): self.s = service.MultiService() self.s.startService() def tearDown(self): return self.s.stopService() def test_no_server(self): w = StorageStatus(None) html = w.renderSynchronously() self.failUnlessIn("

No Storage Server Running

", html) def test_status(self): basedir = "storage/WebStatus/status" fileutil.make_dirs(basedir) nodeid = "\x00" * 20 ss = StorageServer(basedir, nodeid) ss.setServiceParent(self.s) w = StorageStatus(ss, "nickname") d = self.render1(w) def _check_html(html): self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Server Nickname: nickname", s) self.failUnlessIn("Server Nodeid: %s" % base32.b2a(nodeid), s) self.failUnlessIn("Accepting new shares: Yes", s) self.failUnlessIn("Reserved space: - 0 B (0)", s) d.addCallback(_check_html) d.addCallback(lambda ign: self.render_json(w)) def _check_json(json): data = simplejson.loads(json) s = data["stats"] self.failUnlessEqual(s["storage_server.accepting_immutable_shares"], 1) self.failUnlessEqual(s["storage_server.reserved_space"], 0) self.failUnlessIn("bucket-counter", data) self.failUnlessIn("lease-checker", data) d.addCallback(_check_json) return d def render_json(self, page): d = self.render1(page, args={"t": ["json"]}) return d @mock.patch('allmydata.util.fileutil.get_disk_stats') def test_status_no_disk_stats(self, mock_get_disk_stats): mock_get_disk_stats.side_effect = AttributeError() # Some platforms may have no disk stats API. Make sure the code can handle that # (test runs on all platforms). basedir = "storage/WebStatus/status_no_disk_stats" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20) ss.setServiceParent(self.s) w = StorageStatus(ss) html = w.renderSynchronously() self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Accepting new shares: Yes", s) self.failUnlessIn("Total disk space: ?", s) self.failUnlessIn("Space Available to Tahoe: ?", s) self.failUnless(ss.get_available_space() is None) @mock.patch('allmydata.util.fileutil.get_disk_stats') def test_status_bad_disk_stats(self, mock_get_disk_stats): mock_get_disk_stats.side_effect = OSError() # If the API to get disk stats exists but a call to it fails, then the status should # show that no shares will be accepted, and get_available_space() should be 0. basedir = "storage/WebStatus/status_bad_disk_stats" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20) ss.setServiceParent(self.s) w = StorageStatus(ss) html = w.renderSynchronously() self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Accepting new shares: No", s) self.failUnlessIn("Total disk space: ?", s) self.failUnlessIn("Space Available to Tahoe: ?", s) self.failUnlessEqual(ss.get_available_space(), 0) @mock.patch('allmydata.util.fileutil.get_disk_stats') def test_status_right_disk_stats(self, mock_get_disk_stats): GB = 1000000000 total = 5*GB free_for_root = 4*GB free_for_nonroot = 3*GB reserved_space = 1*GB used = total - free_for_root avail = max(free_for_nonroot - reserved_space, 0) mock_get_disk_stats.return_value = { 'total': total, 'free_for_root': free_for_root, 'free_for_nonroot': free_for_nonroot, 'used': used, 'avail': avail, } basedir = "storage/WebStatus/status_right_disk_stats" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20, reserved_space=reserved_space) expecteddir = ss.sharedir ss.setServiceParent(self.s) w = StorageStatus(ss) html = w.renderSynchronously() self.failIf([True for args in mock_get_disk_stats.call_args_list if args != ((expecteddir, reserved_space), {})], mock_get_disk_stats.call_args_list) self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Total disk space: 5.00 GB", s) self.failUnlessIn("Disk space used: - 1.00 GB", s) self.failUnlessIn("Disk space free (root): 4.00 GB", s) self.failUnlessIn("Disk space free (non-root): 3.00 GB", s) self.failUnlessIn("Reserved space: - 1.00 GB", s) self.failUnlessIn("Space Available to Tahoe: 2.00 GB", s) self.failUnlessEqual(ss.get_available_space(), 2*GB) def test_readonly(self): basedir = "storage/WebStatus/readonly" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20, readonly_storage=True) ss.setServiceParent(self.s) w = StorageStatus(ss) html = w.renderSynchronously() self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Accepting new shares: No", s) def test_reserved(self): basedir = "storage/WebStatus/reserved" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20, reserved_space=10e6) ss.setServiceParent(self.s) w = StorageStatus(ss) html = w.renderSynchronously() self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Reserved space: - 10.00 MB (10000000)", s) def test_huge_reserved(self): basedir = "storage/WebStatus/reserved" fileutil.make_dirs(basedir) ss = StorageServer(basedir, "\x00" * 20, reserved_space=10e6) ss.setServiceParent(self.s) w = StorageStatus(ss) html = w.renderSynchronously() self.failUnlessIn("

Storage Server Status

", html) s = remove_tags(html) self.failUnlessIn("Reserved space: - 10.00 MB (10000000)", s) def test_util(self): w = StorageStatus(None) self.failUnlessEqual(w.render_space(None, None), "?") self.failUnlessEqual(w.render_space(None, 10e6), "10000000") self.failUnlessEqual(w.render_abbrev_space(None, None), "?") self.failUnlessEqual(w.render_abbrev_space(None, 10e6), "10.00 MB") self.failUnlessEqual(remove_prefix("foo.bar", "foo."), "bar") self.failUnlessEqual(remove_prefix("foo.bar", "baz."), None) tahoe-lafs-1.10.0/src/allmydata/test/test_system.py000066400000000000000000002555431221140116300222740ustar00rootroot00000000000000 import os, re, sys, time, simplejson from cStringIO import StringIO from twisted.trial import unittest from twisted.internet import defer from twisted.internet import threads # CLI tests use deferToThread import allmydata from allmydata import uri from allmydata.storage.mutable import MutableShareFile from allmydata.storage.server import si_a2b from allmydata.immutable import offloaded, upload from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil from allmydata.util import log, base32 from allmydata.util.verlib import NormalizedVersion from allmydata.util.encodingutil import quote_output, unicode_to_argv, get_filesystem_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.scripts import runner from allmydata.interfaces import IDirectoryNode, IFileNode, \ NoSuchChildError, NoSharesError from allmydata.monitor import Monitor from allmydata.mutable.common import NotWriteableError from allmydata.mutable import layout as mutable_layout from allmydata.mutable.publish import MutableData import foolscap from foolscap.api import DeadReferenceError, fireEventually from twisted.python.failure import Failure from twisted.web.client import getPage from twisted.web.error import Error from allmydata.test.common import SystemTestMixin # TODO: move this to common or common_util from allmydata.test.test_runner import RunBinTahoeMixin LARGE_DATA = """ This is some data to publish to the remote grid.., which needs to be large enough to not fit inside a LIT uri. """ class CountingDataUploadable(upload.Data): bytes_read = 0 interrupt_after = None interrupt_after_d = None def read(self, length): self.bytes_read += length if self.interrupt_after is not None: if self.bytes_read > self.interrupt_after: self.interrupt_after = None self.interrupt_after_d.callback(self) return upload.Data.read(self, length) class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): timeout = 3600 # It takes longer than 960 seconds on Zandr's ARM box. def test_connections(self): self.basedir = "system/SystemTest/test_connections" d = self.set_up_nodes() self.extra_node = None d.addCallback(lambda res: self.add_extra_node(self.numclients)) def _check(extra_node): self.extra_node = extra_node for c in self.clients: all_peerids = c.get_storage_broker().get_all_serverids() self.failUnlessEqual(len(all_peerids), self.numclients+1) sb = c.storage_broker permuted_peers = sb.get_servers_for_psi("a") self.failUnlessEqual(len(permuted_peers), self.numclients+1) d.addCallback(_check) def _shutdown_extra_node(res): if self.extra_node: return self.extra_node.stopService() return res d.addBoth(_shutdown_extra_node) return d # test_connections is subsumed by test_upload_and_download, and takes # quite a while to run on a slow machine (because of all the TLS # connections that must be established). If we ever rework the introducer # code to such an extent that we're not sure if it works anymore, we can # reinstate this test until it does. del test_connections def test_upload_and_download_random_key(self): self.basedir = "system/SystemTest/test_upload_and_download_random_key" return self._test_upload_and_download(convergence=None) def test_upload_and_download_convergent(self): self.basedir = "system/SystemTest/test_upload_and_download_convergent" return self._test_upload_and_download(convergence="some convergence string") def _test_upload_and_download(self, convergence): # we use 4000 bytes of data, which will result in about 400k written # to disk among all our simulated nodes DATA = "Some data to upload\n" * 200 d = self.set_up_nodes() def _check_connections(res): for c in self.clients: c.DEFAULT_ENCODING_PARAMETERS['happy'] = 5 all_peerids = c.get_storage_broker().get_all_serverids() self.failUnlessEqual(len(all_peerids), self.numclients) sb = c.storage_broker permuted_peers = sb.get_servers_for_psi("a") self.failUnlessEqual(len(permuted_peers), self.numclients) d.addCallback(_check_connections) def _do_upload(res): log.msg("UPLOADING") u = self.clients[0].getServiceNamed("uploader") self.uploader = u # we crank the max segsize down to 1024b for the duration of this # test, so we can exercise multiple segments. It is important # that this is not a multiple of the segment size, so that the # tail segment is not the same length as the others. This actualy # gets rounded up to 1025 to be a multiple of the number of # required shares (since we use 25 out of 100 FEC). up = upload.Data(DATA, convergence=convergence) up.max_segment_size = 1024 d1 = u.upload(up) return d1 d.addCallback(_do_upload) def _upload_done(results): theuri = results.get_uri() log.msg("upload finished: uri is %s" % (theuri,)) self.uri = theuri assert isinstance(self.uri, str), self.uri self.cap = uri.from_string(self.uri) self.n = self.clients[1].create_node_from_uri(self.uri) d.addCallback(_upload_done) def _upload_again(res): # Upload again. If using convergent encryption then this ought to be # short-circuited, however with the way we currently generate URIs # (i.e. because they include the roothash), we have to do all of the # encoding work, and only get to save on the upload part. log.msg("UPLOADING AGAIN") up = upload.Data(DATA, convergence=convergence) up.max_segment_size = 1024 return self.uploader.upload(up) d.addCallback(_upload_again) def _download_to_data(res): log.msg("DOWNLOADING") return download_to_data(self.n) d.addCallback(_download_to_data) def _download_to_data_done(data): log.msg("download finished") self.failUnlessEqual(data, DATA) d.addCallback(_download_to_data_done) def _test_read(res): n = self.clients[1].create_node_from_uri(self.uri) d = download_to_data(n) def _read_done(data): self.failUnlessEqual(data, DATA) d.addCallback(_read_done) d.addCallback(lambda ign: n.read(MemoryConsumer(), offset=1, size=4)) def _read_portion_done(mc): self.failUnlessEqual("".join(mc.chunks), DATA[1:1+4]) d.addCallback(_read_portion_done) d.addCallback(lambda ign: n.read(MemoryConsumer(), offset=2, size=None)) def _read_tail_done(mc): self.failUnlessEqual("".join(mc.chunks), DATA[2:]) d.addCallback(_read_tail_done) d.addCallback(lambda ign: n.read(MemoryConsumer(), size=len(DATA)+1000)) def _read_too_much(mc): self.failUnlessEqual("".join(mc.chunks), DATA) d.addCallback(_read_too_much) return d d.addCallback(_test_read) def _test_bad_read(res): bad_u = uri.from_string_filenode(self.uri) bad_u.key = self.flip_bit(bad_u.key) bad_n = self.clients[1].create_node_from_uri(bad_u.to_string()) # this should cause an error during download d = self.shouldFail2(NoSharesError, "'download bad node'", None, bad_n.read, MemoryConsumer(), offset=2) return d d.addCallback(_test_bad_read) def _download_nonexistent_uri(res): baduri = self.mangle_uri(self.uri) badnode = self.clients[1].create_node_from_uri(baduri) log.msg("about to download non-existent URI", level=log.UNUSUAL, facility="tahoe.tests") d1 = download_to_data(badnode) def _baduri_should_fail(res): log.msg("finished downloading non-existent URI", level=log.UNUSUAL, facility="tahoe.tests") self.failUnless(isinstance(res, Failure)) self.failUnless(res.check(NoSharesError), "expected NoSharesError, got %s" % res) d1.addBoth(_baduri_should_fail) return d1 d.addCallback(_download_nonexistent_uri) # add a new node, which doesn't accept shares, and only uses the # helper for upload. d.addCallback(lambda res: self.add_extra_node(self.numclients, self.helper_furl, add_to_sparent=True)) def _added(extra_node): self.extra_node = extra_node self.extra_node.DEFAULT_ENCODING_PARAMETERS['happy'] = 5 d.addCallback(_added) def _has_helper(): uploader = self.extra_node.getServiceNamed("uploader") furl, connected = uploader.get_helper_info() return connected d.addCallback(lambda ign: self.poll(_has_helper)) HELPER_DATA = "Data that needs help to upload" * 1000 def _upload_with_helper(res): u = upload.Data(HELPER_DATA, convergence=convergence) d = self.extra_node.upload(u) def _uploaded(results): n = self.clients[1].create_node_from_uri(results.get_uri()) return download_to_data(n) d.addCallback(_uploaded) def _check(newdata): self.failUnlessEqual(newdata, HELPER_DATA) d.addCallback(_check) return d d.addCallback(_upload_with_helper) def _upload_duplicate_with_helper(res): u = upload.Data(HELPER_DATA, convergence=convergence) u.debug_stash_RemoteEncryptedUploadable = True d = self.extra_node.upload(u) def _uploaded(results): n = self.clients[1].create_node_from_uri(results.get_uri()) return download_to_data(n) d.addCallback(_uploaded) def _check(newdata): self.failUnlessEqual(newdata, HELPER_DATA) self.failIf(hasattr(u, "debug_RemoteEncryptedUploadable"), "uploadable started uploading, should have been avoided") d.addCallback(_check) return d if convergence is not None: d.addCallback(_upload_duplicate_with_helper) d.addCallback(fireEventually) def _upload_resumable(res): DATA = "Data that needs help to upload and gets interrupted" * 1000 u1 = CountingDataUploadable(DATA, convergence=convergence) u2 = CountingDataUploadable(DATA, convergence=convergence) # we interrupt the connection after about 5kB by shutting down # the helper, then restarting it. u1.interrupt_after = 5000 u1.interrupt_after_d = defer.Deferred() bounced_d = defer.Deferred() def _do_bounce(res): d = self.bounce_client(0) d.addBoth(bounced_d.callback) u1.interrupt_after_d.addCallback(_do_bounce) # sneak into the helper and reduce its chunk size, so that our # debug_interrupt will sever the connection on about the fifth # chunk fetched. This makes sure that we've started to write the # new shares before we abandon them, which exercises the # abort/delete-partial-share code. TODO: find a cleaner way to do # this. I know that this will affect later uses of the helper in # this same test run, but I'm not currently worried about it. offloaded.CHKCiphertextFetcher.CHUNK_SIZE = 1000 upload_d = self.extra_node.upload(u1) # The upload will start, and bounce_client() will be called after # about 5kB. bounced_d will fire after bounce_client() finishes # shutting down and restarting the node. d = bounced_d def _bounced(ign): # By this point, the upload should have failed because of the # interruption. upload_d will fire in a moment def _should_not_finish(res): self.fail("interrupted upload should have failed, not" " finished with result %s" % (res,)) def _interrupted(f): f.trap(DeadReferenceError) # make sure we actually interrupted it before finishing # the file self.failUnless(u1.bytes_read < len(DATA), "read %d out of %d total" % (u1.bytes_read, len(DATA))) upload_d.addCallbacks(_should_not_finish, _interrupted) return upload_d d.addCallback(_bounced) def _disconnected(res): # check to make sure the storage servers aren't still hanging # on to the partial share: their incoming/ directories should # now be empty. log.msg("disconnected", level=log.NOISY, facility="tahoe.test.test_system") for i in range(self.numclients): incdir = os.path.join(self.getdir("client%d" % i), "storage", "shares", "incoming") self.failIf(os.path.exists(incdir) and os.listdir(incdir)) d.addCallback(_disconnected) d.addCallback(lambda res: log.msg("wait_for_helper", level=log.NOISY, facility="tahoe.test.test_system")) # then we need to wait for the extra node to reestablish its # connection to the helper. d.addCallback(lambda ign: self.poll(_has_helper)) d.addCallback(lambda res: log.msg("uploading again", level=log.NOISY, facility="tahoe.test.test_system")) d.addCallback(lambda res: self.extra_node.upload(u2)) def _uploaded(results): cap = results.get_uri() log.msg("Second upload complete", level=log.NOISY, facility="tahoe.test.test_system") # this is really bytes received rather than sent, but it's # convenient and basically measures the same thing bytes_sent = results.get_ciphertext_fetched() self.failUnless(isinstance(bytes_sent, (int, long)), bytes_sent) # We currently don't support resumption of upload if the data is # encrypted with a random key. (Because that would require us # to store the key locally and re-use it on the next upload of # this file, which isn't a bad thing to do, but we currently # don't do it.) if convergence is not None: # Make sure we did not have to read the whole file the # second time around . self.failUnless(bytes_sent < len(DATA), "resumption didn't save us any work:" " read %r bytes out of %r total" % (bytes_sent, len(DATA))) else: # Make sure we did have to read the whole file the second # time around -- because the one that we partially uploaded # earlier was encrypted with a different random key. self.failIf(bytes_sent < len(DATA), "resumption saved us some work even though we were using random keys:" " read %r bytes out of %r total" % (bytes_sent, len(DATA))) n = self.clients[1].create_node_from_uri(cap) return download_to_data(n) d.addCallback(_uploaded) def _check(newdata): self.failUnlessEqual(newdata, DATA) # If using convergent encryption, then also check that the # helper has removed the temp file from its directories. if convergence is not None: basedir = os.path.join(self.getdir("client0"), "helper") files = os.listdir(os.path.join(basedir, "CHK_encoding")) self.failUnlessEqual(files, []) files = os.listdir(os.path.join(basedir, "CHK_incoming")) self.failUnlessEqual(files, []) d.addCallback(_check) return d d.addCallback(_upload_resumable) def _grab_stats(ignored): # the StatsProvider doesn't normally publish a FURL: # instead it passes a live reference to the StatsGatherer # (if and when it connects). To exercise the remote stats # interface, we manually publish client0's StatsProvider # and use client1 to query it. sp = self.clients[0].stats_provider sp_furl = self.clients[0].tub.registerReference(sp) d = self.clients[1].tub.getReference(sp_furl) d.addCallback(lambda sp_rref: sp_rref.callRemote("get_stats")) def _got_stats(stats): #print "STATS" #from pprint import pprint #pprint(stats) s = stats["stats"] self.failUnlessEqual(s["storage_server.accepting_immutable_shares"], 1) c = stats["counters"] self.failUnless("storage_server.allocate" in c) d.addCallback(_got_stats) return d d.addCallback(_grab_stats) return d def _find_all_shares(self, basedir): shares = [] for (dirpath, dirnames, filenames) in os.walk(basedir): if "storage" not in dirpath: continue if not filenames: continue pieces = dirpath.split(os.sep) if (len(pieces) >= 5 and pieces[-4] == "storage" and pieces[-3] == "shares"): # we're sitting in .../storage/shares/$START/$SINDEX , and there # are sharefiles here assert pieces[-5].startswith("client") client_num = int(pieces[-5][-1]) storage_index_s = pieces[-1] storage_index = si_a2b(storage_index_s) for sharename in filenames: shnum = int(sharename) filename = os.path.join(dirpath, sharename) data = (client_num, storage_index, filename, shnum) shares.append(data) if not shares: self.fail("unable to find any share files in %s" % basedir) return shares def _corrupt_mutable_share(self, filename, which): msf = MutableShareFile(filename) datav = msf.readv([ (0, 1000000) ]) final_share = datav[0] assert len(final_share) < 1000000 # ought to be truncated pieces = mutable_layout.unpack_share(final_share) (seqnum, root_hash, IV, k, N, segsize, datalen, verification_key, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces if which == "seqnum": seqnum = seqnum + 15 elif which == "R": root_hash = self.flip_bit(root_hash) elif which == "IV": IV = self.flip_bit(IV) elif which == "segsize": segsize = segsize + 15 elif which == "pubkey": verification_key = self.flip_bit(verification_key) elif which == "signature": signature = self.flip_bit(signature) elif which == "share_hash_chain": nodenum = share_hash_chain.keys()[0] share_hash_chain[nodenum] = self.flip_bit(share_hash_chain[nodenum]) elif which == "block_hash_tree": block_hash_tree[-1] = self.flip_bit(block_hash_tree[-1]) elif which == "share_data": share_data = self.flip_bit(share_data) elif which == "encprivkey": enc_privkey = self.flip_bit(enc_privkey) prefix = mutable_layout.pack_prefix(seqnum, root_hash, IV, k, N, segsize, datalen) final_share = mutable_layout.pack_share(prefix, verification_key, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) msf.writev( [(0, final_share)], None) def test_mutable(self): self.basedir = "system/SystemTest/test_mutable" DATA = "initial contents go here." # 25 bytes % 3 != 0 DATA_uploadable = MutableData(DATA) NEWDATA = "new contents yay" NEWDATA_uploadable = MutableData(NEWDATA) NEWERDATA = "this is getting old" NEWERDATA_uploadable = MutableData(NEWERDATA) d = self.set_up_nodes(use_key_generator=True) def _create_mutable(res): c = self.clients[0] log.msg("starting create_mutable_file") d1 = c.create_mutable_file(DATA_uploadable) def _done(res): log.msg("DONE: %s" % (res,)) self._mutable_node_1 = res d1.addCallback(_done) return d1 d.addCallback(_create_mutable) def _test_debug(res): # find a share. It is important to run this while there is only # one slot in the grid. shares = self._find_all_shares(self.basedir) (client_num, storage_index, filename, shnum) = shares[0] log.msg("test_system.SystemTest.test_mutable._test_debug using %s" % filename) log.msg(" for clients[%d]" % client_num) out,err = StringIO(), StringIO() rc = runner.runner(["debug", "dump-share", "--offsets", filename], stdout=out, stderr=err) output = out.getvalue() self.failUnlessEqual(rc, 0) try: self.failUnless("Mutable slot found:\n" in output) self.failUnless("share_type: SDMF\n" in output) peerid = idlib.nodeid_b2a(self.clients[client_num].nodeid) self.failUnless(" WE for nodeid: %s\n" % peerid in output) self.failUnless(" num_extra_leases: 0\n" in output) self.failUnless(" secrets are for nodeid: %s\n" % peerid in output) self.failUnless(" SDMF contents:\n" in output) self.failUnless(" seqnum: 1\n" in output) self.failUnless(" required_shares: 3\n" in output) self.failUnless(" total_shares: 10\n" in output) self.failUnless(" segsize: 27\n" in output, (output, filename)) self.failUnless(" datalen: 25\n" in output) # the exact share_hash_chain nodes depends upon the sharenum, # and is more of a hassle to compute than I want to deal with # now self.failUnless(" share_hash_chain: " in output) self.failUnless(" block_hash_tree: 1 nodes\n" in output) expected = (" verify-cap: URI:SSK-Verifier:%s:" % base32.b2a(storage_index)) self.failUnless(expected in output) except unittest.FailTest: print print "dump-share output was:" print output raise d.addCallback(_test_debug) # test retrieval # first, let's see if we can use the existing node to retrieve the # contents. This allows it to use the cached pubkey and maybe the # latest-known sharemap. d.addCallback(lambda res: self._mutable_node_1.download_best_version()) def _check_download_1(res): self.failUnlessEqual(res, DATA) # now we see if we can retrieve the data from a new node, # constructed using the URI of the original one. We do this test # on the same client that uploaded the data. uri = self._mutable_node_1.get_uri() log.msg("starting retrieve1") newnode = self.clients[0].create_node_from_uri(uri) newnode_2 = self.clients[0].create_node_from_uri(uri) self.failUnlessIdentical(newnode, newnode_2) return newnode.download_best_version() d.addCallback(_check_download_1) def _check_download_2(res): self.failUnlessEqual(res, DATA) # same thing, but with a different client uri = self._mutable_node_1.get_uri() newnode = self.clients[1].create_node_from_uri(uri) log.msg("starting retrieve2") d1 = newnode.download_best_version() d1.addCallback(lambda res: (res, newnode)) return d1 d.addCallback(_check_download_2) def _check_download_3((res, newnode)): self.failUnlessEqual(res, DATA) # replace the data log.msg("starting replace1") d1 = newnode.overwrite(NEWDATA_uploadable) d1.addCallback(lambda res: newnode.download_best_version()) return d1 d.addCallback(_check_download_3) def _check_download_4(res): self.failUnlessEqual(res, NEWDATA) # now create an even newer node and replace the data on it. This # new node has never been used for download before. uri = self._mutable_node_1.get_uri() newnode1 = self.clients[2].create_node_from_uri(uri) newnode2 = self.clients[3].create_node_from_uri(uri) self._newnode3 = self.clients[3].create_node_from_uri(uri) log.msg("starting replace2") d1 = newnode1.overwrite(NEWERDATA_uploadable) d1.addCallback(lambda res: newnode2.download_best_version()) return d1 d.addCallback(_check_download_4) def _check_download_5(res): log.msg("finished replace2") self.failUnlessEqual(res, NEWERDATA) d.addCallback(_check_download_5) def _corrupt_shares(res): # run around and flip bits in all but k of the shares, to test # the hash checks shares = self._find_all_shares(self.basedir) ## sort by share number #shares.sort( lambda a,b: cmp(a[3], b[3]) ) where = dict([ (shnum, filename) for (client_num, storage_index, filename, shnum) in shares ]) assert len(where) == 10 # this test is designed for 3-of-10 for shnum, filename in where.items(): # shares 7,8,9 are left alone. read will check # (share_hash_chain, block_hash_tree, share_data). New # seqnum+R pairs will trigger a check of (seqnum, R, IV, # segsize, signature). if shnum == 0: # read: this will trigger "pubkey doesn't match # fingerprint". self._corrupt_mutable_share(filename, "pubkey") self._corrupt_mutable_share(filename, "encprivkey") elif shnum == 1: # triggers "signature is invalid" self._corrupt_mutable_share(filename, "seqnum") elif shnum == 2: # triggers "signature is invalid" self._corrupt_mutable_share(filename, "R") elif shnum == 3: # triggers "signature is invalid" self._corrupt_mutable_share(filename, "segsize") elif shnum == 4: self._corrupt_mutable_share(filename, "share_hash_chain") elif shnum == 5: self._corrupt_mutable_share(filename, "block_hash_tree") elif shnum == 6: self._corrupt_mutable_share(filename, "share_data") # other things to correct: IV, signature # 7,8,9 are left alone # note that initial_query_count=5 means that we'll hit the # first 5 servers in effectively random order (based upon # response time), so we won't necessarily ever get a "pubkey # doesn't match fingerprint" error (if we hit shnum>=1 before # shnum=0, we pull the pubkey from there). To get repeatable # specific failures, we need to set initial_query_count=1, # but of course that will change the sequencing behavior of # the retrieval process. TODO: find a reasonable way to make # this a parameter, probably when we expand this test to test # for one failure mode at a time. # when we retrieve this, we should get three signature # failures (where we've mangled seqnum, R, and segsize). The # pubkey mangling d.addCallback(_corrupt_shares) d.addCallback(lambda res: self._newnode3.download_best_version()) d.addCallback(_check_download_5) def _check_empty_file(res): # make sure we can create empty files, this usually screws up the # segsize math d1 = self.clients[2].create_mutable_file(MutableData("")) d1.addCallback(lambda newnode: newnode.download_best_version()) d1.addCallback(lambda res: self.failUnlessEqual("", res)) return d1 d.addCallback(_check_empty_file) d.addCallback(lambda res: self.clients[0].create_dirnode()) def _created_dirnode(dnode): log.msg("_created_dirnode(%s)" % (dnode,)) d1 = dnode.list() d1.addCallback(lambda children: self.failUnlessEqual(children, {})) d1.addCallback(lambda res: dnode.has_child(u"edgar")) d1.addCallback(lambda answer: self.failUnlessEqual(answer, False)) d1.addCallback(lambda res: dnode.set_node(u"see recursive", dnode)) d1.addCallback(lambda res: dnode.has_child(u"see recursive")) d1.addCallback(lambda answer: self.failUnlessEqual(answer, True)) d1.addCallback(lambda res: dnode.build_manifest().when_done()) d1.addCallback(lambda res: self.failUnlessEqual(len(res["manifest"]), 1)) return d1 d.addCallback(_created_dirnode) def wait_for_c3_kg_conn(): return self.clients[3]._key_generator is not None d.addCallback(lambda junk: self.poll(wait_for_c3_kg_conn)) def check_kg_poolsize(junk, size_delta): self.failUnlessEqual(len(self.key_generator_svc.key_generator.keypool), self.key_generator_svc.key_generator.pool_size + size_delta) d.addCallback(check_kg_poolsize, 0) d.addCallback(lambda junk: self.clients[3].create_mutable_file(MutableData('hello, world'))) d.addCallback(check_kg_poolsize, -1) d.addCallback(lambda junk: self.clients[3].create_dirnode()) d.addCallback(check_kg_poolsize, -2) # use_helper induces use of clients[3], which is the using-key_gen client d.addCallback(lambda junk: self.POST("uri?t=mkdir&name=george", use_helper=True)) d.addCallback(check_kg_poolsize, -3) return d def flip_bit(self, good): return good[:-1] + chr(ord(good[-1]) ^ 0x01) def mangle_uri(self, gooduri): # change the key, which changes the storage index, which means we'll # be asking about the wrong file, so nobody will have any shares u = uri.from_string(gooduri) u2 = uri.CHKFileURI(key=self.flip_bit(u.key), uri_extension_hash=u.uri_extension_hash, needed_shares=u.needed_shares, total_shares=u.total_shares, size=u.size) return u2.to_string() # TODO: add a test which mangles the uri_extension_hash instead, and # should fail due to not being able to get a valid uri_extension block. # Also a test which sneakily mangles the uri_extension block to change # some of the validation data, so it will fail in the post-download phase # when the file's crypttext integrity check fails. Do the same thing for # the key, which should cause the download to fail the post-download # plaintext_hash check. def test_filesystem(self): self.basedir = "system/SystemTest/test_filesystem" self.data = LARGE_DATA d = self.set_up_nodes(use_stats_gatherer=True) def _new_happy_semantics(ign): for c in self.clients: c.DEFAULT_ENCODING_PARAMETERS['happy'] = 1 d.addCallback(_new_happy_semantics) d.addCallback(self._test_introweb) d.addCallback(self.log, "starting publish") d.addCallback(self._do_publish1) d.addCallback(self._test_runner) d.addCallback(self._do_publish2) # at this point, we have the following filesystem (where "R" denotes # self._root_directory_uri): # R # R/subdir1 # R/subdir1/mydata567 # R/subdir1/subdir2/ # R/subdir1/subdir2/mydata992 d.addCallback(lambda res: self.bounce_client(0)) d.addCallback(self.log, "bounced client0") d.addCallback(self._check_publish1) d.addCallback(self.log, "did _check_publish1") d.addCallback(self._check_publish2) d.addCallback(self.log, "did _check_publish2") d.addCallback(self._do_publish_private) d.addCallback(self.log, "did _do_publish_private") # now we also have (where "P" denotes a new dir): # P/personal/sekrit data # P/s2-rw -> /subdir1/subdir2/ # P/s2-ro -> /subdir1/subdir2/ (read-only) d.addCallback(self._check_publish_private) d.addCallback(self.log, "did _check_publish_private") d.addCallback(self._test_web) d.addCallback(self._test_control) d.addCallback(self._test_cli) # P now has four top-level children: # P/personal/sekrit data # P/s2-ro/ # P/s2-rw/ # P/test_put/ (empty) d.addCallback(self._test_checker) return d def _test_introweb(self, res): d = getPage(self.introweb_url, method="GET", followRedirect=True) def _check(res): try: self.failUnless("%s: %s" % (allmydata.__appname__, allmydata.__version__) in res) verstr = str(allmydata.__version__) # The Python "rational version numbering" convention # disallows "-r$REV" but allows ".post$REV" # instead. Eventually we'll probably move to # that. When we do, this test won't go red: ix = verstr.rfind('-r') if ix != -1: altverstr = verstr[:ix] + '.post' + verstr[ix+2:] else: ix = verstr.rfind('.post') if ix != -1: altverstr = verstr[:ix] + '-r' + verstr[ix+5:] else: altverstr = verstr appverstr = "%s: %s" % (allmydata.__appname__, verstr) newappverstr = "%s: %s" % (allmydata.__appname__, altverstr) self.failUnless((appverstr in res) or (newappverstr in res), (appverstr, newappverstr, res)) self.failUnless("Announcement Summary: storage: 5" in res) self.failUnless("Subscription Summary: storage: 5" in res) self.failUnless("tahoe.css" in res) except unittest.FailTest: print print "GET %s output was:" % self.introweb_url print res raise d.addCallback(_check) # make sure it serves the CSS too d.addCallback(lambda res: getPage(self.introweb_url+"tahoe.css", method="GET")) d.addCallback(lambda res: getPage(self.introweb_url + "?t=json", method="GET", followRedirect=True)) def _check_json(res): data = simplejson.loads(res) try: self.failUnlessEqual(data["subscription_summary"], {"storage": 5}) self.failUnlessEqual(data["announcement_summary"], {"storage": 5}) self.failUnlessEqual(data["announcement_distinct_hosts"], {"storage": 1}) except unittest.FailTest: print print "GET %s?t=json output was:" % self.introweb_url print res raise d.addCallback(_check_json) return d def _do_publish1(self, res): ut = upload.Data(self.data, convergence=None) c0 = self.clients[0] d = c0.create_dirnode() def _made_root(new_dirnode): self._root_directory_uri = new_dirnode.get_uri() return c0.create_node_from_uri(self._root_directory_uri) d.addCallback(_made_root) d.addCallback(lambda root: root.create_subdirectory(u"subdir1")) def _made_subdir1(subdir1_node): self._subdir1_node = subdir1_node d1 = subdir1_node.add_file(u"mydata567", ut) d1.addCallback(self.log, "publish finished") def _stash_uri(filenode): self.uri = filenode.get_uri() assert isinstance(self.uri, str), (self.uri, filenode) d1.addCallback(_stash_uri) return d1 d.addCallback(_made_subdir1) return d def _do_publish2(self, res): ut = upload.Data(self.data, convergence=None) d = self._subdir1_node.create_subdirectory(u"subdir2") d.addCallback(lambda subdir2: subdir2.add_file(u"mydata992", ut)) return d def log(self, res, *args, **kwargs): # print "MSG: %s RES: %s" % (msg, args) log.msg(*args, **kwargs) return res def _do_publish_private(self, res): self.smalldata = "sssh, very secret stuff" ut = upload.Data(self.smalldata, convergence=None) d = self.clients[0].create_dirnode() d.addCallback(self.log, "GOT private directory") def _got_new_dir(privnode): rootnode = self.clients[0].create_node_from_uri(self._root_directory_uri) d1 = privnode.create_subdirectory(u"personal") d1.addCallback(self.log, "made P/personal") d1.addCallback(lambda node: node.add_file(u"sekrit data", ut)) d1.addCallback(self.log, "made P/personal/sekrit data") d1.addCallback(lambda res: rootnode.get_child_at_path([u"subdir1", u"subdir2"])) def _got_s2(s2node): d2 = privnode.set_uri(u"s2-rw", s2node.get_uri(), s2node.get_readonly_uri()) d2.addCallback(lambda node: privnode.set_uri(u"s2-ro", s2node.get_readonly_uri(), s2node.get_readonly_uri())) return d2 d1.addCallback(_got_s2) d1.addCallback(lambda res: privnode) return d1 d.addCallback(_got_new_dir) return d def _check_publish1(self, res): # this one uses the iterative API c1 = self.clients[1] d = defer.succeed(c1.create_node_from_uri(self._root_directory_uri)) d.addCallback(self.log, "check_publish1 got /") d.addCallback(lambda root: root.get(u"subdir1")) d.addCallback(lambda subdir1: subdir1.get(u"mydata567")) d.addCallback(lambda filenode: download_to_data(filenode)) d.addCallback(self.log, "get finished") def _get_done(data): self.failUnlessEqual(data, self.data) d.addCallback(_get_done) return d def _check_publish2(self, res): # this one uses the path-based API rootnode = self.clients[1].create_node_from_uri(self._root_directory_uri) d = rootnode.get_child_at_path(u"subdir1") d.addCallback(lambda dirnode: self.failUnless(IDirectoryNode.providedBy(dirnode))) d.addCallback(lambda res: rootnode.get_child_at_path(u"subdir1/mydata567")) d.addCallback(lambda filenode: download_to_data(filenode)) d.addCallback(lambda data: self.failUnlessEqual(data, self.data)) d.addCallback(lambda res: rootnode.get_child_at_path(u"subdir1/mydata567")) def _got_filenode(filenode): fnode = self.clients[1].create_node_from_uri(filenode.get_uri()) assert fnode == filenode d.addCallback(_got_filenode) return d def _check_publish_private(self, resnode): # this one uses the path-based API self._private_node = resnode d = self._private_node.get_child_at_path(u"personal") def _got_personal(personal): self._personal_node = personal return personal d.addCallback(_got_personal) d.addCallback(lambda dirnode: self.failUnless(IDirectoryNode.providedBy(dirnode), dirnode)) def get_path(path): return self._private_node.get_child_at_path(path) d.addCallback(lambda res: get_path(u"personal/sekrit data")) d.addCallback(lambda filenode: download_to_data(filenode)) d.addCallback(lambda data: self.failUnlessEqual(data, self.smalldata)) d.addCallback(lambda res: get_path(u"s2-rw")) d.addCallback(lambda dirnode: self.failUnless(dirnode.is_mutable())) d.addCallback(lambda res: get_path(u"s2-ro")) def _got_s2ro(dirnode): self.failUnless(dirnode.is_mutable(), dirnode) self.failUnless(dirnode.is_readonly(), dirnode) d1 = defer.succeed(None) d1.addCallback(lambda res: dirnode.list()) d1.addCallback(self.log, "dirnode.list") d1.addCallback(lambda res: self.shouldFail2(NotWriteableError, "mkdir(nope)", None, dirnode.create_subdirectory, u"nope")) d1.addCallback(self.log, "doing add_file(ro)") ut = upload.Data("I will disappear, unrecorded and unobserved. The tragedy of my demise is made more poignant by its silence, but this beauty is not for you to ever know.", convergence="99i-p1x4-xd4-18yc-ywt-87uu-msu-zo -- completely and totally unguessable string (unless you read this)") d1.addCallback(lambda res: self.shouldFail2(NotWriteableError, "add_file(nope)", None, dirnode.add_file, u"hope", ut)) d1.addCallback(self.log, "doing get(ro)") d1.addCallback(lambda res: dirnode.get(u"mydata992")) d1.addCallback(lambda filenode: self.failUnless(IFileNode.providedBy(filenode))) d1.addCallback(self.log, "doing delete(ro)") d1.addCallback(lambda res: self.shouldFail2(NotWriteableError, "delete(nope)", None, dirnode.delete, u"mydata992")) d1.addCallback(lambda res: self.shouldFail2(NotWriteableError, "set_uri(nope)", None, dirnode.set_uri, u"hopeless", self.uri, self.uri)) d1.addCallback(lambda res: self.shouldFail2(NoSuchChildError, "get(missing)", "missing", dirnode.get, u"missing")) personal = self._personal_node d1.addCallback(lambda res: self.shouldFail2(NotWriteableError, "mv from readonly", None, dirnode.move_child_to, u"mydata992", personal, u"nope")) d1.addCallback(self.log, "doing move_child_to(ro)2") d1.addCallback(lambda res: self.shouldFail2(NotWriteableError, "mv to readonly", None, personal.move_child_to, u"sekrit data", dirnode, u"nope")) d1.addCallback(self.log, "finished with _got_s2ro") return d1 d.addCallback(_got_s2ro) def _got_home(dummy): home = self._private_node personal = self._personal_node d1 = defer.succeed(None) d1.addCallback(self.log, "mv 'P/personal/sekrit data' to P/sekrit") d1.addCallback(lambda res: personal.move_child_to(u"sekrit data",home,u"sekrit")) d1.addCallback(self.log, "mv P/sekrit 'P/sekrit data'") d1.addCallback(lambda res: home.move_child_to(u"sekrit", home, u"sekrit data")) d1.addCallback(self.log, "mv 'P/sekret data' P/personal/") d1.addCallback(lambda res: home.move_child_to(u"sekrit data", personal)) d1.addCallback(lambda res: home.build_manifest().when_done()) d1.addCallback(self.log, "manifest") # five items: # P/ # P/personal/ # P/personal/sekrit data # P/s2-rw (same as P/s2-ro) # P/s2-rw/mydata992 (same as P/s2-rw/mydata992) d1.addCallback(lambda res: self.failUnlessEqual(len(res["manifest"]), 5)) d1.addCallback(lambda res: home.start_deep_stats().when_done()) def _check_stats(stats): expected = {"count-immutable-files": 1, "count-mutable-files": 0, "count-literal-files": 1, "count-files": 2, "count-directories": 3, "size-immutable-files": 112, "size-literal-files": 23, #"size-directories": 616, # varies #"largest-directory": 616, "largest-directory-children": 3, "largest-immutable-file": 112, } for k,v in expected.iteritems(): self.failUnlessEqual(stats[k], v, "stats[%s] was %s, not %s" % (k, stats[k], v)) self.failUnless(stats["size-directories"] > 1300, stats["size-directories"]) self.failUnless(stats["largest-directory"] > 800, stats["largest-directory"]) self.failUnlessEqual(stats["size-files-histogram"], [ (11, 31, 1), (101, 316, 1) ]) d1.addCallback(_check_stats) return d1 d.addCallback(_got_home) return d def shouldFail(self, res, expected_failure, which, substring=None): if isinstance(res, Failure): res.trap(expected_failure) if substring: self.failUnless(substring in str(res), "substring '%s' not in '%s'" % (substring, str(res))) else: self.fail("%s was supposed to raise %s, not get '%s'" % (which, expected_failure, res)) def shouldFail2(self, expected_failure, which, substring, callable, *args, **kwargs): assert substring is None or isinstance(substring, str) d = defer.maybeDeferred(callable, *args, **kwargs) def done(res): if isinstance(res, Failure): res.trap(expected_failure) if substring: self.failUnless(substring in str(res), "substring '%s' not in '%s'" % (substring, str(res))) else: self.fail("%s was supposed to raise %s, not get '%s'" % (which, expected_failure, res)) d.addBoth(done) return d def PUT(self, urlpath, data): url = self.webish_url + urlpath return getPage(url, method="PUT", postdata=data) def GET(self, urlpath, followRedirect=False): url = self.webish_url + urlpath return getPage(url, method="GET", followRedirect=followRedirect) def POST(self, urlpath, followRedirect=False, use_helper=False, **fields): sepbase = "boogabooga" sep = "--" + sepbase form = [] form.append(sep) form.append('Content-Disposition: form-data; name="_charset"') form.append('') form.append('UTF-8') form.append(sep) for name, value in fields.iteritems(): if isinstance(value, tuple): filename, value = value form.append('Content-Disposition: form-data; name="%s"; ' 'filename="%s"' % (name, filename.encode("utf-8"))) else: form.append('Content-Disposition: form-data; name="%s"' % name) form.append('') form.append(str(value)) form.append(sep) form[-1] += "--" body = "" headers = {} if fields: body = "\r\n".join(form) + "\r\n" headers["content-type"] = "multipart/form-data; boundary=%s" % sepbase return self.POST2(urlpath, body, headers, followRedirect, use_helper) def POST2(self, urlpath, body="", headers={}, followRedirect=False, use_helper=False): if use_helper: url = self.helper_webish_url + urlpath else: url = self.webish_url + urlpath return getPage(url, method="POST", postdata=body, headers=headers, followRedirect=followRedirect) def _test_web(self, res): base = self.webish_url public = "uri/" + self._root_directory_uri d = getPage(base) def _got_welcome(page): html = page.replace('\n', ' ') connected_re = r'Connected to %d\s*of %d known storage servers' % (self.numclients, self.numclients) self.failUnless(re.search(connected_re, html), "I didn't see the right '%s' message in:\n%s" % (connected_re, page)) # nodeids/tubids don't have any regexp-special characters nodeid_re = r'Node ID:\s*%s' % ( self.clients[0].get_long_tubid(), self.clients[0].get_long_nodeid()) self.failUnless(re.search(nodeid_re, html), "I didn't see the right '%s' message in:\n%s" % (nodeid_re, page)) self.failUnless("Helper: 0 active uploads" in page) d.addCallback(_got_welcome) d.addCallback(self.log, "done with _got_welcome") # get the welcome page from the node that uses the helper too d.addCallback(lambda res: getPage(self.helper_webish_url)) def _got_welcome_helper(page): html = page.replace('\n', ' ') self.failUnless(re.search(r'
\s*
Helper
', html), page) self.failUnlessIn("Not running helper", page) d.addCallback(_got_welcome_helper) d.addCallback(lambda res: getPage(base + public)) d.addCallback(lambda res: getPage(base + public + "/subdir1")) def _got_subdir1(page): # there ought to be an href for our file self.failUnlessIn('%d' % len(self.data), page) self.failUnless(">mydata567" in page) d.addCallback(_got_subdir1) d.addCallback(self.log, "done with _got_subdir1") d.addCallback(lambda res: getPage(base + public + "/subdir1/mydata567")) def _got_data(page): self.failUnlessEqual(page, self.data) d.addCallback(_got_data) # download from a URI embedded in a URL d.addCallback(self.log, "_get_from_uri") def _get_from_uri(res): return getPage(base + "uri/%s?filename=%s" % (self.uri, "mydata567")) d.addCallback(_get_from_uri) def _got_from_uri(page): self.failUnlessEqual(page, self.data) d.addCallback(_got_from_uri) # download from a URI embedded in a URL, second form d.addCallback(self.log, "_get_from_uri2") def _get_from_uri2(res): return getPage(base + "uri?uri=%s" % (self.uri,)) d.addCallback(_get_from_uri2) d.addCallback(_got_from_uri) # download from a bogus URI, make sure we get a reasonable error d.addCallback(self.log, "_get_from_bogus_uri", level=log.UNUSUAL) def _get_from_bogus_uri(res): d1 = getPage(base + "uri/%s?filename=%s" % (self.mangle_uri(self.uri), "mydata567")) d1.addBoth(self.shouldFail, Error, "downloading bogus URI", "410") return d1 d.addCallback(_get_from_bogus_uri) d.addCallback(self.log, "_got_from_bogus_uri", level=log.UNUSUAL) # upload a file with PUT d.addCallback(self.log, "about to try PUT") d.addCallback(lambda res: self.PUT(public + "/subdir3/new.txt", "new.txt contents")) d.addCallback(lambda res: self.GET(public + "/subdir3/new.txt")) d.addCallback(self.failUnlessEqual, "new.txt contents") # and again with something large enough to use multiple segments, # and hopefully trigger pauseProducing too def _new_happy_semantics(ign): for c in self.clients: # these get reset somewhere? Whatever. c.DEFAULT_ENCODING_PARAMETERS['happy'] = 1 d.addCallback(_new_happy_semantics) d.addCallback(lambda res: self.PUT(public + "/subdir3/big.txt", "big" * 500000)) # 1.5MB d.addCallback(lambda res: self.GET(public + "/subdir3/big.txt")) d.addCallback(lambda res: self.failUnlessEqual(len(res), 1500000)) # can we replace files in place? d.addCallback(lambda res: self.PUT(public + "/subdir3/new.txt", "NEWER contents")) d.addCallback(lambda res: self.GET(public + "/subdir3/new.txt")) d.addCallback(self.failUnlessEqual, "NEWER contents") # test unlinked POST d.addCallback(lambda res: self.POST("uri", t="upload", file=("new.txt", "data" * 10000))) # and again using the helper, which exercises different upload-status # display code d.addCallback(lambda res: self.POST("uri", use_helper=True, t="upload", file=("foo.txt", "data2" * 10000))) # check that the status page exists d.addCallback(lambda res: self.GET("status", followRedirect=True)) def _got_status(res): # find an interesting upload and download to look at. LIT files # are not interesting. h = self.clients[0].get_history() for ds in h.list_all_download_statuses(): if ds.get_size() > 200: self._down_status = ds.get_counter() for us in h.list_all_upload_statuses(): if us.get_size() > 200: self._up_status = us.get_counter() rs = list(h.list_all_retrieve_statuses())[0] self._retrieve_status = rs.get_counter() ps = list(h.list_all_publish_statuses())[0] self._publish_status = ps.get_counter() us = list(h.list_all_mapupdate_statuses())[0] self._update_status = us.get_counter() # and that there are some upload- and download- status pages return self.GET("status/up-%d" % self._up_status) d.addCallback(_got_status) def _got_up(res): return self.GET("status/down-%d" % self._down_status) d.addCallback(_got_up) def _got_down(res): return self.GET("status/mapupdate-%d" % self._update_status) d.addCallback(_got_down) def _got_update(res): return self.GET("status/publish-%d" % self._publish_status) d.addCallback(_got_update) def _got_publish(res): self.failUnlessIn("Publish Results", res) return self.GET("status/retrieve-%d" % self._retrieve_status) d.addCallback(_got_publish) def _got_retrieve(res): self.failUnlessIn("Retrieve Results", res) d.addCallback(_got_retrieve) # check that the helper status page exists d.addCallback(lambda res: self.GET("helper_status", followRedirect=True)) def _got_helper_status(res): self.failUnless("Bytes Fetched:" in res) # touch a couple of files in the helper's working directory to # exercise more code paths workdir = os.path.join(self.getdir("client0"), "helper") incfile = os.path.join(workdir, "CHK_incoming", "spurious") f = open(incfile, "wb") f.write("small file") f.close() then = time.time() - 86400*3 now = time.time() os.utime(incfile, (now, then)) encfile = os.path.join(workdir, "CHK_encoding", "spurious") f = open(encfile, "wb") f.write("less small file") f.close() os.utime(encfile, (now, then)) d.addCallback(_got_helper_status) # and that the json form exists d.addCallback(lambda res: self.GET("helper_status?t=json", followRedirect=True)) def _got_helper_status_json(res): data = simplejson.loads(res) self.failUnlessEqual(data["chk_upload_helper.upload_need_upload"], 1) self.failUnlessEqual(data["chk_upload_helper.incoming_count"], 1) self.failUnlessEqual(data["chk_upload_helper.incoming_size"], 10) self.failUnlessEqual(data["chk_upload_helper.incoming_size_old"], 10) self.failUnlessEqual(data["chk_upload_helper.encoding_count"], 1) self.failUnlessEqual(data["chk_upload_helper.encoding_size"], 15) self.failUnlessEqual(data["chk_upload_helper.encoding_size_old"], 15) d.addCallback(_got_helper_status_json) # and check that client[3] (which uses a helper but does not run one # itself) doesn't explode when you ask for its status d.addCallback(lambda res: getPage(self.helper_webish_url + "status/")) def _got_non_helper_status(res): self.failUnlessIn("Recent and Active Operations", res) d.addCallback(_got_non_helper_status) # or for helper status with t=json d.addCallback(lambda res: getPage(self.helper_webish_url + "helper_status?t=json")) def _got_non_helper_status_json(res): data = simplejson.loads(res) self.failUnlessEqual(data, {}) d.addCallback(_got_non_helper_status_json) # see if the statistics page exists d.addCallback(lambda res: self.GET("statistics")) def _got_stats(res): self.failUnlessIn("Operational Statistics", res) self.failUnlessIn(" 'downloader.files_downloaded': 5,", res) d.addCallback(_got_stats) d.addCallback(lambda res: self.GET("statistics?t=json")) def _got_stats_json(res): data = simplejson.loads(res) self.failUnlessEqual(data["counters"]["uploader.files_uploaded"], 5) self.failUnlessEqual(data["stats"]["chk_upload_helper.upload_need_upload"], 1) d.addCallback(_got_stats_json) # TODO: mangle the second segment of a file, to test errors that # occur after we've already sent some good data, which uses a # different error path. # TODO: download a URI with a form # TODO: create a directory by using a form # TODO: upload by using a form on the directory page # url = base + "somedir/subdir1/freeform_post!!upload" # TODO: delete a file by using a button on the directory page return d def _test_runner(self, res): # exercise some of the diagnostic tools in runner.py # find a share for (dirpath, dirnames, filenames) in os.walk(unicode(self.basedir)): if "storage" not in dirpath: continue if not filenames: continue pieces = dirpath.split(os.sep) if (len(pieces) >= 4 and pieces[-4] == "storage" and pieces[-3] == "shares"): # we're sitting in .../storage/shares/$START/$SINDEX , and there # are sharefiles here filename = os.path.join(dirpath, filenames[0]) # peek at the magic to see if it is a chk share magic = open(filename, "rb").read(4) if magic == '\x00\x00\x00\x01': break else: self.fail("unable to find any uri_extension files in %r" % self.basedir) log.msg("test_system.SystemTest._test_runner using %r" % filename) out,err = StringIO(), StringIO() rc = runner.runner(["debug", "dump-share", "--offsets", unicode_to_argv(filename)], stdout=out, stderr=err) output = out.getvalue() self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about # its size and shares. self.failUnlessIn("share filename: %s" % quote_output(abspath_expanduser_unicode(filename)), output) self.failUnlessIn("size: %d\n" % len(self.data), output) self.failUnlessIn("num_segments: 1\n", output) # segment_size is always a multiple of needed_shares self.failUnlessIn("segment_size: %d\n" % mathutil.next_multiple(len(self.data), 3), output) self.failUnlessIn("total_shares: 10\n", output) # keys which are supposed to be present for key in ("size", "num_segments", "segment_size", "needed_shares", "total_shares", "codec_name", "codec_params", "tail_codec_params", #"plaintext_hash", "plaintext_root_hash", "crypttext_hash", "crypttext_root_hash", "share_root_hash", "UEB_hash"): self.failUnlessIn("%s: " % key, output) self.failUnlessIn(" verify-cap: URI:CHK-Verifier:", output) # now use its storage index to find the other shares using the # 'find-shares' tool sharedir, shnum = os.path.split(filename) storagedir, storage_index_s = os.path.split(sharedir) storage_index_s = str(storage_index_s) out,err = StringIO(), StringIO() nodedirs = [self.getdir("client%d" % i) for i in range(self.numclients)] cmd = ["debug", "find-shares", storage_index_s] + nodedirs rc = runner.runner(cmd, stdout=out, stderr=err) self.failUnlessEqual(rc, 0) out.seek(0) sharefiles = [sfn.strip() for sfn in out.readlines()] self.failUnlessEqual(len(sharefiles), 10) # also exercise the 'catalog-shares' tool out,err = StringIO(), StringIO() nodedirs = [self.getdir("client%d" % i) for i in range(self.numclients)] cmd = ["debug", "catalog-shares"] + nodedirs rc = runner.runner(cmd, stdout=out, stderr=err) self.failUnlessEqual(rc, 0) out.seek(0) descriptions = [sfn.strip() for sfn in out.readlines()] self.failUnlessEqual(len(descriptions), 30) matching = [line for line in descriptions if line.startswith("CHK %s " % storage_index_s)] self.failUnlessEqual(len(matching), 10) def _test_control(self, res): # exercise the remote-control-the-client foolscap interfaces in # allmydata.control (mostly used for performance tests) c0 = self.clients[0] control_furl_file = os.path.join(c0.basedir, "private", "control.furl") control_furl = open(control_furl_file, "r").read().strip() # it doesn't really matter which Tub we use to connect to the client, # so let's just use our IntroducerNode's d = self.introducer.tub.getReference(control_furl) d.addCallback(self._test_control2, control_furl_file) return d def _test_control2(self, rref, filename): d = rref.callRemote("upload_from_file_to_uri", filename.encode(get_filesystem_encoding()), convergence=None) downfile = os.path.join(self.basedir, "control.downfile").encode(get_filesystem_encoding()) d.addCallback(lambda uri: rref.callRemote("download_from_uri_to_file", uri, downfile)) def _check(res): self.failUnlessEqual(res, downfile) data = open(downfile, "r").read() expected_data = open(filename, "r").read() self.failUnlessEqual(data, expected_data) d.addCallback(_check) d.addCallback(lambda res: rref.callRemote("speed_test", 1, 200, False)) if sys.platform in ("linux2", "linux3"): d.addCallback(lambda res: rref.callRemote("get_memory_usage")) d.addCallback(lambda res: rref.callRemote("measure_peer_response_time")) return d def _test_cli(self, res): # run various CLI commands (in a thread, since they use blocking # network calls) private_uri = self._private_node.get_uri() client0_basedir = self.getdir("client0") nodeargs = [ "--node-directory", client0_basedir, ] d = defer.succeed(None) # for compatibility with earlier versions, private/root_dir.cap is # supposed to be treated as an alias named "tahoe:". Start by making # sure that works, before we add other aliases. root_file = os.path.join(client0_basedir, "private", "root_dir.cap") f = open(root_file, "w") f.write(private_uri) f.close() def run(ignored, verb, *args, **kwargs): stdin = kwargs.get("stdin", "") newargs = nodeargs + [verb] + list(args) return self._run_cli(newargs, stdin=stdin) def _check_ls((out,err), expected_children, unexpected_children=[]): self.failUnlessEqual(err, "") for s in expected_children: self.failUnless(s in out, (s,out)) for s in unexpected_children: self.failIf(s in out, (s,out)) def _check_ls_root((out,err)): self.failUnless("personal" in out) self.failUnless("s2-ro" in out) self.failUnless("s2-rw" in out) self.failUnlessEqual(err, "") # this should reference private_uri d.addCallback(run, "ls") d.addCallback(_check_ls, ["personal", "s2-ro", "s2-rw"]) d.addCallback(run, "list-aliases") def _check_aliases_1((out,err)): self.failUnlessEqual(err, "") self.failUnlessEqual(out.strip(" \n"), "tahoe: %s" % private_uri) d.addCallback(_check_aliases_1) # now that that's out of the way, remove root_dir.cap and work with # new files d.addCallback(lambda res: os.unlink(root_file)) d.addCallback(run, "list-aliases") def _check_aliases_2((out,err)): self.failUnlessEqual(err, "") self.failUnlessEqual(out, "") d.addCallback(_check_aliases_2) d.addCallback(run, "mkdir") def _got_dir( (out,err) ): self.failUnless(uri.from_string_dirnode(out.strip())) return out.strip() d.addCallback(_got_dir) d.addCallback(lambda newcap: run(None, "add-alias", "tahoe", newcap)) d.addCallback(run, "list-aliases") def _check_aliases_3((out,err)): self.failUnlessEqual(err, "") self.failUnless("tahoe: " in out) d.addCallback(_check_aliases_3) def _check_empty_dir((out,err)): self.failUnlessEqual(out, "") self.failUnlessEqual(err, "") d.addCallback(run, "ls") d.addCallback(_check_empty_dir) def _check_missing_dir((out,err)): # TODO: check that rc==2 self.failUnlessEqual(out, "") self.failUnlessEqual(err, "No such file or directory\n") d.addCallback(run, "ls", "bogus") d.addCallback(_check_missing_dir) files = [] datas = [] for i in range(10): fn = os.path.join(self.basedir, "file%d" % i) files.append(fn) data = "data to be uploaded: file%d\n" % i datas.append(data) open(fn,"wb").write(data) def _check_stdout_against((out,err), filenum=None, data=None): self.failUnlessEqual(err, "") if filenum is not None: self.failUnlessEqual(out, datas[filenum]) if data is not None: self.failUnlessEqual(out, data) # test all both forms of put: from a file, and from stdin # tahoe put bar FOO d.addCallback(run, "put", files[0], "tahoe-file0") def _put_out((out,err)): self.failUnless("URI:LIT:" in out, out) self.failUnless("201 Created" in err, err) uri0 = out.strip() return run(None, "get", uri0) d.addCallback(_put_out) d.addCallback(lambda (out,err): self.failUnlessEqual(out, datas[0])) d.addCallback(run, "put", files[1], "subdir/tahoe-file1") # tahoe put bar tahoe:FOO d.addCallback(run, "put", files[2], "tahoe:file2") d.addCallback(run, "put", "--format=SDMF", files[3], "tahoe:file3") def _check_put_mutable((out,err)): self._mutable_file3_uri = out.strip() d.addCallback(_check_put_mutable) d.addCallback(run, "get", "tahoe:file3") d.addCallback(_check_stdout_against, 3) # tahoe put FOO STDIN_DATA = "This is the file to upload from stdin." d.addCallback(run, "put", "-", "tahoe-file-stdin", stdin=STDIN_DATA) # tahoe put tahoe:FOO d.addCallback(run, "put", "-", "tahoe:from-stdin", stdin="Other file from stdin.") d.addCallback(run, "ls") d.addCallback(_check_ls, ["tahoe-file0", "file2", "file3", "subdir", "tahoe-file-stdin", "from-stdin"]) d.addCallback(run, "ls", "subdir") d.addCallback(_check_ls, ["tahoe-file1"]) # tahoe mkdir FOO d.addCallback(run, "mkdir", "subdir2") d.addCallback(run, "ls") # TODO: extract the URI, set an alias with it d.addCallback(_check_ls, ["subdir2"]) # tahoe get: (to stdin and to a file) d.addCallback(run, "get", "tahoe-file0") d.addCallback(_check_stdout_against, 0) d.addCallback(run, "get", "tahoe:subdir/tahoe-file1") d.addCallback(_check_stdout_against, 1) outfile0 = os.path.join(self.basedir, "outfile0") d.addCallback(run, "get", "file2", outfile0) def _check_outfile0((out,err)): data = open(outfile0,"rb").read() self.failUnlessEqual(data, "data to be uploaded: file2\n") d.addCallback(_check_outfile0) outfile1 = os.path.join(self.basedir, "outfile0") d.addCallback(run, "get", "tahoe:subdir/tahoe-file1", outfile1) def _check_outfile1((out,err)): data = open(outfile1,"rb").read() self.failUnlessEqual(data, "data to be uploaded: file1\n") d.addCallback(_check_outfile1) d.addCallback(run, "rm", "tahoe-file0") d.addCallback(run, "rm", "tahoe:file2") d.addCallback(run, "ls") d.addCallback(_check_ls, [], ["tahoe-file0", "file2"]) d.addCallback(run, "ls", "-l") def _check_ls_l((out,err)): lines = out.split("\n") for l in lines: if "tahoe-file-stdin" in l: self.failUnless(l.startswith("-r-- "), l) self.failUnless(" %d " % len(STDIN_DATA) in l) if "file3" in l: self.failUnless(l.startswith("-rw- "), l) # mutable d.addCallback(_check_ls_l) d.addCallback(run, "ls", "--uri") def _check_ls_uri((out,err)): lines = out.split("\n") for l in lines: if "file3" in l: self.failUnless(self._mutable_file3_uri in l) d.addCallback(_check_ls_uri) d.addCallback(run, "ls", "--readonly-uri") def _check_ls_rouri((out,err)): lines = out.split("\n") for l in lines: if "file3" in l: rw_uri = self._mutable_file3_uri u = uri.from_string_mutable_filenode(rw_uri) ro_uri = u.get_readonly().to_string() self.failUnless(ro_uri in l) d.addCallback(_check_ls_rouri) d.addCallback(run, "mv", "tahoe-file-stdin", "tahoe-moved") d.addCallback(run, "ls") d.addCallback(_check_ls, ["tahoe-moved"], ["tahoe-file-stdin"]) d.addCallback(run, "ln", "tahoe-moved", "newlink") d.addCallback(run, "ls") d.addCallback(_check_ls, ["tahoe-moved", "newlink"]) d.addCallback(run, "cp", "tahoe:file3", "tahoe:file3-copy") d.addCallback(run, "ls") d.addCallback(_check_ls, ["file3", "file3-copy"]) d.addCallback(run, "get", "tahoe:file3-copy") d.addCallback(_check_stdout_against, 3) # copy from disk into tahoe d.addCallback(run, "cp", files[4], "tahoe:file4") d.addCallback(run, "ls") d.addCallback(_check_ls, ["file3", "file3-copy", "file4"]) d.addCallback(run, "get", "tahoe:file4") d.addCallback(_check_stdout_against, 4) # copy from tahoe into disk target_filename = os.path.join(self.basedir, "file-out") d.addCallback(run, "cp", "tahoe:file4", target_filename) def _check_cp_out((out,err)): self.failUnless(os.path.exists(target_filename)) got = open(target_filename,"rb").read() self.failUnlessEqual(got, datas[4]) d.addCallback(_check_cp_out) # copy from disk to disk (silly case) target2_filename = os.path.join(self.basedir, "file-out-copy") d.addCallback(run, "cp", target_filename, target2_filename) def _check_cp_out2((out,err)): self.failUnless(os.path.exists(target2_filename)) got = open(target2_filename,"rb").read() self.failUnlessEqual(got, datas[4]) d.addCallback(_check_cp_out2) # copy from tahoe into disk, overwriting an existing file d.addCallback(run, "cp", "tahoe:file3", target_filename) def _check_cp_out3((out,err)): self.failUnless(os.path.exists(target_filename)) got = open(target_filename,"rb").read() self.failUnlessEqual(got, datas[3]) d.addCallback(_check_cp_out3) # copy from disk into tahoe, overwriting an existing immutable file d.addCallback(run, "cp", files[5], "tahoe:file4") d.addCallback(run, "ls") d.addCallback(_check_ls, ["file3", "file3-copy", "file4"]) d.addCallback(run, "get", "tahoe:file4") d.addCallback(_check_stdout_against, 5) # copy from disk into tahoe, overwriting an existing mutable file d.addCallback(run, "cp", files[5], "tahoe:file3") d.addCallback(run, "ls") d.addCallback(_check_ls, ["file3", "file3-copy", "file4"]) d.addCallback(run, "get", "tahoe:file3") d.addCallback(_check_stdout_against, 5) # recursive copy: setup dn = os.path.join(self.basedir, "dir1") os.makedirs(dn) open(os.path.join(dn, "rfile1"), "wb").write("rfile1") open(os.path.join(dn, "rfile2"), "wb").write("rfile2") open(os.path.join(dn, "rfile3"), "wb").write("rfile3") sdn2 = os.path.join(dn, "subdir2") os.makedirs(sdn2) open(os.path.join(sdn2, "rfile4"), "wb").write("rfile4") open(os.path.join(sdn2, "rfile5"), "wb").write("rfile5") # from disk into tahoe d.addCallback(run, "cp", "-r", dn, "tahoe:dir1") d.addCallback(run, "ls") d.addCallback(_check_ls, ["dir1"]) d.addCallback(run, "ls", "dir1") d.addCallback(_check_ls, ["rfile1", "rfile2", "rfile3", "subdir2"], ["rfile4", "rfile5"]) d.addCallback(run, "ls", "tahoe:dir1/subdir2") d.addCallback(_check_ls, ["rfile4", "rfile5"], ["rfile1", "rfile2", "rfile3"]) d.addCallback(run, "get", "dir1/subdir2/rfile4") d.addCallback(_check_stdout_against, data="rfile4") # and back out again dn_copy = os.path.join(self.basedir, "dir1-copy") d.addCallback(run, "cp", "--verbose", "-r", "tahoe:dir1", dn_copy) def _check_cp_r_out((out,err)): def _cmp(name): old = open(os.path.join(dn, name), "rb").read() newfn = os.path.join(dn_copy, name) self.failUnless(os.path.exists(newfn)) new = open(newfn, "rb").read() self.failUnlessEqual(old, new) _cmp("rfile1") _cmp("rfile2") _cmp("rfile3") _cmp(os.path.join("subdir2", "rfile4")) _cmp(os.path.join("subdir2", "rfile5")) d.addCallback(_check_cp_r_out) # and copy it a second time, which ought to overwrite the same files d.addCallback(run, "cp", "-r", "tahoe:dir1", dn_copy) # and again, only writing filecaps dn_copy2 = os.path.join(self.basedir, "dir1-copy-capsonly") d.addCallback(run, "cp", "-r", "--caps-only", "tahoe:dir1", dn_copy2) def _check_capsonly((out,err)): # these should all be LITs x = open(os.path.join(dn_copy2, "subdir2", "rfile4")).read() y = uri.from_string_filenode(x) self.failUnlessEqual(y.data, "rfile4") d.addCallback(_check_capsonly) # and tahoe-to-tahoe d.addCallback(run, "cp", "-r", "tahoe:dir1", "tahoe:dir1-copy") d.addCallback(run, "ls") d.addCallback(_check_ls, ["dir1", "dir1-copy"]) d.addCallback(run, "ls", "dir1-copy") d.addCallback(_check_ls, ["rfile1", "rfile2", "rfile3", "subdir2"], ["rfile4", "rfile5"]) d.addCallback(run, "ls", "tahoe:dir1-copy/subdir2") d.addCallback(_check_ls, ["rfile4", "rfile5"], ["rfile1", "rfile2", "rfile3"]) d.addCallback(run, "get", "dir1-copy/subdir2/rfile4") d.addCallback(_check_stdout_against, data="rfile4") # and copy it a second time, which ought to overwrite the same files d.addCallback(run, "cp", "-r", "tahoe:dir1", "tahoe:dir1-copy") # tahoe_ls doesn't currently handle the error correctly: it tries to # JSON-parse a traceback. ## def _ls_missing(res): ## argv = nodeargs + ["ls", "bogus"] ## return self._run_cli(argv) ## d.addCallback(_ls_missing) ## def _check_ls_missing((out,err)): ## print "OUT", out ## print "ERR", err ## self.failUnlessEqual(err, "") ## d.addCallback(_check_ls_missing) return d def test_filesystem_with_cli_in_subprocess(self): # We do this in a separate test so that test_filesystem doesn't skip if we can't run bin/tahoe. self.basedir = "system/SystemTest/test_filesystem_with_cli_in_subprocess" d = self.set_up_nodes() def _new_happy_semantics(ign): for c in self.clients: c.DEFAULT_ENCODING_PARAMETERS['happy'] = 1 d.addCallback(_new_happy_semantics) def _run_in_subprocess(ignored, verb, *args, **kwargs): stdin = kwargs.get("stdin") env = kwargs.get("env") newargs = ["--node-directory", self.getdir("client0"), verb] + list(args) return self.run_bintahoe(newargs, stdin=stdin, env=env) def _check_succeeded(res, check_stderr=True): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0, str(res)) if check_stderr: self.failUnlessEqual(err, "") d.addCallback(_run_in_subprocess, "create-alias", "newalias") d.addCallback(_check_succeeded) STDIN_DATA = "This is the file to upload from stdin." d.addCallback(_run_in_subprocess, "put", "-", "newalias:tahoe-file", stdin=STDIN_DATA) d.addCallback(_check_succeeded, check_stderr=False) def _mv_with_http_proxy(ign): env = os.environ env['http_proxy'] = env['HTTP_PROXY'] = "http://127.0.0.0:12345" # invalid address return _run_in_subprocess(None, "mv", "newalias:tahoe-file", "newalias:tahoe-moved", env=env) d.addCallback(_mv_with_http_proxy) d.addCallback(_check_succeeded) d.addCallback(_run_in_subprocess, "ls", "newalias:") def _check_ls(res): out, err, rc_or_sig = res self.failUnlessEqual(rc_or_sig, 0, str(res)) self.failUnlessEqual(err, "", str(res)) self.failUnlessIn("tahoe-moved", out) self.failIfIn("tahoe-file", out) d.addCallback(_check_ls) return d def test_debug_trial(self): def _check_for_line(lines, result, test): for l in lines: if result in l and test in l: return self.fail("output (prefixed with '##') does not have a line containing both %r and %r:\n## %s" % (result, test, "\n## ".join(lines))) def _check_for_outcome(lines, out, outcome): self.failUnlessIn(outcome, out, "output (prefixed with '##') does not contain %r:\n## %s" % (outcome, "\n## ".join(lines))) d = self.run_bintahoe(['debug', 'trial', '--reporter=verbose', 'allmydata.test.trialtest']) def _check_failure( (out, err, rc) ): self.failUnlessEqual(rc, 1) lines = out.split('\n') _check_for_line(lines, "[SKIPPED]", "test_skip") _check_for_line(lines, "[TODO]", "test_todo") _check_for_line(lines, "[FAIL]", "test_fail") _check_for_line(lines, "[ERROR]", "test_deferred_error") _check_for_line(lines, "[ERROR]", "test_error") _check_for_outcome(lines, out, "FAILED") d.addCallback(_check_failure) # the --quiet argument regression-tests a problem in finding which arguments to pass to trial d.addCallback(lambda ign: self.run_bintahoe(['--quiet', 'debug', 'trial', '--reporter=verbose', 'allmydata.test.trialtest.Success'])) def _check_success( (out, err, rc) ): self.failUnlessEqual(rc, 0) lines = out.split('\n') _check_for_line(lines, "[SKIPPED]", "test_skip") _check_for_line(lines, "[TODO]", "test_todo") _check_for_outcome(lines, out, "PASSED") d.addCallback(_check_success) return d def _run_cli(self, argv, stdin=""): #print "CLI:", argv stdout, stderr = StringIO(), StringIO() d = threads.deferToThread(runner.runner, argv, run_by_human=False, stdin=StringIO(stdin), stdout=stdout, stderr=stderr) def _done(res): return stdout.getvalue(), stderr.getvalue() d.addCallback(_done) return d def _test_checker(self, res): ut = upload.Data("too big to be literal" * 200, convergence=None) d = self._personal_node.add_file(u"big file", ut) d.addCallback(lambda res: self._personal_node.check(Monitor())) def _check_dirnode_results(r): self.failUnless(r.is_healthy()) d.addCallback(_check_dirnode_results) d.addCallback(lambda res: self._personal_node.check(Monitor(), verify=True)) d.addCallback(_check_dirnode_results) d.addCallback(lambda res: self._personal_node.get(u"big file")) def _got_chk_filenode(n): self.failUnless(isinstance(n, ImmutableFileNode)) d = n.check(Monitor()) def _check_filenode_results(r): self.failUnless(r.is_healthy()) d.addCallback(_check_filenode_results) d.addCallback(lambda res: n.check(Monitor(), verify=True)) d.addCallback(_check_filenode_results) return d d.addCallback(_got_chk_filenode) d.addCallback(lambda res: self._personal_node.get(u"sekrit data")) def _got_lit_filenode(n): self.failUnless(isinstance(n, LiteralFileNode)) d = n.check(Monitor()) def _check_lit_filenode_results(r): self.failUnlessEqual(r, None) d.addCallback(_check_lit_filenode_results) d.addCallback(lambda res: n.check(Monitor(), verify=True)) d.addCallback(_check_lit_filenode_results) return d d.addCallback(_got_lit_filenode) return d class Connections(SystemTestMixin, unittest.TestCase): def test_rref(self): if NormalizedVersion(foolscap.__version__) < NormalizedVersion('0.6.4'): raise unittest.SkipTest("skipped due to http://foolscap.lothar.com/trac/ticket/196 " "(which does not affect normal usage of Tahoe-LAFS)") self.basedir = "system/Connections/rref" d = self.set_up_nodes(2) def _start(ign): self.c0 = self.clients[0] nonclients = [s for s in self.c0.storage_broker.get_connected_servers() if s.get_serverid() != self.c0.nodeid] self.failUnlessEqual(len(nonclients), 1) self.s1 = nonclients[0] # s1 is the server, not c0 self.s1_rref = self.s1.get_rref() self.failIfEqual(self.s1_rref, None) self.failUnless(self.s1.is_connected()) d.addCallback(_start) # now shut down the server d.addCallback(lambda ign: self.clients[1].disownServiceParent()) # and wait for the client to notice def _poll(): return len(self.c0.storage_broker.get_connected_servers()) < 2 d.addCallback(lambda ign: self.poll(_poll)) def _down(ign): self.failIf(self.s1.is_connected()) rref = self.s1.get_rref() self.failUnless(rref) self.failUnlessIdentical(rref, self.s1_rref) d.addCallback(_down) return d tahoe-lafs-1.10.0/src/allmydata/test/test_upload.py000066400000000000000000002571201221140116300222250ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os, shutil from cStringIO import StringIO from twisted.trial import unittest from twisted.python.failure import Failure from twisted.internet import defer from foolscap.api import fireEventually import allmydata # for __full_version__ from allmydata import uri, monitor, client from allmydata.immutable import upload, encode from allmydata.interfaces import FileTooLargeError, UploadUnhappinessError from allmydata.util import log, base32 from allmydata.util.assertutil import precondition from allmydata.util.deferredutil import DeferredListShouldSucceed from allmydata.test.no_network import GridTestMixin from allmydata.test.common_util import ShouldFailMixin from allmydata.util.happinessutil import servers_of_happiness, \ shares_by_server, merge_servers from allmydata.storage_client import StorageFarmBroker from allmydata.storage.server import storage_index_to_dir from allmydata.client import Client MiB = 1024*1024 def extract_uri(results): return results.get_uri() # Some of these took longer than 480 seconds on Zandr's arm box, but this may # have been due to an earlier test ERROR'ing out due to timeout, which seems # to screw up subsequent tests. timeout = 960 class Uploadable(unittest.TestCase): def shouldEqual(self, data, expected): self.failUnless(isinstance(data, list)) for e in data: self.failUnless(isinstance(e, str)) s = "".join(data) self.failUnlessEqual(s, expected) def test_filehandle_random_key(self): return self._test_filehandle(convergence=None) def test_filehandle_convergent_encryption(self): return self._test_filehandle(convergence="some convergence string") def _test_filehandle(self, convergence): s = StringIO("a"*41) u = upload.FileHandle(s, convergence=convergence) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) d.addCallback(self.shouldEqual, "a") d.addCallback(lambda res: u.read(80)) d.addCallback(self.shouldEqual, "a"*40) d.addCallback(lambda res: u.close()) # this doesn't close the filehandle d.addCallback(lambda res: s.close()) # that privilege is reserved for us return d def test_filename(self): basedir = "upload/Uploadable/test_filename" os.makedirs(basedir) fn = os.path.join(basedir, "file") f = open(fn, "w") f.write("a"*41) f.close() u = upload.FileName(fn, convergence=None) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) d.addCallback(self.shouldEqual, "a") d.addCallback(lambda res: u.read(80)) d.addCallback(self.shouldEqual, "a"*40) d.addCallback(lambda res: u.close()) return d def test_data(self): s = "a"*41 u = upload.Data(s, convergence=None) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) d.addCallback(self.shouldEqual, "a") d.addCallback(lambda res: u.read(80)) d.addCallback(self.shouldEqual, "a"*40) d.addCallback(lambda res: u.close()) return d class ServerError(Exception): pass class SetDEPMixin: def set_encoding_parameters(self, k, happy, n, max_segsize=1*MiB): p = {"k": k, "happy": happy, "n": n, "max_segment_size": max_segsize, } self.node.DEFAULT_ENCODING_PARAMETERS = p class FakeStorageServer: def __init__(self, mode): self.mode = mode self.allocated = [] self.queries = 0 self.version = { "http://allmydata.org/tahoe/protocols/storage/v1" : { "maximum-immutable-share-size": 2**32 - 1 }, "application-version": str(allmydata.__full_version__), } if mode == "small": self.version = { "http://allmydata.org/tahoe/protocols/storage/v1" : { "maximum-immutable-share-size": 10 }, "application-version": str(allmydata.__full_version__), } def callRemote(self, methname, *args, **kwargs): def _call(): meth = getattr(self, methname) return meth(*args, **kwargs) d = fireEventually() d.addCallback(lambda res: _call()) return d def allocate_buckets(self, storage_index, renew_secret, cancel_secret, sharenums, share_size, canary): #print "FakeStorageServer.allocate_buckets(num=%d, size=%d)" % (len(sharenums), share_size) if self.mode == "first-fail": if self.queries == 0: raise ServerError if self.mode == "second-fail": if self.queries == 1: raise ServerError self.queries += 1 if self.mode == "full": return (set(), {},) elif self.mode == "already got them": return (set(sharenums), {},) else: for shnum in sharenums: self.allocated.append( (storage_index, shnum) ) return (set(), dict([( shnum, FakeBucketWriter(share_size) ) for shnum in sharenums]), ) class FakeBucketWriter: # a diagnostic version of storageserver.BucketWriter def __init__(self, size): self.data = StringIO() self.closed = False self._size = size def callRemote(self, methname, *args, **kwargs): def _call(): meth = getattr(self, "remote_" + methname) return meth(*args, **kwargs) d = fireEventually() d.addCallback(lambda res: _call()) return d def callRemoteOnly(self, methname, *args, **kwargs): d = self.callRemote(methname, *args, **kwargs) del d # callRemoteOnly ignores this return None def remote_write(self, offset, data): precondition(not self.closed) precondition(offset >= 0) precondition(offset+len(data) <= self._size, "offset=%d + data=%d > size=%d" % (offset, len(data), self._size)) self.data.seek(offset) self.data.write(data) def remote_close(self): precondition(not self.closed) self.closed = True def remote_abort(self): pass class FakeClient: DEFAULT_ENCODING_PARAMETERS = {"k":25, "happy": 25, "n": 100, "max_segment_size": 1*MiB, } def __init__(self, mode="good", num_servers=50): self.num_servers = num_servers if type(mode) is str: mode = dict([i,mode] for i in range(num_servers)) servers = [ ("%20d"%fakeid, FakeStorageServer(mode[fakeid])) for fakeid in range(self.num_servers) ] self.storage_broker = StorageFarmBroker(None, permute_peers=True) for (serverid, rref) in servers: ann = {"anonymous-storage-FURL": "pb://%s@nowhere/fake" % base32.b2a(serverid), "permutation-seed-base32": base32.b2a(serverid) } self.storage_broker.test_add_rref(serverid, rref, ann) self.last_servers = [s[1] for s in servers] def log(self, *args, **kwargs): pass def get_encoding_parameters(self): return self.DEFAULT_ENCODING_PARAMETERS def get_storage_broker(self): return self.storage_broker _secret_holder = client.SecretHolder("lease secret", "convergence secret") class GotTooFarError(Exception): pass class GiganticUploadable(upload.FileHandle): def __init__(self, size): self._size = size self._fp = 0 def get_encryption_key(self): return defer.succeed("\x00" * 16) def get_size(self): return defer.succeed(self._size) def read(self, length): left = self._size - self._fp length = min(left, length) self._fp += length if self._fp > 1000000: # terminate the test early. raise GotTooFarError("we shouldn't be allowed to get this far") return defer.succeed(["\x00" * length]) def close(self): pass DATA = """ Once upon a time, there was a beautiful princess named Buttercup. She lived in a magical land where every file was stored securely among millions of machines, and nobody ever worried about their data being lost ever again. The End. """ assert len(DATA) > upload.Uploader.URI_LIT_SIZE_THRESHOLD SIZE_ZERO = 0 SIZE_SMALL = 16 SIZE_LARGE = len(DATA) def upload_data(uploader, data): u = upload.Data(data, convergence=None) return uploader.upload(u) def upload_filename(uploader, filename): u = upload.FileName(filename, convergence=None) return uploader.upload(u) def upload_filehandle(uploader, fh): u = upload.FileHandle(fh, convergence=None) return uploader.upload(u) class GoodServer(unittest.TestCase, ShouldFailMixin, SetDEPMixin): def setUp(self): self.node = FakeClient(mode="good") self.u = upload.Uploader() self.u.running = True self.u.parent = self.node def _check_small(self, newuri, size): u = uri.from_string(newuri) self.failUnless(isinstance(u, uri.LiteralFileURI)) self.failUnlessEqual(len(u.data), size) def _check_large(self, newuri, size): u = uri.from_string(newuri) self.failUnless(isinstance(u, uri.CHKFileURI)) self.failUnless(isinstance(u.get_storage_index(), str)) self.failUnlessEqual(len(u.get_storage_index()), 16) self.failUnless(isinstance(u.key, str)) self.failUnlessEqual(len(u.key), 16) self.failUnlessEqual(u.size, size) def get_data(self, size): return DATA[:size] def test_too_large(self): # we've removed the 4GiB share size limit (see ticket #346 for # details), but still have an 8-byte field, so the limit is now # 2**64, so make sure we reject files larger than that. k = 3; happy = 7; n = 10 self.set_encoding_parameters(k, happy, n) big = k*(2**64) data1 = GiganticUploadable(big) d = self.shouldFail(FileTooLargeError, "test_too_large-data1", "This file is too large to be uploaded (data_size)", self.u.upload, data1) data2 = GiganticUploadable(big-3) d.addCallback(lambda res: self.shouldFail(FileTooLargeError, "test_too_large-data2", "This file is too large to be uploaded (offsets)", self.u.upload, data2)) # I don't know where the actual limit is.. it depends upon how large # the hash trees wind up. It's somewhere close to k*4GiB-ln2(size). return d def test_data_zero(self): data = self.get_data(SIZE_ZERO) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_small, SIZE_ZERO) return d def test_data_small(self): data = self.get_data(SIZE_SMALL) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_small, SIZE_SMALL) return d def test_data_large(self): data = self.get_data(SIZE_LARGE) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) return d def test_data_large_odd_segments(self): data = self.get_data(SIZE_LARGE) segsize = int(SIZE_LARGE / 2.5) # we want 3 segments, since that's not a power of two self.set_encoding_parameters(25, 25, 100, segsize) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) return d def test_filehandle_zero(self): data = self.get_data(SIZE_ZERO) d = upload_filehandle(self.u, StringIO(data)) d.addCallback(extract_uri) d.addCallback(self._check_small, SIZE_ZERO) return d def test_filehandle_small(self): data = self.get_data(SIZE_SMALL) d = upload_filehandle(self.u, StringIO(data)) d.addCallback(extract_uri) d.addCallback(self._check_small, SIZE_SMALL) return d def test_filehandle_large(self): data = self.get_data(SIZE_LARGE) d = upload_filehandle(self.u, StringIO(data)) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) return d def test_filename_zero(self): fn = "Uploader-test_filename_zero.data" f = open(fn, "wb") data = self.get_data(SIZE_ZERO) f.write(data) f.close() d = upload_filename(self.u, fn) d.addCallback(extract_uri) d.addCallback(self._check_small, SIZE_ZERO) return d def test_filename_small(self): fn = "Uploader-test_filename_small.data" f = open(fn, "wb") data = self.get_data(SIZE_SMALL) f.write(data) f.close() d = upload_filename(self.u, fn) d.addCallback(extract_uri) d.addCallback(self._check_small, SIZE_SMALL) return d def test_filename_large(self): fn = "Uploader-test_filename_large.data" f = open(fn, "wb") data = self.get_data(SIZE_LARGE) f.write(data) f.close() d = upload_filename(self.u, fn) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) return d class ServerErrors(unittest.TestCase, ShouldFailMixin, SetDEPMixin): def make_node(self, mode, num_servers=10): self.node = FakeClient(mode, num_servers) self.u = upload.Uploader() self.u.running = True self.u.parent = self.node def _check_large(self, newuri, size): u = uri.from_string(newuri) self.failUnless(isinstance(u, uri.CHKFileURI)) self.failUnless(isinstance(u.get_storage_index(), str)) self.failUnlessEqual(len(u.get_storage_index()), 16) self.failUnless(isinstance(u.key, str)) self.failUnlessEqual(len(u.key), 16) self.failUnlessEqual(u.size, size) def test_first_error(self): mode = dict([(0,"good")] + [(i,"first-fail") for i in range(1,10)]) self.make_node(mode) self.set_encoding_parameters(k=25, happy=1, n=50) d = upload_data(self.u, DATA) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) return d def test_first_error_all(self): self.make_node("first-fail") d = self.shouldFail(UploadUnhappinessError, "first_error_all", "server selection failed", upload_data, self.u, DATA) def _check((f,)): self.failUnlessIn("placed 0 shares out of 100 total", str(f.value)) # there should also be a 'last failure was' message self.failUnlessIn("ServerError", str(f.value)) d.addCallback(_check) return d def test_second_error(self): # we want to make sure we make it to a third pass. This means that # the first pass was insufficient to place all shares, and at least # one of second pass servers (other than the last one) accepted a # share (so we'll believe that a third pass will be useful). (if # everyone but the last server throws an error, then we'll send all # the remaining shares to the last server at the end of the second # pass, and if that succeeds, we won't make it to a third pass). # # we can achieve this 97.5% of the time by using 40 servers, having # 39 of them fail on the second request, leaving only one to succeed # on the second request. (we need to keep the number of servers low # enough to ensure a second pass with 100 shares). mode = dict([(0,"good")] + [(i,"second-fail") for i in range(1,40)]) self.make_node(mode, 40) d = upload_data(self.u, DATA) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) return d def test_second_error_all(self): self.make_node("second-fail") d = self.shouldFail(UploadUnhappinessError, "second_error_all", "server selection failed", upload_data, self.u, DATA) def _check((f,)): self.failUnlessIn("placed 10 shares out of 100 total", str(f.value)) # there should also be a 'last failure was' message self.failUnlessIn("ServerError", str(f.value)) d.addCallback(_check) return d class FullServer(unittest.TestCase): def setUp(self): self.node = FakeClient(mode="full") self.u = upload.Uploader() self.u.running = True self.u.parent = self.node def _should_fail(self, f): self.failUnless(isinstance(f, Failure) and f.check(UploadUnhappinessError), f) def test_data_large(self): data = DATA d = upload_data(self.u, data) d.addBoth(self._should_fail) return d class ServerSelection(unittest.TestCase): def make_client(self, num_servers=50): self.node = FakeClient(mode="good", num_servers=num_servers) self.u = upload.Uploader() self.u.running = True self.u.parent = self.node def get_data(self, size): return DATA[:size] def _check_large(self, newuri, size): u = uri.from_string(newuri) self.failUnless(isinstance(u, uri.CHKFileURI)) self.failUnless(isinstance(u.get_storage_index(), str)) self.failUnlessEqual(len(u.get_storage_index()), 16) self.failUnless(isinstance(u.key, str)) self.failUnlessEqual(len(u.key), 16) self.failUnlessEqual(u.size, size) def set_encoding_parameters(self, k, happy, n, max_segsize=1*MiB): p = {"k": k, "happy": happy, "n": n, "max_segment_size": max_segsize, } self.node.DEFAULT_ENCODING_PARAMETERS = p def test_one_each(self): # if we have 50 shares, and there are 50 servers, and they all accept # a share, we should get exactly one share per server self.make_client() data = self.get_data(SIZE_LARGE) self.set_encoding_parameters(25, 30, 50) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) def _check(res): for s in self.node.last_servers: allocated = s.allocated self.failUnlessEqual(len(allocated), 1) self.failUnlessEqual(s.queries, 1) d.addCallback(_check) return d def test_two_each(self): # if we have 100 shares, and there are 50 servers, and they all # accept all shares, we should get exactly two shares per server self.make_client() data = self.get_data(SIZE_LARGE) # if there are 50 servers, then happy needs to be <= 50 self.set_encoding_parameters(50, 50, 100) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) def _check(res): for s in self.node.last_servers: allocated = s.allocated self.failUnlessEqual(len(allocated), 2) self.failUnlessEqual(s.queries, 2) d.addCallback(_check) return d def test_one_each_plus_one_extra(self): # if we have 51 shares, and there are 50 servers, then one server # gets two shares and the rest get just one self.make_client() data = self.get_data(SIZE_LARGE) self.set_encoding_parameters(24, 41, 51) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) def _check(res): got_one = [] got_two = [] for s in self.node.last_servers: allocated = s.allocated self.failUnless(len(allocated) in (1,2), len(allocated)) if len(allocated) == 1: self.failUnlessEqual(s.queries, 1) got_one.append(s) else: self.failUnlessEqual(s.queries, 2) got_two.append(s) self.failUnlessEqual(len(got_one), 49) self.failUnlessEqual(len(got_two), 1) d.addCallback(_check) return d def test_four_each(self): # if we have 200 shares, and there are 50 servers, then each server # gets 4 shares. The design goal is to accomplish this with only two # queries per server. self.make_client() data = self.get_data(SIZE_LARGE) # if there are 50 servers, then happy should be no more than 50 if we # want this to work. self.set_encoding_parameters(100, 50, 200) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) def _check(res): for s in self.node.last_servers: allocated = s.allocated self.failUnlessEqual(len(allocated), 4) self.failUnlessEqual(s.queries, 2) d.addCallback(_check) return d def test_three_of_ten(self): # if we have 10 shares and 3 servers, I want to see 3+3+4 rather than # 4+4+2 self.make_client(3) data = self.get_data(SIZE_LARGE) self.set_encoding_parameters(3, 3, 10) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) def _check(res): counts = {} for s in self.node.last_servers: allocated = s.allocated counts[len(allocated)] = counts.get(len(allocated), 0) + 1 histogram = [counts.get(i, 0) for i in range(5)] self.failUnlessEqual(histogram, [0,0,0,2,1]) d.addCallback(_check) return d def test_some_big_some_small(self): # 10 shares, 20 servers, but half the servers don't support a # share-size large enough for our file mode = dict([(i,{0:"good",1:"small"}[i%2]) for i in range(20)]) self.node = FakeClient(mode, num_servers=20) self.u = upload.Uploader() self.u.running = True self.u.parent = self.node data = self.get_data(SIZE_LARGE) self.set_encoding_parameters(3, 5, 10) d = upload_data(self.u, data) d.addCallback(extract_uri) d.addCallback(self._check_large, SIZE_LARGE) def _check(res): # we should have put one share each on the big servers, and zero # shares on the small servers total_allocated = 0 for p in self.node.last_servers: if p.mode == "good": self.failUnlessEqual(len(p.allocated), 1) elif p.mode == "small": self.failUnlessEqual(len(p.allocated), 0) total_allocated += len(p.allocated) self.failUnlessEqual(total_allocated, 10) d.addCallback(_check) return d class StorageIndex(unittest.TestCase): def test_params_must_matter(self): DATA = "I am some data" PARAMS = Client.DEFAULT_ENCODING_PARAMETERS u = upload.Data(DATA, convergence="") u.set_default_encoding_parameters(PARAMS) eu = upload.EncryptAnUploadable(u) d1 = eu.get_storage_index() # CHK means the same data should encrypt the same way u = upload.Data(DATA, convergence="") u.set_default_encoding_parameters(PARAMS) eu = upload.EncryptAnUploadable(u) d1a = eu.get_storage_index() # but if we use a different convergence string it should be different u = upload.Data(DATA, convergence="wheee!") u.set_default_encoding_parameters(PARAMS) eu = upload.EncryptAnUploadable(u) d1salt1 = eu.get_storage_index() # and if we add yet a different convergence it should be different again u = upload.Data(DATA, convergence="NOT wheee!") u.set_default_encoding_parameters(PARAMS) eu = upload.EncryptAnUploadable(u) d1salt2 = eu.get_storage_index() # and if we use the first string again it should be the same as last time u = upload.Data(DATA, convergence="wheee!") u.set_default_encoding_parameters(PARAMS) eu = upload.EncryptAnUploadable(u) d1salt1a = eu.get_storage_index() # and if we change the encoding parameters, it should be different (from the same convergence string with different encoding parameters) u = upload.Data(DATA, convergence="") u.set_default_encoding_parameters(PARAMS) u.encoding_param_k = u.default_encoding_param_k + 1 eu = upload.EncryptAnUploadable(u) d2 = eu.get_storage_index() # and if we use a random key, it should be different than the CHK u = upload.Data(DATA, convergence=None) u.set_default_encoding_parameters(PARAMS) eu = upload.EncryptAnUploadable(u) d3 = eu.get_storage_index() # and different from another instance u = upload.Data(DATA, convergence=None) u.set_default_encoding_parameters(PARAMS) eu = upload.EncryptAnUploadable(u) d4 = eu.get_storage_index() d = DeferredListShouldSucceed([d1,d1a,d1salt1,d1salt2,d1salt1a,d2,d3,d4]) def _done(res): si1, si1a, si1salt1, si1salt2, si1salt1a, si2, si3, si4 = res self.failUnlessEqual(si1, si1a) self.failIfEqual(si1, si2) self.failIfEqual(si1, si3) self.failIfEqual(si1, si4) self.failIfEqual(si3, si4) self.failIfEqual(si1salt1, si1) self.failIfEqual(si1salt1, si1salt2) self.failIfEqual(si1salt2, si1) self.failUnlessEqual(si1salt1, si1salt1a) d.addCallback(_done) return d # copied from python docs because itertools.combinations was added in # python 2.6 and we support >= 2.4. def combinations(iterable, r): # combinations('ABCD', 2) --> AB AC AD BC BD CD # combinations(range(4), 3) --> 012 013 023 123 pool = tuple(iterable) n = len(pool) if r > n: return indices = range(r) yield tuple(pool[i] for i in indices) while True: for i in reversed(range(r)): if indices[i] != i + n - r: break else: return indices[i] += 1 for j in range(i+1, r): indices[j] = indices[j-1] + 1 yield tuple(pool[i] for i in indices) def is_happy_enough(servertoshnums, h, k): """ I calculate whether servertoshnums achieves happiness level h. I do this with a naïve "brute force search" approach. (See src/allmydata/util/happinessutil.py for a better algorithm.) """ if len(servertoshnums) < h: return False # print "servertoshnums: ", servertoshnums, h, k for happysetcombo in combinations(servertoshnums.iterkeys(), h): # print "happysetcombo: ", happysetcombo for subsetcombo in combinations(happysetcombo, k): shnums = reduce(set.union, [ servertoshnums[s] for s in subsetcombo ]) # print "subsetcombo: ", subsetcombo, ", shnums: ", shnums if len(shnums) < k: # print "NOT HAAPP{Y", shnums, k return False # print "HAAPP{Y" return True class FakeServerTracker: def __init__(self, serverid, buckets): self._serverid = serverid self.buckets = buckets def get_serverid(self): return self._serverid class EncodingParameters(GridTestMixin, unittest.TestCase, SetDEPMixin, ShouldFailMixin): def find_all_shares(self, unused=None): """Locate shares on disk. Returns a dict that maps server to set of sharenums. """ assert self.g, "I tried to find a grid at self.g, but failed" servertoshnums = {} # k: server, v: set(shnum) for i, c in self.g.servers_by_number.iteritems(): for (dirp, dirns, fns) in os.walk(c.sharedir): for fn in fns: try: sharenum = int(fn) except TypeError: # Whoops, I guess that's not a share file then. pass else: servertoshnums.setdefault(i, set()).add(sharenum) return servertoshnums def _do_upload_with_broken_servers(self, servers_to_break): """ I act like a normal upload, but before I send the results of Tahoe2ServerSelector to the Encoder, I break the first servers_to_break ServerTrackers in the upload_servers part of the return result. """ assert self.g, "I tried to find a grid at self.g, but failed" broker = self.g.clients[0].storage_broker sh = self.g.clients[0]._secret_holder data = upload.Data("data" * 10000, convergence="") data.set_default_encoding_parameters({'k': 3, 'happy': 4, 'n': 10}) uploadable = upload.EncryptAnUploadable(data) encoder = encode.Encoder() encoder.set_encrypted_uploadable(uploadable) status = upload.UploadStatus() selector = upload.Tahoe2ServerSelector("dglev", "test", status) storage_index = encoder.get_param("storage_index") share_size = encoder.get_param("share_size") block_size = encoder.get_param("block_size") num_segments = encoder.get_param("num_segments") d = selector.get_shareholders(broker, sh, storage_index, share_size, block_size, num_segments, 10, 3, 4) def _have_shareholders((upload_trackers, already_servers)): assert servers_to_break <= len(upload_trackers) for index in xrange(servers_to_break): tracker = list(upload_trackers)[index] for share in tracker.buckets.keys(): tracker.buckets[share].abort() buckets = {} servermap = already_servers.copy() for tracker in upload_trackers: buckets.update(tracker.buckets) for bucket in tracker.buckets: servermap.setdefault(bucket, set()).add(tracker.get_serverid()) encoder.set_shareholders(buckets, servermap) d = encoder.start() return d d.addCallback(_have_shareholders) return d def _has_happy_share_distribution(self): servertoshnums = self.find_all_shares() k = self.g.clients[0].DEFAULT_ENCODING_PARAMETERS['k'] h = self.g.clients[0].DEFAULT_ENCODING_PARAMETERS['happy'] return is_happy_enough(servertoshnums, h, k) def _add_server(self, server_number, readonly=False): assert self.g, "I tried to find a grid at self.g, but failed" ss = self.g.make_server(server_number, readonly) log.msg("just created a server, number: %s => %s" % (server_number, ss,)) self.g.add_server(server_number, ss) def _add_server_with_share(self, server_number, share_number=None, readonly=False): self._add_server(server_number, readonly) if share_number is not None: self._copy_share_to_server(share_number, server_number) def _copy_share_to_server(self, share_number, server_number): ss = self.g.servers_by_number[server_number] # Copy share i from the directory associated with the first # storage server to the directory associated with this one. assert self.g, "I tried to find a grid at self.g, but failed" assert self.shares, "I tried to find shares at self.shares, but failed" old_share_location = self.shares[share_number][2] new_share_location = os.path.join(ss.storedir, "shares") si = uri.from_string(self.uri).get_storage_index() new_share_location = os.path.join(new_share_location, storage_index_to_dir(si)) if not os.path.exists(new_share_location): os.makedirs(new_share_location) new_share_location = os.path.join(new_share_location, str(share_number)) if old_share_location != new_share_location: shutil.copy(old_share_location, new_share_location) shares = self.find_uri_shares(self.uri) # Make sure that the storage server has the share. self.failUnless((share_number, ss.my_nodeid, new_share_location) in shares) def _setup_grid(self): """ I set up a NoNetworkGrid with a single server and client. """ self.set_up_grid(num_clients=1, num_servers=1) def _setup_and_upload(self, **kwargs): """ I set up a NoNetworkGrid with a single server and client, upload a file to it, store its uri in self.uri, and store its sharedata in self.shares. """ self._setup_grid() client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 1 if "n" in kwargs and "k" in kwargs: client.DEFAULT_ENCODING_PARAMETERS['k'] = kwargs['k'] client.DEFAULT_ENCODING_PARAMETERS['n'] = kwargs['n'] data = upload.Data("data" * 10000, convergence="") self.data = data d = client.upload(data) def _store_uri(ur): self.uri = ur.get_uri() d.addCallback(_store_uri) d.addCallback(lambda ign: self.find_uri_shares(self.uri)) def _store_shares(shares): self.shares = shares d.addCallback(_store_shares) return d def test_configure_parameters(self): self.basedir = self.mktemp() hooks = {0: self._set_up_nodes_extra_config} self.set_up_grid(client_config_hooks=hooks) c0 = self.g.clients[0] DATA = "data" * 100 u = upload.Data(DATA, convergence="") d = c0.upload(u) d.addCallback(lambda ur: c0.create_node_from_uri(ur.get_uri())) m = monitor.Monitor() d.addCallback(lambda fn: fn.check(m)) def _check(cr): self.failUnlessEqual(cr.get_encoding_needed(), 7) self.failUnlessEqual(cr.get_encoding_expected(), 12) d.addCallback(_check) return d def _setUp(self, ns): # Used by test_happy_semantics and test_preexisting_share_behavior # to set up the grid. self.node = FakeClient(mode="good", num_servers=ns) self.u = upload.Uploader() self.u.running = True self.u.parent = self.node def test_happy_semantics(self): self._setUp(2) DATA = upload.Data("kittens" * 10000, convergence="") # These parameters are unsatisfiable with only 2 servers. self.set_encoding_parameters(k=3, happy=5, n=10) d = self.shouldFail(UploadUnhappinessError, "test_happy_semantics", "shares could be placed or found on only 2 " "server(s). We were asked to place shares on " "at least 5 server(s) such that any 3 of them " "have enough shares to recover the file", self.u.upload, DATA) # Let's reset the client to have 10 servers d.addCallback(lambda ign: self._setUp(10)) # These parameters are satisfiable with 10 servers. d.addCallback(lambda ign: self.set_encoding_parameters(k=3, happy=5, n=10)) d.addCallback(lambda ign: self.u.upload(DATA)) # Let's reset the client to have 7 servers # (this is less than n, but more than h) d.addCallback(lambda ign: self._setUp(7)) # These parameters are satisfiable with 7 servers. d.addCallback(lambda ign: self.set_encoding_parameters(k=3, happy=5, n=10)) d.addCallback(lambda ign: self.u.upload(DATA)) return d def test_aborted_shares(self): self.basedir = "upload/EncodingParameters/aborted_shares" self.set_up_grid(num_servers=4) c = self.g.clients[0] DATA = upload.Data(100* "kittens", convergence="") # These parameters are unsatisfiable with only 4 servers, but should # work with 5, as long as the original 4 are not stuck in the open # BucketWriter state (open() but not parms = {"k":2, "happy":5, "n":5, "max_segment_size": 1*MiB} c.DEFAULT_ENCODING_PARAMETERS = parms d = self.shouldFail(UploadUnhappinessError, "test_aborted_shares", "shares could be placed on only 4 " "server(s) such that any 2 of them have enough " "shares to recover the file, but we were asked " "to place shares on at least 5 such servers", c.upload, DATA) # now add the 5th server d.addCallback(lambda ign: self._add_server(4, False)) # and this time the upload ought to succeed d.addCallback(lambda ign: c.upload(DATA)) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d def test_problem_layout_comment_52(self): def _basedir(): self.basedir = self.mktemp() _basedir() # This scenario is at # http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:52 # # The scenario in comment:52 proposes that we have a layout # like: # server 0: shares 1 - 9 # server 1: share 0, read-only # server 2: share 0, read-only # server 3: share 0, read-only # To get access to the shares, we will first upload to one # server, which will then have shares 0 - 9. We'll then # add three new servers, configure them to not accept any new # shares, then write share 0 directly into the serverdir of each, # and then remove share 0 from server 0 in the same way. # Then each of servers 1 - 3 will report that they have share 0, # and will not accept any new share, while server 0 will report that # it has shares 1 - 9 and will accept new shares. # We'll then set 'happy' = 4, and see that an upload fails # (as it should) d = self._setup_and_upload() d.addCallback(lambda ign: self._add_server_with_share(server_number=1, share_number=0, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=2, share_number=0, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=3, share_number=0, readonly=True)) # Remove the first share from server 0. def _remove_share_0_from_server_0(): share_location = self.shares[0][2] os.remove(share_location) d.addCallback(lambda ign: _remove_share_0_from_server_0()) # Set happy = 4 in the client. def _prepare(): client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 return client d.addCallback(lambda ign: _prepare()) # Uploading data should fail d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_problem_layout_comment_52_test_1", "shares could be placed or found on 4 server(s), " "but they are not spread out evenly enough to " "ensure that any 3 of these servers would have " "enough shares to recover the file. " "We were asked to place shares on at " "least 4 servers such that any 3 of them have " "enough shares to recover the file", client.upload, upload.Data("data" * 10000, convergence=""))) # Do comment:52, but like this: # server 2: empty # server 3: share 0, read-only # server 1: share 0, read-only # server 0: shares 0-9 d.addCallback(lambda ign: _basedir()) d.addCallback(lambda ign: self._setup_and_upload()) d.addCallback(lambda ign: self._add_server(server_number=2)) d.addCallback(lambda ign: self._add_server_with_share(server_number=3, share_number=0, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=1, share_number=0, readonly=True)) def _prepare2(): client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 return client d.addCallback(lambda ign: _prepare2()) d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_problem_layout_comment_52_test_2", "shares could be placed on only 3 server(s) such " "that any 3 of them have enough shares to recover " "the file, but we were asked to place shares on " "at least 4 such servers.", client.upload, upload.Data("data" * 10000, convergence=""))) return d def test_problem_layout_comment_53(self): # This scenario is at # http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:53 # # Set up the grid to have one server def _change_basedir(ign): self.basedir = self.mktemp() _change_basedir(None) # We start by uploading all of the shares to one server. # Next, we'll add three new servers to our NoNetworkGrid. We'll add # one share from our initial upload to each of these. # The counterintuitive ordering of the share numbers is to deal with # the permuting of these servers -- distributing the shares this # way ensures that the Tahoe2ServerSelector sees them in the order # described below. d = self._setup_and_upload() d.addCallback(lambda ign: self._add_server_with_share(server_number=1, share_number=2)) d.addCallback(lambda ign: self._add_server_with_share(server_number=2, share_number=0)) d.addCallback(lambda ign: self._add_server_with_share(server_number=3, share_number=1)) # So, we now have the following layout: # server 0: shares 0 - 9 # server 1: share 2 # server 2: share 0 # server 3: share 1 # We change the 'happy' parameter in the client to 4. # The Tahoe2ServerSelector will see the servers permuted as: # 2, 3, 1, 0 # Ideally, a reupload of our original data should work. def _reset_encoding_parameters(ign, happy=4): client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = happy return client d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) # This scenario is basically comment:53, but changed so that the # Tahoe2ServerSelector sees the server with all of the shares before # any of the other servers. # The layout is: # server 2: shares 0 - 9 # server 3: share 0 # server 1: share 1 # server 4: share 2 # The Tahoe2ServerSelector sees the servers permuted as: # 2, 3, 1, 4 # Note that server 0 has been replaced by server 4; this makes it # easier to ensure that the last server seen by Tahoe2ServerSelector # has only one share. d.addCallback(_change_basedir) d.addCallback(lambda ign: self._setup_and_upload()) d.addCallback(lambda ign: self._add_server_with_share(server_number=2, share_number=0)) d.addCallback(lambda ign: self._add_server_with_share(server_number=3, share_number=1)) d.addCallback(lambda ign: self._add_server_with_share(server_number=1, share_number=2)) # Copy all of the other shares to server number 2 def _copy_shares(ign): for i in xrange(0, 10): self._copy_share_to_server(i, 2) d.addCallback(_copy_shares) # Remove the first server, and add a placeholder with share 0 d.addCallback(lambda ign: self.g.remove_server(self.g.servers_by_number[0].my_nodeid)) d.addCallback(lambda ign: self._add_server_with_share(server_number=4, share_number=0)) # Now try uploading. d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) # Try the same thing, but with empty servers after the first one # We want to make sure that Tahoe2ServerSelector will redistribute # shares as necessary, not simply discover an existing layout. # The layout is: # server 2: shares 0 - 9 # server 3: empty # server 1: empty # server 4: empty d.addCallback(_change_basedir) d.addCallback(lambda ign: self._setup_and_upload()) d.addCallback(lambda ign: self._add_server(server_number=2)) d.addCallback(lambda ign: self._add_server(server_number=3)) d.addCallback(lambda ign: self._add_server(server_number=1)) d.addCallback(lambda ign: self._add_server(server_number=4)) d.addCallback(_copy_shares) d.addCallback(lambda ign: self.g.remove_server(self.g.servers_by_number[0].my_nodeid)) d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) # Make sure that only as many shares as necessary to satisfy # servers of happiness were pushed. d.addCallback(lambda results: self.failUnlessEqual(results.get_pushed_shares(), 3)) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d def test_problem_layout_ticket_1124(self): self.basedir = self.mktemp() d = self._setup_and_upload(k=2, n=4) # server 0: shares 0, 1, 2, 3 # server 1: shares 0, 3 # server 2: share 1 # server 3: share 2 # With this layout, an upload should just be satisfied that the current distribution is good enough, right? def _setup(ign): self._add_server_with_share(server_number=0, share_number=None) self._add_server_with_share(server_number=1, share_number=0) self._add_server_with_share(server_number=2, share_number=1) self._add_server_with_share(server_number=3, share_number=2) # Copy shares self._copy_share_to_server(3, 1) client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 return client d.addCallback(_setup) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d test_problem_layout_ticket_1124.todo = "Fix this after 1.7.1 release." def test_happiness_with_some_readonly_servers(self): # Try the following layout # server 2: shares 0-9 # server 4: share 0, read-only # server 3: share 1, read-only # server 1: share 2, read-only self.basedir = self.mktemp() d = self._setup_and_upload() d.addCallback(lambda ign: self._add_server_with_share(server_number=2, share_number=0)) d.addCallback(lambda ign: self._add_server_with_share(server_number=3, share_number=1, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=1, share_number=2, readonly=True)) # Copy all of the other shares to server number 2 def _copy_shares(ign): for i in xrange(1, 10): self._copy_share_to_server(i, 2) d.addCallback(_copy_shares) # Remove server 0, and add another in its place d.addCallback(lambda ign: self.g.remove_server(self.g.servers_by_number[0].my_nodeid)) d.addCallback(lambda ign: self._add_server_with_share(server_number=4, share_number=0, readonly=True)) def _reset_encoding_parameters(ign, happy=4): client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = happy return client d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d def test_happiness_with_all_readonly_servers(self): # server 3: share 1, read-only # server 1: share 2, read-only # server 2: shares 0-9, read-only # server 4: share 0, read-only # The idea with this test is to make sure that the survey of # read-only servers doesn't undercount servers of happiness self.basedir = self.mktemp() d = self._setup_and_upload() d.addCallback(lambda ign: self._add_server_with_share(server_number=4, share_number=0, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=3, share_number=1, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=1, share_number=2, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=2, share_number=0, readonly=True)) def _copy_shares(ign): for i in xrange(1, 10): self._copy_share_to_server(i, 2) d.addCallback(_copy_shares) d.addCallback(lambda ign: self.g.remove_server(self.g.servers_by_number[0].my_nodeid)) def _reset_encoding_parameters(ign, happy=4): client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = happy return client d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d def test_dropped_servers_in_encoder(self): # The Encoder does its own "servers_of_happiness" check if it # happens to lose a bucket during an upload (it assumes that # the layout presented to it satisfies "servers_of_happiness" # until a failure occurs) # # This test simulates an upload where servers break after server # selection, but before they are written to. def _set_basedir(ign=None): self.basedir = self.mktemp() _set_basedir() d = self._setup_and_upload(); # Add 5 servers def _do_server_setup(ign): self._add_server(server_number=1) self._add_server(server_number=2) self._add_server(server_number=3) self._add_server(server_number=4) self._add_server(server_number=5) d.addCallback(_do_server_setup) # remove the original server # (necessary to ensure that the Tahoe2ServerSelector will distribute # all the shares) def _remove_server(ign): server = self.g.servers_by_number[0] self.g.remove_server(server.my_nodeid) d.addCallback(_remove_server) # This should succeed; we still have 4 servers, and the # happiness of the upload is 4. d.addCallback(lambda ign: self._do_upload_with_broken_servers(1)) # Now, do the same thing over again, but drop 2 servers instead # of 1. This should fail, because servers_of_happiness is 4 and # we can't satisfy that. d.addCallback(_set_basedir) d.addCallback(lambda ign: self._setup_and_upload()) d.addCallback(_do_server_setup) d.addCallback(_remove_server) d.addCallback(lambda ign: self.shouldFail(UploadUnhappinessError, "test_dropped_servers_in_encoder", "shares could be placed on only 3 server(s) " "such that any 3 of them have enough shares to " "recover the file, but we were asked to place " "shares on at least 4", self._do_upload_with_broken_servers, 2)) # Now do the same thing over again, but make some of the servers # readonly, break some of the ones that aren't, and make sure that # happiness accounting is preserved. d.addCallback(_set_basedir) d.addCallback(lambda ign: self._setup_and_upload()) def _do_server_setup_2(ign): self._add_server(1) self._add_server(2) self._add_server(3) self._add_server_with_share(4, 7, readonly=True) self._add_server_with_share(5, 8, readonly=True) d.addCallback(_do_server_setup_2) d.addCallback(_remove_server) d.addCallback(lambda ign: self._do_upload_with_broken_servers(1)) d.addCallback(_set_basedir) d.addCallback(lambda ign: self._setup_and_upload()) d.addCallback(_do_server_setup_2) d.addCallback(_remove_server) d.addCallback(lambda ign: self.shouldFail(UploadUnhappinessError, "test_dropped_servers_in_encoder", "shares could be placed on only 3 server(s) " "such that any 3 of them have enough shares to " "recover the file, but we were asked to place " "shares on at least 4", self._do_upload_with_broken_servers, 2)) return d def test_merge_servers(self): # merge_servers merges a list of upload_servers and a dict of # shareid -> serverid mappings. shares = { 1 : set(["server1"]), 2 : set(["server2"]), 3 : set(["server3"]), 4 : set(["server4", "server5"]), 5 : set(["server1", "server2"]), } # if not provided with a upload_servers argument, it should just # return the first argument unchanged. self.failUnlessEqual(shares, merge_servers(shares, set([]))) trackers = [] for (i, server) in [(i, "server%d" % i) for i in xrange(5, 9)]: t = FakeServerTracker(server, [i]) trackers.append(t) expected = { 1 : set(["server1"]), 2 : set(["server2"]), 3 : set(["server3"]), 4 : set(["server4", "server5"]), 5 : set(["server1", "server2", "server5"]), 6 : set(["server6"]), 7 : set(["server7"]), 8 : set(["server8"]), } self.failUnlessEqual(expected, merge_servers(shares, set(trackers))) shares2 = {} expected = { 5 : set(["server5"]), 6 : set(["server6"]), 7 : set(["server7"]), 8 : set(["server8"]), } self.failUnlessEqual(expected, merge_servers(shares2, set(trackers))) shares3 = {} trackers = [] expected = {} for (i, server) in [(i, "server%d" % i) for i in xrange(10)]: shares3[i] = set([server]) t = FakeServerTracker(server, [i]) trackers.append(t) expected[i] = set([server]) self.failUnlessEqual(expected, merge_servers(shares3, set(trackers))) def test_servers_of_happiness_utility_function(self): # These tests are concerned with the servers_of_happiness() # utility function, and its underlying matching algorithm. Other # aspects of the servers_of_happiness behavior are tested # elsehwere These tests exist to ensure that # servers_of_happiness doesn't under or overcount the happiness # value for given inputs. # servers_of_happiness expects a dict of # shnum => set(serverids) as a preexisting shares argument. test1 = { 1 : set(["server1"]), 2 : set(["server2"]), 3 : set(["server3"]), 4 : set(["server4"]) } happy = servers_of_happiness(test1) self.failUnlessEqual(4, happy) test1[4] = set(["server1"]) # We've added a duplicate server, so now servers_of_happiness # should be 3 instead of 4. happy = servers_of_happiness(test1) self.failUnlessEqual(3, happy) # The second argument of merge_servers should be a set of objects with # serverid and buckets as attributes. In actual use, these will be # ServerTracker instances, but for testing it is fine to make a # FakeServerTracker whose job is to hold those instance variables to # test that part. trackers = [] for (i, server) in [(i, "server%d" % i) for i in xrange(5, 9)]: t = FakeServerTracker(server, [i]) trackers.append(t) # Recall that test1 is a server layout with servers_of_happiness # = 3. Since there isn't any overlap between the shnum -> # set([serverid]) correspondences in test1 and those in trackers, # the result here should be 7. test2 = merge_servers(test1, set(trackers)) happy = servers_of_happiness(test2) self.failUnlessEqual(7, happy) # Now add an overlapping server to trackers. This is redundant, # so it should not cause the previously reported happiness value # to change. t = FakeServerTracker("server1", [1]) trackers.append(t) test2 = merge_servers(test1, set(trackers)) happy = servers_of_happiness(test2) self.failUnlessEqual(7, happy) test = {} happy = servers_of_happiness(test) self.failUnlessEqual(0, happy) # Test a more substantial overlap between the trackers and the # existing assignments. test = { 1 : set(['server1']), 2 : set(['server2']), 3 : set(['server3']), 4 : set(['server4']), } trackers = [] t = FakeServerTracker('server5', [4]) trackers.append(t) t = FakeServerTracker('server6', [3, 5]) trackers.append(t) # The value returned by servers_of_happiness is the size # of a maximum matching in the bipartite graph that # servers_of_happiness() makes between serverids and share # numbers. It should find something like this: # (server 1, share 1) # (server 2, share 2) # (server 3, share 3) # (server 5, share 4) # (server 6, share 5) # # and, since there are 5 edges in this matching, it should # return 5. test2 = merge_servers(test, set(trackers)) happy = servers_of_happiness(test2) self.failUnlessEqual(5, happy) # Zooko's first puzzle: # (from http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:156) # # server 1: shares 0, 1 # server 2: shares 1, 2 # server 3: share 2 # # This should yield happiness of 3. test = { 0 : set(['server1']), 1 : set(['server1', 'server2']), 2 : set(['server2', 'server3']), } self.failUnlessEqual(3, servers_of_happiness(test)) # Zooko's second puzzle: # (from http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:158) # # server 1: shares 0, 1 # server 2: share 1 # # This should yield happiness of 2. test = { 0 : set(['server1']), 1 : set(['server1', 'server2']), } self.failUnlessEqual(2, servers_of_happiness(test)) def test_shares_by_server(self): test = dict([(i, set(["server%d" % i])) for i in xrange(1, 5)]) sbs = shares_by_server(test) self.failUnlessEqual(set([1]), sbs["server1"]) self.failUnlessEqual(set([2]), sbs["server2"]) self.failUnlessEqual(set([3]), sbs["server3"]) self.failUnlessEqual(set([4]), sbs["server4"]) test1 = { 1 : set(["server1"]), 2 : set(["server1"]), 3 : set(["server1"]), 4 : set(["server2"]), 5 : set(["server2"]) } sbs = shares_by_server(test1) self.failUnlessEqual(set([1, 2, 3]), sbs["server1"]) self.failUnlessEqual(set([4, 5]), sbs["server2"]) # This should fail unless the serverid part of the mapping is a set test2 = {1: "server1"} self.shouldFail(AssertionError, "test_shares_by_server", "", shares_by_server, test2) def test_existing_share_detection(self): self.basedir = self.mktemp() d = self._setup_and_upload() # Our final setup should look like this: # server 1: shares 0 - 9, read-only # server 2: empty # server 3: empty # server 4: empty # The purpose of this test is to make sure that the server selector # knows about the shares on server 1, even though it is read-only. # It used to simply filter these out, which would cause the test # to fail when servers_of_happiness = 4. d.addCallback(lambda ign: self._add_server_with_share(1, 0, True)) d.addCallback(lambda ign: self._add_server(2)) d.addCallback(lambda ign: self._add_server(3)) d.addCallback(lambda ign: self._add_server(4)) def _copy_shares(ign): for i in xrange(1, 10): self._copy_share_to_server(i, 1) d.addCallback(_copy_shares) d.addCallback(lambda ign: self.g.remove_server(self.g.servers_by_number[0].my_nodeid)) def _prepare_client(ign): client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 return client d.addCallback(_prepare_client) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d def test_query_counting(self): # If server selection fails, Tahoe2ServerSelector prints out a lot # of helpful diagnostic information, including query stats. # This test helps make sure that that information is accurate. self.basedir = self.mktemp() d = self._setup_and_upload() def _setup(ign): for i in xrange(1, 11): self._add_server(server_number=i) self.g.remove_server(self.g.servers_by_number[0].my_nodeid) c = self.g.clients[0] # We set happy to an unsatisfiable value so that we can check the # counting in the exception message. The same progress message # is also used when the upload is successful, but in that case it # only gets written to a log, so we can't see what it says. c.DEFAULT_ENCODING_PARAMETERS['happy'] = 45 return c d.addCallback(_setup) d.addCallback(lambda c: self.shouldFail(UploadUnhappinessError, "test_query_counting", "10 queries placed some shares", c.upload, upload.Data("data" * 10000, convergence=""))) # Now try with some readonly servers. We want to make sure that # the readonly server share discovery phase is counted correctly. def _reset(ign): self.basedir = self.mktemp() self.g = None d.addCallback(_reset) d.addCallback(lambda ign: self._setup_and_upload()) def _then(ign): for i in xrange(1, 11): self._add_server(server_number=i) self._add_server(server_number=11, readonly=True) self._add_server(server_number=12, readonly=True) self.g.remove_server(self.g.servers_by_number[0].my_nodeid) c = self.g.clients[0] c.DEFAULT_ENCODING_PARAMETERS['happy'] = 45 return c d.addCallback(_then) d.addCallback(lambda c: self.shouldFail(UploadUnhappinessError, "test_query_counting", "2 placed none (of which 2 placed none due to " "the server being full", c.upload, upload.Data("data" * 10000, convergence=""))) # Now try the case where the upload process finds a bunch of the # shares that it wants to place on the first server, including # the one that it wanted to allocate there. Though no shares will # be allocated in this request, it should still be called # productive, since it caused some homeless shares to be # removed. d.addCallback(_reset) d.addCallback(lambda ign: self._setup_and_upload()) def _next(ign): for i in xrange(1, 11): self._add_server(server_number=i) # Copy all of the shares to server 9, since that will be # the first one that the selector sees. for i in xrange(10): self._copy_share_to_server(i, 9) # Remove server 0, and its contents self.g.remove_server(self.g.servers_by_number[0].my_nodeid) # Make happiness unsatisfiable c = self.g.clients[0] c.DEFAULT_ENCODING_PARAMETERS['happy'] = 45 return c d.addCallback(_next) d.addCallback(lambda c: self.shouldFail(UploadUnhappinessError, "test_query_counting", "1 queries placed some shares", c.upload, upload.Data("data" * 10000, convergence=""))) return d def test_upper_limit_on_readonly_queries(self): self.basedir = self.mktemp() d = self._setup_and_upload() def _then(ign): for i in xrange(1, 11): self._add_server(server_number=i, readonly=True) self.g.remove_server(self.g.servers_by_number[0].my_nodeid) c = self.g.clients[0] c.DEFAULT_ENCODING_PARAMETERS['k'] = 2 c.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 c.DEFAULT_ENCODING_PARAMETERS['n'] = 4 return c d.addCallback(_then) d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_upper_limit_on_readonly_queries", "sent 8 queries to 8 servers", client.upload, upload.Data('data' * 10000, convergence=""))) return d def test_exception_messages_during_server_selection(self): # server 1: read-only, no shares # server 2: read-only, no shares # server 3: read-only, no shares # server 4: read-only, no shares # server 5: read-only, no shares # This will fail, but we want to make sure that the log messages # are informative about why it has failed. self.basedir = self.mktemp() d = self._setup_and_upload() d.addCallback(lambda ign: self._add_server(server_number=1, readonly=True)) d.addCallback(lambda ign: self._add_server(server_number=2, readonly=True)) d.addCallback(lambda ign: self._add_server(server_number=3, readonly=True)) d.addCallback(lambda ign: self._add_server(server_number=4, readonly=True)) d.addCallback(lambda ign: self._add_server(server_number=5, readonly=True)) d.addCallback(lambda ign: self.g.remove_server(self.g.servers_by_number[0].my_nodeid)) def _reset_encoding_parameters(ign, happy=4): client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = happy return client d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_selection_exceptions", "placed 0 shares out of 10 " "total (10 homeless), want to place shares on at " "least 4 servers such that any 3 of them have " "enough shares to recover the file, " "sent 5 queries to 5 servers, 0 queries placed " "some shares, 5 placed none " "(of which 5 placed none due to the server being " "full and 0 placed none due to an error)", client.upload, upload.Data("data" * 10000, convergence=""))) # server 1: read-only, no shares # server 2: broken, no shares # server 3: read-only, no shares # server 4: read-only, no shares # server 5: read-only, no shares def _reset(ign): self.basedir = self.mktemp() d.addCallback(_reset) d.addCallback(lambda ign: self._setup_and_upload()) d.addCallback(lambda ign: self._add_server(server_number=1, readonly=True)) d.addCallback(lambda ign: self._add_server(server_number=2)) def _break_server_2(ign): serverid = self.g.servers_by_number[2].my_nodeid self.g.break_server(serverid) d.addCallback(_break_server_2) d.addCallback(lambda ign: self._add_server(server_number=3, readonly=True)) d.addCallback(lambda ign: self._add_server(server_number=4, readonly=True)) d.addCallback(lambda ign: self._add_server(server_number=5, readonly=True)) d.addCallback(lambda ign: self.g.remove_server(self.g.servers_by_number[0].my_nodeid)) d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_selection_exceptions", "placed 0 shares out of 10 " "total (10 homeless), want to place shares on at " "least 4 servers such that any 3 of them have " "enough shares to recover the file, " "sent 5 queries to 5 servers, 0 queries placed " "some shares, 5 placed none " "(of which 4 placed none due to the server being " "full and 1 placed none due to an error)", client.upload, upload.Data("data" * 10000, convergence=""))) # server 0, server 1 = empty, accepting shares # This should place all of the shares, but still fail with happy=4. # We want to make sure that the exception message is worded correctly. d.addCallback(_reset) d.addCallback(lambda ign: self._setup_grid()) d.addCallback(lambda ign: self._add_server(server_number=1)) d.addCallback(_reset_encoding_parameters) d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_selection_exceptions", "shares could be placed or found on only 2 " "server(s). We were asked to place shares on at " "least 4 server(s) such that any 3 of them have " "enough shares to recover the file.", client.upload, upload.Data("data" * 10000, convergence=""))) # servers 0 - 4 = empty, accepting shares # This too should place all the shares, and this too should fail, # but since the effective happiness is more than the k encoding # parameter, it should trigger a different error message than the one # above. d.addCallback(_reset) d.addCallback(lambda ign: self._setup_grid()) d.addCallback(lambda ign: self._add_server(server_number=1)) d.addCallback(lambda ign: self._add_server(server_number=2)) d.addCallback(lambda ign: self._add_server(server_number=3)) d.addCallback(lambda ign: self._add_server(server_number=4)) d.addCallback(_reset_encoding_parameters, happy=7) d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_selection_exceptions", "shares could be placed on only 5 server(s) such " "that any 3 of them have enough shares to recover " "the file, but we were asked to place shares on " "at least 7 such servers.", client.upload, upload.Data("data" * 10000, convergence=""))) # server 0: shares 0 - 9 # server 1: share 0, read-only # server 2: share 0, read-only # server 3: share 0, read-only # This should place all of the shares, but fail with happy=4. # Since the number of servers with shares is more than the number # necessary to reconstitute the file, this will trigger a different # error message than either of those above. d.addCallback(_reset) d.addCallback(lambda ign: self._setup_and_upload()) d.addCallback(lambda ign: self._add_server_with_share(server_number=1, share_number=0, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=2, share_number=0, readonly=True)) d.addCallback(lambda ign: self._add_server_with_share(server_number=3, share_number=0, readonly=True)) d.addCallback(_reset_encoding_parameters, happy=7) d.addCallback(lambda client: self.shouldFail(UploadUnhappinessError, "test_selection_exceptions", "shares could be placed or found on 4 server(s), " "but they are not spread out evenly enough to " "ensure that any 3 of these servers would have " "enough shares to recover the file. We were asked " "to place shares on at least 7 servers such that " "any 3 of them have enough shares to recover the " "file", client.upload, upload.Data("data" * 10000, convergence=""))) return d def test_problem_layout_comment_187(self): # #778 comment 187 broke an initial attempt at a share # redistribution algorithm. This test is here to demonstrate the # breakage, and to test that subsequent algorithms don't also # break in the same way. self.basedir = self.mktemp() d = self._setup_and_upload(k=2, n=3) # server 1: shares 0, 1, 2, readonly # server 2: share 0, readonly # server 3: share 0 def _setup(ign): self._add_server_with_share(server_number=1, share_number=0, readonly=True) self._add_server_with_share(server_number=2, share_number=0, readonly=True) self._add_server_with_share(server_number=3, share_number=0) # Copy shares self._copy_share_to_server(1, 1) self._copy_share_to_server(2, 1) # Remove server 0 self.g.remove_server(self.g.servers_by_number[0].my_nodeid) client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 3 return client d.addCallback(_setup) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d test_problem_layout_comment_187.todo = "this isn't fixed yet" def test_problem_layout_ticket_1118(self): # #1118 includes a report from a user who hit an assertion in # the upload code with this layout. self.basedir = self.mktemp() d = self._setup_and_upload(k=2, n=4) # server 0: no shares # server 1: shares 0, 3 # server 3: share 1 # server 2: share 2 # The order that they get queries is 0, 1, 3, 2 def _setup(ign): self._add_server(server_number=0) self._add_server_with_share(server_number=1, share_number=0) self._add_server_with_share(server_number=2, share_number=2) self._add_server_with_share(server_number=3, share_number=1) # Copy shares self._copy_share_to_server(3, 1) storedir = self.get_serverdir(0) # remove the storedir, wiping out any existing shares shutil.rmtree(storedir) # create an empty storedir to replace the one we just removed os.mkdir(storedir) client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 return client d.addCallback(_setup) # Note: actually it should succeed! See # test_problem_layout_ticket_1128. But ticket 1118 is just to # make it realize that it has failed, so if it raises # UploadUnhappinessError then we'll give it the green light # for now. d.addCallback(lambda ignored: self.shouldFail(UploadUnhappinessError, "test_problem_layout_ticket_1118", "", self.g.clients[0].upload, upload.Data("data" * 10000, convergence=""))) return d def test_problem_layout_ticket_1128(self): # #1118 includes a report from a user who hit an assertion in # the upload code with this layout. self.basedir = self.mktemp() d = self._setup_and_upload(k=2, n=4) # server 0: no shares # server 1: shares 0, 3 # server 3: share 1 # server 2: share 2 # The order that they get queries is 0, 1, 3, 2 def _setup(ign): self._add_server(server_number=0) self._add_server_with_share(server_number=1, share_number=0) self._add_server_with_share(server_number=2, share_number=2) self._add_server_with_share(server_number=3, share_number=1) # Copy shares self._copy_share_to_server(3, 1) storedir = self.get_serverdir(0) # remove the storedir, wiping out any existing shares shutil.rmtree(storedir) # create an empty storedir to replace the one we just removed os.mkdir(storedir) client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 return client d.addCallback(_setup) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d test_problem_layout_ticket_1128.todo = "Invent a smarter uploader that uploads successfully in this case." def test_upload_succeeds_with_some_homeless_shares(self): # If the upload is forced to stop trying to place shares before # it has placed (or otherwise accounted) for all of them, but it # has placed enough to satisfy the upload health criteria that # we're using, it should still succeed. self.basedir = self.mktemp() d = self._setup_and_upload() def _server_setup(ign): # Add four servers so that we have a layout like this: # server 1: share 0, read-only # server 2: share 1, read-only # server 3: share 2, read-only # server 4: share 3, read-only # If we set happy = 4, the upload will manage to satisfy # servers of happiness, but not place all of the shares; we # want to test that the upload is declared successful in # this case. self._add_server_with_share(server_number=1, share_number=0, readonly=True) self._add_server_with_share(server_number=2, share_number=1, readonly=True) self._add_server_with_share(server_number=3, share_number=2, readonly=True) self._add_server_with_share(server_number=4, share_number=3, readonly=True) # Remove server 0. self.g.remove_server(self.g.servers_by_number[0].my_nodeid) # Set the client appropriately c = self.g.clients[0] c.DEFAULT_ENCODING_PARAMETERS['happy'] = 4 return c d.addCallback(_server_setup) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d def test_uploader_skips_over_servers_with_only_one_share(self): # We want to make sure that the redistribution logic ignores # servers with only one share, since placing these shares # elsewhere will at best keep happiness the same as it was, and # at worst hurt it. self.basedir = self.mktemp() d = self._setup_and_upload() def _server_setup(ign): # Add some servers so that the upload will need to # redistribute, but will first pass over a couple of servers # that don't have enough shares to redistribute before # finding one that does have shares to redistribute. self._add_server_with_share(server_number=1, share_number=0) self._add_server_with_share(server_number=2, share_number=2) self._add_server_with_share(server_number=3, share_number=1) self._add_server_with_share(server_number=8, share_number=4) self._add_server_with_share(server_number=5, share_number=5) self._add_server_with_share(server_number=10, share_number=7) for i in xrange(4): self._copy_share_to_server(i, 2) return self.g.clients[0] d.addCallback(_server_setup) d.addCallback(lambda client: client.upload(upload.Data("data" * 10000, convergence=""))) d.addCallback(lambda ign: self.failUnless(self._has_happy_share_distribution())) return d def test_server_selector_bucket_abort(self): # If server selection for an upload fails due to an unhappy # layout, the server selection process should abort the buckets it # allocates before failing, so that the space can be re-used. self.basedir = self.mktemp() self.set_up_grid(num_servers=5) # Try to upload a file with happy=7, which is unsatisfiable with # the current grid. This will fail, but should not take up any # space on the storage servers after it fails. client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 7 d = defer.succeed(None) d.addCallback(lambda ignored: self.shouldFail(UploadUnhappinessError, "test_server_selection_bucket_abort", "", client.upload, upload.Data("data" * 10000, convergence=""))) # wait for the abort messages to get there. def _turn_barrier(res): return fireEventually(res) d.addCallback(_turn_barrier) def _then(ignored): for server in self.g.servers_by_number.values(): self.failUnlessEqual(server.allocated_size(), 0) d.addCallback(_then) return d def test_encoder_bucket_abort(self): # If enough servers die in the process of encoding and uploading # a file to make the layout unhappy, we should cancel the # newly-allocated buckets before dying. self.basedir = self.mktemp() self.set_up_grid(num_servers=4) client = self.g.clients[0] client.DEFAULT_ENCODING_PARAMETERS['happy'] = 7 d = defer.succeed(None) d.addCallback(lambda ignored: self.shouldFail(UploadUnhappinessError, "test_encoder_bucket_abort", "", self._do_upload_with_broken_servers, 1)) def _turn_barrier(res): return fireEventually(res) d.addCallback(_turn_barrier) def _then(ignored): for server in self.g.servers_by_number.values(): self.failUnlessEqual(server.allocated_size(), 0) d.addCallback(_then) return d def _set_up_nodes_extra_config(self, clientdir): cfgfn = os.path.join(clientdir, "tahoe.cfg") oldcfg = open(cfgfn, "r").read() f = open(cfgfn, "wt") f.write(oldcfg) f.write("\n") f.write("[client]\n") f.write("shares.needed = 7\n") f.write("shares.total = 12\n") f.write("\n") f.close() return None # TODO: # upload with exactly 75 servers (shares_of_happiness) # have a download fail # cancel a download (need to implement more cancel stuff) # from test_encode: # NoNetworkGrid, upload part of ciphertext, kill server, continue upload # check with Kevan, they want to live in test_upload, existing tests might cover # def test_lost_one_shareholder(self): # these are upload-side tests # def test_lost_one_shareholder_early(self): # def test_lost_many_shareholders(self): # def test_lost_all_shareholders(self): tahoe-lafs-1.10.0/src/allmydata/test/test_uri.py000066400000000000000000001051771221140116300215440ustar00rootroot00000000000000 import os, re from twisted.trial import unittest from allmydata import uri from allmydata.util import hashutil, base32 from allmydata.interfaces import IURI, IFileURI, IDirnodeURI, IMutableFileURI, \ IVerifierURI, CapConstraintError import allmydata.test.common_util as testutil class Literal(testutil.ReallyEqualMixin, unittest.TestCase): def _help_test(self, data): u = uri.LiteralFileURI(data) self.failUnless(IURI.providedBy(u)) self.failUnless(IFileURI.providedBy(u)) self.failIf(IDirnodeURI.providedBy(u)) self.failUnlessReallyEqual(u.data, data) self.failUnlessReallyEqual(u.get_size(), len(data)) self.failUnless(u.is_readonly()) self.failIf(u.is_mutable()) u2 = uri.from_string(u.to_string()) self.failUnless(IURI.providedBy(u2)) self.failUnless(IFileURI.providedBy(u2)) self.failIf(IDirnodeURI.providedBy(u2)) self.failUnlessReallyEqual(u2.data, data) self.failUnlessReallyEqual(u2.get_size(), len(data)) self.failUnless(u2.is_readonly()) self.failIf(u2.is_mutable()) u2i = uri.from_string(u.to_string(), deep_immutable=True) self.failUnless(IFileURI.providedBy(u2i)) self.failIf(IDirnodeURI.providedBy(u2i)) self.failUnlessReallyEqual(u2i.data, data) self.failUnlessReallyEqual(u2i.get_size(), len(data)) self.failUnless(u2i.is_readonly()) self.failIf(u2i.is_mutable()) u3 = u.get_readonly() self.failUnlessIdentical(u, u3) self.failUnlessReallyEqual(u.get_verify_cap(), None) he = u.to_human_encoding() u_h = uri.LiteralFileURI.init_from_human_encoding(he) self.failUnlessReallyEqual(u, u_h) def test_empty(self): data = "" # This data is some *very* small data! return self._help_test(data) def test_pack(self): data = "This is some small data" return self._help_test(data) def test_nonascii(self): data = "This contains \x00 and URI:LIT: and \n, oh my." return self._help_test(data) class Compare(testutil.ReallyEqualMixin, unittest.TestCase): def test_compare(self): lit1 = uri.LiteralFileURI("some data") fileURI = 'URI:CHK:f5ahxa25t4qkktywz6teyfvcx4:opuioq7tj2y6idzfp6cazehtmgs5fdcebcz3cygrxyydvcozrmeq:3:10:345834' chk1 = uri.CHKFileURI.init_from_string(fileURI) chk2 = uri.CHKFileURI.init_from_string(fileURI) unk = uri.UnknownURI("lafs://from_the_future") self.failIfEqual(lit1, chk1) self.failUnlessReallyEqual(chk1, chk2) self.failIfEqual(chk1, "not actually a URI") # these should be hashable too s = set([lit1, chk1, chk2, unk]) self.failUnlessReallyEqual(len(s), 3) # since chk1==chk2 def test_is_uri(self): lit1 = uri.LiteralFileURI("some data").to_string() self.failUnless(uri.is_uri(lit1)) self.failIf(uri.is_uri(None)) def test_is_literal_file_uri(self): lit1 = uri.LiteralFileURI("some data").to_string() self.failUnless(uri.is_literal_file_uri(lit1)) self.failIf(uri.is_literal_file_uri(None)) self.failIf(uri.is_literal_file_uri("foo")) self.failIf(uri.is_literal_file_uri("ro.foo")) self.failIf(uri.is_literal_file_uri("URI:LITfoo")) self.failUnless(uri.is_literal_file_uri("ro.URI:LIT:foo")) self.failUnless(uri.is_literal_file_uri("imm.URI:LIT:foo")) def test_has_uri_prefix(self): self.failUnless(uri.has_uri_prefix("URI:foo")) self.failUnless(uri.has_uri_prefix("ro.URI:foo")) self.failUnless(uri.has_uri_prefix("imm.URI:foo")) self.failIf(uri.has_uri_prefix(None)) self.failIf(uri.has_uri_prefix("foo")) class CHKFile(testutil.ReallyEqualMixin, unittest.TestCase): def test_pack(self): key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" storage_index = hashutil.storage_index_hash(key) uri_extension_hash = hashutil.uri_extension_hash("stuff") needed_shares = 25 total_shares = 100 size = 1234 u = uri.CHKFileURI(key=key, uri_extension_hash=uri_extension_hash, needed_shares=needed_shares, total_shares=total_shares, size=size) self.failUnlessReallyEqual(u.get_storage_index(), storage_index) self.failUnlessReallyEqual(u.key, key) self.failUnlessReallyEqual(u.uri_extension_hash, uri_extension_hash) self.failUnlessReallyEqual(u.needed_shares, needed_shares) self.failUnlessReallyEqual(u.total_shares, total_shares) self.failUnlessReallyEqual(u.size, size) self.failUnless(u.is_readonly()) self.failIf(u.is_mutable()) self.failUnless(IURI.providedBy(u)) self.failUnless(IFileURI.providedBy(u)) self.failIf(IDirnodeURI.providedBy(u)) self.failUnlessReallyEqual(u.get_size(), 1234) u_ro = u.get_readonly() self.failUnlessIdentical(u, u_ro) he = u.to_human_encoding() self.failUnlessReallyEqual(he, "http://127.0.0.1:3456/uri/" + u.to_string()) self.failUnlessReallyEqual(uri.CHKFileURI.init_from_human_encoding(he), u) u2 = uri.from_string(u.to_string()) self.failUnlessReallyEqual(u2.get_storage_index(), storage_index) self.failUnlessReallyEqual(u2.key, key) self.failUnlessReallyEqual(u2.uri_extension_hash, uri_extension_hash) self.failUnlessReallyEqual(u2.needed_shares, needed_shares) self.failUnlessReallyEqual(u2.total_shares, total_shares) self.failUnlessReallyEqual(u2.size, size) self.failUnless(u2.is_readonly()) self.failIf(u2.is_mutable()) self.failUnless(IURI.providedBy(u2)) self.failUnless(IFileURI.providedBy(u2)) self.failIf(IDirnodeURI.providedBy(u2)) self.failUnlessReallyEqual(u2.get_size(), 1234) u2i = uri.from_string(u.to_string(), deep_immutable=True) self.failUnlessReallyEqual(u.to_string(), u2i.to_string()) u2ro = uri.from_string(uri.ALLEGED_READONLY_PREFIX + u.to_string()) self.failUnlessReallyEqual(u.to_string(), u2ro.to_string()) u2imm = uri.from_string(uri.ALLEGED_IMMUTABLE_PREFIX + u.to_string()) self.failUnlessReallyEqual(u.to_string(), u2imm.to_string()) v = u.get_verify_cap() self.failUnless(isinstance(v.to_string(), str)) self.failUnless(v.is_readonly()) self.failIf(v.is_mutable()) v2 = uri.from_string(v.to_string()) self.failUnlessReallyEqual(v, v2) he = v.to_human_encoding() v2_h = uri.CHKFileVerifierURI.init_from_human_encoding(he) self.failUnlessReallyEqual(v2, v2_h) v3 = uri.CHKFileVerifierURI(storage_index="\x00"*16, uri_extension_hash="\x00"*32, needed_shares=3, total_shares=10, size=1234) self.failUnless(isinstance(v3.to_string(), str)) self.failUnless(v3.is_readonly()) self.failIf(v3.is_mutable()) def test_pack_badly(self): key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" storage_index = hashutil.storage_index_hash(key) uri_extension_hash = hashutil.uri_extension_hash("stuff") needed_shares = 25 total_shares = 100 size = 1234 self.failUnlessRaises(TypeError, uri.CHKFileURI, key=key, uri_extension_hash=uri_extension_hash, needed_shares=needed_shares, total_shares=total_shares, size=size, bogus_extra_argument="reject me", ) self.failUnlessRaises(TypeError, uri.CHKFileVerifierURI, bogus="bogus") self.failUnlessRaises(TypeError, uri.CHKFileVerifierURI, storage_index=storage_index, uri_extension_hash=uri_extension_hash, needed_shares=3, total_shares=10, # leave size= missing ) class Extension(testutil.ReallyEqualMixin, unittest.TestCase): def test_pack(self): data = {"stuff": "value", "size": 12, "needed_shares": 3, "big_hash": hashutil.tagged_hash("foo", "bar"), } ext = uri.pack_extension(data) d = uri.unpack_extension(ext) self.failUnlessReallyEqual(d["stuff"], "value") self.failUnlessReallyEqual(d["size"], 12) self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar")) readable = uri.unpack_extension_readable(ext) self.failUnlessReallyEqual(readable["needed_shares"], 3) self.failUnlessReallyEqual(readable["stuff"], "value") self.failUnlessReallyEqual(readable["size"], 12) self.failUnlessReallyEqual(readable["big_hash"], base32.b2a(hashutil.tagged_hash("foo", "bar"))) self.failUnlessReallyEqual(readable["UEB_hash"], base32.b2a(hashutil.uri_extension_hash(ext))) class Unknown(testutil.ReallyEqualMixin, unittest.TestCase): def test_from_future(self): # any URI type that we don't recognize should be treated as unknown future_uri = "I am a URI from the future. Whatever you do, don't " u = uri.from_string(future_uri) self.failUnless(isinstance(u, uri.UnknownURI)) self.failUnlessReallyEqual(u.to_string(), future_uri) self.failUnless(u.get_readonly() is None) self.failUnless(u.get_error() is None) u2 = uri.UnknownURI(future_uri, error=CapConstraintError("...")) self.failUnlessReallyEqual(u.to_string(), future_uri) self.failUnless(u2.get_readonly() is None) self.failUnless(isinstance(u2.get_error(), CapConstraintError)) # Future caps might have non-ASCII chars in them. (Or maybe not, who can tell about the future?) future_uri = u"I am a cap from the \u263A future. Whatever you ".encode('utf-8') u = uri.from_string(future_uri) self.failUnless(isinstance(u, uri.UnknownURI)) self.failUnlessReallyEqual(u.to_string(), future_uri) self.failUnless(u.get_readonly() is None) self.failUnless(u.get_error() is None) u2 = uri.UnknownURI(future_uri, error=CapConstraintError("...")) self.failUnlessReallyEqual(u.to_string(), future_uri) self.failUnless(u2.get_readonly() is None) self.failUnless(isinstance(u2.get_error(), CapConstraintError)) class Constraint(testutil.ReallyEqualMixin, unittest.TestCase): def test_constraint(self): good="http://127.0.0.1:3456/uri/URI%3ADIR2%3Agh3l5rbvnv2333mrfvalmjfr4i%3Alz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma/" uri.DirectoryURI.init_from_human_encoding(good) self.failUnlessRaises(uri.BadURIError, uri.DirectoryURI.init_from_string, good) bad = good + '===' self.failUnlessRaises(uri.BadURIError, uri.DirectoryURI.init_from_human_encoding, bad) self.failUnlessRaises(uri.BadURIError, uri.DirectoryURI.init_from_string, bad) fileURI = 'URI:CHK:gh3l5rbvnv2333mrfvalmjfr4i:lz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma:3:10:345834' uri.CHKFileURI.init_from_string(fileURI) class Mutable(testutil.ReallyEqualMixin, unittest.TestCase): def setUp(self): self.writekey = "\x01" * 16 self.fingerprint = "\x02" * 32 self.readkey = hashutil.ssk_readkey_hash(self.writekey) self.storage_index = hashutil.ssk_storage_index_hash(self.readkey) def test_pack(self): u = uri.WriteableSSKFileURI(self.writekey, self.fingerprint) self.failUnlessReallyEqual(u.writekey, self.writekey) self.failUnlessReallyEqual(u.fingerprint, self.fingerprint) self.failIf(u.is_readonly()) self.failUnless(u.is_mutable()) self.failUnless(IURI.providedBy(u)) self.failUnless(IMutableFileURI.providedBy(u)) self.failIf(IDirnodeURI.providedBy(u)) self.failUnless("WriteableSSKFileURI" in str(u)) he = u.to_human_encoding() u_h = uri.WriteableSSKFileURI.init_from_human_encoding(he) self.failUnlessReallyEqual(u, u_h) u2 = uri.from_string(u.to_string()) self.failUnlessReallyEqual(u2.writekey, self.writekey) self.failUnlessReallyEqual(u2.fingerprint, self.fingerprint) self.failIf(u2.is_readonly()) self.failUnless(u2.is_mutable()) self.failUnless(IURI.providedBy(u2)) self.failUnless(IMutableFileURI.providedBy(u2)) self.failIf(IDirnodeURI.providedBy(u2)) u2i = uri.from_string(u.to_string(), deep_immutable=True) self.failUnless(isinstance(u2i, uri.UnknownURI), u2i) u2ro = uri.from_string(uri.ALLEGED_READONLY_PREFIX + u.to_string()) self.failUnless(isinstance(u2ro, uri.UnknownURI), u2ro) u2imm = uri.from_string(uri.ALLEGED_IMMUTABLE_PREFIX + u.to_string()) self.failUnless(isinstance(u2imm, uri.UnknownURI), u2imm) u3 = u2.get_readonly() readkey = hashutil.ssk_readkey_hash(self.writekey) self.failUnlessReallyEqual(u3.fingerprint, self.fingerprint) self.failUnlessReallyEqual(u3.readkey, readkey) self.failUnless(u3.is_readonly()) self.failUnless(u3.is_mutable()) self.failUnless(IURI.providedBy(u3)) self.failUnless(IMutableFileURI.providedBy(u3)) self.failIf(IDirnodeURI.providedBy(u3)) u3i = uri.from_string(u3.to_string(), deep_immutable=True) self.failUnless(isinstance(u3i, uri.UnknownURI), u3i) u3ro = uri.from_string(uri.ALLEGED_READONLY_PREFIX + u3.to_string()) self.failUnlessReallyEqual(u3.to_string(), u3ro.to_string()) u3imm = uri.from_string(uri.ALLEGED_IMMUTABLE_PREFIX + u3.to_string()) self.failUnless(isinstance(u3imm, uri.UnknownURI), u3imm) he = u3.to_human_encoding() u3_h = uri.ReadonlySSKFileURI.init_from_human_encoding(he) self.failUnlessReallyEqual(u3, u3_h) u4 = uri.ReadonlySSKFileURI(readkey, self.fingerprint) self.failUnlessReallyEqual(u4.fingerprint, self.fingerprint) self.failUnlessReallyEqual(u4.readkey, readkey) self.failUnless(u4.is_readonly()) self.failUnless(u4.is_mutable()) self.failUnless(IURI.providedBy(u4)) self.failUnless(IMutableFileURI.providedBy(u4)) self.failIf(IDirnodeURI.providedBy(u4)) u4i = uri.from_string(u4.to_string(), deep_immutable=True) self.failUnless(isinstance(u4i, uri.UnknownURI), u4i) u4ro = uri.from_string(uri.ALLEGED_READONLY_PREFIX + u4.to_string()) self.failUnlessReallyEqual(u4.to_string(), u4ro.to_string()) u4imm = uri.from_string(uri.ALLEGED_IMMUTABLE_PREFIX + u4.to_string()) self.failUnless(isinstance(u4imm, uri.UnknownURI), u4imm) u4a = uri.from_string(u4.to_string()) self.failUnlessReallyEqual(u4a, u4) self.failUnless("ReadonlySSKFileURI" in str(u4a)) self.failUnlessIdentical(u4a.get_readonly(), u4a) u5 = u4.get_verify_cap() self.failUnless(IVerifierURI.providedBy(u5)) self.failUnlessReallyEqual(u5.get_storage_index(), u.get_storage_index()) u7 = u.get_verify_cap() self.failUnless(IVerifierURI.providedBy(u7)) self.failUnlessReallyEqual(u7.get_storage_index(), u.get_storage_index()) he = u5.to_human_encoding() u5_h = uri.SSKVerifierURI.init_from_human_encoding(he) self.failUnlessReallyEqual(u5, u5_h) def test_writeable_mdmf_cap(self): u1 = uri.WriteableMDMFFileURI(self.writekey, self.fingerprint) cap = u1.to_string() u = uri.WriteableMDMFFileURI.init_from_string(cap) self.failUnless(IMutableFileURI.providedBy(u)) self.failUnlessReallyEqual(u.fingerprint, self.fingerprint) self.failUnlessReallyEqual(u.writekey, self.writekey) self.failUnless(u.is_mutable()) self.failIf(u.is_readonly()) self.failUnlessEqual(cap, u.to_string()) # Now get a readonly cap from the writeable cap, and test that it # degrades gracefully. ru = u.get_readonly() self.failUnlessReallyEqual(self.readkey, ru.readkey) self.failUnlessReallyEqual(self.fingerprint, ru.fingerprint) self.failUnless(ru.is_mutable()) self.failUnless(ru.is_readonly()) # Now get a verifier cap. vu = ru.get_verify_cap() self.failUnlessReallyEqual(self.storage_index, vu.storage_index) self.failUnlessReallyEqual(self.fingerprint, vu.fingerprint) self.failUnless(IVerifierURI.providedBy(vu)) def test_readonly_mdmf_cap(self): u1 = uri.ReadonlyMDMFFileURI(self.readkey, self.fingerprint) cap = u1.to_string() u2 = uri.ReadonlyMDMFFileURI.init_from_string(cap) self.failUnlessReallyEqual(u2.fingerprint, self.fingerprint) self.failUnlessReallyEqual(u2.readkey, self.readkey) self.failUnless(u2.is_readonly()) self.failUnless(u2.is_mutable()) vu = u2.get_verify_cap() self.failUnlessEqual(vu.storage_index, self.storage_index) self.failUnlessEqual(vu.fingerprint, self.fingerprint) def test_create_writeable_mdmf_cap_from_readcap(self): # we shouldn't be able to create a writeable MDMF cap given only a # readcap. u1 = uri.ReadonlyMDMFFileURI(self.readkey, self.fingerprint) cap = u1.to_string() self.failUnlessRaises(uri.BadURIError, uri.WriteableMDMFFileURI.init_from_string, cap) def test_create_writeable_mdmf_cap_from_verifycap(self): u1 = uri.MDMFVerifierURI(self.storage_index, self.fingerprint) cap = u1.to_string() self.failUnlessRaises(uri.BadURIError, uri.WriteableMDMFFileURI.init_from_string, cap) def test_create_readonly_mdmf_cap_from_verifycap(self): u1 = uri.MDMFVerifierURI(self.storage_index, self.fingerprint) cap = u1.to_string() self.failUnlessRaises(uri.BadURIError, uri.ReadonlyMDMFFileURI.init_from_string, cap) def test_mdmf_verifier_cap(self): u1 = uri.MDMFVerifierURI(self.storage_index, self.fingerprint) self.failUnless(u1.is_readonly()) self.failIf(u1.is_mutable()) self.failUnlessReallyEqual(self.storage_index, u1.storage_index) self.failUnlessReallyEqual(self.fingerprint, u1.fingerprint) cap = u1.to_string() u2 = uri.MDMFVerifierURI.init_from_string(cap) self.failUnless(u2.is_readonly()) self.failIf(u2.is_mutable()) self.failUnlessReallyEqual(self.storage_index, u2.storage_index) self.failUnlessReallyEqual(self.fingerprint, u2.fingerprint) u3 = u2.get_readonly() self.failUnlessReallyEqual(u3, u2) u4 = u2.get_verify_cap() self.failUnlessReallyEqual(u4, u2) def test_mdmf_cap_ignore_extensions(self): # MDMF caps can be arbitrarily extended after the fingerprint and # key/storage index fields. tahoe-1.9 is supposed to ignore any # extensions, and not add any itself. u1 = uri.WriteableMDMFFileURI(self.writekey, self.fingerprint) cap = u1.to_string() cap2 = cap+":I COME FROM THE FUTURE" u2 = uri.WriteableMDMFFileURI.init_from_string(cap2) self.failUnlessReallyEqual(self.writekey, u2.writekey) self.failUnlessReallyEqual(self.fingerprint, u2.fingerprint) self.failIf(u2.is_readonly()) self.failUnless(u2.is_mutable()) cap3 = cap+":"+os.urandom(40) # parse *that*! u3 = uri.WriteableMDMFFileURI.init_from_string(cap3) self.failUnlessReallyEqual(self.writekey, u3.writekey) self.failUnlessReallyEqual(self.fingerprint, u3.fingerprint) self.failIf(u3.is_readonly()) self.failUnless(u3.is_mutable()) cap4 = u1.get_readonly().to_string()+":ooh scary future stuff" u4 = uri.from_string_mutable_filenode(cap4) self.failUnlessReallyEqual(self.readkey, u4.readkey) self.failUnlessReallyEqual(self.fingerprint, u4.fingerprint) self.failUnless(u4.is_readonly()) self.failUnless(u4.is_mutable()) cap5 = u1.get_verify_cap().to_string()+":spoilers!" u5 = uri.from_string(cap5) self.failUnlessReallyEqual(self.storage_index, u5.storage_index) self.failUnlessReallyEqual(self.fingerprint, u5.fingerprint) self.failUnless(u5.is_readonly()) self.failIf(u5.is_mutable()) def test_mdmf_valid_human_encoding(self): # What's a human encoding? Well, it's of the form: base = "https://127.0.0.1:3456/uri/" # With a cap on the end. For each of the cap types, we need to # test that a valid cap (with and without the traditional # separators) is recognized and accepted by the classes. w1 = uri.WriteableMDMFFileURI(self.writekey, self.fingerprint) r1 = uri.ReadonlyMDMFFileURI(self.readkey, self.fingerprint) v1 = uri.MDMFVerifierURI(self.storage_index, self.fingerprint) # These will yield three different caps. for o in (w1, r1, v1): url = base + o.to_string() o1 = o.__class__.init_from_human_encoding(url) self.failUnlessReallyEqual(o1, o) # Note that our cap will, by default, have : as separators. # But it's expected that users from, e.g., the WUI, will # have %3A as a separator. We need to make sure that the # initialization routine handles that, too. cap = o.to_string() cap = re.sub(":", "%3A", cap) url = base + cap o2 = o.__class__.init_from_human_encoding(url) self.failUnlessReallyEqual(o2, o) def test_mdmf_human_encoding_invalid_base(self): # What's a human encoding? Well, it's of the form: base = "https://127.0.0.1:3456/foo/bar/bazuri/" # With a cap on the end. For each of the cap types, we need to # test that a valid cap (with and without the traditional # separators) is recognized and accepted by the classes. w1 = uri.WriteableMDMFFileURI(self.writekey, self.fingerprint) r1 = uri.ReadonlyMDMFFileURI(self.readkey, self.fingerprint) v1 = uri.MDMFVerifierURI(self.storage_index, self.fingerprint) # These will yield three different caps. for o in (w1, r1, v1): url = base + o.to_string() self.failUnlessRaises(uri.BadURIError, o.__class__.init_from_human_encoding, url) def test_mdmf_human_encoding_invalid_cap(self): base = "https://127.0.0.1:3456/uri/" # With a cap on the end. For each of the cap types, we need to # test that a valid cap (with and without the traditional # separators) is recognized and accepted by the classes. w1 = uri.WriteableMDMFFileURI(self.writekey, self.fingerprint) r1 = uri.ReadonlyMDMFFileURI(self.readkey, self.fingerprint) v1 = uri.MDMFVerifierURI(self.storage_index, self.fingerprint) # These will yield three different caps. for o in (w1, r1, v1): # not exhaustive, obviously... url = base + o.to_string() + "foobarbaz" url2 = base + "foobarbaz" + o.to_string() url3 = base + o.to_string()[:25] + "foo" + o.to_string()[:25] for u in (url, url2, url3): self.failUnlessRaises(uri.BadURIError, o.__class__.init_from_human_encoding, u) def test_mdmf_from_string(self): # Make sure that the from_string utility function works with # MDMF caps. u1 = uri.WriteableMDMFFileURI(self.writekey, self.fingerprint) cap = u1.to_string() self.failUnless(uri.is_uri(cap)) u2 = uri.from_string(cap) self.failUnlessReallyEqual(u1, u2) u3 = uri.from_string_mutable_filenode(cap) self.failUnlessEqual(u3, u1) u1 = uri.ReadonlyMDMFFileURI(self.readkey, self.fingerprint) cap = u1.to_string() self.failUnless(uri.is_uri(cap)) u2 = uri.from_string(cap) self.failUnlessReallyEqual(u1, u2) u3 = uri.from_string_mutable_filenode(cap) self.failUnlessEqual(u3, u1) u1 = uri.MDMFVerifierURI(self.storage_index, self.fingerprint) cap = u1.to_string() self.failUnless(uri.is_uri(cap)) u2 = uri.from_string(cap) self.failUnlessReallyEqual(u1, u2) u3 = uri.from_string_verifier(cap) self.failUnlessEqual(u3, u1) class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): def test_pack(self): writekey = "\x01" * 16 fingerprint = "\x02" * 32 n = uri.WriteableSSKFileURI(writekey, fingerprint) u1 = uri.DirectoryURI(n) self.failIf(u1.is_readonly()) self.failUnless(u1.is_mutable()) self.failUnless(IURI.providedBy(u1)) self.failIf(IFileURI.providedBy(u1)) self.failUnless(IDirnodeURI.providedBy(u1)) self.failUnless("DirectoryURI" in str(u1)) u1_filenode = u1.get_filenode_cap() self.failUnless(u1_filenode.is_mutable()) self.failIf(u1_filenode.is_readonly()) u2 = uri.from_string(u1.to_string()) self.failUnlessReallyEqual(u1.to_string(), u2.to_string()) self.failIf(u2.is_readonly()) self.failUnless(u2.is_mutable()) self.failUnless(IURI.providedBy(u2)) self.failIf(IFileURI.providedBy(u2)) self.failUnless(IDirnodeURI.providedBy(u2)) u2i = uri.from_string(u1.to_string(), deep_immutable=True) self.failUnless(isinstance(u2i, uri.UnknownURI)) u3 = u2.get_readonly() self.failUnless(u3.is_readonly()) self.failUnless(u3.is_mutable()) self.failUnless(IURI.providedBy(u3)) self.failIf(IFileURI.providedBy(u3)) self.failUnless(IDirnodeURI.providedBy(u3)) u3i = uri.from_string(u2.to_string(), deep_immutable=True) self.failUnless(isinstance(u3i, uri.UnknownURI)) u3n = u3._filenode_uri self.failUnless(u3n.is_readonly()) self.failUnless(u3n.is_mutable()) u3_filenode = u3.get_filenode_cap() self.failUnless(u3_filenode.is_mutable()) self.failUnless(u3_filenode.is_readonly()) u3a = uri.from_string(u3.to_string()) self.failUnlessIdentical(u3a, u3a.get_readonly()) u4 = uri.ReadonlyDirectoryURI(u2._filenode_uri.get_readonly()) self.failUnlessReallyEqual(u4.to_string(), u3.to_string()) self.failUnless(u4.is_readonly()) self.failUnless(u4.is_mutable()) self.failUnless(IURI.providedBy(u4)) self.failIf(IFileURI.providedBy(u4)) self.failUnless(IDirnodeURI.providedBy(u4)) u4_verifier = u4.get_verify_cap() u4_verifier_filenode = u4_verifier.get_filenode_cap() self.failUnless(isinstance(u4_verifier_filenode, uri.SSKVerifierURI)) verifiers = [u1.get_verify_cap(), u2.get_verify_cap(), u3.get_verify_cap(), u4.get_verify_cap(), uri.DirectoryURIVerifier(n.get_verify_cap()), ] for v in verifiers: self.failUnless(IVerifierURI.providedBy(v)) self.failUnlessReallyEqual(v._filenode_uri, u1.get_verify_cap()._filenode_uri) def test_immutable(self): readkey = "\x01" * 16 uri_extension_hash = hashutil.uri_extension_hash("stuff") needed_shares = 3 total_shares = 10 size = 1234 fnuri = uri.CHKFileURI(key=readkey, uri_extension_hash=uri_extension_hash, needed_shares=needed_shares, total_shares=total_shares, size=size) fncap = fnuri.to_string() self.failUnlessReallyEqual(fncap, "URI:CHK:aeaqcaibaeaqcaibaeaqcaibae:nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa:3:10:1234") u1 = uri.ImmutableDirectoryURI(fnuri) self.failUnless(u1.is_readonly()) self.failIf(u1.is_mutable()) self.failUnless(IURI.providedBy(u1)) self.failIf(IFileURI.providedBy(u1)) self.failUnless(IDirnodeURI.providedBy(u1)) self.failUnless("DirectoryURI" in str(u1)) u1_filenode = u1.get_filenode_cap() self.failIf(u1_filenode.is_mutable()) self.failUnless(u1_filenode.is_readonly()) self.failUnlessReallyEqual(u1_filenode.to_string(), fncap) self.failUnless(str(u1)) u2 = uri.from_string(u1.to_string()) self.failUnlessReallyEqual(u1.to_string(), u2.to_string()) self.failUnless(u2.is_readonly()) self.failIf(u2.is_mutable()) self.failUnless(IURI.providedBy(u2)) self.failIf(IFileURI.providedBy(u2)) self.failUnless(IDirnodeURI.providedBy(u2)) u2i = uri.from_string(u1.to_string(), deep_immutable=True) self.failUnlessReallyEqual(u1.to_string(), u2i.to_string()) u3 = u2.get_readonly() self.failUnlessReallyEqual(u3.to_string(), u2.to_string()) self.failUnless(str(u3)) u3i = uri.from_string(u2.to_string(), deep_immutable=True) self.failUnlessReallyEqual(u2.to_string(), u3i.to_string()) u2_verifier = u2.get_verify_cap() self.failUnless(isinstance(u2_verifier, uri.ImmutableDirectoryURIVerifier), u2_verifier) self.failUnless(IVerifierURI.providedBy(u2_verifier)) u2vs = u2_verifier.to_string() # URI:DIR2-CHK-Verifier:$key:$ueb:$k:$n:$size self.failUnless(u2vs.startswith("URI:DIR2-CHK-Verifier:"), u2vs) u2_verifier_fileuri = u2_verifier.get_filenode_cap() self.failUnless(IVerifierURI.providedBy(u2_verifier_fileuri)) u2vfs = u2_verifier_fileuri.to_string() # URI:CHK-Verifier:$key:$ueb:$k:$n:$size self.failUnlessReallyEqual(u2vfs, fnuri.get_verify_cap().to_string()) self.failUnlessReallyEqual(u2vs[len("URI:DIR2-"):], u2vfs[len("URI:"):]) self.failUnless(str(u2_verifier)) def test_literal(self): u0 = uri.LiteralFileURI("data") u1 = uri.LiteralDirectoryURI(u0) self.failUnless(str(u1)) self.failUnlessReallyEqual(u1.to_string(), "URI:DIR2-LIT:mrqxiyi") self.failUnless(u1.is_readonly()) self.failIf(u1.is_mutable()) self.failUnless(IURI.providedBy(u1)) self.failIf(IFileURI.providedBy(u1)) self.failUnless(IDirnodeURI.providedBy(u1)) self.failUnlessReallyEqual(u1.get_verify_cap(), None) self.failUnlessReallyEqual(u1.get_storage_index(), None) self.failUnlessReallyEqual(u1.abbrev_si(), "") def test_mdmf(self): writekey = "\x01" * 16 fingerprint = "\x02" * 32 uri1 = uri.WriteableMDMFFileURI(writekey, fingerprint) d1 = uri.MDMFDirectoryURI(uri1) self.failIf(d1.is_readonly()) self.failUnless(d1.is_mutable()) self.failUnless(IURI.providedBy(d1)) self.failUnless(IDirnodeURI.providedBy(d1)) d1_uri = d1.to_string() d2 = uri.from_string(d1_uri) self.failUnlessIsInstance(d2, uri.MDMFDirectoryURI) self.failIf(d2.is_readonly()) self.failUnless(d2.is_mutable()) self.failUnless(IURI.providedBy(d2)) self.failUnless(IDirnodeURI.providedBy(d2)) # It doesn't make sense to ask for a deep immutable URI for a # mutable directory, and we should get back a result to that # effect. d3 = uri.from_string(d2.to_string(), deep_immutable=True) self.failUnlessIsInstance(d3, uri.UnknownURI) def test_mdmf_attenuation(self): writekey = "\x01" * 16 fingerprint = "\x02" * 32 uri1 = uri.WriteableMDMFFileURI(writekey, fingerprint) d1 = uri.MDMFDirectoryURI(uri1) self.failUnless(d1.is_mutable()) self.failIf(d1.is_readonly()) self.failUnless(IURI.providedBy(d1)) self.failUnless(IDirnodeURI.providedBy(d1)) d1_uri = d1.to_string() d1_uri_from_fn = uri.MDMFDirectoryURI(d1.get_filenode_cap()).to_string() self.failUnlessEqual(d1_uri_from_fn, d1_uri) uri2 = uri.from_string(d1_uri) self.failUnlessIsInstance(uri2, uri.MDMFDirectoryURI) self.failUnless(IURI.providedBy(uri2)) self.failUnless(IDirnodeURI.providedBy(uri2)) self.failUnless(uri2.is_mutable()) self.failIf(uri2.is_readonly()) ro = uri2.get_readonly() self.failUnlessIsInstance(ro, uri.ReadonlyMDMFDirectoryURI) self.failUnless(ro.is_mutable()) self.failUnless(ro.is_readonly()) self.failUnless(IURI.providedBy(ro)) self.failUnless(IDirnodeURI.providedBy(ro)) ro_uri = ro.to_string() n = uri.from_string(ro_uri, deep_immutable=True) self.failUnlessIsInstance(n, uri.UnknownURI) fn_cap = ro.get_filenode_cap() fn_ro_cap = fn_cap.get_readonly() d3 = uri.ReadonlyMDMFDirectoryURI(fn_ro_cap) self.failUnlessEqual(ro.to_string(), d3.to_string()) self.failUnless(ro.is_mutable()) self.failUnless(ro.is_readonly()) def test_mdmf_verifier(self): # I'm not sure what I want to write here yet. writekey = "\x01" * 16 fingerprint = "\x02" * 32 uri1 = uri.WriteableMDMFFileURI(writekey, fingerprint) d1 = uri.MDMFDirectoryURI(uri1) v1 = d1.get_verify_cap() self.failUnlessIsInstance(v1, uri.MDMFDirectoryURIVerifier) self.failIf(v1.is_mutable()) d2 = uri.from_string(d1.to_string()) v2 = d2.get_verify_cap() self.failUnlessIsInstance(v2, uri.MDMFDirectoryURIVerifier) self.failIf(v2.is_mutable()) self.failUnlessEqual(v2.to_string(), v1.to_string()) # Now attenuate and make sure that works correctly. r3 = d2.get_readonly() v3 = r3.get_verify_cap() self.failUnlessIsInstance(v3, uri.MDMFDirectoryURIVerifier) self.failIf(v3.is_mutable()) self.failUnlessEqual(v3.to_string(), v1.to_string()) r4 = uri.from_string(r3.to_string()) v4 = r4.get_verify_cap() self.failUnlessIsInstance(v4, uri.MDMFDirectoryURIVerifier) self.failIf(v4.is_mutable()) self.failUnlessEqual(v4.to_string(), v3.to_string()) tahoe-lafs-1.10.0/src/allmydata/test/test_util.py000066400000000000000000002424311221140116300217150ustar00rootroot00000000000000 def foo(): pass # keep the line number constant import os, time, sys from StringIO import StringIO from twisted.trial import unittest from twisted.internet import defer, reactor from twisted.python.failure import Failure from twisted.python import log from pycryptopp.hash.sha256 import SHA256 as _hash from allmydata.util import base32, idlib, humanreadable, mathutil, hashutil from allmydata.util import assertutil, fileutil, deferredutil, abbreviate from allmydata.util import limiter, time_format, pollmixin, cachedir from allmydata.util import statistics, dictutil, pipeline from allmydata.util import log as tahoe_log from allmydata.util.spans import Spans, overlap, DataSpans class Base32(unittest.TestCase): def test_b2a_matches_Pythons(self): import base64 y = "\x12\x34\x45\x67\x89\x0a\xbc\xde\xf0" x = base64.b32encode(y) while x and x[-1] == '=': x = x[:-1] x = x.lower() self.failUnlessEqual(base32.b2a(y), x) def test_b2a(self): self.failUnlessEqual(base32.b2a("\x12\x34"), "ci2a") def test_b2a_or_none(self): self.failUnlessEqual(base32.b2a_or_none(None), None) self.failUnlessEqual(base32.b2a_or_none("\x12\x34"), "ci2a") def test_a2b(self): self.failUnlessEqual(base32.a2b("ci2a"), "\x12\x34") self.failUnlessRaises(AssertionError, base32.a2b, "b0gus") class IDLib(unittest.TestCase): def test_nodeid_b2a(self): self.failUnlessEqual(idlib.nodeid_b2a("\x00"*20), "a"*32) class NoArgumentException(Exception): def __init__(self): pass class HumanReadable(unittest.TestCase): def test_repr(self): hr = humanreadable.hr self.failUnlessEqual(hr(foo), "") self.failUnlessEqual(hr(self.test_repr), ">") self.failUnlessEqual(hr(1L), "1") self.failUnlessEqual(hr(10**40), "100000000000000000...000000000000000000") self.failUnlessEqual(hr(self), "") self.failUnlessEqual(hr([1,2]), "[1, 2]") self.failUnlessEqual(hr({1:2}), "{1:2}") try: raise ValueError except Exception, e: self.failUnless( hr(e) == "" # python-2.4 or hr(e) == "ValueError()") # python-2.5 try: raise ValueError("oops") except Exception, e: self.failUnless( hr(e) == "" # python-2.4 or hr(e) == "ValueError('oops',)") # python-2.5 try: raise NoArgumentException except Exception, e: self.failUnless( hr(e) == "" # python-2.4 or hr(e) == "NoArgumentException()") # python-2.5 class MyList(list): pass class Math(unittest.TestCase): def test_div_ceil(self): f = mathutil.div_ceil self.failUnlessEqual(f(0, 1), 0) self.failUnlessEqual(f(0, 2), 0) self.failUnlessEqual(f(0, 3), 0) self.failUnlessEqual(f(1, 3), 1) self.failUnlessEqual(f(2, 3), 1) self.failUnlessEqual(f(3, 3), 1) self.failUnlessEqual(f(4, 3), 2) self.failUnlessEqual(f(5, 3), 2) self.failUnlessEqual(f(6, 3), 2) self.failUnlessEqual(f(7, 3), 3) def test_next_multiple(self): f = mathutil.next_multiple self.failUnlessEqual(f(5, 1), 5) self.failUnlessEqual(f(5, 2), 6) self.failUnlessEqual(f(5, 3), 6) self.failUnlessEqual(f(5, 4), 8) self.failUnlessEqual(f(5, 5), 5) self.failUnlessEqual(f(5, 6), 6) self.failUnlessEqual(f(32, 1), 32) self.failUnlessEqual(f(32, 2), 32) self.failUnlessEqual(f(32, 3), 33) self.failUnlessEqual(f(32, 4), 32) self.failUnlessEqual(f(32, 5), 35) self.failUnlessEqual(f(32, 6), 36) self.failUnlessEqual(f(32, 7), 35) self.failUnlessEqual(f(32, 8), 32) self.failUnlessEqual(f(32, 9), 36) self.failUnlessEqual(f(32, 10), 40) self.failUnlessEqual(f(32, 11), 33) self.failUnlessEqual(f(32, 12), 36) self.failUnlessEqual(f(32, 13), 39) self.failUnlessEqual(f(32, 14), 42) self.failUnlessEqual(f(32, 15), 45) self.failUnlessEqual(f(32, 16), 32) self.failUnlessEqual(f(32, 17), 34) self.failUnlessEqual(f(32, 18), 36) self.failUnlessEqual(f(32, 589), 589) def test_pad_size(self): f = mathutil.pad_size self.failUnlessEqual(f(0, 4), 0) self.failUnlessEqual(f(1, 4), 3) self.failUnlessEqual(f(2, 4), 2) self.failUnlessEqual(f(3, 4), 1) self.failUnlessEqual(f(4, 4), 0) self.failUnlessEqual(f(5, 4), 3) def test_is_power_of_k(self): f = mathutil.is_power_of_k for i in range(1, 100): if i in (1, 2, 4, 8, 16, 32, 64): self.failUnless(f(i, 2), "but %d *is* a power of 2" % i) else: self.failIf(f(i, 2), "but %d is *not* a power of 2" % i) for i in range(1, 100): if i in (1, 3, 9, 27, 81): self.failUnless(f(i, 3), "but %d *is* a power of 3" % i) else: self.failIf(f(i, 3), "but %d is *not* a power of 3" % i) def test_next_power_of_k(self): f = mathutil.next_power_of_k self.failUnlessEqual(f(0,2), 1) self.failUnlessEqual(f(1,2), 1) self.failUnlessEqual(f(2,2), 2) self.failUnlessEqual(f(3,2), 4) self.failUnlessEqual(f(4,2), 4) for i in range(5, 8): self.failUnlessEqual(f(i,2), 8, "%d" % i) for i in range(9, 16): self.failUnlessEqual(f(i,2), 16, "%d" % i) for i in range(17, 32): self.failUnlessEqual(f(i,2), 32, "%d" % i) for i in range(33, 64): self.failUnlessEqual(f(i,2), 64, "%d" % i) for i in range(65, 100): self.failUnlessEqual(f(i,2), 128, "%d" % i) self.failUnlessEqual(f(0,3), 1) self.failUnlessEqual(f(1,3), 1) self.failUnlessEqual(f(2,3), 3) self.failUnlessEqual(f(3,3), 3) for i in range(4, 9): self.failUnlessEqual(f(i,3), 9, "%d" % i) for i in range(10, 27): self.failUnlessEqual(f(i,3), 27, "%d" % i) for i in range(28, 81): self.failUnlessEqual(f(i,3), 81, "%d" % i) for i in range(82, 200): self.failUnlessEqual(f(i,3), 243, "%d" % i) def test_ave(self): f = mathutil.ave self.failUnlessEqual(f([1,2,3]), 2) self.failUnlessEqual(f([0,0,0,4]), 1) self.failUnlessAlmostEqual(f([0.0, 1.0, 1.0]), .666666666666) def test_round_sigfigs(self): f = mathutil.round_sigfigs self.failUnlessEqual(f(22.0/3, 4), 7.3330000000000002) class Statistics(unittest.TestCase): def should_assert(self, msg, func, *args, **kwargs): try: func(*args, **kwargs) self.fail(msg) except AssertionError: pass def failUnlessListEqual(self, a, b, msg = None): self.failUnlessEqual(len(a), len(b)) for i in range(len(a)): self.failUnlessEqual(a[i], b[i], msg) def failUnlessListAlmostEqual(self, a, b, places = 7, msg = None): self.failUnlessEqual(len(a), len(b)) for i in range(len(a)): self.failUnlessAlmostEqual(a[i], b[i], places, msg) def test_binomial_coeff(self): f = statistics.binomial_coeff self.failUnlessEqual(f(20, 0), 1) self.failUnlessEqual(f(20, 1), 20) self.failUnlessEqual(f(20, 2), 190) self.failUnlessEqual(f(20, 8), f(20, 12)) self.should_assert("Should assert if n < k", f, 2, 3) def test_binomial_distribution_pmf(self): f = statistics.binomial_distribution_pmf pmf_comp = f(2, .1) pmf_stat = [0.81, 0.18, 0.01] self.failUnlessListAlmostEqual(pmf_comp, pmf_stat) # Summing across a PMF should give the total probability 1 self.failUnlessAlmostEqual(sum(pmf_comp), 1) self.should_assert("Should assert if not 0<=p<=1", f, 1, -1) self.should_assert("Should assert if n < 1", f, 0, .1) out = StringIO() statistics.print_pmf(pmf_comp, out=out) lines = out.getvalue().splitlines() self.failUnlessEqual(lines[0], "i=0: 0.81") self.failUnlessEqual(lines[1], "i=1: 0.18") self.failUnlessEqual(lines[2], "i=2: 0.01") def test_survival_pmf(self): f = statistics.survival_pmf # Cross-check binomial-distribution method against convolution # method. p_list = [.9999] * 100 + [.99] * 50 + [.8] * 20 pmf1 = statistics.survival_pmf_via_conv(p_list) pmf2 = statistics.survival_pmf_via_bd(p_list) self.failUnlessListAlmostEqual(pmf1, pmf2) self.failUnlessTrue(statistics.valid_pmf(pmf1)) self.should_assert("Should assert if p_i > 1", f, [1.1]); self.should_assert("Should assert if p_i < 0", f, [-.1]); def test_repair_count_pmf(self): survival_pmf = statistics.binomial_distribution_pmf(5, .9) repair_pmf = statistics.repair_count_pmf(survival_pmf, 3) # repair_pmf[0] == sum(survival_pmf[0,1,2,5]) # repair_pmf[1] == survival_pmf[4] # repair_pmf[2] = survival_pmf[3] self.failUnlessListAlmostEqual(repair_pmf, [0.00001 + 0.00045 + 0.0081 + 0.59049, .32805, .0729, 0, 0, 0]) def test_repair_cost(self): survival_pmf = statistics.binomial_distribution_pmf(5, .9) bwcost = statistics.bandwidth_cost_function cost = statistics.mean_repair_cost(bwcost, 1000, survival_pmf, 3, ul_dl_ratio=1.0) self.failUnlessAlmostEqual(cost, 558.90) cost = statistics.mean_repair_cost(bwcost, 1000, survival_pmf, 3, ul_dl_ratio=8.0) self.failUnlessAlmostEqual(cost, 1664.55) # I haven't manually checked the math beyond here -warner cost = statistics.eternal_repair_cost(bwcost, 1000, survival_pmf, 3, discount_rate=0, ul_dl_ratio=1.0) self.failUnlessAlmostEqual(cost, 65292.056074766246) cost = statistics.eternal_repair_cost(bwcost, 1000, survival_pmf, 3, discount_rate=0.05, ul_dl_ratio=1.0) self.failUnlessAlmostEqual(cost, 9133.6097158191551) def test_convolve(self): f = statistics.convolve v1 = [ 1, 2, 3 ] v2 = [ 4, 5, 6 ] v3 = [ 7, 8 ] v1v2result = [ 4, 13, 28, 27, 18 ] # Convolution is commutative r1 = f(v1, v2) r2 = f(v2, v1) self.failUnlessListEqual(r1, r2, "Convolution should be commutative") self.failUnlessListEqual(r1, v1v2result, "Didn't match known result") # Convolution is associative r1 = f(f(v1, v2), v3) r2 = f(v1, f(v2, v3)) self.failUnlessListEqual(r1, r2, "Convolution should be associative") # Convolution is distributive r1 = f(v3, [ a + b for a, b in zip(v1, v2) ]) tmp1 = f(v3, v1) tmp2 = f(v3, v2) r2 = [ a + b for a, b in zip(tmp1, tmp2) ] self.failUnlessListEqual(r1, r2, "Convolution should be distributive") # Convolution is scalar multiplication associative tmp1 = f(v1, v2) r1 = [ a * 4 for a in tmp1 ] tmp2 = [ a * 4 for a in v1 ] r2 = f(tmp2, v2) self.failUnlessListEqual(r1, r2, "Convolution should be scalar multiplication associative") def test_find_k(self): f = statistics.find_k g = statistics.pr_file_loss plist = [.9] * 10 + [.8] * 10 # N=20 t = .0001 k = f(plist, t) self.failUnlessEqual(k, 10) self.failUnless(g(plist, k) < t) def test_pr_file_loss(self): f = statistics.pr_file_loss plist = [.5] * 10 self.failUnlessEqual(f(plist, 3), .0546875) def test_pr_backup_file_loss(self): f = statistics.pr_backup_file_loss plist = [.5] * 10 self.failUnlessEqual(f(plist, .5, 3), .02734375) class Asserts(unittest.TestCase): def should_assert(self, func, *args, **kwargs): try: func(*args, **kwargs) except AssertionError, e: return str(e) except Exception, e: self.fail("assert failed with non-AssertionError: %s" % e) self.fail("assert was not caught") def should_not_assert(self, func, *args, **kwargs): try: func(*args, **kwargs) except AssertionError, e: self.fail("assertion fired when it should not have: %s" % e) except Exception, e: self.fail("assertion (which shouldn't have failed) failed with non-AssertionError: %s" % e) return # we're happy def test_assert(self): f = assertutil._assert self.should_assert(f) self.should_assert(f, False) self.should_not_assert(f, True) m = self.should_assert(f, False, "message") self.failUnlessEqual(m, "'message' ", m) m = self.should_assert(f, False, "message1", othermsg=12) self.failUnlessEqual("'message1' , othermsg: 12 ", m) m = self.should_assert(f, False, othermsg="message2") self.failUnlessEqual("othermsg: 'message2' ", m) def test_precondition(self): f = assertutil.precondition self.should_assert(f) self.should_assert(f, False) self.should_not_assert(f, True) m = self.should_assert(f, False, "message") self.failUnlessEqual("precondition: 'message' ", m) m = self.should_assert(f, False, "message1", othermsg=12) self.failUnlessEqual("precondition: 'message1' , othermsg: 12 ", m) m = self.should_assert(f, False, othermsg="message2") self.failUnlessEqual("precondition: othermsg: 'message2' ", m) def test_postcondition(self): f = assertutil.postcondition self.should_assert(f) self.should_assert(f, False) self.should_not_assert(f, True) m = self.should_assert(f, False, "message") self.failUnlessEqual("postcondition: 'message' ", m) m = self.should_assert(f, False, "message1", othermsg=12) self.failUnlessEqual("postcondition: 'message1' , othermsg: 12 ", m) m = self.should_assert(f, False, othermsg="message2") self.failUnlessEqual("postcondition: othermsg: 'message2' ", m) class FileUtil(unittest.TestCase): def mkdir(self, basedir, path, mode=0777): fn = os.path.join(basedir, path) fileutil.make_dirs(fn, mode) def touch(self, basedir, path, mode=None, data="touch\n"): fn = os.path.join(basedir, path) f = open(fn, "w") f.write(data) f.close() if mode is not None: os.chmod(fn, mode) def test_rm_dir(self): basedir = "util/FileUtil/test_rm_dir" fileutil.make_dirs(basedir) # create it again to test idempotency fileutil.make_dirs(basedir) d = os.path.join(basedir, "doomed") self.mkdir(d, "a/b") self.touch(d, "a/b/1.txt") self.touch(d, "a/b/2.txt", 0444) self.touch(d, "a/b/3.txt", 0) self.mkdir(d, "a/c") self.touch(d, "a/c/1.txt") self.touch(d, "a/c/2.txt", 0444) self.touch(d, "a/c/3.txt", 0) os.chmod(os.path.join(d, "a/c"), 0444) self.mkdir(d, "a/d") self.touch(d, "a/d/1.txt") self.touch(d, "a/d/2.txt", 0444) self.touch(d, "a/d/3.txt", 0) os.chmod(os.path.join(d, "a/d"), 0) fileutil.rm_dir(d) self.failIf(os.path.exists(d)) # remove it again to test idempotency fileutil.rm_dir(d) def test_remove_if_possible(self): basedir = "util/FileUtil/test_remove_if_possible" fileutil.make_dirs(basedir) self.touch(basedir, "here") fn = os.path.join(basedir, "here") fileutil.remove_if_possible(fn) self.failIf(os.path.exists(fn)) fileutil.remove_if_possible(fn) # should be idempotent fileutil.rm_dir(basedir) fileutil.remove_if_possible(fn) # should survive errors def test_write_atomically(self): basedir = "util/FileUtil/test_write_atomically" fileutil.make_dirs(basedir) fn = os.path.join(basedir, "here") fileutil.write_atomically(fn, "one") self.failUnlessEqual(fileutil.read(fn), "one") fileutil.write_atomically(fn, "two", mode="") # non-binary self.failUnlessEqual(fileutil.read(fn), "two") def test_open_or_create(self): basedir = "util/FileUtil/test_open_or_create" fileutil.make_dirs(basedir) fn = os.path.join(basedir, "here") f = fileutil.open_or_create(fn) f.write("stuff.") f.close() f = fileutil.open_or_create(fn) f.seek(0, os.SEEK_END) f.write("more.") f.close() f = open(fn, "r") data = f.read() f.close() self.failUnlessEqual(data, "stuff.more.") def test_NamedTemporaryDirectory(self): basedir = "util/FileUtil/test_NamedTemporaryDirectory" fileutil.make_dirs(basedir) td = fileutil.NamedTemporaryDirectory(dir=basedir) name = td.name self.failUnless(basedir in name) self.failUnless(basedir in repr(td)) self.failUnless(os.path.isdir(name)) del td # it is conceivable that we need to force gc here, but I'm not sure self.failIf(os.path.isdir(name)) def test_rename(self): basedir = "util/FileUtil/test_rename" fileutil.make_dirs(basedir) self.touch(basedir, "here") fn = os.path.join(basedir, "here") fn2 = os.path.join(basedir, "there") fileutil.rename(fn, fn2) self.failIf(os.path.exists(fn)) self.failUnless(os.path.exists(fn2)) def test_du(self): basedir = "util/FileUtil/test_du" fileutil.make_dirs(basedir) d = os.path.join(basedir, "space-consuming") self.mkdir(d, "a/b") self.touch(d, "a/b/1.txt", data="a"*10) self.touch(d, "a/b/2.txt", data="b"*11) self.mkdir(d, "a/c") self.touch(d, "a/c/1.txt", data="c"*12) self.touch(d, "a/c/2.txt", data="d"*13) used = fileutil.du(basedir) self.failUnlessEqual(10+11+12+13, used) def test_abspath_expanduser_unicode(self): self.failUnlessRaises(AssertionError, fileutil.abspath_expanduser_unicode, "bytestring") saved_cwd = os.path.normpath(os.getcwdu()) abspath_cwd = fileutil.abspath_expanduser_unicode(u".") self.failUnless(isinstance(saved_cwd, unicode), saved_cwd) self.failUnless(isinstance(abspath_cwd, unicode), abspath_cwd) self.failUnlessEqual(abspath_cwd, saved_cwd) # adapted from self.failUnlessIn(u"foo", fileutil.abspath_expanduser_unicode(u"foo")) self.failIfIn(u"~", fileutil.abspath_expanduser_unicode(u"~")) cwds = ['cwd'] try: cwds.append(u'\xe7w\xf0'.encode(sys.getfilesystemencoding() or 'ascii')) except UnicodeEncodeError: pass # the cwd can't be encoded -- test with ascii cwd only for cwd in cwds: try: os.mkdir(cwd) os.chdir(cwd) for upath in (u'', u'fuu', u'f\xf9\xf9', u'/fuu', u'U:\\', u'~'): uabspath = fileutil.abspath_expanduser_unicode(upath) self.failUnless(isinstance(uabspath, unicode), uabspath) finally: os.chdir(saved_cwd) def test_disk_stats(self): avail = fileutil.get_available_space('.', 2**14) if avail == 0: raise unittest.SkipTest("This test will spuriously fail there is no disk space left.") disk = fileutil.get_disk_stats('.', 2**13) self.failUnless(disk['total'] > 0, disk['total']) self.failUnless(disk['used'] > 0, disk['used']) self.failUnless(disk['free_for_root'] > 0, disk['free_for_root']) self.failUnless(disk['free_for_nonroot'] > 0, disk['free_for_nonroot']) self.failUnless(disk['avail'] > 0, disk['avail']) def test_disk_stats_avail_nonnegative(self): # This test will spuriously fail if you have more than 2^128 # bytes of available space on your filesystem. disk = fileutil.get_disk_stats('.', 2**128) self.failUnlessEqual(disk['avail'], 0) class PollMixinTests(unittest.TestCase): def setUp(self): self.pm = pollmixin.PollMixin() def test_PollMixin_True(self): d = self.pm.poll(check_f=lambda : True, pollinterval=0.1) return d def test_PollMixin_False_then_True(self): i = iter([False, True]) d = self.pm.poll(check_f=i.next, pollinterval=0.1) return d def test_timeout(self): d = self.pm.poll(check_f=lambda: False, pollinterval=0.01, timeout=1) def _suc(res): self.fail("poll should have failed, not returned %s" % (res,)) def _err(f): f.trap(pollmixin.TimeoutError) return None # success d.addCallbacks(_suc, _err) return d class DeferredUtilTests(unittest.TestCase): def test_gather_results(self): d1 = defer.Deferred() d2 = defer.Deferred() res = deferredutil.gatherResults([d1, d2]) d1.errback(ValueError("BAD")) def _callb(res): self.fail("Should have errbacked, not resulted in %s" % (res,)) def _errb(thef): thef.trap(ValueError) res.addCallbacks(_callb, _errb) return res def test_success(self): d1, d2 = defer.Deferred(), defer.Deferred() good = [] bad = [] dlss = deferredutil.DeferredListShouldSucceed([d1,d2]) dlss.addCallbacks(good.append, bad.append) d1.callback(1) d2.callback(2) self.failUnlessEqual(good, [[1,2]]) self.failUnlessEqual(bad, []) def test_failure(self): d1, d2 = defer.Deferred(), defer.Deferred() good = [] bad = [] dlss = deferredutil.DeferredListShouldSucceed([d1,d2]) dlss.addCallbacks(good.append, bad.append) d1.addErrback(lambda _ignore: None) d2.addErrback(lambda _ignore: None) d1.callback(1) d2.errback(ValueError()) self.failUnlessEqual(good, []) self.failUnlessEqual(len(bad), 1) f = bad[0] self.failUnless(isinstance(f, Failure)) self.failUnless(f.check(ValueError)) class HashUtilTests(unittest.TestCase): def test_random_key(self): k = hashutil.random_key() self.failUnlessEqual(len(k), hashutil.KEYLEN) def test_sha256d(self): h1 = hashutil.tagged_hash("tag1", "value") h2 = hashutil.tagged_hasher("tag1") h2.update("value") h2a = h2.digest() h2b = h2.digest() self.failUnlessEqual(h1, h2a) self.failUnlessEqual(h2a, h2b) def test_sha256d_truncated(self): h1 = hashutil.tagged_hash("tag1", "value", 16) h2 = hashutil.tagged_hasher("tag1", 16) h2.update("value") h2 = h2.digest() self.failUnlessEqual(len(h1), 16) self.failUnlessEqual(len(h2), 16) self.failUnlessEqual(h1, h2) def test_chk(self): h1 = hashutil.convergence_hash(3, 10, 1000, "data", "secret") h2 = hashutil.convergence_hasher(3, 10, 1000, "secret") h2.update("data") h2 = h2.digest() self.failUnlessEqual(h1, h2) def test_hashers(self): h1 = hashutil.block_hash("foo") h2 = hashutil.block_hasher() h2.update("foo") self.failUnlessEqual(h1, h2.digest()) h1 = hashutil.uri_extension_hash("foo") h2 = hashutil.uri_extension_hasher() h2.update("foo") self.failUnlessEqual(h1, h2.digest()) h1 = hashutil.plaintext_hash("foo") h2 = hashutil.plaintext_hasher() h2.update("foo") self.failUnlessEqual(h1, h2.digest()) h1 = hashutil.crypttext_hash("foo") h2 = hashutil.crypttext_hasher() h2.update("foo") self.failUnlessEqual(h1, h2.digest()) h1 = hashutil.crypttext_segment_hash("foo") h2 = hashutil.crypttext_segment_hasher() h2.update("foo") self.failUnlessEqual(h1, h2.digest()) h1 = hashutil.plaintext_segment_hash("foo") h2 = hashutil.plaintext_segment_hasher() h2.update("foo") self.failUnlessEqual(h1, h2.digest()) def test_constant_time_compare(self): self.failUnless(hashutil.constant_time_compare("a", "a")) self.failUnless(hashutil.constant_time_compare("ab", "ab")) self.failIf(hashutil.constant_time_compare("a", "b")) self.failIf(hashutil.constant_time_compare("a", "aa")) def _testknown(self, hashf, expected_a, *args): got = hashf(*args) got_a = base32.b2a(got) self.failUnlessEqual(got_a, expected_a) def test_known_answers(self): # assert backwards compatibility self._testknown(hashutil.storage_index_hash, "qb5igbhcc5esa6lwqorsy7e6am", "") self._testknown(hashutil.block_hash, "msjr5bh4evuh7fa3zw7uovixfbvlnstr5b65mrerwfnvjxig2jvq", "") self._testknown(hashutil.uri_extension_hash, "wthsu45q7zewac2mnivoaa4ulh5xvbzdmsbuyztq2a5fzxdrnkka", "") self._testknown(hashutil.plaintext_hash, "5lz5hwz3qj3af7n6e3arblw7xzutvnd3p3fjsngqjcb7utf3x3da", "") self._testknown(hashutil.crypttext_hash, "itdj6e4njtkoiavlrmxkvpreosscssklunhwtvxn6ggho4rkqwga", "") self._testknown(hashutil.crypttext_segment_hash, "aovy5aa7jej6ym5ikgwyoi4pxawnoj3wtaludjz7e2nb5xijb7aa", "") self._testknown(hashutil.plaintext_segment_hash, "4fdgf6qruaisyukhqcmoth4t3li6bkolbxvjy4awwcpprdtva7za", "") self._testknown(hashutil.convergence_hash, "3mo6ni7xweplycin6nowynw2we", 3, 10, 100, "", "converge") self._testknown(hashutil.my_renewal_secret_hash, "ujhr5k5f7ypkp67jkpx6jl4p47pyta7hu5m527cpcgvkafsefm6q", "") self._testknown(hashutil.my_cancel_secret_hash, "rjwzmafe2duixvqy6h47f5wfrokdziry6zhx4smew4cj6iocsfaa", "") self._testknown(hashutil.file_renewal_secret_hash, "hzshk2kf33gzbd5n3a6eszkf6q6o6kixmnag25pniusyaulqjnia", "", "si") self._testknown(hashutil.file_cancel_secret_hash, "bfciwvr6w7wcavsngxzxsxxaszj72dej54n4tu2idzp6b74g255q", "", "si") self._testknown(hashutil.bucket_renewal_secret_hash, "e7imrzgzaoashsncacvy3oysdd2m5yvtooo4gmj4mjlopsazmvuq", "", "\x00"*20) self._testknown(hashutil.bucket_cancel_secret_hash, "dvdujeyxeirj6uux6g7xcf4lvesk632aulwkzjar7srildvtqwma", "", "\x00"*20) self._testknown(hashutil.hmac, "c54ypfi6pevb3nvo6ba42jtglpkry2kbdopqsi7dgrm4r7tw5sra", "tag", "") self._testknown(hashutil.mutable_rwcap_key_hash, "6rvn2iqrghii5n4jbbwwqqsnqu", "iv", "wk") self._testknown(hashutil.ssk_writekey_hash, "ykpgmdbpgbb6yqz5oluw2q26ye", "") self._testknown(hashutil.ssk_write_enabler_master_hash, "izbfbfkoait4dummruol3gy2bnixrrrslgye6ycmkuyujnenzpia", "") self._testknown(hashutil.ssk_write_enabler_hash, "fuu2dvx7g6gqu5x22vfhtyed7p4pd47y5hgxbqzgrlyvxoev62tq", "wk", "\x00"*20) self._testknown(hashutil.ssk_pubkey_fingerprint_hash, "3opzw4hhm2sgncjx224qmt5ipqgagn7h5zivnfzqycvgqgmgz35q", "") self._testknown(hashutil.ssk_readkey_hash, "vugid4as6qbqgeq2xczvvcedai", "") self._testknown(hashutil.ssk_readkey_data_hash, "73wsaldnvdzqaf7v4pzbr2ae5a", "iv", "rk") self._testknown(hashutil.ssk_storage_index_hash, "j7icz6kigb6hxrej3tv4z7ayym", "") class Abbreviate(unittest.TestCase): def test_time(self): a = abbreviate.abbreviate_time self.failUnlessEqual(a(None), "unknown") self.failUnlessEqual(a(0), "0 seconds") self.failUnlessEqual(a(1), "1 second") self.failUnlessEqual(a(2), "2 seconds") self.failUnlessEqual(a(119), "119 seconds") MIN = 60 self.failUnlessEqual(a(2*MIN), "2 minutes") self.failUnlessEqual(a(60*MIN), "60 minutes") self.failUnlessEqual(a(179*MIN), "179 minutes") HOUR = 60*MIN self.failUnlessEqual(a(180*MIN), "3 hours") self.failUnlessEqual(a(4*HOUR), "4 hours") DAY = 24*HOUR MONTH = 30*DAY self.failUnlessEqual(a(2*DAY), "2 days") self.failUnlessEqual(a(2*MONTH), "2 months") YEAR = 365*DAY self.failUnlessEqual(a(5*YEAR), "5 years") def test_space(self): tests_si = [(None, "unknown"), (0, "0 B"), (1, "1 B"), (999, "999 B"), (1000, "1000 B"), (1023, "1023 B"), (1024, "1.02 kB"), (20*1000, "20.00 kB"), (1024*1024, "1.05 MB"), (1000*1000, "1.00 MB"), (1000*1000*1000, "1.00 GB"), (1000*1000*1000*1000, "1.00 TB"), (1000*1000*1000*1000*1000, "1.00 PB"), (1000*1000*1000*1000*1000*1000, "1.00 EB"), (1234567890123456789, "1.23 EB"), ] for (x, expected) in tests_si: got = abbreviate.abbreviate_space(x, SI=True) self.failUnlessEqual(got, expected) tests_base1024 = [(None, "unknown"), (0, "0 B"), (1, "1 B"), (999, "999 B"), (1000, "1000 B"), (1023, "1023 B"), (1024, "1.00 kiB"), (20*1024, "20.00 kiB"), (1000*1000, "976.56 kiB"), (1024*1024, "1.00 MiB"), (1024*1024*1024, "1.00 GiB"), (1024*1024*1024*1024, "1.00 TiB"), (1000*1000*1000*1000*1000, "909.49 TiB"), (1024*1024*1024*1024*1024, "1.00 PiB"), (1024*1024*1024*1024*1024*1024, "1.00 EiB"), (1234567890123456789, "1.07 EiB"), ] for (x, expected) in tests_base1024: got = abbreviate.abbreviate_space(x, SI=False) self.failUnlessEqual(got, expected) self.failUnlessEqual(abbreviate.abbreviate_space_both(1234567), "(1.23 MB, 1.18 MiB)") def test_parse_space(self): p = abbreviate.parse_abbreviated_size self.failUnlessEqual(p(""), None) self.failUnlessEqual(p(None), None) self.failUnlessEqual(p("123"), 123) self.failUnlessEqual(p("123B"), 123) self.failUnlessEqual(p("2K"), 2000) self.failUnlessEqual(p("2kb"), 2000) self.failUnlessEqual(p("2KiB"), 2048) self.failUnlessEqual(p("10MB"), 10*1000*1000) self.failUnlessEqual(p("10MiB"), 10*1024*1024) self.failUnlessEqual(p("5G"), 5*1000*1000*1000) self.failUnlessEqual(p("4GiB"), 4*1024*1024*1024) self.failUnlessEqual(p("3TB"), 3*1000*1000*1000*1000) self.failUnlessEqual(p("3TiB"), 3*1024*1024*1024*1024) self.failUnlessEqual(p("6PB"), 6*1000*1000*1000*1000*1000) self.failUnlessEqual(p("6PiB"), 6*1024*1024*1024*1024*1024) self.failUnlessEqual(p("9EB"), 9*1000*1000*1000*1000*1000*1000) self.failUnlessEqual(p("9EiB"), 9*1024*1024*1024*1024*1024*1024) e = self.failUnlessRaises(ValueError, p, "12 cubits") self.failUnlessIn("12 cubits", str(e)) e = self.failUnlessRaises(ValueError, p, "1 BB") self.failUnlessIn("1 BB", str(e)) e = self.failUnlessRaises(ValueError, p, "fhtagn") self.failUnlessIn("fhtagn", str(e)) class Limiter(unittest.TestCase): timeout = 480 # This takes longer than 240 seconds on Francois's arm box. def job(self, i, foo): self.calls.append( (i, foo) ) self.simultaneous += 1 self.peak_simultaneous = max(self.simultaneous, self.peak_simultaneous) d = defer.Deferred() def _done(): self.simultaneous -= 1 d.callback("done %d" % i) reactor.callLater(1.0, _done) return d def bad_job(self, i, foo): raise ValueError("bad_job %d" % i) def test_limiter(self): self.calls = [] self.simultaneous = 0 self.peak_simultaneous = 0 l = limiter.ConcurrencyLimiter() dl = [] for i in range(20): dl.append(l.add(self.job, i, foo=str(i))) d = defer.DeferredList(dl, fireOnOneErrback=True) def _done(res): self.failUnlessEqual(self.simultaneous, 0) self.failUnless(self.peak_simultaneous <= 10) self.failUnlessEqual(len(self.calls), 20) for i in range(20): self.failUnless( (i, str(i)) in self.calls) d.addCallback(_done) return d def test_errors(self): self.calls = [] self.simultaneous = 0 self.peak_simultaneous = 0 l = limiter.ConcurrencyLimiter() dl = [] for i in range(20): dl.append(l.add(self.job, i, foo=str(i))) d2 = l.add(self.bad_job, 21, "21") d = defer.DeferredList(dl, fireOnOneErrback=True) def _most_done(res): results = [] for (success, result) in res: self.failUnlessEqual(success, True) results.append(result) results.sort() expected_results = ["done %d" % i for i in range(20)] expected_results.sort() self.failUnlessEqual(results, expected_results) self.failUnless(self.peak_simultaneous <= 10) self.failUnlessEqual(len(self.calls), 20) for i in range(20): self.failUnless( (i, str(i)) in self.calls) def _good(res): self.fail("should have failed, not got %s" % (res,)) def _err(f): f.trap(ValueError) self.failUnless("bad_job 21" in str(f)) d2.addCallbacks(_good, _err) return d2 d.addCallback(_most_done) def _all_done(res): self.failUnlessEqual(self.simultaneous, 0) self.failUnless(self.peak_simultaneous <= 10) self.failUnlessEqual(len(self.calls), 20) for i in range(20): self.failUnless( (i, str(i)) in self.calls) d.addCallback(_all_done) return d class TimeFormat(unittest.TestCase): def test_epoch(self): return self._help_test_epoch() def test_epoch_in_London(self): # Europe/London is a particularly troublesome timezone. Nowadays, its # offset from GMT is 0. But in 1970, its offset from GMT was 1. # (Apparently in 1970 Britain had redefined standard time to be GMT+1 # and stayed in standard time all year round, whereas today # Europe/London standard time is GMT and Europe/London Daylight # Savings Time is GMT+1.) The current implementation of # time_format.iso_utc_time_to_localseconds() breaks if the timezone is # Europe/London. (As soon as this unit test is done then I'll change # that implementation to something that works even in this case...) origtz = os.environ.get('TZ') os.environ['TZ'] = "Europe/London" if hasattr(time, 'tzset'): time.tzset() try: return self._help_test_epoch() finally: if origtz is None: del os.environ['TZ'] else: os.environ['TZ'] = origtz if hasattr(time, 'tzset'): time.tzset() def _help_test_epoch(self): origtzname = time.tzname s = time_format.iso_utc_time_to_seconds("1970-01-01T00:00:01") self.failUnlessEqual(s, 1.0) s = time_format.iso_utc_time_to_seconds("1970-01-01_00:00:01") self.failUnlessEqual(s, 1.0) s = time_format.iso_utc_time_to_seconds("1970-01-01 00:00:01") self.failUnlessEqual(s, 1.0) self.failUnlessEqual(time_format.iso_utc(1.0), "1970-01-01_00:00:01") self.failUnlessEqual(time_format.iso_utc(1.0, sep=" "), "1970-01-01 00:00:01") now = time.time() isostr = time_format.iso_utc(now) timestamp = time_format.iso_utc_time_to_seconds(isostr) self.failUnlessEqual(int(timestamp), int(now)) def my_time(): return 1.0 self.failUnlessEqual(time_format.iso_utc(t=my_time), "1970-01-01_00:00:01") e = self.failUnlessRaises(ValueError, time_format.iso_utc_time_to_seconds, "invalid timestring") self.failUnless("not a complete ISO8601 timestamp" in str(e)) s = time_format.iso_utc_time_to_seconds("1970-01-01_00:00:01.500") self.failUnlessEqual(s, 1.5) # Look for daylight-savings-related errors. thatmomentinmarch = time_format.iso_utc_time_to_seconds("2009-03-20 21:49:02.226536") self.failUnlessEqual(thatmomentinmarch, 1237585742.226536) self.failUnlessEqual(origtzname, time.tzname) def test_iso_utc(self): when = 1266760143.7841301 out = time_format.iso_utc_date(when) self.failUnlessEqual(out, "2010-02-21") out = time_format.iso_utc_date(t=lambda: when) self.failUnlessEqual(out, "2010-02-21") out = time_format.iso_utc(when) self.failUnlessEqual(out, "2010-02-21_13:49:03.784130") out = time_format.iso_utc(when, sep="-") self.failUnlessEqual(out, "2010-02-21-13:49:03.784130") def test_parse_duration(self): p = time_format.parse_duration DAY = 24*60*60 self.failUnlessEqual(p("1 day"), DAY) self.failUnlessEqual(p("2 days"), 2*DAY) self.failUnlessEqual(p("3 months"), 3*31*DAY) self.failUnlessEqual(p("4 mo"), 4*31*DAY) self.failUnlessEqual(p("5 years"), 5*365*DAY) e = self.failUnlessRaises(ValueError, p, "123") self.failUnlessIn("no unit (like day, month, or year) in '123'", str(e)) def test_parse_date(self): self.failUnlessEqual(time_format.parse_date("2010-02-21"), 1266710400) class CacheDir(unittest.TestCase): def test_basic(self): basedir = "test_util/CacheDir/test_basic" def _failIfExists(name): absfn = os.path.join(basedir, name) self.failIf(os.path.exists(absfn), "%s exists but it shouldn't" % absfn) def _failUnlessExists(name): absfn = os.path.join(basedir, name) self.failUnless(os.path.exists(absfn), "%s doesn't exist but it should" % absfn) cdm = cachedir.CacheDirectoryManager(basedir) a = cdm.get_file("a") b = cdm.get_file("b") c = cdm.get_file("c") f = open(a.get_filename(), "wb"); f.write("hi"); f.close(); del f f = open(b.get_filename(), "wb"); f.write("hi"); f.close(); del f f = open(c.get_filename(), "wb"); f.write("hi"); f.close(); del f _failUnlessExists("a") _failUnlessExists("b") _failUnlessExists("c") cdm.check() _failUnlessExists("a") _failUnlessExists("b") _failUnlessExists("c") del a # this file won't be deleted yet, because it isn't old enough cdm.check() _failUnlessExists("a") _failUnlessExists("b") _failUnlessExists("c") # we change the definition of "old" to make everything old cdm.old = -10 cdm.check() _failIfExists("a") _failUnlessExists("b") _failUnlessExists("c") cdm.old = 60*60 del b cdm.check() _failIfExists("a") _failUnlessExists("b") _failUnlessExists("c") b2 = cdm.get_file("b") cdm.check() _failIfExists("a") _failUnlessExists("b") _failUnlessExists("c") del b2 ctr = [0] class EqButNotIs: def __init__(self, x): self.x = x self.hash = ctr[0] ctr[0] += 1 def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.x,) def __hash__(self): return self.hash def __le__(self, other): return self.x <= other def __lt__(self, other): return self.x < other def __ge__(self, other): return self.x >= other def __gt__(self, other): return self.x > other def __ne__(self, other): return self.x != other def __eq__(self, other): return self.x == other class DictUtil(unittest.TestCase): def _help_test_empty_dict(self, klass): d1 = klass() d2 = klass({}) self.failUnless(d1 == d2, "d1: %r, d2: %r" % (d1, d2,)) self.failUnless(len(d1) == 0) self.failUnless(len(d2) == 0) def _help_test_nonempty_dict(self, klass): d1 = klass({'a': 1, 'b': "eggs", 3: "spam",}) d2 = klass({'a': 1, 'b': "eggs", 3: "spam",}) self.failUnless(d1 == d2) self.failUnless(len(d1) == 3, "%s, %s" % (len(d1), d1,)) self.failUnless(len(d2) == 3) def _help_test_eq_but_notis(self, klass): d = klass({'a': 3, 'b': EqButNotIs(3), 'c': 3}) d.pop('b') d.clear() d['a'] = 3 d['b'] = EqButNotIs(3) d['c'] = 3 d.pop('b') d.clear() d['b'] = EqButNotIs(3) d['a'] = 3 d['c'] = 3 d.pop('b') d.clear() d['a'] = EqButNotIs(3) d['c'] = 3 d['a'] = 3 d.clear() fake3 = EqButNotIs(3) fake7 = EqButNotIs(7) d[fake3] = fake7 d[3] = 7 d[3] = 8 self.failUnless(filter(lambda x: x is 8, d.itervalues())) self.failUnless(filter(lambda x: x is fake7, d.itervalues())) # The real 7 should have been ejected by the d[3] = 8. self.failUnless(not filter(lambda x: x is 7, d.itervalues())) self.failUnless(filter(lambda x: x is fake3, d.iterkeys())) self.failUnless(filter(lambda x: x is 3, d.iterkeys())) d[fake3] = 8 d.clear() d[3] = 7 fake3 = EqButNotIs(3) fake7 = EqButNotIs(7) d[fake3] = fake7 d[3] = 8 self.failUnless(filter(lambda x: x is 8, d.itervalues())) self.failUnless(filter(lambda x: x is fake7, d.itervalues())) # The real 7 should have been ejected by the d[3] = 8. self.failUnless(not filter(lambda x: x is 7, d.itervalues())) self.failUnless(filter(lambda x: x is fake3, d.iterkeys())) self.failUnless(filter(lambda x: x is 3, d.iterkeys())) d[fake3] = 8 def test_all(self): self._help_test_eq_but_notis(dictutil.UtilDict) self._help_test_eq_but_notis(dictutil.NumDict) self._help_test_eq_but_notis(dictutil.ValueOrderedDict) self._help_test_nonempty_dict(dictutil.UtilDict) self._help_test_nonempty_dict(dictutil.NumDict) self._help_test_nonempty_dict(dictutil.ValueOrderedDict) self._help_test_eq_but_notis(dictutil.UtilDict) self._help_test_eq_but_notis(dictutil.NumDict) self._help_test_eq_but_notis(dictutil.ValueOrderedDict) def test_dict_of_sets(self): ds = dictutil.DictOfSets() ds.add(1, "a") ds.add(2, "b") ds.add(2, "b") ds.add(2, "c") self.failUnlessEqual(ds[1], set(["a"])) self.failUnlessEqual(ds[2], set(["b", "c"])) ds.discard(3, "d") # should not raise an exception ds.discard(2, "b") self.failUnlessEqual(ds[2], set(["c"])) ds.discard(2, "c") self.failIf(2 in ds) ds.add(3, "f") ds2 = dictutil.DictOfSets() ds2.add(3, "f") ds2.add(3, "g") ds2.add(4, "h") ds.update(ds2) self.failUnlessEqual(ds[1], set(["a"])) self.failUnlessEqual(ds[3], set(["f", "g"])) self.failUnlessEqual(ds[4], set(["h"])) def test_move(self): d1 = {1: "a", 2: "b"} d2 = {2: "c", 3: "d"} dictutil.move(1, d1, d2) self.failUnlessEqual(d1, {2: "b"}) self.failUnlessEqual(d2, {1: "a", 2: "c", 3: "d"}) d1 = {1: "a", 2: "b"} d2 = {2: "c", 3: "d"} dictutil.move(2, d1, d2) self.failUnlessEqual(d1, {1: "a"}) self.failUnlessEqual(d2, {2: "b", 3: "d"}) d1 = {1: "a", 2: "b"} d2 = {2: "c", 3: "d"} self.failUnlessRaises(KeyError, dictutil.move, 5, d1, d2, strict=True) def test_subtract(self): d1 = {1: "a", 2: "b"} d2 = {2: "c", 3: "d"} d3 = dictutil.subtract(d1, d2) self.failUnlessEqual(d3, {1: "a"}) d1 = {1: "a", 2: "b"} d2 = {2: "c"} d3 = dictutil.subtract(d1, d2) self.failUnlessEqual(d3, {1: "a"}) def test_utildict(self): d = dictutil.UtilDict({1: "a", 2: "b"}) d.del_if_present(1) d.del_if_present(3) self.failUnlessEqual(d, {2: "b"}) def eq(a, b): return a == b self.failUnlessRaises(TypeError, eq, d, "not a dict") d = dictutil.UtilDict({1: "b", 2: "a"}) self.failUnlessEqual(d.items_sorted_by_value(), [(2, "a"), (1, "b")]) self.failUnlessEqual(d.items_sorted_by_key(), [(1, "b"), (2, "a")]) self.failUnlessEqual(repr(d), "{1: 'b', 2: 'a'}") self.failUnless(1 in d) d2 = dictutil.UtilDict({3: "c", 4: "d"}) self.failUnless(d != d2) self.failUnless(d2 > d) self.failUnless(d2 >= d) self.failUnless(d <= d2) self.failUnless(d < d2) self.failUnlessEqual(d[1], "b") self.failUnlessEqual(sorted(list([k for k in d])), [1,2]) d3 = d.copy() self.failUnlessEqual(d, d3) self.failUnless(isinstance(d3, dictutil.UtilDict)) d4 = d.fromkeys([3,4], "e") self.failUnlessEqual(d4, {3: "e", 4: "e"}) self.failUnlessEqual(d.get(1), "b") self.failUnlessEqual(d.get(3), None) self.failUnlessEqual(d.get(3, "default"), "default") self.failUnlessEqual(sorted(list(d.items())), [(1, "b"), (2, "a")]) self.failUnlessEqual(sorted(list(d.iteritems())), [(1, "b"), (2, "a")]) self.failUnlessEqual(sorted(d.keys()), [1, 2]) self.failUnlessEqual(sorted(d.values()), ["a", "b"]) x = d.setdefault(1, "new") self.failUnlessEqual(x, "b") self.failUnlessEqual(d[1], "b") x = d.setdefault(3, "new") self.failUnlessEqual(x, "new") self.failUnlessEqual(d[3], "new") del d[3] x = d.popitem() self.failUnless(x in [(1, "b"), (2, "a")]) x = d.popitem() self.failUnless(x in [(1, "b"), (2, "a")]) self.failUnlessRaises(KeyError, d.popitem) def test_numdict(self): d = dictutil.NumDict({"a": 1, "b": 2}) d.add_num("a", 10, 5) d.add_num("c", 20, 5) d.add_num("d", 30) self.failUnlessEqual(d, {"a": 11, "b": 2, "c": 25, "d": 30}) d.subtract_num("a", 10) d.subtract_num("e", 10) d.subtract_num("f", 10, 15) self.failUnlessEqual(d, {"a": 1, "b": 2, "c": 25, "d": 30, "e": -10, "f": 5}) self.failUnlessEqual(d.sum(), sum([1, 2, 25, 30, -10, 5])) d = dictutil.NumDict() d.inc("a") d.inc("a") d.inc("b", 5) self.failUnlessEqual(d, {"a": 2, "b": 6}) d.dec("a") d.dec("c") d.dec("d", 5) self.failUnlessEqual(d, {"a": 1, "b": 6, "c": -1, "d": 4}) self.failUnlessEqual(d.items_sorted_by_key(), [("a", 1), ("b", 6), ("c", -1), ("d", 4)]) self.failUnlessEqual(d.items_sorted_by_value(), [("c", -1), ("a", 1), ("d", 4), ("b", 6)]) self.failUnlessEqual(d.item_with_largest_value(), ("b", 6)) d = dictutil.NumDict({"a": 1, "b": 2}) self.failUnlessEqual(repr(d), "{'a': 1, 'b': 2}") self.failUnless("a" in d) d2 = dictutil.NumDict({"c": 3, "d": 4}) self.failUnless(d != d2) self.failUnless(d2 > d) self.failUnless(d2 >= d) self.failUnless(d <= d2) self.failUnless(d < d2) self.failUnlessEqual(d["a"], 1) self.failUnlessEqual(sorted(list([k for k in d])), ["a","b"]) def eq(a, b): return a == b self.failUnlessRaises(TypeError, eq, d, "not a dict") d3 = d.copy() self.failUnlessEqual(d, d3) self.failUnless(isinstance(d3, dictutil.NumDict)) d4 = d.fromkeys(["a","b"], 5) self.failUnlessEqual(d4, {"a": 5, "b": 5}) self.failUnlessEqual(d.get("a"), 1) self.failUnlessEqual(d.get("c"), 0) self.failUnlessEqual(d.get("c", 5), 5) self.failUnlessEqual(sorted(list(d.items())), [("a", 1), ("b", 2)]) self.failUnlessEqual(sorted(list(d.iteritems())), [("a", 1), ("b", 2)]) self.failUnlessEqual(sorted(d.keys()), ["a", "b"]) self.failUnlessEqual(sorted(d.values()), [1, 2]) self.failUnless(d.has_key("a")) self.failIf(d.has_key("c")) x = d.setdefault("c", 3) self.failUnlessEqual(x, 3) self.failUnlessEqual(d["c"], 3) x = d.setdefault("c", 5) self.failUnlessEqual(x, 3) self.failUnlessEqual(d["c"], 3) del d["c"] x = d.popitem() self.failUnless(x in [("a", 1), ("b", 2)]) x = d.popitem() self.failUnless(x in [("a", 1), ("b", 2)]) self.failUnlessRaises(KeyError, d.popitem) d.update({"c": 3}) d.update({"c": 4, "d": 5}) self.failUnlessEqual(d, {"c": 4, "d": 5}) def test_del_if_present(self): d = {1: "a", 2: "b"} dictutil.del_if_present(d, 1) dictutil.del_if_present(d, 3) self.failUnlessEqual(d, {2: "b"}) def test_valueordereddict(self): d = dictutil.ValueOrderedDict() d["a"] = 3 d["b"] = 2 d["c"] = 1 self.failUnlessEqual(d, {"a": 3, "b": 2, "c": 1}) self.failUnlessEqual(d.items(), [("c", 1), ("b", 2), ("a", 3)]) self.failUnlessEqual(d.values(), [1, 2, 3]) self.failUnlessEqual(d.keys(), ["c", "b", "a"]) self.failUnlessEqual(repr(d), "") def eq(a, b): return a == b self.failIf(d == {"a": 4}) self.failUnless(d != {"a": 4}) x = d.setdefault("d", 0) self.failUnlessEqual(x, 0) self.failUnlessEqual(d["d"], 0) x = d.setdefault("d", -1) self.failUnlessEqual(x, 0) self.failUnlessEqual(d["d"], 0) x = d.remove("e", "default", False) self.failUnlessEqual(x, "default") self.failUnlessRaises(KeyError, d.remove, "e", "default", True) x = d.remove("d", 5) self.failUnlessEqual(x, 0) x = d.__getitem__("c") self.failUnlessEqual(x, 1) x = d.__getitem__("e", "default", False) self.failUnlessEqual(x, "default") self.failUnlessRaises(KeyError, d.__getitem__, "e", "default", True) self.failUnlessEqual(d.popitem(), ("c", 1)) self.failUnlessEqual(d.popitem(), ("b", 2)) self.failUnlessEqual(d.popitem(), ("a", 3)) self.failUnlessRaises(KeyError, d.popitem) d = dictutil.ValueOrderedDict({"a": 3, "b": 2, "c": 1}) x = d.pop("d", "default", False) self.failUnlessEqual(x, "default") self.failUnlessRaises(KeyError, d.pop, "d", "default", True) x = d.pop("b") self.failUnlessEqual(x, 2) self.failUnlessEqual(d.items(), [("c", 1), ("a", 3)]) d = dictutil.ValueOrderedDict({"a": 3, "b": 2, "c": 1}) x = d.pop_from_list(1) # pop the second item, b/2 self.failUnlessEqual(x, "b") self.failUnlessEqual(d.items(), [("c", 1), ("a", 3)]) def test_auxdict(self): d = dictutil.AuxValueDict() # we put the serialized form in the auxdata d.set_with_aux("key", ("filecap", "metadata"), "serialized") self.failUnlessEqual(d.keys(), ["key"]) self.failUnlessEqual(d["key"], ("filecap", "metadata")) self.failUnlessEqual(d.get_aux("key"), "serialized") def _get_missing(key): return d[key] self.failUnlessRaises(KeyError, _get_missing, "nonkey") self.failUnlessEqual(d.get("nonkey"), None) self.failUnlessEqual(d.get("nonkey", "nonvalue"), "nonvalue") self.failUnlessEqual(d.get_aux("nonkey"), None) self.failUnlessEqual(d.get_aux("nonkey", "nonvalue"), "nonvalue") d["key"] = ("filecap2", "metadata2") self.failUnlessEqual(d["key"], ("filecap2", "metadata2")) self.failUnlessEqual(d.get_aux("key"), None) d.set_with_aux("key2", "value2", "aux2") self.failUnlessEqual(sorted(d.keys()), ["key", "key2"]) del d["key2"] self.failUnlessEqual(d.keys(), ["key"]) self.failIf("key2" in d) self.failUnlessRaises(KeyError, _get_missing, "key2") self.failUnlessEqual(d.get("key2"), None) self.failUnlessEqual(d.get_aux("key2"), None) d["key2"] = "newvalue2" self.failUnlessEqual(d.get("key2"), "newvalue2") self.failUnlessEqual(d.get_aux("key2"), None) d = dictutil.AuxValueDict({1:2,3:4}) self.failUnlessEqual(sorted(d.keys()), [1,3]) self.failUnlessEqual(d[1], 2) self.failUnlessEqual(d.get_aux(1), None) d = dictutil.AuxValueDict([ (1,2), (3,4) ]) self.failUnlessEqual(sorted(d.keys()), [1,3]) self.failUnlessEqual(d[1], 2) self.failUnlessEqual(d.get_aux(1), None) d = dictutil.AuxValueDict(one=1, two=2) self.failUnlessEqual(sorted(d.keys()), ["one","two"]) self.failUnlessEqual(d["one"], 1) self.failUnlessEqual(d.get_aux("one"), None) class Pipeline(unittest.TestCase): def pause(self, *args, **kwargs): d = defer.Deferred() self.calls.append( (d, args, kwargs) ) return d def failUnlessCallsAre(self, expected): #print self.calls #print expected self.failUnlessEqual(len(self.calls), len(expected), self.calls) for i,c in enumerate(self.calls): self.failUnlessEqual(c[1:], expected[i], str(i)) def test_basic(self): self.calls = [] finished = [] p = pipeline.Pipeline(100) d = p.flush() # fires immediately d.addCallbacks(finished.append, log.err) self.failUnlessEqual(len(finished), 1) finished = [] d = p.add(10, self.pause, "one") # the call should start right away, and our return Deferred should # fire right away d.addCallbacks(finished.append, log.err) self.failUnlessEqual(len(finished), 1) self.failUnlessEqual(finished[0], None) self.failUnlessCallsAre([ ( ("one",) , {} ) ]) self.failUnlessEqual(p.gauge, 10) # pipeline: [one] finished = [] d = p.add(20, self.pause, "two", kw=2) # pipeline: [one, two] # the call and the Deferred should fire right away d.addCallbacks(finished.append, log.err) self.failUnlessEqual(len(finished), 1) self.failUnlessEqual(finished[0], None) self.failUnlessCallsAre([ ( ("one",) , {} ), ( ("two",) , {"kw": 2} ), ]) self.failUnlessEqual(p.gauge, 30) self.calls[0][0].callback("one-result") # pipeline: [two] self.failUnlessEqual(p.gauge, 20) finished = [] d = p.add(90, self.pause, "three", "posarg1") # pipeline: [two, three] flushed = [] fd = p.flush() fd.addCallbacks(flushed.append, log.err) self.failUnlessEqual(flushed, []) # the call will be made right away, but the return Deferred will not, # because the pipeline is now full. d.addCallbacks(finished.append, log.err) self.failUnlessEqual(len(finished), 0) self.failUnlessCallsAre([ ( ("one",) , {} ), ( ("two",) , {"kw": 2} ), ( ("three", "posarg1"), {} ), ]) self.failUnlessEqual(p.gauge, 110) self.failUnlessRaises(pipeline.SingleFileError, p.add, 10, self.pause) # retiring either call will unblock the pipeline, causing the #3 # Deferred to fire self.calls[2][0].callback("three-result") # pipeline: [two] self.failUnlessEqual(len(finished), 1) self.failUnlessEqual(finished[0], None) self.failUnlessEqual(flushed, []) # retiring call#2 will finally allow the flush() Deferred to fire self.calls[1][0].callback("two-result") self.failUnlessEqual(len(flushed), 1) def test_errors(self): self.calls = [] p = pipeline.Pipeline(100) d1 = p.add(200, self.pause, "one") d2 = p.flush() finished = [] d1.addBoth(finished.append) self.failUnlessEqual(finished, []) flushed = [] d2.addBoth(flushed.append) self.failUnlessEqual(flushed, []) self.calls[0][0].errback(ValueError("oops")) self.failUnlessEqual(len(finished), 1) f = finished[0] self.failUnless(isinstance(f, Failure)) self.failUnless(f.check(pipeline.PipelineError)) self.failUnlessIn("PipelineError", str(f.value)) self.failUnlessIn("ValueError", str(f.value)) r = repr(f.value) self.failUnless("ValueError" in r, r) f2 = f.value.error self.failUnless(f2.check(ValueError)) self.failUnlessEqual(len(flushed), 1) f = flushed[0] self.failUnless(isinstance(f, Failure)) self.failUnless(f.check(pipeline.PipelineError)) f2 = f.value.error self.failUnless(f2.check(ValueError)) # now that the pipeline is in the failed state, any new calls will # fail immediately d3 = p.add(20, self.pause, "two") finished = [] d3.addBoth(finished.append) self.failUnlessEqual(len(finished), 1) f = finished[0] self.failUnless(isinstance(f, Failure)) self.failUnless(f.check(pipeline.PipelineError)) r = repr(f.value) self.failUnless("ValueError" in r, r) f2 = f.value.error self.failUnless(f2.check(ValueError)) d4 = p.flush() flushed = [] d4.addBoth(flushed.append) self.failUnlessEqual(len(flushed), 1) f = flushed[0] self.failUnless(isinstance(f, Failure)) self.failUnless(f.check(pipeline.PipelineError)) f2 = f.value.error self.failUnless(f2.check(ValueError)) def test_errors2(self): self.calls = [] p = pipeline.Pipeline(100) d1 = p.add(10, self.pause, "one") d2 = p.add(20, self.pause, "two") d3 = p.add(30, self.pause, "three") d4 = p.flush() # one call fails, then the second one succeeds: make sure # ExpandableDeferredList tolerates the second one flushed = [] d4.addBoth(flushed.append) self.failUnlessEqual(flushed, []) self.calls[0][0].errback(ValueError("oops")) self.failUnlessEqual(len(flushed), 1) f = flushed[0] self.failUnless(isinstance(f, Failure)) self.failUnless(f.check(pipeline.PipelineError)) f2 = f.value.error self.failUnless(f2.check(ValueError)) self.calls[1][0].callback("two-result") self.calls[2][0].errback(ValueError("three-error")) del d1,d2,d3,d4 class SampleError(Exception): pass class Log(unittest.TestCase): def test_err(self): if not hasattr(self, "flushLoggedErrors"): # without flushLoggedErrors, we can't get rid of the # twisted.log.err that tahoe_log records, so we can't keep this # test from [ERROR]ing raise unittest.SkipTest("needs flushLoggedErrors from Twisted-2.5.0") try: raise SampleError("simple sample") except: f = Failure() tahoe_log.err(format="intentional sample error", failure=f, level=tahoe_log.OPERATIONAL, umid="wO9UoQ") self.flushLoggedErrors(SampleError) class SimpleSpans: # this is a simple+inefficient form of util.spans.Spans . We compare the # behavior of this reference model against the real (efficient) form. def __init__(self, _span_or_start=None, length=None): self._have = set() if length is not None: for i in range(_span_or_start, _span_or_start+length): self._have.add(i) elif _span_or_start: for (start,length) in _span_or_start: self.add(start, length) def add(self, start, length): for i in range(start, start+length): self._have.add(i) return self def remove(self, start, length): for i in range(start, start+length): self._have.discard(i) return self def each(self): return sorted(self._have) def __iter__(self): items = sorted(self._have) prevstart = None prevend = None for i in items: if prevstart is None: prevstart = prevend = i continue if i == prevend+1: prevend = i continue yield (prevstart, prevend-prevstart+1) prevstart = prevend = i if prevstart is not None: yield (prevstart, prevend-prevstart+1) def __nonzero__(self): # this gets us bool() return self.len() def len(self): return len(self._have) def __add__(self, other): s = self.__class__(self) for (start, length) in other: s.add(start, length) return s def __sub__(self, other): s = self.__class__(self) for (start, length) in other: s.remove(start, length) return s def __iadd__(self, other): for (start, length) in other: self.add(start, length) return self def __isub__(self, other): for (start, length) in other: self.remove(start, length) return self def __and__(self, other): s = self.__class__() for i in other.each(): if i in self._have: s.add(i, 1) return s def __contains__(self, (start,length)): for i in range(start, start+length): if i not in self._have: return False return True class ByteSpans(unittest.TestCase): def test_basic(self): s = Spans() self.failUnlessEqual(list(s), []) self.failIf(s) self.failIf((0,1) in s) self.failUnlessEqual(s.len(), 0) s1 = Spans(3, 4) # 3,4,5,6 self._check1(s1) s1 = Spans(3L, 4L) # 3,4,5,6 self._check1(s1) s2 = Spans(s1) self._check1(s2) s2.add(10,2) # 10,11 self._check1(s1) self.failUnless((10,1) in s2) self.failIf((10,1) in s1) self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11]) self.failUnlessEqual(s2.len(), 6) s2.add(15,2).add(20,2) self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21]) self.failUnlessEqual(s2.len(), 10) s2.remove(4,3).remove(15,1) self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21]) self.failUnlessEqual(s2.len(), 6) s1 = SimpleSpans(3, 4) # 3 4 5 6 s2 = SimpleSpans(5, 4) # 5 6 7 8 i = s1 & s2 self.failUnlessEqual(list(i.each()), [5, 6]) def _check1(self, s): self.failUnlessEqual(list(s), [(3,4)]) self.failUnless(s) self.failUnlessEqual(s.len(), 4) self.failIf((0,1) in s) self.failUnless((3,4) in s) self.failUnless((3,1) in s) self.failUnless((5,2) in s) self.failUnless((6,1) in s) self.failIf((6,2) in s) self.failIf((7,1) in s) self.failUnlessEqual(list(s.each()), [3,4,5,6]) def test_large(self): s = Spans(4, 2**65) # don't do this with a SimpleSpans self.failUnlessEqual(list(s), [(4, 2**65)]) self.failUnless(s) self.failUnlessEqual(s.len(), 2**65) self.failIf((0,1) in s) self.failUnless((4,2) in s) self.failUnless((2**65,2) in s) def test_math(self): s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9 s2 = Spans(5, 3) # 5,6,7 s3 = Spans(8, 4) # 8,9,10,11 s = s1 - s2 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9]) s = s1 - s3 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7]) s = s2 - s3 self.failUnlessEqual(list(s.each()), [5,6,7]) s = s1 & s2 self.failUnlessEqual(list(s.each()), [5,6,7]) s = s2 & s1 self.failUnlessEqual(list(s.each()), [5,6,7]) s = s1 & s3 self.failUnlessEqual(list(s.each()), [8,9]) s = s3 & s1 self.failUnlessEqual(list(s.each()), [8,9]) s = s2 & s3 self.failUnlessEqual(list(s.each()), []) s = s3 & s2 self.failUnlessEqual(list(s.each()), []) s = Spans() & s3 self.failUnlessEqual(list(s.each()), []) s = s3 & Spans() self.failUnlessEqual(list(s.each()), []) s = s1 + s2 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9]) s = s1 + s3 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11]) s = s2 + s3 self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11]) s = Spans(s1) s -= s2 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9]) s = Spans(s1) s -= s3 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7]) s = Spans(s2) s -= s3 self.failUnlessEqual(list(s.each()), [5,6,7]) s = Spans(s1) s += s2 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9]) s = Spans(s1) s += s3 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11]) s = Spans(s2) s += s3 self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11]) def test_random(self): # attempt to increase coverage of corner cases by comparing behavior # of a simple-but-slow model implementation against the # complex-but-fast actual implementation, in a large number of random # operations S1 = SimpleSpans S2 = Spans s1 = S1(); s2 = S2() seed = "" def _create(subseed): ns1 = S1(); ns2 = S2() for i in range(10): what = _hash(subseed+str(i)).hexdigest() start = int(what[2:4], 16) length = max(1,int(what[5:6], 16)) ns1.add(start, length); ns2.add(start, length) return ns1, ns2 #print for i in range(1000): what = _hash(seed+str(i)).hexdigest() op = what[0] subop = what[1] start = int(what[2:4], 16) length = max(1,int(what[5:6], 16)) #print what if op in "0": if subop in "01234": s1 = S1(); s2 = S2() elif subop in "5678": s1 = S1(start, length); s2 = S2(start, length) else: s1 = S1(s1); s2 = S2(s2) #print "s2 = %s" % s2.dump() elif op in "123": #print "s2.add(%d,%d)" % (start, length) s1.add(start, length); s2.add(start, length) elif op in "456": #print "s2.remove(%d,%d)" % (start, length) s1.remove(start, length); s2.remove(start, length) elif op in "78": ns1, ns2 = _create(what[7:11]) #print "s2 + %s" % ns2.dump() s1 = s1 + ns1; s2 = s2 + ns2 elif op in "9a": ns1, ns2 = _create(what[7:11]) #print "%s - %s" % (s2.dump(), ns2.dump()) s1 = s1 - ns1; s2 = s2 - ns2 elif op in "bc": ns1, ns2 = _create(what[7:11]) #print "s2 += %s" % ns2.dump() s1 += ns1; s2 += ns2 elif op in "de": ns1, ns2 = _create(what[7:11]) #print "%s -= %s" % (s2.dump(), ns2.dump()) s1 -= ns1; s2 -= ns2 else: ns1, ns2 = _create(what[7:11]) #print "%s &= %s" % (s2.dump(), ns2.dump()) s1 = s1 & ns1; s2 = s2 & ns2 #print "s2 now %s" % s2.dump() self.failUnlessEqual(list(s1.each()), list(s2.each())) self.failUnlessEqual(s1.len(), s2.len()) self.failUnlessEqual(bool(s1), bool(s2)) self.failUnlessEqual(list(s1), list(s2)) for j in range(10): what = _hash(what[12:14]+str(j)).hexdigest() start = int(what[2:4], 16) length = max(1, int(what[5:6], 16)) span = (start, length) self.failUnlessEqual(bool(span in s1), bool(span in s2)) # s() # s(start,length) # s(s0) # s.add(start,length) : returns s # s.remove(start,length) # s.each() -> list of byte offsets, mostly for testing # list(s) -> list of (start,length) tuples, one per span # (start,length) in s -> True if (start..start+length-1) are all members # NOT equivalent to x in list(s) # s.len() -> number of bytes, for testing, bool(), and accounting/limiting # bool(s) (__nonzeron__) # s = s1+s2, s1-s2, +=s1, -=s1 def test_overlap(self): for a in range(20): for b in range(10): for c in range(20): for d in range(10): self._test_overlap(a,b,c,d) def _test_overlap(self, a, b, c, d): s1 = set(range(a,a+b)) s2 = set(range(c,c+d)) #print "---" #self._show_overlap(s1, "1") #self._show_overlap(s2, "2") o = overlap(a,b,c,d) expected = s1.intersection(s2) if not expected: self.failUnlessEqual(o, None) else: start,length = o so = set(range(start,start+length)) #self._show(so, "o") self.failUnlessEqual(so, expected) def _show_overlap(self, s, c): import sys out = sys.stdout if s: for i in range(max(s)): if i in s: out.write(c) else: out.write(" ") out.write("\n") def extend(s, start, length, fill): if len(s) >= start+length: return s assert len(fill) == 1 return s + fill*(start+length-len(s)) def replace(s, start, data): assert len(s) >= start+len(data) return s[:start] + data + s[start+len(data):] class SimpleDataSpans: def __init__(self, other=None): self.missing = "" # "1" where missing, "0" where found self.data = "" if other: for (start, data) in other.get_chunks(): self.add(start, data) def __nonzero__(self): # this gets us bool() return self.len() def len(self): return len(self.missing.replace("1", "")) def _dump(self): return [i for (i,c) in enumerate(self.missing) if c == "0"] def _have(self, start, length): m = self.missing[start:start+length] if not m or len(m)= 0.6c6", {"setuptools": ("0.6", "", None)}) check_requirement("setuptools >= 0.6c6", {"setuptools": ("0.6", "", "distribute")}) check_requirement("pycrypto == 2.0.1, == 2.1, >= 2.3", {"pycrypto": ("2.1.0", "", None)}) check_requirement("pycrypto == 2.0.1, == 2.1, >= 2.3", {"pycrypto": ("2.4.0", "", None)}) check_requirement("zope.interface <= 3.6.2, >= 3.6.6", {"zope.interface": ("3.6.1", "", None)}) check_requirement("zope.interface <= 3.6.2, >= 3.6.6", {"zope.interface": ("3.6.6", "", None)}) check_requirement("zope.interface", {"zope.interface": ("unknown", "", None)}) check_requirement("mock", {"mock": ("0.6.0", "", None)}) check_requirement("foo >= 1.0", {"foo": ("1.0", "", None), "bar": ("2.0", "", None)}) check_requirement("foolscap[secure_connections] >= 0.6.0", {"foolscap": ("0.7.0", "", None)}) try: check_requirement("foolscap[secure_connections] >= 0.6.0", {"foolscap": ("0.6.1+", "", None)}) # succeeding is ok except PackagingError, e: self.failUnlessIn("could not parse", str(e)) self.failUnlessRaises(PackagingError, check_requirement, "foolscap[secure_connections] >= 0.6.0", {"foolscap": ("0.5.1", "", None)}) self.failUnlessRaises(PackagingError, check_requirement, "pycrypto == 2.0.1, == 2.1, >= 2.3", {"pycrypto": ("2.2.0", "", None)}) self.failUnlessRaises(PackagingError, check_requirement, "zope.interface <= 3.6.2, >= 3.6.6", {"zope.interface": ("3.6.4", "", None)}) self.failUnlessRaises(PackagingError, check_requirement, "foo >= 1.0", {}) self.failUnlessRaises(PackagingError, check_requirement, "foo >= 1.0", {"foo": ("irrational", "", None)}) self.failUnlessRaises(ImportError, check_requirement, "foo >= 1.0", {"foo": (None, None, "foomodule")}) def test_cross_check_ticket_1355(self): # The bug in #1355 is triggered when a version string from either pkg_resources or import # is not parseable at all by normalized_version. res = cross_check({"foo": ("unparseable", "")}, [("foo", ("1.0", "", None))]) self.failUnlessEqual(len(res), 1) self.failUnlessIn("by pkg_resources could not be parsed", res[0]) res = cross_check({"foo": ("1.0", "")}, [("foo", ("unparseable", "", None))]) self.failUnlessEqual(len(res), 1) self.failUnlessIn(") could not be parsed", res[0]) def test_cross_check(self): res = cross_check({}, []) self.failUnlessEqual(res, []) res = cross_check({}, [("allmydata-tahoe", ("1.0", "", "blah"))]) self.failUnlessEqual(res, []) res = cross_check({"foo": ("unparseable", "")}, []) self.failUnlessEqual(len(res), 1) self.failUnlessIn("not found by import", res[0]) res = cross_check({"argparse": ("unparseable", "")}, []) self.failUnlessEqual(len(res), 0) res = cross_check({}, [("foo", ("unparseable", "", None))]) self.failUnlessEqual(len(res), 1) self.failUnlessIn("not found by pkg_resources", res[0]) res = cross_check({"distribute": ("1.0", "/somewhere")}, [("setuptools", ("2.0", "/somewhere", "distribute"))]) self.failUnlessEqual(len(res), 0) res = cross_check({"distribute": ("1.0", "/somewhere")}, [("setuptools", ("2.0", "/somewhere", None))]) self.failUnlessEqual(len(res), 1) self.failUnlessIn("location mismatch", res[0]) res = cross_check({"distribute": ("1.0", "/somewhere")}, [("setuptools", ("2.0", "/somewhere_different", None))]) self.failUnlessEqual(len(res), 1) self.failUnlessIn("location mismatch", res[0]) res = cross_check({"zope.interface": ("1.0", "")}, [("zope.interface", ("unknown", "", None))]) self.failUnlessEqual(len(res), 0) res = cross_check({"foo": ("1.0", "")}, [("foo", ("unknown", "", None))]) self.failUnlessEqual(len(res), 1) self.failUnlessIn("could not find a version number", res[0]) # When pkg_resources and import both find a package, there is only a warning if both # the version and the path fail to match. res = cross_check({"foo": ("1.0", "/somewhere")}, [("foo", ("2.0", "/somewhere", None))]) self.failUnlessEqual(len(res), 0) res = cross_check({"foo": ("1.0", "/somewhere")}, [("foo", ("1.0", "/somewhere_different", None))]) self.failUnlessEqual(len(res), 0) res = cross_check({"foo": ("1.0-r123", "/somewhere")}, [("foo", ("1.0.post123", "/somewhere_different", None))]) self.failUnlessEqual(len(res), 0) res = cross_check({"foo": ("1.0", "/somewhere")}, [("foo", ("2.0", "/somewhere_different", None))]) self.failUnlessEqual(len(res), 1) self.failUnlessIn("but version '2.0'", res[0]) # based on https://bitbucket.org/tarek/distutilsversion/src/17df9a7d96ef/test_verlib.py class VersionTestCase(unittest.TestCase): versions = ((V('1.0'), '1.0'), (V('1.1'), '1.1'), (V('1.2.3'), '1.2.3'), (V('1.2'), '1.2'), (V('1.2.3a4'), '1.2.3a4'), (V('1.2c4'), '1.2c4'), (V('1.2.3.4'), '1.2.3.4'), (V('1.2.3.4.0b3'), '1.2.3.4b3'), (V('1.2.0.0.0'), '1.2'), (V('1.0.dev345'), '1.0.dev345'), (V('1.0.post456.dev623'), '1.0.post456.dev623')) def test_basic_versions(self): for v, s in self.versions: self.failUnlessEqual(str(v), s) def test_from_parts(self): for v, s in self.versions: parts = v.parts v2 = V.from_parts(*parts) self.failUnlessEqual(v, v2) self.failUnlessEqual(str(v), str(v2)) def test_irrational_versions(self): irrational = ('1', '1.2a', '1.2.3b', '1.02', '1.2a03', '1.2a3.04', '1.2.dev.2', '1.2dev', '1.2.dev', '1.2.dev2.post2', '1.2.post2.dev3.post4') for s in irrational: self.failUnlessRaises(IrrationalVersionError, V, s) def test_comparison(self): self.failUnlessRaises(TypeError, lambda: V('1.2.0') == '1.2') self.failUnlessEqual(V('1.2.0'), V('1.2')) self.failIfEqual(V('1.2.0'), V('1.2.3')) self.failUnless(V('1.2.0') < V('1.2.3')) self.failUnless(V('1.0') > V('1.0b2')) self.failUnless(V('1.0') > V('1.0c2') > V('1.0c1') > V('1.0b2') > V('1.0b1') > V('1.0a2') > V('1.0a1')) self.failUnless(V('1.0.0') > V('1.0.0c2') > V('1.0.0c1') > V('1.0.0b2') > V('1.0.0b1') > V('1.0.0a2') > V('1.0.0a1')) self.failUnless(V('1.0') < V('1.0.post456.dev623')) self.failUnless(V('1.0.post456.dev623') < V('1.0.post456') < V('1.0.post1234')) self.failUnless(V('1.0a1') < V('1.0a2.dev456') < V('1.0a2') < V('1.0a2.1.dev456') # e.g. need to do a quick post release on 1.0a2 < V('1.0a2.1') < V('1.0b1.dev456') < V('1.0b2') < V('1.0c1') < V('1.0c2.dev456') < V('1.0c2') < V('1.0.dev7') < V('1.0.dev18') < V('1.0.dev456') < V('1.0.dev1234') < V('1.0') < V('1.0.post456.dev623') # development version of a post release < V('1.0.post456')) def test_suggest_normalized_version(self): self.failUnlessEqual(suggest('1.0'), '1.0') self.failUnlessEqual(suggest('1.0-alpha1'), '1.0a1') self.failUnlessEqual(suggest('1.0c2'), '1.0c2') self.failUnlessEqual(suggest('walla walla washington'), None) self.failUnlessEqual(suggest('2.4c1'), '2.4c1') # from setuptools self.failUnlessEqual(suggest('0.4a1.r10'), '0.4a1.post10') self.failUnlessEqual(suggest('0.7a1dev-r66608'), '0.7a1.dev66608') self.failUnlessEqual(suggest('0.6a9.dev-r41475'), '0.6a9.dev41475') self.failUnlessEqual(suggest('2.4preview1'), '2.4c1') self.failUnlessEqual(suggest('2.4pre1') , '2.4c1') self.failUnlessEqual(suggest('2.1-rc2'), '2.1c2') # from pypi self.failUnlessEqual(suggest('0.1dev'), '0.1.dev0') self.failUnlessEqual(suggest('0.1.dev'), '0.1.dev0') # we want to be able to parse Twisted # development versions are like post releases in Twisted self.failUnlessEqual(suggest('9.0.0+r2363'), '9.0.0.post2363') # pre-releases are using markers like "pre1" self.failUnlessEqual(suggest('9.0.0pre1'), '9.0.0c1') # we want to be able to parse Tcl-TK # they use "p1" "p2" for post releases self.failUnlessEqual(suggest('1.4p1'), '1.4.post1') # from darcsver self.failUnlessEqual(suggest('1.8.1-r4956'), '1.8.1.post4956') # zetuptoolz self.failUnlessEqual(suggest('0.6c16dev3'), '0.6c16.dev3') tahoe-lafs-1.10.0/src/allmydata/test/test_web.py000066400000000000000000010326571221140116300215250ustar00rootroot00000000000000import os.path, re, urllib, time, cgi import simplejson from StringIO import StringIO from twisted.application import service from twisted.trial import unittest from twisted.internet import defer, reactor from twisted.internet.task import Clock from twisted.web import client, error, http from twisted.python import failure, log from foolscap.api import fireEventually, flushEventualQueue from nevow.util import escapeToXML from nevow import rend from allmydata import interfaces, uri, webish, dirnode from allmydata.storage.shares import get_share_file from allmydata.storage_client import StorageFarmBroker, StubServer from allmydata.immutable import upload from allmydata.immutable.downloader.status import DownloadStatus from allmydata.dirnode import DirectoryNode from allmydata.nodemaker import NodeMaker from allmydata.unknown import UnknownNode from allmydata.web import status, common from allmydata.scripts.debug import CorruptShareOptions, corrupt_share from allmydata.util import fileutil, base32, hashutil from allmydata.util.consumer import download_to_data from allmydata.util.netstring import split_netstring from allmydata.util.encodingutil import to_str from allmydata.test.common import FakeCHKFileNode, FakeMutableFileNode, \ create_chk_filenode, WebErrorMixin, ShouldFailMixin, \ make_mutable_file_uri, create_mutable_filenode from allmydata.interfaces import IMutableFileNode, SDMF_VERSION, MDMF_VERSION from allmydata.mutable import servermap, publish, retrieve import allmydata.test.common_util as testutil from allmydata.test.no_network import GridTestMixin from allmydata.test.common_web import HTTPClientGETFactory, \ HTTPClientHEADFactory from allmydata.client import Client, SecretHolder from allmydata.introducer import IntroducerNode # create a fake uploader/downloader, and a couple of fake dirnodes, then # create a webserver that works against them timeout = 480 # Most of these take longer than 240 seconds on Francois's arm box. unknown_rwcap = u"lafs://from_the_future_rw_\u263A".encode('utf-8') unknown_rocap = u"ro.lafs://readonly_from_the_future_ro_\u263A".encode('utf-8') unknown_immcap = u"imm.lafs://immutable_from_the_future_imm_\u263A".encode('utf-8') FAVICON_MARKUP = '' class FakeStatsProvider: def get_stats(self): stats = {'stats': {}, 'counters': {}} return stats class FakeNodeMaker(NodeMaker): encoding_params = { 'k': 3, 'n': 10, 'happy': 7, 'max_segment_size':128*1024 # 1024=KiB } def _create_lit(self, cap): return FakeCHKFileNode(cap, self.all_contents) def _create_immutable(self, cap): return FakeCHKFileNode(cap, self.all_contents) def _create_mutable(self, cap): return FakeMutableFileNode(None, None, self.encoding_params, None, self.all_contents).init_from_cap(cap) def create_mutable_file(self, contents="", keysize=None, version=SDMF_VERSION): n = FakeMutableFileNode(None, None, self.encoding_params, None, self.all_contents) return n.create(contents, version=version) class FakeUploader(service.Service): name = "uploader" helper_furl = None helper_connected = False def upload(self, uploadable): d = uploadable.get_size() d.addCallback(lambda size: uploadable.read(size)) def _got_data(datav): data = "".join(datav) n = create_chk_filenode(data, self.all_contents) ur = upload.UploadResults(file_size=len(data), ciphertext_fetched=0, preexisting_shares=0, pushed_shares=10, sharemap={}, servermap={}, timings={}, uri_extension_data={}, uri_extension_hash="fake", verifycapstr="fakevcap") ur.set_uri(n.get_uri()) return ur d.addCallback(_got_data) return d def get_helper_info(self): return (self.helper_furl, self.helper_connected) def build_one_ds(): ds = DownloadStatus("storage_index", 1234) now = time.time() serverA = StubServer(hashutil.tagged_hash("foo", "serverid_a")[:20]) serverB = StubServer(hashutil.tagged_hash("foo", "serverid_b")[:20]) storage_index = hashutil.storage_index_hash("SI") e0 = ds.add_segment_request(0, now) e0.activate(now+0.5) e0.deliver(now+1, 0, 100, 0.5) # when, start,len, decodetime e1 = ds.add_segment_request(1, now+2) e1.error(now+3) # two outstanding requests e2 = ds.add_segment_request(2, now+4) e3 = ds.add_segment_request(3, now+5) del e2,e3 # hush pyflakes # simulate a segment which gets delivered faster than a system clock tick (ticket #1166) e = ds.add_segment_request(4, now) e.activate(now) e.deliver(now, 0, 140, 0.5) e = ds.add_dyhb_request(serverA, now) e.finished([1,2], now+1) e = ds.add_dyhb_request(serverB, now+2) # left unfinished e = ds.add_read_event(0, 120, now) e.update(60, 0.5, 0.1) # bytes, decrypttime, pausetime e.finished(now+1) e = ds.add_read_event(120, 30, now+2) # left unfinished e = ds.add_block_request(serverA, 1, 100, 20, now) e.finished(20, now+1) e = ds.add_block_request(serverB, 1, 120, 30, now+1) # left unfinished # make sure that add_read_event() can come first too ds1 = DownloadStatus(storage_index, 1234) e = ds1.add_read_event(0, 120, now) e.update(60, 0.5, 0.1) # bytes, decrypttime, pausetime e.finished(now+1) return ds class FakeHistory: _all_upload_status = [upload.UploadStatus()] _all_download_status = [build_one_ds()] _all_mapupdate_statuses = [servermap.UpdateStatus()] _all_publish_statuses = [publish.PublishStatus()] _all_retrieve_statuses = [retrieve.RetrieveStatus()] def list_all_upload_statuses(self): return self._all_upload_status def list_all_download_statuses(self): return self._all_download_status def list_all_mapupdate_statuses(self): return self._all_mapupdate_statuses def list_all_publish_statuses(self): return self._all_publish_statuses def list_all_retrieve_statuses(self): return self._all_retrieve_statuses def list_all_helper_statuses(self): return [] class FakeDisplayableServer(StubServer): def __init__(self, serverid, nickname): StubServer.__init__(self, serverid) self.announcement = {"my-version": "allmydata-tahoe-fake", "service-name": "storage", "nickname": nickname} def is_connected(self): return True def get_permutation_seed(self): return "" def get_remote_host(self): return "" def get_last_loss_time(self): return None def get_announcement_time(self): return None def get_announcement(self): return self.announcement def get_nickname(self): return self.announcement["nickname"] class FakeBucketCounter(object): def get_state(self): return {"last-complete-bucket-count": 0} def get_progress(self): return {"estimated-time-per-cycle": 0, "cycle-in-progress": False, "remaining-wait-time": 0} class FakeLeaseChecker(object): def __init__(self): self.expiration_enabled = False self.mode = "age" self.override_lease_duration = None self.sharetypes_to_expire = {} def get_state(self): return {"history": None} def get_progress(self): return {"estimated-time-per-cycle": 0, "cycle-in-progress": False, "remaining-wait-time": 0} class FakeStorageServer(service.MultiService): name = 'storage' def __init__(self, nodeid, nickname): service.MultiService.__init__(self) self.my_nodeid = nodeid self.nickname = nickname self.bucket_counter = FakeBucketCounter() self.lease_checker = FakeLeaseChecker() def get_stats(self): return {"storage_server.accepting_immutable_shares": False} class FakeClient(Client): def __init__(self): # don't upcall to Client.__init__, since we only want to initialize a # minimal subset service.MultiService.__init__(self) self.all_contents = {} self.nodeid = "fake_nodeid" self.nickname = u"fake_nickname \u263A" self.introducer_furl = "None" self.stats_provider = FakeStatsProvider() self._secret_holder = SecretHolder("lease secret", "convergence secret") self.helper = None self.convergence = "some random string" self.storage_broker = StorageFarmBroker(None, permute_peers=True) # fake knowledge of another server self.storage_broker.test_add_server("other_nodeid", FakeDisplayableServer("other_nodeid", u"other_nickname \u263B")) self.introducer_client = None self.history = FakeHistory() self.uploader = FakeUploader() self.uploader.all_contents = self.all_contents self.uploader.setServiceParent(self) self.blacklist = None self.nodemaker = FakeNodeMaker(None, self._secret_holder, None, self.uploader, None, None, None, None) self.nodemaker.all_contents = self.all_contents self.mutable_file_default = SDMF_VERSION self.addService(FakeStorageServer(self.nodeid, self.nickname)) def get_long_nodeid(self): return "v0-nodeid" def get_long_tubid(self): return "tubid" def startService(self): return service.MultiService.startService(self) def stopService(self): return service.MultiService.stopService(self) MUTABLE_SIZELIMIT = FakeMutableFileNode.MUTABLE_SIZELIMIT class WebMixin(object): def setUp(self): self.s = FakeClient() self.s.startService() self.staticdir = self.mktemp() self.clock = Clock() self.ws = webish.WebishServer(self.s, "0", staticdir=self.staticdir, clock=self.clock) self.ws.setServiceParent(self.s) self.webish_port = self.ws.getPortnum() self.webish_url = self.ws.getURL() assert self.webish_url.endswith("/") self.webish_url = self.webish_url[:-1] # these tests add their own / l = [ self.s.create_dirnode() for x in range(6) ] d = defer.DeferredList(l) def _then(res): self.public_root = res[0][1] assert interfaces.IDirectoryNode.providedBy(self.public_root), res self.public_url = "/uri/" + self.public_root.get_uri() self.private_root = res[1][1] foo = res[2][1] self._foo_node = foo self._foo_uri = foo.get_uri() self._foo_readonly_uri = foo.get_readonly_uri() self._foo_verifycap = foo.get_verify_cap().to_string() # NOTE: we ignore the deferred on all set_uri() calls, because we # know the fake nodes do these synchronously self.public_root.set_uri(u"foo", foo.get_uri(), foo.get_readonly_uri()) self.BAR_CONTENTS, n, self._bar_txt_uri = self.makefile(0) foo.set_uri(u"bar.txt", self._bar_txt_uri, self._bar_txt_uri) self._bar_txt_verifycap = n.get_verify_cap().to_string() # sdmf # XXX: Do we ever use this? self.BAZ_CONTENTS, n, self._baz_txt_uri, self._baz_txt_readonly_uri = self.makefile_mutable(0) foo.set_uri(u"baz.txt", self._baz_txt_uri, self._baz_txt_readonly_uri) # mdmf self.QUUX_CONTENTS, n, self._quux_txt_uri, self._quux_txt_readonly_uri = self.makefile_mutable(0, mdmf=True) assert self._quux_txt_uri.startswith("URI:MDMF") foo.set_uri(u"quux.txt", self._quux_txt_uri, self._quux_txt_readonly_uri) foo.set_uri(u"empty", res[3][1].get_uri(), res[3][1].get_readonly_uri()) sub_uri = res[4][1].get_uri() self._sub_uri = sub_uri foo.set_uri(u"sub", sub_uri, sub_uri) sub = self.s.create_node_from_uri(sub_uri) self._sub_node = sub _ign, n, blocking_uri = self.makefile(1) foo.set_uri(u"blockingfile", blocking_uri, blocking_uri) # filenode to test for html encoding issues self._htmlname_unicode = u"<&weirdly'named\"file>>>_