pax_global_header00006660000000000000000000000064145512772250014524gustar00rootroot0000000000000052 comment=1d55788d5919c34fb59f691be410695483c8e782 ntfs2btrfs-20240115/000077500000000000000000000000001455127722500140615ustar00rootroot00000000000000ntfs2btrfs-20240115/CMakeLists.txt000066400000000000000000000051671455127722500166320ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.14.3) cmake_policy(SET CMP0091 NEW) set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") project(ntfs2btrfs VERSION 20240115) include(GNUInstallDirs) option(WITH_ZLIB "Include zlib support" ON) option(WITH_LZO "Include lzo support" ON) option(WITH_ZSTD "Include zstd support" ON) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ntfs2btrfs.8.in ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) find_package(fmt REQUIRED) find_package(PkgConfig REQUIRED) if(WITH_ZLIB) find_package(ZLIB REQUIRED) endif() if(WITH_LZO) pkg_check_modules(LZO REQUIRED lzo2) endif() if(WITH_ZSTD) pkg_check_modules(ZSTD REQUIRED libzstd) endif() set(SRC_FILES src/ntfs2btrfs.cpp src/ntfs.cpp src/decomp.cpp src/compress.cpp src/rollback.cpp src/crc32c.c src/xxhash.c src/sha256.c src/blake2b-ref.c src/ebiggers/lzx_decompress.c src/ebiggers/lzx_common.c src/ebiggers/aligned_malloc.c src/ebiggers/decompress_common.c src/ebiggers/xpress_decompress.c) if(MSVC) enable_language(ASM_MASM) set(SRC_FILES ${SRC_FILES} src/crc32c-masm.asm) else() enable_language(ASM) set(SRC_FILES ${SRC_FILES} src/crc32c-gas.S) endif() add_executable(ntfs2btrfs ${SRC_FILES}) if(CMAKE_BUILD_TYPE MATCHES "Debug") add_definitions(-D_GLIBCXX_DEBUG) endif() target_link_libraries(ntfs2btrfs fmt::fmt-header-only) if(WITH_ZLIB) target_link_libraries(ntfs2btrfs ZLIB::ZLIB) endif() if(WITH_LZO) target_link_libraries(ntfs2btrfs ${LZO_LINK_LIBRARIES}) endif() if(WITH_ZSTD) target_link_libraries(ntfs2btrfs ${ZSTD_LINK_LIBRARIES}) endif() include_directories(${CMAKE_CURRENT_BINARY_DIR}) # Work around bug in MSVC version of cmake - see https://gitlab.kitware.com/cmake/cmake/-/merge_requests/4257 set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreaded "") set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDLL "") set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebug "") set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebugDLL "") if(MSVC) target_compile_options(ntfs2btrfs PRIVATE /W4) else() target_compile_options(ntfs2btrfs PRIVATE -Wall -Wextra -Wno-address-of-packed-member -Wconversion -Wno-unknown-pragmas -Werror=pointer-arith) endif() install(TARGETS ntfs2btrfs DESTINATION ${CMAKE_INSTALL_SBINDIR}) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8 DESTINATION ${CMAKE_INSTALL_MANDIR}/man8) ntfs2btrfs-20240115/LICENCE000066400000000000000000000432541455127722500150560ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ntfs2btrfs-20240115/README.md000066400000000000000000000112461455127722500153440ustar00rootroot00000000000000Ntfs2btrfs ========== Ntfs2btrfs is a tool which does in-place conversion of Microsoft's NTFS filesystem to the open-source filesystem Btrfs, much as `btrfs-convert` does for ext2. The original image is saved as a reflink copy at `image/ntfs.img`, and if you want to keep the conversion you can delete this to free up space. Although I believe this tool to be stable, please note that I take no responsibility if something goes awry! You're probably also interested in [WinBtrfs](https://github.com/maharmstone/btrfs), which is a Btrfs filesystem driver for Windows. Thanks to [Eric Biggers](https://github.com/ebiggers), who [successfully reverse-engineered](https://github.com/ebiggers/ntfs-3g-system-compression/) Windows 10's "WOF compressed data", and whose code I've used here. Usage ----- On Windows, from an Administrator command prompt: `ntfs2btrfs.exe D:\` Bear in mind that it won't work with your boot drive or a drive containing a pagefile that's currently in use. On Linux, as root: `ntfs2btrfs /dev/sda1` Installation ------------ On Windows, go to the [Releases page](https://github.com/maharmstone/ntfs2btrfs/releases) and download the latest Zip file, or use [Scoop](https://github.com/ScoopInstaller/Main/blob/master/bucket/ntfs2btrfs.json). For Linux: * [Arch](https://aur.archlinux.org/packages/ntfs2btrfs) * [Fedora](https://src.fedoraproject.org/rpms/ntfs2btrfs) (thanks to [Conan-Kudo](https://github.com/Conan-Kudo)) * Gentoo - available as sys-fs/ntfs2btrfs in the guru repository * [Debian](https://packages.debian.org/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko)) * [Ubuntu](https://packages.ubuntu.com/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko)) * [openSUSE](https://build.opensuse.org/package/show/filesystems/ntfs2btrfs) (thanks to David Sterba) For other distributions or operating systems, you will need to compile it yourself - see below. Changelog --------- * 20240115 * Fixed compilation on GCC 14 (`-Werror=incompatible-pointer-types` now enabled by default) * 20230501 * Fixed inline extent items being written out of order (not diagnosed by `btrfs check`) * Fixed metadata items being written with wrong level value (not diagnosed by `btrfs check`) * ADSes with overly-long names now get skipped * 20220812 * Added --no-datasum option, to skip calculating checksums * LXSS / WSL metadata is now preserved * Fixed lowercase drive letters not being recognized * Fixed crash due to iterator invalidation (thanks to nyanpasu64) * Fixed corruption when NTFS places file in last megabyte of disk * 20210923 * Added (Btrfs) compression support (zlib, lzo, and zstd) * Added support for other hash algorithms: xxhash, sha256, and blake2 * Added support for rolling back to NTFS * Added support for NT4-style security descriptors * Increased conversion speed for volume with many inodes * Fixed bug when fragmented file was in superblock location * Fixed buffer overflow when reading security descriptors * Fixed bug where filesystems would be corrupted in a way that `btrfs check` doesn't pick up * 20210523 * Improved handling of large compressed files * 20210402 (source code only release) * Fixes for compilation on non-amd64 architectures * 20210105 * Added support for NTFS compression * Added support for "WOF compressed data" * Fixed problems caused by sparse files * Miscellaneous bug fixes * 20201108 * Improved error handling * Added better message if NTFS is corrupted or unclean * Better handling of relocations * 20200330 * Initial release Compilation ----------- On Windows, open the source directory in a recent version of MSVC, right-click on CMakeLists.txt, and click Compile. On Linux: mkdir build cd build cmake .. make You'll also need [libfmt](https://github.com/fmtlib/fmt) installed - it should be in your package manager. Compression support requires zlib, lzo, and/or zstd - again, they will be in your package manager. See also the cmake options WITH_ZLIB, WITH_LZO, and WITH_ZSTD, if you want to disable this. What works ---------- * Files * Directories * Symlinks * Other reparse points * Security descriptors * Alternate data streams * DOS attributes (hidden, system, etc.) * Rollback to original NTFS image * Preservation of LXSS metadata What doesn't work ----------------- * Windows' old extended attributes (you're not using these) * Large (i.e >16KB) ADSes (you're not using these either) * Preservation of the case-sensitivity flag * Unusual cluster sizes (i.e. not 4 KB) * Encrypted files Can I boot Windows from Btrfs with this? ---------------------------------------- Yes, if the stars are right. See [Quibble](https://github.com/maharmstone/quibble). ntfs2btrfs-20240115/ntfs2btrfs.8.in000066400000000000000000000051661455127722500166640ustar00rootroot00000000000000.TH NTFS2BTRFS "8" "January 2024" "ntfs2btrfs @PROJECT_VERSION@" "System Administration" .SH NAME ntfs2btrfs \- convert ntfs filesystem to btrfs filesystem .SH SYNOPSIS \fBntfs2btrfs\fR [options] \fIdevice\fR .SH DESCRIPTION This is a tool which does in-place conversion of Microsoft's NTFS filesystem to the open-source filesystem Btrfs, much as \fBbtrfs\-convert\fR does for ext2. .SH OPTIONS .PP -c \fI\fR, --compress=\fI\fR .RS 4 Uses the specified algorithm to recompress files that are compressed on the NTFS volume; valid choices are \fIzstd\fR, \fIlzo\fR, \fIzlib\fR, or \fInone\fR. If you don't specify any value, \fIzstd\fR will be used, assuming it's been compiled in. Note that this will be ignored if you also select --no-datasum (see below). .RE .PP -h \fI\fR, --hash=\fI\fR .RS 4 Uses the specified checksumming algorithm; valid choices are \fIcrc32c\fR, \fIxxhash\fR, \fIsha256\fR, and \fIblake2\fR. The first of these will be used by default, and should be fine for most purposes. .RE .PP -r, --rollback .RS 4 Tries to restore the original NTFS filesystem. See \fBROLLBACK\fR below. .RE .PP -d, --no-datasum .RS 4 Skips calculating checksums for existing data. Don't choose this unless you're sure it's what you want. .RE .SH ROLLBACK The original filesystem image is saved as \fIimage/ntfs.img\fR as a reflink copy. You can restore this at any time by using the rollback option, provided that you've not moved the data by doing a balance. Bear in mind that this restores the volume to how it was when you did the conversion, meaning that any changes you've made since will be lost. .PP If you decide to keep the conversion, you can remove the \fIimage\fR subvolume at any point to free up space. .SH XATTRS Various bits of NTFS-specific data are stored as Btrfs xattrs, in a manner that the Windows btrfs driver understands (\fBhttps://github.com/maharmstone/btrfs\fR). Some should also be understood by tools such as Wine and Samba, but YMMV. .IP \[bu] 2 The NTFS attribute value is stored as a hex string at \fIuser.DOSATTRIB\fR. .IP \[bu] 2 The reparse points on directories are stored at \fIuser.reparse\fR. NTFS symlinks should be converted into POSIX symlinks. The data for other reparse points will be stored as the contents of the files. .IP \[bu] 2 The NT security descriptor is stored as \fIsecurity.NTACL\fR. .IP \[bu] 2 Alternate data streams on files are stored in the \fIuser\fR namespace, e.g. \fI:Zone.Identifier\fR becomes \fIuser.Zone.Identifier\fR. .SH SEE ALSO .BR btrfs (8), .BR mkfs.btrfs (8). .SH AUTHOR Written by Mark Harmstone (\fBmark@harmstone.com\fR). .SH WEB .IP https://github.com/maharmstone/ntfs2btrfs ntfs2btrfs-20240115/src/000077500000000000000000000000001455127722500146505ustar00rootroot00000000000000ntfs2btrfs-20240115/src/blake2-impl.h000066400000000000000000000117351455127722500171270ustar00rootroot00000000000000/* BLAKE2 reference source code package - reference C implementations Copyright 2012, Samuel Neves . You may use this under the terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at your option. The terms of these licenses can be found at: - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - OpenSSL license : https://www.openssl.org/source/license.html - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 More information about the BLAKE2 hash function can be found at https://blake2.net. */ #pragma once #include #include #define NATIVE_LITTLE_ENDIAN #if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) #if defined(_MSC_VER) #define BLAKE2_INLINE __inline #elif defined(__GNUC__) #define BLAKE2_INLINE __inline__ #else #define BLAKE2_INLINE #endif #else #define BLAKE2_INLINE inline #endif static BLAKE2_INLINE uint32_t load32( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint32_t w; memcpy(&w, src, sizeof w); return w; #else const uint8_t *p = ( const uint8_t * )src; return (( uint32_t )( p[0] ) << 0) | (( uint32_t )( p[1] ) << 8) | (( uint32_t )( p[2] ) << 16) | (( uint32_t )( p[3] ) << 24) ; #endif } static BLAKE2_INLINE uint64_t load64( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint64_t w; memcpy(&w, src, sizeof w); return w; #else const uint8_t *p = ( const uint8_t * )src; return (( uint64_t )( p[0] ) << 0) | (( uint64_t )( p[1] ) << 8) | (( uint64_t )( p[2] ) << 16) | (( uint64_t )( p[3] ) << 24) | (( uint64_t )( p[4] ) << 32) | (( uint64_t )( p[5] ) << 40) | (( uint64_t )( p[6] ) << 48) | (( uint64_t )( p[7] ) << 56) ; #endif } static BLAKE2_INLINE uint16_t load16( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint16_t w; memcpy(&w, src, sizeof w); return w; #else const uint8_t *p = ( const uint8_t * )src; return ( uint16_t )((( uint32_t )( p[0] ) << 0) | (( uint32_t )( p[1] ) << 8)); #endif } static BLAKE2_INLINE void store16( void *dst, uint16_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; *p++ = ( uint8_t )w; w >>= 8; *p++ = ( uint8_t )w; #endif } static BLAKE2_INLINE void store32( void *dst, uint32_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; p[0] = (uint8_t)(w >> 0); p[1] = (uint8_t)(w >> 8); p[2] = (uint8_t)(w >> 16); p[3] = (uint8_t)(w >> 24); #endif } static BLAKE2_INLINE void store64( void *dst, uint64_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; p[0] = (uint8_t)(w >> 0); p[1] = (uint8_t)(w >> 8); p[2] = (uint8_t)(w >> 16); p[3] = (uint8_t)(w >> 24); p[4] = (uint8_t)(w >> 32); p[5] = (uint8_t)(w >> 40); p[6] = (uint8_t)(w >> 48); p[7] = (uint8_t)(w >> 56); #endif } static BLAKE2_INLINE uint64_t load48( const void *src ) { const uint8_t *p = ( const uint8_t * )src; return (( uint64_t )( p[0] ) << 0) | (( uint64_t )( p[1] ) << 8) | (( uint64_t )( p[2] ) << 16) | (( uint64_t )( p[3] ) << 24) | (( uint64_t )( p[4] ) << 32) | (( uint64_t )( p[5] ) << 40) ; } static BLAKE2_INLINE void store48( void *dst, uint64_t w ) { uint8_t *p = ( uint8_t * )dst; p[0] = (uint8_t)(w >> 0); p[1] = (uint8_t)(w >> 8); p[2] = (uint8_t)(w >> 16); p[3] = (uint8_t)(w >> 24); p[4] = (uint8_t)(w >> 32); p[5] = (uint8_t)(w >> 40); } static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 32 - c ) ); } static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 64 - c ) ); } #if defined(_MSC_VER) #define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) #else #define BLAKE2_PACKED(x) x __attribute__((packed)) #endif enum blake2b_constant { BLAKE2B_BLOCKBYTES = 128, BLAKE2B_OUTBYTES = 64, BLAKE2B_KEYBYTES = 64, BLAKE2B_SALTBYTES = 16, BLAKE2B_PERSONALBYTES = 16 }; typedef struct blake2b_state__ { uint64_t h[8]; uint64_t t[2]; uint64_t f[2]; uint8_t buf[BLAKE2B_BLOCKBYTES]; size_t buflen; size_t outlen; uint8_t last_node; } blake2b_state; BLAKE2_PACKED(struct blake2b_param__ { uint8_t digest_length; /* 1 */ uint8_t key_length; /* 2 */ uint8_t fanout; /* 3 */ uint8_t depth; /* 4 */ uint32_t leaf_length; /* 8 */ uint32_t node_offset; /* 12 */ uint32_t xof_length; /* 16 */ uint8_t node_depth; /* 17 */ uint8_t inner_length; /* 18 */ uint8_t reserved[14]; /* 32 */ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ }); typedef struct blake2b_param__ blake2b_param; ntfs2btrfs-20240115/src/blake2b-ref.c000066400000000000000000000151411455127722500170720ustar00rootroot00000000000000/* BLAKE2 reference source code package - reference C implementations Copyright 2012, Samuel Neves . You may use this under the terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at your option. The terms of these licenses can be found at: - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - OpenSSL license : https://www.openssl.org/source/license.html - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 More information about the BLAKE2 hash function can be found at https://blake2.net. */ #include #include #include #include "blake2-impl.h" static const uint64_t blake2b_IV[8] = { 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL }; static const uint8_t blake2b_sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } }; static int blake2b_update(blake2b_state* S, const void* in, size_t inlen); static void blake2b_set_lastnode( blake2b_state *S ) { S->f[1] = (uint64_t)-1; } /* Some helper functions, not necessarily useful */ static int blake2b_is_lastblock( const blake2b_state *S ) { return S->f[0] != 0; } static void blake2b_set_lastblock( blake2b_state *S ) { if( S->last_node ) blake2b_set_lastnode( S ); S->f[0] = (uint64_t)-1; } static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); } static void blake2b_init0( blake2b_state *S ) { size_t i; memset( S, 0, sizeof( blake2b_state ) ); for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; } /* init xors IV with input parameter block */ static void blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { const uint8_t *p = ( const uint8_t * )( P ); size_t i; blake2b_init0( S ); /* IV XOR ParamBlock */ for( i = 0; i < 8; ++i ) S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); S->outlen = P->digest_length; } static void blake2b_init( blake2b_state *S, size_t outlen ) { blake2b_param P[1]; P->digest_length = (uint8_t)outlen; P->key_length = 0; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); store32( &P->node_offset, 0 ); store32( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; memset( P->reserved, 0, sizeof( P->reserved ) ); memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); blake2b_init_param( S, P ); } #define G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ d = rotr64(d ^ a, 32); \ c = c + d; \ b = rotr64(b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ d = rotr64(d ^ a, 16); \ c = c + d; \ b = rotr64(b ^ c, 63); \ } while(0) #define ROUND(r) \ do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ G(r,3,v[ 3],v[ 7],v[11],v[15]); \ G(r,4,v[ 0],v[ 5],v[10],v[15]); \ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) { uint64_t m[16]; uint64_t v[16]; size_t i; for( i = 0; i < 16; ++i ) { m[i] = load64( block + i * sizeof( m[i] ) ); } for( i = 0; i < 8; ++i ) { v[i] = S->h[i]; } v[ 8] = blake2b_IV[0]; v[ 9] = blake2b_IV[1]; v[10] = blake2b_IV[2]; v[11] = blake2b_IV[3]; v[12] = blake2b_IV[4] ^ S->t[0]; v[13] = blake2b_IV[5] ^ S->t[1]; v[14] = blake2b_IV[6] ^ S->f[0]; v[15] = blake2b_IV[7] ^ S->f[1]; ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); ROUND( 3 ); ROUND( 4 ); ROUND( 5 ); ROUND( 6 ); ROUND( 7 ); ROUND( 8 ); ROUND( 9 ); ROUND( 10 ); ROUND( 11 ); for( i = 0; i < 8; ++i ) { S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; } } #undef G #undef ROUND static int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) { const unsigned char * in = (const unsigned char *)pin; if( inlen > 0 ) { size_t left = S->buflen; size_t fill = BLAKE2B_BLOCKBYTES - left; if( inlen > fill ) { S->buflen = 0; memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); blake2b_compress( S, S->buf ); /* Compress */ in += fill; inlen -= fill; while(inlen > BLAKE2B_BLOCKBYTES) { blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); blake2b_compress( S, in ); in += BLAKE2B_BLOCKBYTES; inlen -= BLAKE2B_BLOCKBYTES; } } memcpy( S->buf + S->buflen, in, inlen ); S->buflen += inlen; } return 0; } static int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; size_t i; if( out == NULL || outlen < S->outlen ) return -1; if( blake2b_is_lastblock( S ) ) return -1; blake2b_increment_counter( S, S->buflen ); blake2b_set_lastblock( S ); memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ blake2b_compress( S, S->buf ); for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); memcpy( out, buffer, S->outlen ); return 0; } /* inlen, at least, should be uint64_t. Others can be size_t. */ void blake2b( void *out, size_t outlen, const void *in, size_t inlen ) { blake2b_state S[1]; blake2b_init( S, outlen ); blake2b_update( S, ( const uint8_t * )in, inlen ); blake2b_final( S, out, outlen ); } ntfs2btrfs-20240115/src/btrfs.h000066400000000000000000000353661455127722500161560ustar00rootroot00000000000000/* btrfs.h * Generic btrfs header file. Thanks to whoever it was who wrote * https://btrfs.wiki.kernel.org/index.php/On-disk_Format - you saved me a lot of time! * * I release this file, and this file only, into the public domain - do whatever * you want with it. You don't have to, but I'd appreciate if you let me know if you * use it anything cool - mark@harmstone.com. */ #pragma once #include static const uint64_t superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4000000000000, 0 }; #define BTRFS_MAGIC 0x4d5f53665248425f #define MAX_LABEL_SIZE 0x100 #define SUBVOL_ROOT_INODE 0x100 enum class btrfs_key_type : uint8_t { INODE_ITEM = 0x01, INODE_REF = 0x0C, INODE_EXTREF = 0x0D, XATTR_ITEM = 0x18, ORPHAN_INODE = 0x30, DIR_ITEM = 0x54, DIR_INDEX = 0x60, EXTENT_DATA = 0x6C, EXTENT_CSUM = 0x80, ROOT_ITEM = 0x84, ROOT_BACKREF = 0x90, ROOT_REF = 0x9C, EXTENT_ITEM = 0xA8, METADATA_ITEM = 0xA9, TREE_BLOCK_REF = 0xB0, EXTENT_DATA_REF = 0xB2, EXTENT_REF_V0 = 0xB4, SHARED_BLOCK_REF = 0xB6, SHARED_DATA_REF = 0xB8, BLOCK_GROUP_ITEM = 0xC0, FREE_SPACE_INFO = 0xC6, FREE_SPACE_EXTENT = 0xC7, FREE_SPACE_BITMAP = 0xC8, DEV_EXTENT = 0xCC, DEV_ITEM = 0xD8, CHUNK_ITEM = 0xE4, TEMP_ITEM = 0xF8, DEV_STATS = 0xF9, SUBVOL_UUID = 0xFB, SUBVOL_REC_UUID = 0xFC }; #define BTRFS_ROOT_ROOT 1 #define BTRFS_ROOT_EXTENT 2 #define BTRFS_ROOT_CHUNK 3 #define BTRFS_ROOT_DEVTREE 4 #define BTRFS_ROOT_FSTREE 5 #define BTRFS_ROOT_TREEDIR 6 #define BTRFS_ROOT_CHECKSUM 7 #define BTRFS_ROOT_UUID 9 #define BTRFS_ROOT_FREE_SPACE 0xa #define BTRFS_ROOT_DATA_RELOC 0xFFFFFFFFFFFFFFF7 enum class btrfs_compression : uint8_t { none = 0, zlib = 1, lzo = 2, zstd = 3 }; #define BTRFS_ENCRYPTION_NONE 0 #define BTRFS_ENCODING_NONE 0 enum class btrfs_extent_type : uint8_t { inline_extent = 0, regular = 1, prealloc = 2 }; #define BLOCK_FLAG_DATA 0x001 #define BLOCK_FLAG_SYSTEM 0x002 #define BLOCK_FLAG_METADATA 0x004 #define BLOCK_FLAG_RAID0 0x008 #define BLOCK_FLAG_RAID1 0x010 #define BLOCK_FLAG_DUPLICATE 0x020 #define BLOCK_FLAG_RAID10 0x040 #define BLOCK_FLAG_RAID5 0x080 #define BLOCK_FLAG_RAID6 0x100 #define BLOCK_FLAG_RAID1C3 0x200 #define BLOCK_FLAG_RAID1C4 0x400 #define FREE_SPACE_CACHE_ID 0xFFFFFFFFFFFFFFF5 #define EXTENT_CSUM_ID 0xFFFFFFFFFFFFFFF6 #define BALANCE_ITEM_ID 0xFFFFFFFFFFFFFFFC #define BTRFS_INODE_NODATASUM 0x001 #define BTRFS_INODE_NODATACOW 0x002 #define BTRFS_INODE_READONLY 0x004 #define BTRFS_INODE_NOCOMPRESS 0x008 #define BTRFS_INODE_PREALLOC 0x010 #define BTRFS_INODE_SYNC 0x020 #define BTRFS_INODE_IMMUTABLE 0x040 #define BTRFS_INODE_APPEND 0x080 #define BTRFS_INODE_NODUMP 0x100 #define BTRFS_INODE_NOATIME 0x200 #define BTRFS_INODE_DIRSYNC 0x400 #define BTRFS_INODE_COMPRESS 0x800 #define BTRFS_SUBVOL_READONLY 0x1 #define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE 0x1 #define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID 0x2 #define BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF 0x0001 #define BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL 0x0002 #define BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS 0x0004 #define BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO 0x0008 #define BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD 0x0010 #define BTRFS_INCOMPAT_FLAGS_BIG_METADATA 0x0020 #define BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF 0x0040 #define BTRFS_INCOMPAT_FLAGS_RAID56 0x0080 #define BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA 0x0100 #define BTRFS_INCOMPAT_FLAGS_NO_HOLES 0x0200 #define BTRFS_INCOMPAT_FLAGS_METADATA_UUID 0x0400 #define BTRFS_INCOMPAT_FLAGS_RAID1C34 0x0800 #define BTRFS_SUPERBLOCK_FLAGS_SEEDING 0x100000000 #define BTRFS_ORPHAN_INODE_OBJID 0xFFFFFFFFFFFFFFFB enum class btrfs_csum_type : uint16_t { crc32c = 0, xxhash = 1, sha256 = 2, blake2 = 3 }; #pragma pack(push, 1) typedef struct { uint8_t uuid[16]; } BTRFS_UUID; typedef struct { uint64_t obj_id; btrfs_key_type obj_type; uint64_t offset; } KEY; #define HEADER_FLAG_WRITTEN 0x000000000000001 #define HEADER_FLAG_SHARED_BACKREF 0x000000000000002 #define HEADER_FLAG_MIXED_BACKREF 0x100000000000000 typedef struct { uint8_t csum[32]; BTRFS_UUID fs_uuid; uint64_t address; uint64_t flags; BTRFS_UUID chunk_tree_uuid; uint64_t generation; uint64_t tree_id; uint32_t num_items; uint8_t level; } tree_header; typedef struct { KEY key; uint32_t offset; uint32_t size; } leaf_node; typedef struct { KEY key; uint64_t address; uint64_t generation; } internal_node; typedef struct { uint64_t dev_id; uint64_t num_bytes; uint64_t bytes_used; uint32_t optimal_io_align; uint32_t optimal_io_width; uint32_t minimal_io_size; uint64_t type; uint64_t generation; uint64_t start_offset; uint32_t dev_group; uint8_t seek_speed; uint8_t bandwidth; BTRFS_UUID device_uuid; BTRFS_UUID fs_uuid; } DEV_ITEM; #define SYS_CHUNK_ARRAY_SIZE 0x800 #define BTRFS_NUM_BACKUP_ROOTS 4 typedef struct { uint64_t root_tree_addr; uint64_t root_tree_generation; uint64_t chunk_tree_addr; uint64_t chunk_tree_generation; uint64_t extent_tree_addr; uint64_t extent_tree_generation; uint64_t fs_tree_addr; uint64_t fs_tree_generation; uint64_t dev_root_addr; uint64_t dev_root_generation; uint64_t csum_root_addr; uint64_t csum_root_generation; uint64_t total_bytes; uint64_t bytes_used; uint64_t num_devices; uint64_t reserved[4]; uint8_t root_level; uint8_t chunk_root_level; uint8_t extent_root_level; uint8_t fs_root_level; uint8_t dev_root_level; uint8_t csum_root_level; uint8_t reserved2[10]; } superblock_backup; typedef struct { uint8_t checksum[32]; BTRFS_UUID uuid; uint64_t sb_phys_addr; uint64_t flags; uint64_t magic; uint64_t generation; uint64_t root_tree_addr; uint64_t chunk_tree_addr; uint64_t log_tree_addr; uint64_t log_root_transid; uint64_t total_bytes; uint64_t bytes_used; uint64_t root_dir_objectid; uint64_t num_devices; uint32_t sector_size; uint32_t node_size; uint32_t leaf_size; uint32_t stripe_size; uint32_t n; uint64_t chunk_root_generation; uint64_t compat_flags; uint64_t compat_ro_flags; uint64_t incompat_flags; enum btrfs_csum_type csum_type; uint8_t root_level; uint8_t chunk_root_level; uint8_t log_root_level; DEV_ITEM dev_item; char label[MAX_LABEL_SIZE]; uint64_t cache_generation; uint64_t uuid_tree_generation; uint64_t reserved[30]; uint8_t sys_chunk_array[SYS_CHUNK_ARRAY_SIZE]; superblock_backup backup[BTRFS_NUM_BACKUP_ROOTS]; uint8_t reserved2[565]; } superblock; enum class btrfs_inode_type : uint8_t { unknown = 0, file = 1, directory = 2, chardev = 3, blockdev = 4, fifo = 5, socket = 6, symlink = 7, ea = 8 }; typedef struct { KEY key; uint64_t transid; uint16_t m; uint16_t n; enum btrfs_inode_type type; char name[1]; } DIR_ITEM; typedef struct { uint64_t seconds; uint32_t nanoseconds; } BTRFS_TIME; typedef struct { uint64_t generation; uint64_t transid; uint64_t st_size; uint64_t st_blocks; uint64_t block_group; uint32_t st_nlink; uint32_t st_uid; uint32_t st_gid; uint32_t st_mode; uint64_t st_rdev; uint64_t flags; uint64_t sequence; uint8_t reserved[32]; BTRFS_TIME st_atime; BTRFS_TIME st_ctime; BTRFS_TIME st_mtime; BTRFS_TIME otime; } INODE_ITEM; typedef struct { INODE_ITEM inode; uint64_t generation; uint64_t objid; uint64_t block_number; uint64_t byte_limit; uint64_t bytes_used; uint64_t last_snapshot_generation; uint64_t flags; uint32_t num_references; KEY drop_progress; uint8_t drop_level; uint8_t root_level; uint64_t generation2; BTRFS_UUID uuid; BTRFS_UUID parent_uuid; BTRFS_UUID received_uuid; uint64_t ctransid; uint64_t otransid; uint64_t stransid; uint64_t rtransid; BTRFS_TIME ctime; BTRFS_TIME otime; BTRFS_TIME stime; BTRFS_TIME rtime; uint64_t reserved[8]; } ROOT_ITEM; typedef struct { uint64_t size; uint64_t root_id; uint64_t stripe_length; uint64_t type; uint32_t opt_io_alignment; uint32_t opt_io_width; uint32_t sector_size; uint16_t num_stripes; uint16_t sub_stripes; } CHUNK_ITEM; typedef struct { uint64_t dev_id; uint64_t offset; BTRFS_UUID dev_uuid; } CHUNK_ITEM_STRIPE; typedef struct { uint64_t generation; uint64_t decoded_size; enum btrfs_compression compression; uint8_t encryption; uint16_t encoding; enum btrfs_extent_type type; uint8_t data[1]; } EXTENT_DATA; typedef struct { uint64_t address; uint64_t size; uint64_t offset; uint64_t num_bytes; } EXTENT_DATA2; typedef struct { uint64_t index; uint16_t n; char name[1]; } INODE_REF; typedef struct { uint64_t dir; uint64_t index; uint16_t n; char name[1]; } INODE_EXTREF; #define EXTENT_ITEM_DATA 0x001 #define EXTENT_ITEM_TREE_BLOCK 0x002 #define EXTENT_ITEM_SHARED_BACKREFS 0x100 typedef struct { uint64_t refcount; uint64_t generation; uint64_t flags; } EXTENT_ITEM; typedef struct { KEY firstitem; uint8_t level; } EXTENT_ITEM2; typedef struct { uint32_t refcount; } EXTENT_ITEM_V0; typedef struct { EXTENT_ITEM extent_item; KEY firstitem; uint8_t level; } EXTENT_ITEM_TREE; typedef struct { uint64_t offset; } TREE_BLOCK_REF; typedef struct { uint64_t root; uint64_t objid; uint64_t offset; uint32_t count; } EXTENT_DATA_REF; typedef struct { uint64_t used; uint64_t chunk_tree; uint64_t flags; } BLOCK_GROUP_ITEM; typedef struct { uint64_t root; uint64_t gen; uint64_t objid; uint32_t count; } EXTENT_REF_V0; typedef struct { uint64_t offset; } SHARED_BLOCK_REF; typedef struct { uint64_t offset; uint32_t count; } SHARED_DATA_REF; static const uint8_t FREE_SPACE_EXTENT = 1; static const uint8_t FREE_SPACE_BITMAP = 2; typedef struct { uint64_t offset; uint64_t size; uint8_t type; } FREE_SPACE_ENTRY; typedef struct { KEY key; uint64_t generation; uint64_t num_entries; uint64_t num_bitmaps; } FREE_SPACE_ITEM; typedef struct { uint64_t dir; uint64_t index; uint16_t n; char name[1]; } ROOT_REF; typedef struct { uint64_t chunktree; uint64_t objid; uint64_t address; uint64_t length; BTRFS_UUID chunktree_uuid; } DEV_EXTENT; #define BALANCE_FLAGS_DATA 0x1 #define BALANCE_FLAGS_SYSTEM 0x2 #define BALANCE_FLAGS_METADATA 0x4 #define BALANCE_ARGS_FLAGS_PROFILES 0x001 #define BALANCE_ARGS_FLAGS_USAGE 0x002 #define BALANCE_ARGS_FLAGS_DEVID 0x004 #define BALANCE_ARGS_FLAGS_DRANGE 0x008 #define BALANCE_ARGS_FLAGS_VRANGE 0x010 #define BALANCE_ARGS_FLAGS_LIMIT 0x020 #define BALANCE_ARGS_FLAGS_LIMIT_RANGE 0x040 #define BALANCE_ARGS_FLAGS_STRIPES_RANGE 0x080 #define BALANCE_ARGS_FLAGS_CONVERT 0x100 #define BALANCE_ARGS_FLAGS_SOFT 0x200 #define BALANCE_ARGS_FLAGS_USAGE_RANGE 0x400 typedef struct { uint64_t profiles; union { uint64_t usage; struct { uint32_t usage_start; uint32_t usage_end; } s; } u1; uint64_t devid; uint64_t drange_start; uint64_t drange_end; uint64_t vrange_start; uint64_t vrange_end; uint64_t convert; uint64_t flags; union { uint64_t limit; struct { uint32_t limit_start; uint32_t limit_end; } s; } u2; uint32_t stripes_start; uint32_t stripes_end; uint8_t reserved[48]; } BALANCE_ARGS; typedef struct { uint64_t flags; BALANCE_ARGS data; BALANCE_ARGS metadata; BALANCE_ARGS system; uint8_t reserved[32]; } BALANCE_ITEM; #define BTRFS_FREE_SPACE_USING_BITMAPS 1 typedef struct { uint32_t count; uint32_t flags; } FREE_SPACE_INFO; #define BTRFS_DEV_STAT_WRITE_ERRORS 0 #define BTRFS_DEV_STAT_READ_ERRORS 1 #define BTRFS_DEV_STAT_FLUSH_ERRORS 2 #define BTRFS_DEV_STAT_CORRUPTION_ERRORS 3 #define BTRFS_DEV_STAT_GENERATION_ERRORS 4 #define BTRFS_SEND_CMD_SUBVOL 1 #define BTRFS_SEND_CMD_SNAPSHOT 2 #define BTRFS_SEND_CMD_MKFILE 3 #define BTRFS_SEND_CMD_MKDIR 4 #define BTRFS_SEND_CMD_MKNOD 5 #define BTRFS_SEND_CMD_MKFIFO 6 #define BTRFS_SEND_CMD_MKSOCK 7 #define BTRFS_SEND_CMD_SYMLINK 8 #define BTRFS_SEND_CMD_RENAME 9 #define BTRFS_SEND_CMD_LINK 10 #define BTRFS_SEND_CMD_UNLINK 11 #define BTRFS_SEND_CMD_RMDIR 12 #define BTRFS_SEND_CMD_SET_XATTR 13 #define BTRFS_SEND_CMD_REMOVE_XATTR 14 #define BTRFS_SEND_CMD_WRITE 15 #define BTRFS_SEND_CMD_CLONE 16 #define BTRFS_SEND_CMD_TRUNCATE 17 #define BTRFS_SEND_CMD_CHMOD 18 #define BTRFS_SEND_CMD_CHOWN 19 #define BTRFS_SEND_CMD_UTIMES 20 #define BTRFS_SEND_CMD_END 21 #define BTRFS_SEND_CMD_UPDATE_EXTENT 22 #define BTRFS_SEND_TLV_UUID 1 #define BTRFS_SEND_TLV_TRANSID 2 #define BTRFS_SEND_TLV_INODE 3 #define BTRFS_SEND_TLV_SIZE 4 #define BTRFS_SEND_TLV_MODE 5 #define BTRFS_SEND_TLV_UID 6 #define BTRFS_SEND_TLV_GID 7 #define BTRFS_SEND_TLV_RDEV 8 #define BTRFS_SEND_TLV_CTIME 9 #define BTRFS_SEND_TLV_MTIME 10 #define BTRFS_SEND_TLV_ATIME 11 #define BTRFS_SEND_TLV_OTIME 12 #define BTRFS_SEND_TLV_XATTR_NAME 13 #define BTRFS_SEND_TLV_XATTR_DATA 14 #define BTRFS_SEND_TLV_PATH 15 #define BTRFS_SEND_TLV_PATH_TO 16 #define BTRFS_SEND_TLV_PATH_LINK 17 #define BTRFS_SEND_TLV_OFFSET 18 #define BTRFS_SEND_TLV_DATA 19 #define BTRFS_SEND_TLV_CLONE_UUID 20 #define BTRFS_SEND_TLV_CLONE_CTRANSID 21 #define BTRFS_SEND_TLV_CLONE_PATH 22 #define BTRFS_SEND_TLV_CLONE_OFFSET 23 #define BTRFS_SEND_TLV_CLONE_LENGTH 24 #define BTRFS_SEND_MAGIC "btrfs-stream" typedef struct { uint8_t magic[13]; uint32_t version; } btrfs_send_header; typedef struct { uint32_t length; uint16_t cmd; uint32_t csum; } btrfs_send_command; typedef struct { uint16_t type; uint16_t length; } btrfs_send_tlv; #pragma pack(pop) ntfs2btrfs-20240115/src/compress.cpp000066400000000000000000000111631455127722500172110ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2021 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #include "ntfs2btrfs.h" #ifdef WITH_ZLIB #include #endif #ifdef WITH_LZO #include #endif #ifdef WITH_ZSTD #include #endif using namespace std; #ifdef WITH_ZLIB optional zlib_compress(string_view data, uint32_t cluster_size) { z_stream c_stream; int ret; buffer_t out(data.length()); c_stream.zalloc = Z_NULL; c_stream.zfree = Z_NULL; c_stream.opaque = (voidpf)0; ret = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); if (ret != Z_OK) throw formatted_error("deflateInit returned {}", ret); c_stream.next_in = (uint8_t*)data.data(); c_stream.avail_in = (unsigned int)data.length(); c_stream.next_out = (uint8_t*)out.data(); c_stream.avail_out = (unsigned int)out.size(); do { ret = deflate(&c_stream, Z_FINISH); if (ret != Z_OK && ret != Z_STREAM_END) { deflateEnd(&c_stream); throw formatted_error("deflate returned {}", ret); } if (c_stream.avail_in == 0 || c_stream.avail_out == 0) break; } while (ret != Z_STREAM_END); deflateEnd(&c_stream); if (c_stream.avail_in > 0) // compressed version would be longer than uncompressed return nullopt; if (c_stream.total_out > data.length() - cluster_size) // space saving less than one sector return nullopt; // round to sector, and zero end out.resize((c_stream.total_out + cluster_size - 1) & ~(cluster_size - 1), 0); return out; } #endif #ifdef WITH_LZO static __inline size_t lzo_max_outlen(size_t inlen) { return inlen + (inlen / 16) + 64 + 3; // formula comes from LZO.FAQ } optional lzo_compress(string_view data, uint32_t cluster_size) { size_t num_pages; num_pages = data.length() / cluster_size; // Four-byte overall header // Another four-byte header page // Each page has a maximum size of lzo_max_outlen(cluster_size) // Plus another four bytes for possible padding buffer_t outbuf(sizeof(uint32_t) + ((lzo_max_outlen(cluster_size) + (2 * sizeof(uint32_t))) * num_pages)); buffer_t wrkmem(LZO1X_MEM_COMPRESS); auto out_size = (uint32_t*)outbuf.data(); *out_size = sizeof(uint32_t); auto in = (lzo_bytep)data.data(); auto out = (lzo_bytep)(outbuf.data() + (2 * sizeof(uint32_t))); for (unsigned int i = 0; i < num_pages; i++) { auto pagelen = (uint32_t*)(out - sizeof(uint32_t)); lzo_uint outlen; auto ret = lzo1x_1_compress(in, cluster_size, out, &outlen, wrkmem.data()); if (ret != LZO_E_OK) throw formatted_error("lzo1x_1_compress returned {}", ret); *pagelen = (uint32_t)outlen; *out_size += (uint32_t)(outlen + sizeof(uint32_t)); in += cluster_size; out += outlen + sizeof(uint32_t); // new page needs to start at a 32-bit boundary if (cluster_size - (*out_size % cluster_size) < sizeof(uint32_t)) { memset(out, 0, cluster_size - (*out_size % cluster_size)); out += cluster_size - (*out_size % cluster_size); *out_size += cluster_size - (*out_size % cluster_size); } if (*out_size >= data.length()) return nullopt; } outbuf.resize(*out_size); if (outbuf.size() > data.length() - cluster_size) return nullopt; outbuf.resize((outbuf.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0); return outbuf; } #endif #ifdef WITH_ZSTD optional zstd_compress(string_view data, uint32_t cluster_size) { buffer_t out(ZSTD_compressBound(data.length())); auto ret = ZSTD_compress(out.data(), out.size(), data.data(), data.length(), 1); if (ZSTD_isError(ret)) throw formatted_error("ZSTD_compress returned {}", ret); if (ret > data.length() - cluster_size) return nullopt; out.resize(ret); out.resize((out.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0); return out; } #endif ntfs2btrfs-20240115/src/config.h.in000066400000000000000000000002011455127722500166640ustar00rootroot00000000000000#pragma once #define PROJECT_VER "@PROJECT_VERSION@" #cmakedefine WITH_ZLIB 1 #cmakedefine WITH_LZO 1 #cmakedefine WITH_ZSTD 1 ntfs2btrfs-20240115/src/crc32c-gas.S000077500000000000000000000067451455127722500166420ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020 * * This file is part of WinBtrfs. * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #ifdef __i386__ .intel_syntax noprefix #ifdef __MINGW32__ .extern _crctable .global _calc_crc32c_sw@12 .global _calc_crc32c_hw@12 #else .extern crctable .global calc_crc32c_sw .global calc_crc32c_hw #endif /* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ #ifdef __MINGW32__ _calc_crc32c_sw@12: #else calc_crc32c_sw: #endif push ebp mov ebp, esp push esi push ebx mov eax, [ebp+8] mov edx, [ebp+12] mov ebx, [ebp+16] /* eax = crc / seed * ebx = len * esi = tmp * edx = buf * ecx = tmp2 */ crcloop: test ebx, ebx jz crcend mov esi, eax shr esi, 8 mov cl, byte ptr [edx] xor al, cl and eax, 255 shl eax, 2 #ifdef __MINGW32__ mov eax, [_crctable + eax] #else mov eax, [crctable + eax] #endif xor eax, esi inc edx dec ebx jmp crcloop crcend: pop ebx pop esi pop ebp ret 12 /****************************************************/ /* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ #ifdef __MINGW32__ _calc_crc32c_hw@12: #else calc_crc32c_hw: #endif push ebp mov ebp, esp mov eax, [ebp+8] mov edx, [ebp+12] mov ecx, [ebp+16] /* eax = crc / seed * ecx = len * edx = buf */ crchw_loop: cmp ecx, 4 jl crchw_stragglers crc32 eax, dword ptr [edx] add edx, 4 sub ecx, 4 jmp crchw_loop crchw_stragglers: cmp ecx, 2 jl crchw_stragglers2 crc32 eax, word ptr [edx] add edx, 2 sub ecx, 2 crchw_stragglers2: test ecx, ecx jz crchw_end crc32 eax, byte ptr [edx] inc edx dec ecx jmp crchw_stragglers2 crchw_end: pop ebp ret 12 #elif defined(__x86_64__) .intel_syntax noprefix .extern crctable .global calc_crc32c_sw .global calc_crc32c_hw /* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ calc_crc32c_sw: /* rax = crc / seed * rdx = buf * r8 = len * rcx = tmp * r10 = tmp2 * r11 = crctable */ lea r11, [rip + crctable] mov rax, rcx crcloop: test r8, r8 jz crcend mov rcx, rax shr rcx, 8 mov r10b, byte ptr [rdx] xor al, r10b and rax, 255 shl rax, 2 mov eax, [r11 + rax] xor rax, rcx inc rdx dec r8 jmp crcloop crcend: ret /****************************************************/ /* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ calc_crc32c_hw: /* rax = crc / seed * rdx = buf * r8 = len */ mov rax, rcx crchw_loop: cmp r8, 8 jl crchw_stragglers crc32 rax, qword ptr [rdx] add rdx, 8 sub r8, 8 jmp crchw_loop crchw_stragglers: cmp r8, 4 jl crchw_stragglers2 crc32 eax, dword ptr [rdx] add rdx, 4 sub r8, 4 crchw_stragglers2: cmp r8, 2 jl crchw_stragglers3 crc32 eax, word ptr [rdx] add rdx, 2 sub r8, 2 crchw_stragglers3: test r8, r8 jz crchw_end crc32 eax, byte ptr [rdx] inc rdx dec r8 jmp crchw_stragglers3 crchw_end: ret #endif #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif ntfs2btrfs-20240115/src/crc32c-masm.asm000077500000000000000000000061031455127722500173670ustar00rootroot00000000000000; Copyright (c) Mark Harmstone 2020 ; ; This file is part of WinBtrfs. ; ; WinBtrfs is free software: you can redistribute it and/or modify ; it under the terms of the GNU Lesser General Public Licence as published by ; the Free Software Foundation, either version 3 of the Licence, or ; (at your option) any later version. ; ; WinBtrfs is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ; GNU Lesser General Public Licence for more details. ; ; You should have received a copy of the GNU Lesser General Public Licence ; along with WinBtrfs. If not, see . IFDEF RAX ELSE .686P ENDIF _TEXT SEGMENT IFDEF RAX EXTERN crctable:qword PUBLIC calc_crc32c_sw ; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); calc_crc32c_sw: ; rax = crc / seed ; rdx = buf ; r8 = len ; rcx = tmp ; r10 = tmp2 mov rax, rcx crcloop: test r8, r8 jz crcend mov rcx, rax shr rcx, 8 mov r10b, byte ptr [rdx] xor al, r10b and rax, 255 shl rax, 2 mov r10, offset crctable mov eax, dword ptr [r10 + rax] xor rax, rcx inc rdx dec r8 jmp crcloop crcend: ret ; **************************************************** ; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); PUBLIC calc_crc32c_hw calc_crc32c_hw: ; rax = crc / seed ; rdx = buf ; r8 = len mov rax, rcx crchw_loop: cmp r8, 8 jl crchw_stragglers crc32 rax, qword ptr [rdx] add rdx, 8 sub r8, 8 jmp crchw_loop crchw_stragglers: cmp r8, 4 jl crchw_stragglers2 crc32 eax, dword ptr [rdx] add rdx, 4 sub r8, 4 crchw_stragglers2: cmp r8, 2 jl crchw_stragglers3 crc32 eax, word ptr [rdx] add rdx, 2 sub r8, 2 crchw_stragglers3: test r8, r8 jz crchw_end crc32 eax, byte ptr [rdx] inc rdx dec r8 jmp crchw_stragglers3 crchw_end: ret ELSE EXTERN _crctable:ABS ; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); PUBLIC _calc_crc32c_sw@12 _calc_crc32c_sw@12: push ebp mov ebp, esp push esi push ebx mov eax, [ebp+8] mov edx, [ebp+12] mov ebx, [ebp+16] ; eax = crc / seed ; ebx = len ; esi = tmp ; edx = buf ; ecx = tmp2 crcloop: test ebx, ebx jz crcend mov esi, eax shr esi, 8 mov cl, byte ptr [edx] xor al, cl and eax, 255 shl eax, 2 mov eax, [_crctable + eax] xor eax, esi inc edx dec ebx jmp crcloop crcend: pop ebx pop esi pop ebp ret 12 ; **************************************************** ; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); PUBLIC _calc_crc32c_hw@12 _calc_crc32c_hw@12: push ebp mov ebp, esp mov eax, [ebp+8] mov edx, [ebp+12] mov ecx, [ebp+16] ; eax = crc / seed ; ecx = len ; edx = buf crchw_loop: cmp ecx, 4 jl crchw_stragglers crc32 eax, dword ptr [edx] add edx, 4 sub ecx, 4 jmp crchw_loop crchw_stragglers: cmp ecx, 2 jl crchw_stragglers2 crc32 eax, word ptr [edx] add edx, 2 sub ecx, 2 crchw_stragglers2: test ecx, ecx jz crchw_end crc32 eax, byte ptr [edx] inc edx dec ecx jmp crchw_stragglers2 crchw_end: pop ebp ret 12 ENDIF _TEXT ENDS end ntfs2btrfs-20240115/src/crc32c.c000066400000000000000000000106561455127722500161030ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2016-17 * * This file is part of WinBtrfs. * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "crc32c.h" #include #include crc_func calc_crc32c = calc_crc32c_sw; #ifdef __cplusplus extern "C" { #endif const uint32_t crctable[] = { 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351, }; // x86 and amd64 versions live in asm files #if !defined(__i386__) && !defined(__x86_64__) && !defined(_M_IX86) && !defined(_M_X64) uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen) { uint32_t rem = seed; for (uint32_t i = 0; i < msglen; i++) { rem = crctable[(rem ^ msg[i]) & 0xff] ^ (rem >> 8); } return rem; } #endif #ifdef __cplusplus } #endif ntfs2btrfs-20240115/src/crc32c.h000066400000000000000000000025151455127722500161030ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #pragma once #include #ifndef _WIN32 #ifdef __i386__ #define __stdcall __attribute__((stdcall)) #elif defined(__x86_64__) #define __stdcall __attribute__((ms_abi)) #else #define __stdcall #endif #endif #ifdef __cplusplus extern "C" { #endif #if defined(__i386__) || defined(__x86_64__) uint32_t __stdcall calc_crc32c_hw(uint32_t seed, const uint8_t* msg, uint32_t msglen); #endif uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen); typedef uint32_t (__stdcall *crc_func)(uint32_t seed, const uint8_t* msg, uint32_t msglen); extern crc_func calc_crc32c; #ifdef __cplusplus } #endif ntfs2btrfs-20240115/src/decomp.cpp000066400000000000000000000170451455127722500166320ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #include "ntfs2btrfs.h" #include "ebiggers/system_compression.h" #define LZX_CHUNK_SIZE 32768 using namespace std; static buffer_t lznt1_decompress_chunk(string_view data) { buffer_t s; while (!data.empty()) { auto fg = (uint8_t)data[0]; data = data.substr(1); if (fg == 0) { if (data.length() < 8) { s.insert(s.end(), data.begin(), data.end()); return s; } else { s.insert(s.end(), data.begin(), data.begin() + 8); data = data.substr(8); } } else { for (unsigned int i = 0; i < 8; i++) { if (data.empty()) return s; if (!(fg & 1)) { s.insert(s.end(), data.begin(), data.begin() + 1); data = data.substr(1); } else { if (data.length() < sizeof(uint16_t)) throw formatted_error("Compressed chunk was {} bytes, expected at least 2.", data.length()); // See https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-xca/90fc6a28-f627-4ee5-82ce-445a6cf98b22 auto v = *(uint16_t*)data.data(); data = data.substr(2); // Shamelessly stolen from https://github.com/you0708/lznt1 - thank you! uint64_t u = s.size() - 1; uint64_t lm = 0xfff; uint64_t os = 12; while (u >= 0x10) { lm >>= 1; os--; u >>= 1; } auto l = (v & lm) + 3; auto d = (v >> os) + 1; s.reserve((uint32_t)(s.size() + l)); while (l > 0) { s.resize(s.size() + 1); s[s.size() - 1] = s[s.size() - d - 1]; l--; } } fg >>= 1; } } } return s; } buffer_t lznt1_decompress(string_view compdata, uint32_t size) { buffer_t ret(size); uint8_t* ptr; memset(ret.data(), 0, ret.size()); ptr = ret.data(); while (true) { if (compdata.length() < sizeof(uint16_t)) throw formatted_error("compdata was {} bytes, expected at least 2.", compdata.length()); auto h = *(uint16_t*)compdata.data(); if (h == 0) return ret; compdata = compdata.substr(2); auto sig = (h & 0x7000) >> 12; if (sig != 3) throw formatted_error("Compression signature was {}, expected 3.", sig); auto len = (uint32_t)(((uint64_t)h & 0xfff) + 1); if (compdata.length() < len) throw formatted_error("compdata was {} bytes, expected at least {}.", compdata.length(), len); auto data = string_view(compdata.data(), len); compdata = compdata.substr(len); if (h & 0x8000) { auto c = lznt1_decompress_chunk(data); if (ptr + c.size() >= ret.data() + size) { memcpy(ptr, c.data(), size - (ptr - ret.data())); return ret; } else { memcpy(ptr, c.data(), c.size()); ptr += c.size(); } } else { if (ptr + data.length() >= ret.data() + size) { memcpy(ptr, data.data(), size - (ptr - ret.data())); return ret; } else { memcpy(ptr, data.data(), data.length()); ptr += data.length(); } } } return ret; } buffer_t do_lzx_decompress(string_view compdata, uint32_t size) { auto ctx = lzx_allocate_decompressor(LZX_CHUNK_SIZE); if (!ctx) throw formatted_error("lzx_allocate_decompressor returned NULL."); uint64_t num_chunks = (size + LZX_CHUNK_SIZE - 1) / LZX_CHUNK_SIZE; auto offsets = (uint32_t*)compdata.data(); buffer_t ret(size); auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)), (uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t)))); for (uint64_t i = 0; i < num_chunks; i++) { uint64_t off = i == 0 ? 0 : offsets[i - 1]; uint32_t complen; if (i == 0) complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length(); else if (i == num_chunks - 1) complen = (uint32_t)data.length() - offsets[i - 1]; else complen = offsets[i] - offsets[i - 1]; if (complen == (i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE)) { // stored uncompressed memcpy(ret.data() + (i * LZX_CHUNK_SIZE), data.data() + off, complen); } else { auto err = lzx_decompress(ctx, data.data() + off, complen, ret.data() + (i * LZX_CHUNK_SIZE), (uint32_t)(i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE)); if (err != 0) { lzx_free_decompressor(ctx); throw formatted_error("lzx_decompress returned {}.", err); } } } lzx_free_decompressor(ctx); return ret; } buffer_t do_xpress_decompress(string_view compdata, uint32_t size, uint32_t chunk_size) { auto ctx = xpress_allocate_decompressor(); if (!ctx) throw formatted_error("xpress_allocate_decompressor returned NULL."); uint64_t num_chunks = (size + chunk_size - 1) / chunk_size; auto offsets = (uint32_t*)compdata.data(); buffer_t ret(size); auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)), (uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t)))); for (uint64_t i = 0; i < num_chunks; i++) { uint64_t off = i == 0 ? 0 : offsets[i - 1]; uint32_t complen; if (i == 0) complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length(); else if (i == num_chunks - 1) complen = (uint32_t)data.length() - offsets[i - 1]; else complen = offsets[i] - offsets[i - 1]; if (complen == (i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)) { // stored uncompressed memcpy(ret.data() + (i * chunk_size), data.data() + off, complen); } else { auto err = xpress_decompress(ctx, data.data() + off, complen, ret.data() + (i * chunk_size), (size_t)(i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)); if (err != 0) { xpress_free_decompressor(ctx); throw formatted_error("xpress_decompress returned {}.", err); } } } xpress_free_decompressor(ctx); return ret; } ntfs2btrfs-20240115/src/ebiggers/000077500000000000000000000000001455127722500164375ustar00rootroot00000000000000ntfs2btrfs-20240115/src/ebiggers/aligned_malloc.c000066400000000000000000000013271455127722500215400ustar00rootroot00000000000000/* * aligned_malloc.c - aligned memory allocation * * This file provides portable aligned memory allocation functions that only use * malloc() and free(). This avoids portability problems with posix_memalign(), * aligned_alloc(), etc. */ #include #include "common_defs.h" void * aligned_malloc(size_t size, size_t alignment) { const uintptr_t mask = alignment - 1; char *ptr = NULL; char *raw_ptr; raw_ptr = malloc(mask + sizeof(size_t) + size); if (raw_ptr) { ptr = (char *)raw_ptr + sizeof(size_t); ptr = (void *)(((uintptr_t)ptr + mask) & ~mask); *((size_t *)ptr - 1) = ptr - raw_ptr; } return ptr; } void aligned_free(void *ptr) { if (ptr) free((char *)ptr - *((size_t *)ptr - 1)); } ntfs2btrfs-20240115/src/ebiggers/common_defs.h000066400000000000000000000155001455127722500211020ustar00rootroot00000000000000#ifndef _COMMON_DEFS_H #define _COMMON_DEFS_H // #include // #include #include typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef int32_t s32; /* ========================================================================== */ /* Type definitions */ /* ========================================================================== */ /* * Type of a machine word. 'unsigned long' would be logical, but that is only * 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best * we can do without a bunch of #ifdefs appears to be 'size_t'. */ typedef size_t machine_word_t; #define WORDBYTES sizeof(machine_word_t) #define WORDBITS (8 * WORDBYTES) /* ========================================================================== */ /* Compiler-specific definitions */ /* ========================================================================== */ #ifdef __GNUC__ /* GCC, or GCC-compatible compiler such as clang */ # define forceinline inline __attribute__((always_inline)) # define likely(expr) __builtin_expect(!!(expr), 1) # define unlikely(expr) __builtin_expect(!!(expr), 0) # define _aligned_attribute(n) __attribute__((aligned(n))) # define bsr32(n) (31 - __builtin_clz(n)) # define bsr64(n) (63 - __builtin_clzll(n)) # define bsf32(n) __builtin_ctz(n) # define bsf64(n) __builtin_ctzll(n) # ifndef min # define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ (_a < _b) ? _a : _b; }) # endif # ifndef max # define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ (_a > _b) ? _a : _b; }) # endif # define DEFINE_UNALIGNED_TYPE(type) \ struct type##_unaligned { \ type v; \ } __attribute__((packed)); \ \ static inline type \ load_##type##_unaligned(const void *p) \ { \ return ((const struct type##_unaligned *)p)->v; \ } \ \ static inline void \ store_##type##_unaligned(type val, void *p) \ { \ ((struct type##_unaligned *)p)->v = val; \ } #endif /* __GNUC__ */ /* Declare that the annotated function should always be inlined. This might be * desirable in highly tuned code, e.g. compression codecs */ #ifndef forceinline # define forceinline inline #endif /* Hint that the expression is usually true */ #ifndef likely # define likely(expr) (expr) #endif /* Hint that the expression is usually false */ #ifndef unlikely # define unlikely(expr) (expr) #endif /* Declare that the annotated variable, or variables of the annotated type, are * to be aligned on n-byte boundaries */ #ifndef _aligned_attribute # define _aligned_attribute(n) #endif /* min() and max() macros */ #ifndef min # define min(a, b) ((a) < (b) ? (a) : (b)) #endif #ifndef max # define max(a, b) ((a) > (b) ? (a) : (b)) #endif /* STATIC_ASSERT() - verify the truth of an expression at compilation time */ #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)])) /* STATIC_ASSERT_ZERO() - verify the truth of an expression at compilation time * and also produce a result of value '0' to be used in constant expressions */ #define STATIC_ASSERT_ZERO(expr) ((int)sizeof(char[-!(expr)])) /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses * can be performed efficiently on the target platform. */ #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED) # define UNALIGNED_ACCESS_IS_FAST 1 #else # define UNALIGNED_ACCESS_IS_FAST 0 #endif /* * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type', * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions * which load and store variables of type 'type' from/to unaligned memory * addresses. */ #ifndef DEFINE_UNALIGNED_TYPE #include /* * Although memcpy() may seem inefficient, it *usually* gets optimized * appropriately by modern compilers. It's portable and may be the best we can * do for a fallback... */ #define DEFINE_UNALIGNED_TYPE(type) \ \ static forceinline type \ load_##type##_unaligned(const void *p) \ { \ type v; \ memcpy(&v, p, sizeof(v)); \ return v; \ } \ \ static forceinline void \ store_##type##_unaligned(type v, void *p) \ { \ memcpy(p, &v, sizeof(v)); \ } #endif /* !DEFINE_UNALIGNED_TYPE */ /* ========================================================================== */ /* Unaligned memory accesses */ /* ========================================================================== */ #define load_word_unaligned load_machine_word_t_unaligned #define store_word_unaligned store_machine_word_t_unaligned /* ========================================================================== */ /* Bit scan functions */ /* ========================================================================== */ /* * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least * significant end) of the *most* significant 1 bit in the input value. The * input value must be nonzero! */ #ifndef bsr32 static forceinline unsigned bsr32(u32 v) { unsigned bit = 0; while ((v >>= 1) != 0) bit++; return bit; } #endif #ifndef bsr64 static forceinline unsigned bsr64(u64 v) { unsigned bit = 0; while ((v >>= 1) != 0) bit++; return bit; } #endif static forceinline unsigned bsrw(machine_word_t v) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); if (WORDBITS == 32) return bsr32(v); else return bsr64(v); } /* * Bit Scan Forward (BSF) - find the 0-based index (relative to the least * significant end) of the *least* significant 1 bit in the input value. The * input value must be nonzero! */ #ifndef bsf32 static forceinline unsigned bsf32(u32 v) { unsigned bit; for (bit = 0; !(v & 1); bit++, v >>= 1) ; return bit; } #endif #ifndef bsf64 static forceinline unsigned bsf64(u64 v) { unsigned bit; for (bit = 0; !(v & 1); bit++, v >>= 1) ; return bit; } #endif static forceinline unsigned bsfw(machine_word_t v) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); if (WORDBITS == 32) return bsf32(v); else return bsf64(v); } /* Return the log base 2 of 'n', rounded up to the nearest integer. */ static forceinline unsigned ilog2_ceil(size_t n) { if (n <= 1) return 0; return 1 + bsrw(n - 1); } /* ========================================================================== */ /* Aligned memory allocation */ /* ========================================================================== */ extern void *aligned_malloc(size_t size, size_t alignment); extern void aligned_free(void *ptr); #endif /* _COMMON_DEFS_H */ ntfs2btrfs-20240115/src/ebiggers/decompress_common.c000066400000000000000000000313641455127722500223260ustar00rootroot00000000000000/* * decompress_common.c * * Code for decompression shared among multiple compression formats. * * The following copying information applies to this specific source code file: * * Written in 2012-2016 by Eric Biggers * * To the extent possible under law, the author(s) have dedicated all copyright * and related and neighboring rights to this software to the public domain * worldwide via the Creative Commons Zero 1.0 Universal Public Domain * Dedication (the "CC0"). * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the CC0 for more details. * * You should have received a copy of the CC0 along with this software; if not * see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #ifdef __SSE2__ # include #endif #include "decompress_common.h" /* * make_huffman_decode_table() - * * Given an alphabet of symbols and the length of each symbol's codeword in a * canonical prefix code, build a table for quickly decoding symbols that were * encoded with that code. * * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols * such that no whole codeword is a prefix of any other. A prefix code might be * a _Huffman code_, which means that it is an optimum prefix code for a given * list of symbol frequencies and was generated by the Huffman algorithm. * Although the prefix codes processed here will ordinarily be "Huffman codes", * strictly speaking the decoder cannot know whether a given code was actually * generated by the Huffman algorithm or not. * * A prefix code is _canonical_ if and only if a longer codeword never * lexicographically precedes a shorter codeword, and the lexicographic ordering * of codewords of equal length is the same as the lexicographic ordering of the * corresponding symbols. The advantage of using a canonical prefix code is * that the codewords can be reconstructed from only the symbol => codeword * length mapping. This eliminates the need to transmit the codewords * explicitly. Instead, they can be enumerated in lexicographic order after * sorting the symbols primarily by increasing codeword length and secondarily * by increasing symbol value. * * However, the decoder's real goal is to decode symbols with the code, not just * generate the list of codewords. Consequently, this function directly builds * a table for efficiently decoding symbols using the code. The basic idea is * that given the next 'max_codeword_len' bits of input, the decoder can look up * the next decoded symbol by indexing a table containing '2^max_codeword_len' * entries. A codeword with length 'max_codeword_len' will have exactly one * entry in this table, whereas a codeword shorter than 'max_codeword_len' will * have multiple entries in this table. Precisely, a codeword of length 'n' * will have '2^(max_codeword_len - n)' entries. The index of each such entry, * considered as a bitstring of length 'max_codeword_len', will contain the * corresponding codeword as a prefix. * * That's the basic idea, but we extend it in two ways: * * - Often the maximum codeword length is too long for it to be efficient to * build the full decode table whenever a new code is used. Instead, we build * a "root" table using only '2^table_bits' entries, where 'table_bits <= * max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a * symbol directly (for codewords not longer than 'table_bits'), or the index * of a subtable which must be indexed with additional bits of input to fully * decode the symbol (for codewords longer than 'table_bits'). * * - Whenever the decoder decodes a symbol, it needs to know the codeword length * so that it can remove the appropriate number of input bits. The obvious * solution would be to simply retain the codeword lengths array and use the * decoded symbol as an index into it. However, that would require two array * accesses when decoding each symbol. Our strategy is to instead store the * codeword length directly in the decode table entry along with the symbol. * * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table * entries, and see read_huffsym() for full details on how symbols are decoded. * * @decode_table: * The array in which to build the decode table. This must have been * declared by the DECODE_TABLE() macro. This may alias @lens, since all * @lens are consumed before the decode table is written to. * * @num_syms: * The number of symbols in the alphabet. * * @table_bits: * The log base 2 of the number of entries in the root table. * * @lens: * An array of length @num_syms, indexed by symbol, that gives the length * of the codeword, in bits, for each symbol. The length can be 0, which * means that the symbol does not have a codeword assigned. In addition, * @lens may alias @decode_table, as noted above. * * @max_codeword_len: * The maximum codeword length permitted for this code. All entries in * 'lens' must be less than or equal to this value. * * @working_space * A temporary array that was declared with DECODE_TABLE_WORKING_SPACE(). * * Returns 0 on success, or -1 if the lengths do not form a valid prefix code. */ int make_huffman_decode_table(u16 decode_table[], unsigned num_syms, unsigned table_bits, const u8 lens[], unsigned max_codeword_len, u16 working_space[]) { u16 * const len_counts = &working_space[0]; u16 * const offsets = &working_space[1 * (max_codeword_len + 1)]; u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)]; s32 remainder = 1; uint8_t *entry_ptr = (uint8_t *)decode_table; unsigned codeword_len = 1; unsigned sym_idx; unsigned codeword; unsigned subtable_pos; unsigned subtable_bits; unsigned subtable_prefix; /* Count how many codewords have each length, including 0. */ for (unsigned len = 0; len <= max_codeword_len; len++) len_counts[len] = 0; for (unsigned sym = 0; sym < num_syms; sym++) len_counts[lens[sym]]++; /* It is already guaranteed that all lengths are <= max_codeword_len, * but it cannot be assumed they form a complete prefix code. A * codeword of length n should require a proportion of the codespace * equaling (1/2)^n. The code is complete if and only if, by this * measure, the codespace is exactly filled by the lengths. */ for (unsigned len = 1; len <= max_codeword_len; len++) { remainder = (remainder << 1) - len_counts[len]; /* Do the lengths overflow the codespace? */ if (unlikely(remainder < 0)) return -1; } if (remainder != 0) { /* The lengths do not fill the codespace; that is, they form an * incomplete code. This is permitted only if the code is empty * (contains no symbols). */ if (unlikely(remainder != 1U << max_codeword_len)) return -1; /* The code is empty. When processing a well-formed stream, the * decode table need not be initialized in this case. However, * we cannot assume the stream is well-formed, so we must * initialize the decode table anyway. Setting all entries to 0 * makes the decode table always produce symbol '0' without * consuming any bits, which is good enough. */ memset(decode_table, 0, sizeof(decode_table[0]) << table_bits); return 0; } /* Sort the symbols primarily by increasing codeword length and * secondarily by increasing symbol value. */ /* Initialize 'offsets' so that 'offsets[len]' is the number of * codewords shorter than 'len' bits, including length 0. */ offsets[0] = 0; for (unsigned len = 0; len < max_codeword_len; len++) offsets[len + 1] = offsets[len] + len_counts[len]; /* Use the 'offsets' array to sort the symbols. */ for (unsigned sym = 0; sym < num_syms; sym++) sorted_syms[offsets[lens[sym]]++] = sym; /* * Fill the root table entries for codewords no longer than table_bits. * * The table will start with entries for the shortest codeword(s), which * will have the most entries. From there, the number of entries per * codeword will decrease. As an optimization, we may begin filling * entries with SSE2 vector accesses (8 entries/store), then change to * word accesses (2 or 4 entries/store), then change to 16-bit accesses * (1 entry/store). */ sym_idx = offsets[0]; #ifdef __SSE2__ /* Fill entries one 128-bit vector (8 entries) at a time. */ for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / (sizeof(__m128i) / sizeof(decode_table[0])); stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; for (; sym_idx < end_sym_idx; sym_idx++) { /* Note: unlike in the "word" version below, the __m128i * type already has __attribute__((may_alias)), so using * it to access an array of u16 will not violate strict * aliasing. */ __m128i v = _mm_set1_epi16( MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], codeword_len)); unsigned n = stores_per_loop; do { *(__m128i *)entry_ptr = v; entry_ptr += sizeof(v); } while (--n); } } #endif /* __SSE2__ */ #ifdef __GNUC__ /* Fill entries one word (2 or 4 entries) at a time. */ for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / (WORDBYTES / sizeof(decode_table[0])); stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; for (; sym_idx < end_sym_idx; sym_idx++) { /* Accessing the array of u16 as u32 or u64 would * violate strict aliasing and would require compiling * the code with -fno-strict-aliasing to guarantee * correctness. To work around this problem, use the * gcc 'may_alias' extension. */ typedef machine_word_t __attribute__((may_alias)) aliased_word_t; aliased_word_t v = repeat_u16( MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], codeword_len)); unsigned n = stores_per_loop; do { *(aliased_word_t *)entry_ptr = v; entry_ptr += sizeof(v); } while (--n); } } #endif /* __GNUC__ */ /* Fill entries one at a time. */ for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)); stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; for (; sym_idx < end_sym_idx; sym_idx++) { u16 v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], codeword_len); unsigned n = stores_per_loop; do { *(u16 *)entry_ptr = v; entry_ptr += sizeof(v); } while (--n); } } /* If all symbols were processed, then no subtables are required. */ if (sym_idx == num_syms) return 0; /* At least one subtable is required. Process the remaining symbols. */ codeword = ((u16 *)entry_ptr - decode_table) << 1; subtable_pos = 1U << table_bits; subtable_bits = table_bits; subtable_prefix = -1; do { while (len_counts[codeword_len] == 0) { codeword_len++; codeword <<= 1; } unsigned prefix = codeword >> (codeword_len - table_bits); /* Start a new subtable if the first 'table_bits' bits of the * codeword don't match the prefix for the previous subtable, or * if this will be the first subtable. */ if (prefix != subtable_prefix) { subtable_prefix = prefix; /* * Calculate the subtable length. If the codeword * length exceeds 'table_bits' by n, then the subtable * needs at least 2^n entries. But it may need more; if * there are fewer than 2^n codewords of length * 'table_bits + n' remaining, then n will need to be * incremented to bring in longer codewords until the * subtable can be filled completely. Note that it * always will, eventually, be possible to fill the * subtable, since it was previously verified that the * code is complete. */ subtable_bits = codeword_len - table_bits; remainder = (s32)1 << subtable_bits; for (;;) { remainder -= len_counts[table_bits + subtable_bits]; if (remainder <= 0) break; subtable_bits++; remainder <<= 1; } /* Create the entry that points from the root table to * the subtable. This entry contains the index of the * start of the subtable and the number of bits with * which the subtable is indexed (the log base 2 of the * number of entries it contains). */ decode_table[subtable_prefix] = MAKE_DECODE_TABLE_ENTRY(subtable_pos, subtable_bits); } /* Fill the subtable entries for this symbol. */ u16 entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], codeword_len - table_bits); unsigned n = 1U << (subtable_bits - (codeword_len - table_bits)); do { decode_table[subtable_pos++] = entry; } while (--n); len_counts[codeword_len]--; codeword++; } while (++sym_idx < num_syms); return 0; } ntfs2btrfs-20240115/src/ebiggers/decompress_common.h000066400000000000000000000474331455127722500223370ustar00rootroot00000000000000/* * decompress_common.h * * Header for decompression code shared by multiple compression formats. * * The following copying information applies to this specific source code file: * * Written in 2012-2016 by Eric Biggers * * To the extent possible under law, the author(s) have dedicated all copyright * and related and neighboring rights to this software to the public domain * worldwide via the Creative Commons Zero 1.0 Universal Public Domain * Dedication (the "CC0"). * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the CC0 for more details. * * You should have received a copy of the CC0 along with this software; if not * see . */ #ifndef _DECOMPRESS_COMMON_H #define _DECOMPRESS_COMMON_H #include #include #include "common_defs.h" /******************************************************************************/ /* Input bitstream for XPRESS and LZX */ /*----------------------------------------------------------------------------*/ /* Structure that encapsulates a block of in-memory data being interpreted as a * stream of bits, optionally with interwoven literal bytes. Bits are assumed * to be stored in little endian 16-bit coding units, with the bits ordered high * to low. */ struct input_bitstream { /* Bits that have been read from the input buffer. The bits are * left-justified; the next bit is always bit 31. */ u32 bitbuf; /* Number of bits currently held in @bitbuf. */ u32 bitsleft; /* Pointer to the next byte to be retrieved from the input buffer. */ const u8 *next; /* Pointer past the end of the input buffer. */ const u8 *end; }; /* Initialize a bitstream to read from the specified input buffer. */ static forceinline void init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size) { is->bitbuf = 0; is->bitsleft = 0; is->next = buffer; is->end = is->next + size; } /* Note: for performance reasons, the following methods don't return error codes * to the caller if the input buffer is overrun. Instead, they just assume that * all overrun data is zeroes. This has no effect on well-formed compressed * data. The only disadvantage is that bad compressed data may go undetected, * but even this is irrelevant if higher level code checksums the uncompressed * data anyway. */ /* Ensure the bit buffer variable for the bitstream contains at least @num_bits * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() * may be called on the bitstream to peek or remove up to @num_bits bits. */ static forceinline void bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits) { /* This currently works for at most 17 bits. */ if (is->bitsleft >= num_bits) return; if (unlikely(is->end - is->next < 2)) goto overflow; is->bitbuf |= (u32)*((uint16_t*)is->next) << (16 - is->bitsleft); is->next += 2; is->bitsleft += 16; if (unlikely(num_bits == 17 && is->bitsleft == 16)) { if (unlikely(is->end - is->next < 2)) goto overflow; is->bitbuf |= (u32)*((uint16_t*)(is->next)); is->next += 2; is->bitsleft = 32; } return; overflow: is->bitsleft = 32; } /* Return the next @num_bits bits from the bitstream, without removing them. * There must be at least @num_bits remaining in the buffer variable, from a * previous call to bitstream_ensure_bits(). */ static forceinline u32 bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits) { return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); } /* Remove @num_bits from the bitstream. There must be at least @num_bits * remaining in the buffer variable, from a previous call to * bitstream_ensure_bits(). */ static forceinline void bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits) { is->bitbuf <<= num_bits; is->bitsleft -= num_bits; } /* Remove and return @num_bits bits from the bitstream. There must be at least * @num_bits remaining in the buffer variable, from a previous call to * bitstream_ensure_bits(). */ static forceinline u32 bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits) { u32 bits = bitstream_peek_bits(is, num_bits); bitstream_remove_bits(is, num_bits); return bits; } /* Read and return the next @num_bits bits from the bitstream. */ static forceinline u32 bitstream_read_bits(struct input_bitstream *is, unsigned num_bits) { bitstream_ensure_bits(is, num_bits); return bitstream_pop_bits(is, num_bits); } /* Read and return the next literal byte embedded in the bitstream. */ static forceinline u8 bitstream_read_byte(struct input_bitstream *is) { if (unlikely(is->end == is->next)) return 0; return *is->next++; } /* Read and return the next 16-bit integer embedded in the bitstream. */ static forceinline u16 bitstream_read_u16(struct input_bitstream *is) { u16 v; if (unlikely(is->end - is->next < 2)) return 0; v = *(uint16_t*)is->next; is->next += 2; return v; } /* Read and return the next 32-bit integer embedded in the bitstream. */ static forceinline u32 bitstream_read_u32(struct input_bitstream *is) { u32 v; if (unlikely(is->end - is->next < 4)) return 0; v = *(uint32_t*)is->next; is->next += 4; return v; } /* Read into @dst_buffer an array of literal bytes embedded in the bitstream. * Return 0 if there were enough bytes remaining in the input, otherwise -1. */ static forceinline int bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count) { if (unlikely(is->end - is->next < count)) return -1; memcpy(dst_buffer, is->next, count); is->next += count; return 0; } /* Align the input bitstream on a coding-unit boundary. */ static forceinline void bitstream_align(struct input_bitstream *is) { is->bitsleft = 0; is->bitbuf = 0; } /******************************************************************************/ /* Huffman decoding */ /*----------------------------------------------------------------------------*/ /* * Required alignment for the Huffman decode tables. We require this alignment * so that we can fill the entries with vector or word instructions and not have * to deal with misaligned buffers. */ #define DECODE_TABLE_ALIGNMENT 16 /* * Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12 * bits) and 'length' (low 4 bits). The precise meaning of these fields depends * on the type of entry: * * Root table entries which are *not* subtable pointers: * symbol: symbol to decode * length: codeword length in bits * * Root table entries which are subtable pointers: * symbol: index of start of subtable * length: number of bits with which the subtable is indexed * * Subtable entries: * symbol: symbol to decode * length: codeword length in bits, minus the number of bits with which the * root table is indexed */ #define DECODE_TABLE_SYMBOL_SHIFT 4 #define DECODE_TABLE_MAX_SYMBOL ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1) #define DECODE_TABLE_MAX_LENGTH ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1) #define DECODE_TABLE_LENGTH_MASK DECODE_TABLE_MAX_LENGTH #define MAKE_DECODE_TABLE_ENTRY(symbol, length) \ (((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length)) /* * Read and return the next Huffman-encoded symbol from the given bitstream * using the given decode table. * * If the input data is exhausted, then the Huffman symbol will be decoded as if * the missing bits were all zeroes. * * XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in * lzms_decompress.c; keep them in sync! */ static forceinline unsigned read_huffsym(struct input_bitstream *is, const u16 decode_table[], unsigned table_bits, unsigned max_codeword_len) { unsigned entry; unsigned symbol; unsigned length; /* Preload the bitbuffer with 'max_codeword_len' bits so that we're * guaranteed to be able to fully decode a codeword. */ bitstream_ensure_bits(is, max_codeword_len); /* Index the root table by the next 'table_bits' bits of input. */ entry = decode_table[bitstream_peek_bits(is, table_bits)]; /* Extract the "symbol" and "length" from the entry. */ symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; length = entry & DECODE_TABLE_LENGTH_MASK; /* If the root table is indexed by the full 'max_codeword_len' bits, * then there cannot be any subtables, and this will be known at compile * time. Otherwise, we must check whether the decoded symbol is really * a subtable pointer. If so, we must discard the bits with which the * root table was indexed, then index the subtable by the next 'length' * bits of input to get the real entry. */ if (max_codeword_len > table_bits && entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT))) { /* Subtable required */ bitstream_remove_bits(is, table_bits); entry = decode_table[symbol + bitstream_peek_bits(is, length)]; symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; length = entry & DECODE_TABLE_LENGTH_MASK; } /* Discard the bits (or the remaining bits, if a subtable was required) * of the codeword. */ bitstream_remove_bits(is, length); /* Return the decoded symbol. */ return symbol; } /* * The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode * table entries, including all subtable entries, that may be required for * decoding a given Huffman code. This depends on three parameters: * * num_syms: the maximum number of symbols in the code * table_bits: the number of bits with which the root table will be indexed * max_codeword_len: the maximum allowed codeword length in the code * * Given these parameters, the utility program 'enough' from zlib, when passed * the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will * compute the maximum number of entries required. This has already been done * for the combinations we need and incorporated into the macro below so that * the mapping can be done at compilation time. If an unknown combination is * used, then a compilation error will result. To fix this, use 'enough' to * find the missing value and add it below. If that still doesn't fix the * compilation error, then most likely a constraint would be violated by the * requested parameters, so they cannot be used, at least without other changes * to the decode table --- see DECODE_TABLE_SIZE(). */ #define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \ ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \ ((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \ ((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \ ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \ ((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \ ((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \ ((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \ ((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \ ((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \ ((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \ ((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \ ((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \ ((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \ ((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \ ((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \ ((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \ ((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \ ((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \ ((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \ ((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \ ((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \ ((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \ ((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \ ((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \ -1) /* Wrapper around DECODE_TABLE_ENOUGH() that does additional compile-time * validation. */ #define DECODE_TABLE_SIZE(num_syms, table_bits, max_codeword_len) ( \ \ /* All values must be positive. */ \ STATIC_ASSERT_ZERO((num_syms) > 0) + \ STATIC_ASSERT_ZERO((table_bits) > 0) + \ STATIC_ASSERT_ZERO((max_codeword_len) > 0) + \ \ /* There cannot be more symbols than possible codewords. */ \ STATIC_ASSERT_ZERO((num_syms) <= 1U << (max_codeword_len)) + \ \ /* There is no reason for the root table to be indexed with * more bits than the maximum codeword length. */ \ STATIC_ASSERT_ZERO((table_bits) <= (max_codeword_len)) + \ \ /* The maximum symbol value must fit in the 'symbol' field. */ \ STATIC_ASSERT_ZERO((num_syms) - 1 <= DECODE_TABLE_MAX_SYMBOL) + \ \ /* The maximum codeword length in the root table must fit in * the 'length' field. */ \ STATIC_ASSERT_ZERO((table_bits) <= DECODE_TABLE_MAX_LENGTH) + \ \ /* The maximum codeword length in a subtable must fit in the * 'length' field. */ \ STATIC_ASSERT_ZERO((max_codeword_len) - (table_bits) <= \ DECODE_TABLE_MAX_LENGTH) + \ \ /* The minimum subtable index must be greater than the maximum * symbol value. If this were not the case, then there would * be no way to tell whether a given root table entry is a * "subtable pointer" or not. (An alternate solution would be * to reserve a flag bit specifically for this purpose.) */ \ STATIC_ASSERT_ZERO((1U << table_bits) > (num_syms) - 1) + \ \ /* The needed 'enough' value must have been defined. */ \ STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \ (num_syms), (table_bits), \ (max_codeword_len)) > 0) + \ \ /* The maximum subtable index must fit in the 'symbol' field. */\ STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \ (num_syms), (table_bits), \ (max_codeword_len)) - 1 <= \ DECODE_TABLE_MAX_SYMBOL) + \ \ /* Finally, make the macro evaluate to the needed maximum * number of decode table entries. */ \ DECODE_TABLE_ENOUGH((num_syms), (table_bits), \ (max_codeword_len)) \ ) /* * Declare the decode table for a Huffman code, given several compile-time * constants that describe the code. See DECODE_TABLE_ENOUGH() for details. * * Decode tables must be aligned to a DECODE_TABLE_ALIGNMENT-byte boundary. * This implies that if a decode table is nested inside a dynamically allocated * structure, then the outer structure must be allocated on a * DECODE_TABLE_ALIGNMENT-byte aligned boundary as well. */ #define DECODE_TABLE(name, num_syms, table_bits, max_codeword_len) \ u16 name[DECODE_TABLE_SIZE((num_syms), (table_bits), \ (max_codeword_len))] \ _aligned_attribute(DECODE_TABLE_ALIGNMENT) /* * Declare the temporary "working_space" array needed for building the decode * table for a Huffman code. */ #define DECODE_TABLE_WORKING_SPACE(name, num_syms, max_codeword_len) \ u16 name[2 * ((max_codeword_len) + 1) + (num_syms)] extern int make_huffman_decode_table(u16 decode_table[], unsigned num_syms, unsigned table_bits, const u8 lens[], unsigned max_codeword_len, u16 working_space[]); /******************************************************************************/ /* LZ match copying */ /*----------------------------------------------------------------------------*/ static forceinline void copy_word_unaligned(const void *src, void *dst) { *(machine_word_t*)dst = *(machine_word_t*)src; } static forceinline machine_word_t repeat_u16(u16 b) { machine_word_t v = b; STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); v |= v << 16; v |= v << ((WORDBITS == 64) ? 32 : 0); return v; } static forceinline machine_word_t repeat_byte(u8 b) { return repeat_u16(((u16)b << 8) | b); } /* * Copy an LZ77 match of 'length' bytes from the match source at 'out_next - * offset' to the match destination at 'out_next'. The source and destination * may overlap. * * This handles validating the length and offset. It is validated that the * beginning of the match source is '>= out_begin' and that end of the match * destination is '<= out_end'. The return value is 0 if the match was valid * (and was copied), otherwise -1. * * 'min_length' is a hint which specifies the minimum possible match length. * This should be a compile-time constant. */ static forceinline int lz_copy(u32 length, u32 offset, u8 *out_begin, u8 *out_next, u8 *out_end, u32 min_length) { const u8 *src; u8 *end; /* Validate the offset. */ if (unlikely(offset > out_next - out_begin)) return -1; /* * Fast path: copy a match which is no longer than a few words, is not * overlapped such that copying a word at a time would produce incorrect * results, and is not too close to the end of the buffer. Note that * this might copy more than the length of the match, but that's okay in * this scenario. */ src = out_next - offset; if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES && offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES) { copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0); copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1); copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2); return 0; } /* Validate the length. This isn't needed in the fast path above, due * to the additional conditions tested, but we do need it here. */ if (unlikely(length > out_end - out_next)) return -1; end = out_next + length; /* * Try to copy one word at a time. On i386 and x86_64 this is faster * than copying one byte at a time, unless the data is near-random and * all the matches have very short lengths. Note that since this * requires unaligned memory accesses, it won't necessarily be faster on * every architecture. * * Also note that we might copy more than the length of the match. For * example, if a word is 8 bytes and the match is of length 5, then * we'll simply copy 8 bytes. This is okay as long as we don't write * beyond the end of the output buffer, hence the check for (out_end - * end >= WORDBYTES - 1). */ if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1)) { if (offset >= WORDBYTES) { /* The source and destination words don't overlap. */ do { copy_word_unaligned(src, out_next); src += WORDBYTES; out_next += WORDBYTES; } while (out_next < end); return 0; } else if (offset == 1) { /* Offset 1 matches are equivalent to run-length * encoding of the previous byte. This case is common * if the data contains many repeated bytes. */ machine_word_t v = repeat_byte(*(out_next - 1)); do { *(machine_word_t*)out_next = v; src += WORDBYTES; out_next += WORDBYTES; } while (out_next < end); return 0; } /* * We don't bother with special cases for other 'offset < * WORDBYTES', which are usually rarer than 'offset == 1'. * Extra checks will just slow things down. Actually, it's * possible to handle all the 'offset < WORDBYTES' cases using * the same code, but it still becomes more complicated doesn't * seem any faster overall; it definitely slows down the more * common 'offset == 1' case. */ } /* Fall back to a bytewise copy. */ if (min_length >= 2) *out_next++ = *src++; if (min_length >= 3) *out_next++ = *src++; if (min_length >= 4) *out_next++ = *src++; do { *out_next++ = *src++; } while (out_next != end); return 0; } #endif /* _DECOMPRESS_COMMON_H */ ntfs2btrfs-20240115/src/ebiggers/lzx_common.c000066400000000000000000000233661455127722500210020ustar00rootroot00000000000000/* * lzx_common.c - Common code for LZX compression and decompression. */ /* * Copyright (C) 2012-2016 Eric Biggers * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #ifdef __SSE2__ # include #endif #ifdef __AVX2__ # include #endif #include "common_defs.h" #include "lzx_common.h" /* Mapping: offset slot => first match offset that uses that offset slot. * The offset slots for repeat offsets map to "fake" offsets < 1. */ const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = { -2 , -1 , 0 , 1 , 2 , /* 0 --- 4 */ 4 , 6 , 10 , 14 , 22 , /* 5 --- 9 */ 30 , 46 , 62 , 94 , 126 , /* 10 --- 14 */ 190 , 254 , 382 , 510 , 766 , /* 15 --- 19 */ 1022 , 1534 , 2046 , 3070 , 4094 , /* 20 --- 24 */ 6142 , 8190 , 12286 , 16382 , 24574 , /* 25 --- 29 */ 32766 , 49150 , 65534 , 98302 , 131070 , /* 30 --- 34 */ 196606 , 262142 , 393214 , 524286 , 655358 , /* 35 --- 39 */ 786430 , 917502 , 1048574, 1179646, 1310718, /* 40 --- 44 */ 1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */ 2097150 /* extra */ }; /* Mapping: offset slot => how many extra bits must be read and added to the * corresponding offset slot base to decode the match offset. */ const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = { 0 , 0 , 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 , 5 , 5 , 6 , 6 , 7 , 7 , 8 , 8 , 9 , 9 , 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, }; /* Round the specified buffer size up to the next valid LZX window size, and * return its order (log2). Or, if the buffer size is 0 or greater than the * largest valid LZX window size, return 0. */ unsigned lzx_get_window_order(size_t max_bufsize) { if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE) return 0; return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER); } /* Given a valid LZX window order, return the number of symbols that will exist * in the main Huffman code. */ unsigned lzx_get_num_main_syms(unsigned window_order) { /* Note: one would expect that the maximum match offset would be * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two * bytes were to match the last two bytes. However, the format * disallows this case. This reduces the number of needed offset slots * by 1. */ u32 window_size = (u32)1 << window_order; u32 max_offset = window_size - LZX_MIN_MATCH_LEN - 1; unsigned num_offset_slots = 30; while (max_offset >= lzx_offset_slot_base[num_offset_slots]) num_offset_slots++; return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS); } static void do_translate_target(void *target, s32 input_pos) { s32 abs_offset, rel_offset; rel_offset = *(int32_t*)target; if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) { if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) { /* "good translation" */ abs_offset = rel_offset + input_pos; } else { /* "compensating translation" */ abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE; } *(uint32_t*)target = abs_offset; } } static void undo_translate_target(void *target, s32 input_pos) { s32 abs_offset, rel_offset; abs_offset = *(int32_t*)target; if (abs_offset >= 0) { if (abs_offset < LZX_WIM_MAGIC_FILESIZE) { /* "good translation" */ rel_offset = abs_offset - input_pos; *(uint32_t*)target = rel_offset; } } else { if (abs_offset >= -input_pos) { /* "compensating translation" */ rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE; *(uint32_t*)target = rel_offset; } } } /* * Do or undo the 'E8' preprocessing used in LZX. Before compression, the * uncompressed data is preprocessed by changing the targets of x86 CALL * instructions from relative offsets to absolute offsets. After decompression, * the translation is undone by changing the targets of x86 CALL instructions * from absolute offsets to relative offsets. * * Note that despite its intent, E8 preprocessing can be done on any data even * if it is not actually x86 machine code. In fact, E8 preprocessing appears to * always be used in LZX-compressed resources in WIM files; there is no bit to * indicate whether it is used or not, unlike in the LZX compressed format as * used in cabinet files, where a bit is reserved for that purpose. * * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data, * which really means the 5-byte call instruction cannot start in the last 10 * bytes of the uncompressed data. This is one of the errors in the LZX * documentation. * * E8 preprocessing does not appear to be disabled after the 32768th chunk of a * WIM resource, which apparently is another difference from the LZX compression * used in cabinet files. * * E8 processing is supposed to take the file size as a parameter, as it is used * in calculating the translated jump targets. But in WIM files, this file size * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000). */ static void lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) { #if !defined(__SSE2__) && !defined(__AVX2__) /* * A worthwhile optimization is to push the end-of-buffer check into the * relatively rare E8 case. This is possible if we replace the last six * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte * before reaching end-of-buffer. In addition, this scheme guarantees * that no translation can begin following an E8 byte in the last 10 * bytes because a 4-byte offset containing E8 as its high byte is a * large negative number that is not valid for translation. That is * exactly what we need. */ u8 *tail; u8 saved_bytes[6]; u8 *p; if (size <= 10) return; tail = &data[size - 6]; memcpy(saved_bytes, tail, 6); memset(tail, 0xE8, 6); p = data; for (;;) { while (*p != 0xE8) p++; if (p >= tail) break; (*process_target)(p + 1, p - data); p += 5; } memcpy(tail, saved_bytes, 6); #else /* SSE2 or AVX-2 optimized version for x86_64 */ u8 *p = data; u64 valid_mask = ~0; if (size <= 10) return; #ifdef __AVX2__ # define ALIGNMENT_REQUIRED 32 #else # define ALIGNMENT_REQUIRED 16 #endif /* Process one byte at a time until the pointer is properly aligned. */ while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) { if (p >= data + size - 10) return; if (*p == 0xE8 && (valid_mask & 1)) { (*process_target)(p + 1, p - data); valid_mask &= ~0x1F; } p++; valid_mask >>= 1; valid_mask |= (u64)1 << 63; } if (data + size - p >= 64) { /* Vectorized processing */ /* Note: we use a "trap" E8 byte to eliminate the need to check * for end-of-buffer in the inner loop. This byte is carefully * positioned so that it will never be changed by a previous * translation before it is detected. */ u8 *trap = p + ((data + size - p) & ~31) - 32 + 4; u8 saved_byte = *trap; *trap = 0xE8; for (;;) { u32 e8_mask; u8 *orig_p = p; #ifdef __AVX2__ const __m256i e8_bytes = _mm256_set1_epi8(0xE8); for (;;) { __m256i bytes = *(const __m256i *)p; __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); e8_mask = _mm256_movemask_epi8(cmpresult); if (e8_mask) break; p += 32; } #else const __m128i e8_bytes = _mm_set1_epi8(0xE8); for (;;) { /* Read the next 32 bytes of data and test them * for E8 bytes. */ __m128i bytes1 = *(const __m128i *)p; __m128i bytes2 = *(const __m128i *)(p + 16); __m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes); __m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes); u32 mask1 = _mm_movemask_epi8(cmpresult1); u32 mask2 = _mm_movemask_epi8(cmpresult2); /* The masks have a bit set for each E8 byte. * We stay in this fast inner loop as long as * there are no E8 bytes. */ if (mask1 | mask2) { e8_mask = mask1 | (mask2 << 16); break; } p += 32; } #endif /* Did we pass over data with no E8 bytes? */ if (p != orig_p) valid_mask = ~0; /* Are we nearing end-of-buffer? */ if (p == trap - 4) break; /* Process the E8 bytes. However, the AND with * 'valid_mask' ensures we never process an E8 byte that * was itself part of a translation target. */ while ((e8_mask &= valid_mask)) { unsigned bit = bsf32(e8_mask); (*process_target)(p + bit + 1, p + bit - data); valid_mask &= ~((u64)0x1F << bit); } valid_mask >>= 32; valid_mask |= 0xFFFFFFFF00000000; p += 32; } *trap = saved_byte; } /* Approaching the end of the buffer; process one byte a time. */ while (p < data + size - 10) { if (*p == 0xE8 && (valid_mask & 1)) { (*process_target)(p + 1, p - data); valid_mask &= ~0x1F; } p++; valid_mask >>= 1; valid_mask |= (u64)1 << 63; } #endif /* __SSE2__ || __AVX2__ */ } void lzx_preprocess(u8 *data, u32 size) { lzx_e8_filter(data, size, do_translate_target); } void lzx_postprocess(u8 *data, u32 size) { lzx_e8_filter(data, size, undo_translate_target); } ntfs2btrfs-20240115/src/ebiggers/lzx_common.h000066400000000000000000000010671455127722500210010ustar00rootroot00000000000000/* * lzx_common.h * * Declarations shared between LZX compression and decompression. */ #ifndef _LZX_COMMON_H #define _LZX_COMMON_H #include "lzx_constants.h" #include "common_defs.h" extern const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1]; extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; extern unsigned lzx_get_window_order(size_t max_bufsize); extern unsigned lzx_get_num_main_syms(unsigned window_order); extern void lzx_preprocess(u8 *data, u32 size); extern void lzx_postprocess(u8 *data, u32 size); #endif /* _LZX_COMMON_H */ ntfs2btrfs-20240115/src/ebiggers/lzx_constants.h000066400000000000000000000074751455127722500215360ustar00rootroot00000000000000/* * lzx_constants.h * * Constants for the LZX compression format. */ #ifndef _LZX_CONSTANTS_H #define _LZX_CONSTANTS_H /* Number of literal byte values. */ #define LZX_NUM_CHARS 256 /* The smallest and largest allowed match lengths. */ #define LZX_MIN_MATCH_LEN 2 #define LZX_MAX_MATCH_LEN 257 /* Number of distinct match lengths that can be represented. */ #define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) /* Number of match lengths for which no length symbol is required. */ #define LZX_NUM_PRIMARY_LENS 7 #define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1) /* Valid values of the 3-bit block type field. */ #define LZX_BLOCKTYPE_VERBATIM 1 #define LZX_BLOCKTYPE_ALIGNED 2 #define LZX_BLOCKTYPE_UNCOMPRESSED 3 /* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum * sizes of the sliding window. */ #define LZX_MIN_WINDOW_ORDER 15 #define LZX_MAX_WINDOW_ORDER 21 #define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */ #define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */ /* Maximum number of offset slots. (The actual number of offset slots depends * on the window size.) */ #define LZX_MAX_OFFSET_SLOTS 50 /* Maximum number of symbols in the main code. (The actual number of symbols in * the main code depends on the window size.) */ #define LZX_MAINCODE_MAX_NUM_SYMBOLS \ (LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS)) /* Number of symbols in the length code. */ #define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS) /* Number of symbols in the pre-code. */ #define LZX_PRECODE_NUM_SYMBOLS 20 /* Number of bits in which each pre-code codeword length is represented. */ #define LZX_PRECODE_ELEMENT_SIZE 4 /* Number of low-order bits of each match offset that are entropy-encoded in * aligned offset blocks. */ #define LZX_NUM_ALIGNED_OFFSET_BITS 3 /* Number of symbols in the aligned offset code. */ #define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS) /* Mask for the match offset bits that are entropy-encoded in aligned offset * blocks. */ #define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1) /* Number of bits in which each aligned offset codeword length is represented. */ #define LZX_ALIGNEDCODE_ELEMENT_SIZE 3 /* The first offset slot which requires an aligned offset symbol in aligned * offset blocks. */ #define LZX_MIN_ALIGNED_OFFSET_SLOT 8 /* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */ #define LZX_MIN_ALIGNED_OFFSET 14 /* The maximum number of extra offset bits in verbatim blocks. (One would need * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset * bits in *aligned* blocks.) */ #define LZX_MAX_NUM_EXTRA_BITS 17 /* Maximum lengths (in bits) for length-limited Huffman code construction. */ #define LZX_MAX_MAIN_CODEWORD_LEN 16 #define LZX_MAX_LEN_CODEWORD_LEN 16 #define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1) #define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1) /* For LZX-compressed blocks in WIM resources, this value is always used as the * filesize parameter for the call instruction (0xe8 byte) preprocessing, even * though the blocks themselves are not this size, and the size of the actual * file resource in the WIM file is very likely to be something entirely * different as well. */ #define LZX_WIM_MAGIC_FILESIZE 12000000 /* Assumed LZX block size when the encoded block size begins with a 0 bit. * This is probably WIM-specific. */ #define LZX_DEFAULT_BLOCK_SIZE 32768 /* Number of offsets in the recent (or "repeat") offsets queue. */ #define LZX_NUM_RECENT_OFFSETS 3 /* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */ #define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1) #endif /* _LZX_CONSTANTS_H */ ntfs2btrfs-20240115/src/ebiggers/lzx_decompress.c000066400000000000000000000403051455127722500216460ustar00rootroot00000000000000/* * lzx_decompress.c * * A decompressor for the LZX compression format, as used in WIM files. */ /* * Copyright (C) 2012-2016 Eric Biggers * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ /* * LZX is an LZ77 and Huffman-code based compression format that has many * similarities to DEFLATE (the format used by zlib/gzip). The compression * ratio is as good or better than DEFLATE. See lzx_compress.c for a format * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a * historical overview. Here I make some pragmatic notes. * * The old specification for LZX is the document "Microsoft LZX Data Compression * Format" (1997). It defines the LZX format as used in cabinet files. Allowed * window sizes are 2^n where 15 <= n <= 21. However, this document contains * several errors, so don't read too much into it... * * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA * Compression and Decompression" (2014). It defines the LZX format as used by * Microsoft's binary patcher. It corrects several errors in the 1997 document * and extends the format in several ways --- namely, optional reference data, * up to 2^25 byte windows, and longer match lengths. * * WIM files use a more restricted form of LZX. No LZX DELTA extensions are * present, the window is not "sliding", E8 preprocessing is done * unconditionally with a fixed file size, and the maximum window size is always * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource). * This code is primarily intended to implement this form of LZX. But although * not compatible with WIMGAPI, this code also supports maximum window sizes up * to 2^21 bytes. * * TODO: Add support for window sizes up to 2^25 bytes. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include "decompress_common.h" #include "lzx_common.h" #include "system_compression.h" /* These values are chosen for fast decompression. */ #define LZX_MAINCODE_TABLEBITS 11 #define LZX_LENCODE_TABLEBITS 9 #define LZX_PRECODE_TABLEBITS 6 #define LZX_ALIGNEDCODE_TABLEBITS 7 #define LZX_READ_LENS_MAX_OVERRUN 50 struct lzx_decompressor { DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS, LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS, LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; union { DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS, LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN); u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS]; }; union { DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS, LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; }; union { DECODE_TABLE_WORKING_SPACE(maincode_working_space, LZX_MAINCODE_MAX_NUM_SYMBOLS, LZX_MAX_MAIN_CODEWORD_LEN); DECODE_TABLE_WORKING_SPACE(lencode_working_space, LZX_LENCODE_NUM_SYMBOLS, LZX_MAX_LEN_CODEWORD_LEN); DECODE_TABLE_WORKING_SPACE(alignedcode_working_space, LZX_ALIGNEDCODE_NUM_SYMBOLS, LZX_MAX_ALIGNED_CODEWORD_LEN); DECODE_TABLE_WORKING_SPACE(precode_working_space, LZX_PRECODE_NUM_SYMBOLS, LZX_MAX_PRE_CODEWORD_LEN); }; unsigned window_order; unsigned num_main_syms; /* Like lzx_extra_offset_bits[], but does not include the entropy-coded * bits of aligned offset blocks */ u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS]; } _aligned_attribute(DECODE_TABLE_ALIGNMENT); /* Read a Huffman-encoded symbol using the precode. */ static forceinline unsigned read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->precode_decode_table, LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); } /* Read a Huffman-encoded symbol using the main code. */ static forceinline unsigned read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->maincode_decode_table, LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); } /* Read a Huffman-encoded symbol using the length code. */ static forceinline unsigned read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->lencode_decode_table, LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); } /* Read a Huffman-encoded symbol using the aligned offset code. */ static forceinline unsigned read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->alignedcode_decode_table, LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN); } /* * Read a precode from the compressed input bitstream, then use it to decode * @num_lens codeword length values and write them to @lens. */ static int lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is, u8 *lens, unsigned num_lens) { u8 *len_ptr = lens; u8 *lens_end = lens + num_lens; /* Read the lengths of the precode codewords. These are stored * explicitly. */ for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) { d->precode_lens[i] = bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE); } /* Build the decoding table for the precode. */ if (make_huffman_decode_table(d->precode_decode_table, LZX_PRECODE_NUM_SYMBOLS, LZX_PRECODE_TABLEBITS, d->precode_lens, LZX_MAX_PRE_CODEWORD_LEN, d->precode_working_space)) return -1; /* Decode the codeword lengths. */ do { unsigned presym; u8 len; /* Read the next precode symbol. */ presym = read_presym(d, is); if (presym < 17) { /* Difference from old length */ len = *len_ptr - presym; if ((int8_t)len < 0) len += 17; *len_ptr++ = len; } else { /* Special RLE values */ unsigned run_len; if (presym == 17) { /* Run of 0's */ run_len = 4 + bitstream_read_bits(is, 4); len = 0; } else if (presym == 18) { /* Longer run of 0's */ run_len = 20 + bitstream_read_bits(is, 5); len = 0; } else { /* Run of identical lengths */ run_len = 4 + bitstream_read_bits(is, 1); presym = read_presym(d, is); if (unlikely(presym > 17)) return -1; len = *len_ptr - presym; if ((int8_t)len < 0) len += 17; } do { *len_ptr++ = len; } while (--run_len); /* * The worst case overrun is when presym == 18, * run_len == 20 + 31, and only 1 length was remaining. * So LZX_READ_LENS_MAX_OVERRUN == 50. * * Overrun while reading the first half of maincode_lens * can corrupt the previous values in the second half. * This doesn't really matter because the resulting * lengths will still be in range, and data that * generates overruns is invalid anyway. */ } } while (len_ptr < lens_end); return 0; } /* * Read the header of an LZX block. For all block types, the block type and * size is saved in *block_type_ret and *block_size_ret, respectively. For * compressed blocks, the codeword lengths are also saved. For uncompressed * blocks, the recent offsets queue is also updated. */ static int lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is, u32 recent_offsets[], int *block_type_ret, u32 *block_size_ret) { int block_type; u32 block_size; bitstream_ensure_bits(is, 4); /* Read the block type. */ block_type = bitstream_pop_bits(is, 3); /* Read the block size. */ if (bitstream_pop_bits(is, 1)) { block_size = LZX_DEFAULT_BLOCK_SIZE; } else { block_size = bitstream_read_bits(is, 16); if (d->window_order >= 16) { block_size <<= 8; block_size |= bitstream_read_bits(is, 8); } } switch (block_type) { case LZX_BLOCKTYPE_ALIGNED: /* Read the aligned offset codeword lengths. */ for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { d->alignedcode_lens[i] = bitstream_read_bits(is, LZX_ALIGNEDCODE_ELEMENT_SIZE); } /* Fall though, since the rest of the header for aligned offset * blocks is the same as that for verbatim blocks. */ case LZX_BLOCKTYPE_VERBATIM: /* Read the main codeword lengths, which are divided into two * parts: literal symbols and match headers. */ if (lzx_read_codeword_lens(d, is, d->maincode_lens, LZX_NUM_CHARS)) return -1; if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS, d->num_main_syms - LZX_NUM_CHARS)) return -1; /* Read the length codeword lengths. */ if (lzx_read_codeword_lens(d, is, d->lencode_lens, LZX_LENCODE_NUM_SYMBOLS)) return -1; break; case LZX_BLOCKTYPE_UNCOMPRESSED: /* * The header of an uncompressed block contains new values for * the recent offsets queue, starting on the next 16-bit * boundary in the bitstream. Careful: if the stream is * *already* aligned, the correct thing to do is to throw away * the next 16 bits (this is probably a mistake in the format). */ bitstream_ensure_bits(is, 1); bitstream_align(is); recent_offsets[0] = bitstream_read_u32(is); recent_offsets[1] = bitstream_read_u32(is); recent_offsets[2] = bitstream_read_u32(is); /* Offsets of 0 are invalid. */ if (recent_offsets[0] == 0 || recent_offsets[1] == 0 || recent_offsets[2] == 0) return -1; break; default: /* Unrecognized block type. */ return -1; } *block_type_ret = block_type; *block_size_ret = block_size; return 0; } /* Decompress a block of LZX-compressed data. */ static int lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, int block_type, u32 block_size, u8 * const out_begin, u8 *out_next, u32 recent_offsets[]) { u8 * const block_end = out_next + block_size; unsigned min_aligned_offset_slot; /* * Build the Huffman decode tables. We always need to build the main * and length decode tables. For aligned blocks we additionally need to * build the aligned offset decode table. */ if (make_huffman_decode_table(d->maincode_decode_table, d->num_main_syms, LZX_MAINCODE_TABLEBITS, d->maincode_lens, LZX_MAX_MAIN_CODEWORD_LEN, d->maincode_working_space)) return -1; if (make_huffman_decode_table(d->lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS, LZX_LENCODE_TABLEBITS, d->lencode_lens, LZX_MAX_LEN_CODEWORD_LEN, d->lencode_working_space)) return -1; if (block_type == LZX_BLOCKTYPE_ALIGNED) { if (make_huffman_decode_table(d->alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS, LZX_ALIGNEDCODE_TABLEBITS, d->alignedcode_lens, LZX_MAX_ALIGNED_CODEWORD_LEN, d->alignedcode_working_space)) return -1; min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned, sizeof(lzx_extra_offset_bits)); } else { min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS; memcpy(d->extra_offset_bits, lzx_extra_offset_bits, sizeof(lzx_extra_offset_bits)); } /* Decode the literals and matches. */ do { unsigned mainsym; unsigned length; u32 offset; unsigned offset_slot; mainsym = read_mainsym(d, is); if (mainsym < LZX_NUM_CHARS) { /* Literal */ *out_next++ = mainsym; continue; } /* Match */ /* Decode the length header and offset slot. */ STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0); length = mainsym % LZX_NUM_LEN_HEADERS; offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS; /* If needed, read a length symbol to decode the full length. */ if (length == LZX_NUM_PRIMARY_LENS) length += read_lensym(d, is); length += LZX_MIN_MATCH_LEN; if (offset_slot < LZX_NUM_RECENT_OFFSETS) { /* Repeat offset */ /* Note: This isn't a real LRU queue, since using the R2 * offset doesn't bump the R1 offset down to R2. */ offset = recent_offsets[offset_slot]; recent_offsets[offset_slot] = recent_offsets[0]; } else { /* Explicit offset */ offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]); if (offset_slot >= min_aligned_offset_slot) { offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) | read_alignedsym(d, is); } offset += lzx_offset_slot_base[offset_slot]; /* Update the match offset LRU queue. */ STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); recent_offsets[2] = recent_offsets[1]; recent_offsets[1] = recent_offsets[0]; } recent_offsets[0] = offset; /* Validate the match and copy it to the current position. */ if (unlikely(lz_copy(length, offset, out_begin, out_next, block_end, LZX_MIN_MATCH_LEN))) return -1; out_next += length; } while (out_next != block_end); return 0; } int lzx_decompress(struct lzx_decompressor *d, const void *compressed_data, size_t compressed_size, void *uncompressed_data, size_t uncompressed_size) { u8 * const out_begin = uncompressed_data; u8 *out_next = out_begin; u8 * const out_end = out_begin + uncompressed_size; struct input_bitstream is; STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1}; unsigned may_have_e8_byte = 0; init_input_bitstream(&is, compressed_data, compressed_size); /* Codeword lengths begin as all 0's for delta encoding purposes. */ memset(d->maincode_lens, 0, d->num_main_syms); memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS); /* Decompress blocks until we have all the uncompressed data. */ while (out_next != out_end) { int block_type; u32 block_size; if (lzx_read_block_header(d, &is, recent_offsets, &block_type, &block_size)) return -1; if (block_size < 1 || block_size > out_end - out_next) return -1; if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) { /* Compressed block */ if (lzx_decompress_block(d, &is, block_type, block_size, out_begin, out_next, recent_offsets)) return -1; /* If the first E8 byte was in this block, then it must * have been encoded as a literal using mainsym E8. */ may_have_e8_byte |= d->maincode_lens[0xE8]; } else { /* Uncompressed block */ if (bitstream_read_bytes(&is, out_next, block_size)) return -1; /* Re-align the bitstream if needed. */ if (block_size & 1) bitstream_read_byte(&is); /* There may have been an E8 byte in the block. */ may_have_e8_byte = 1; } out_next += block_size; } /* Postprocess the data unless it cannot possibly contain E8 bytes. */ if (may_have_e8_byte) lzx_postprocess(uncompressed_data, uncompressed_size); return 0; } struct lzx_decompressor * lzx_allocate_decompressor(size_t max_block_size) { unsigned window_order; struct lzx_decompressor *d; window_order = lzx_get_window_order(max_block_size); if (window_order == 0) { errno = EINVAL; return NULL; } d = aligned_malloc(sizeof(*d), DECODE_TABLE_ALIGNMENT); if (!d) return NULL; d->window_order = window_order; d->num_main_syms = lzx_get_num_main_syms(window_order); /* Initialize 'd->extra_offset_bits_minus_aligned'. */ STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) == sizeof(lzx_extra_offset_bits)); STATIC_ASSERT(sizeof(d->extra_offset_bits) == sizeof(lzx_extra_offset_bits)); memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits, sizeof(lzx_extra_offset_bits)); for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++) { d->extra_offset_bits_minus_aligned[offset_slot] -= LZX_NUM_ALIGNED_OFFSET_BITS; } return d; } void lzx_free_decompressor(struct lzx_decompressor *d) { aligned_free(d); } ntfs2btrfs-20240115/src/ebiggers/system_compression.h000066400000000000000000000035101455127722500225540ustar00rootroot00000000000000/* * system_compression.h - declarations for accessing System Compressed files * * Copyright (C) 2015 Eric Biggers * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #ifdef __cplusplus extern "C" { #endif #include #include /* System compressed file access */ struct ntfs_system_decompression_ctx; extern void ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx); /* XPRESS decompression */ struct xpress_decompressor; extern struct xpress_decompressor *xpress_allocate_decompressor(void); extern int xpress_decompress(struct xpress_decompressor *decompressor, const void *compressed_data, size_t compressed_size, void *uncompressed_data, size_t uncompressed_size); extern void xpress_free_decompressor(struct xpress_decompressor *decompressor); /* LZX decompression */ struct lzx_decompressor; extern struct lzx_decompressor * lzx_allocate_decompressor(size_t max_block_size); extern int lzx_decompress(struct lzx_decompressor *decompressor, const void *compressed_data, size_t compressed_size, void *uncompressed_data, size_t uncompressed_size); extern void lzx_free_decompressor(struct lzx_decompressor *decompressor); #ifdef __cplusplus } #endif ntfs2btrfs-20240115/src/ebiggers/xpress_constants.h000066400000000000000000000006531455127722500222340ustar00rootroot00000000000000/* * xpress_constants.h * * Constants for the XPRESS compression format. */ #ifndef _XPRESS_CONSTANTS_H #define _XPRESS_CONSTANTS_H #define XPRESS_NUM_CHARS 256 #define XPRESS_NUM_SYMBOLS 512 #define XPRESS_MAX_CODEWORD_LEN 15 #define XPRESS_END_OF_DATA 256 #define XPRESS_MIN_OFFSET 1 #define XPRESS_MAX_OFFSET 65535 #define XPRESS_MIN_MATCH_LEN 3 #define XPRESS_MAX_MATCH_LEN 65538 #endif /* _XPRESS_CONSTANTS_H */ ntfs2btrfs-20240115/src/ebiggers/xpress_decompress.c000066400000000000000000000126511455127722500223600ustar00rootroot00000000000000/* * xpress_decompress.c * * A decompressor for the XPRESS compression format (Huffman variant). */ /* * * Copyright (C) 2012-2016 Eric Biggers * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ /* * The XPRESS compression format is an LZ77 and Huffman-code based algorithm. * That means it is fairly similar to LZX compression, but XPRESS is simpler, so * it is a little faster to compress and decompress. * * The XPRESS compression format is mostly documented in a file called "[MS-XCA] * Xpress Compression Algorithm". In the MSDN library, it can currently be * found under Open Specifications => Protocols => Windows Protocols => Windows * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm" * (there apparently are some other versions of XPRESS as well). * * If you are already familiar with the LZ77 algorithm and Huffman coding, the * XPRESS format is fairly simple. The compressed data begins with 256 bytes * that contain 512 4-bit integers that are the lengths of the symbols in the * Huffman code used for match/literal headers. In contrast with more * complicated formats such as DEFLATE and LZX, this is the only Huffman code * that is used for the entirety of the XPRESS compressed data, and the codeword * lengths are not encoded with a pretree. * * The rest of the compressed data is Huffman-encoded symbols. Values 0 through * 255 represent the corresponding literal bytes. Values 256 through 511 * represent matches and may require extra bits or bytes to be read to get the * match offset and match length. * * The trickiest part is probably the way in which literal bytes for match * lengths are interleaved in the bitstream. * * Also, a caveat--- according to Microsoft's documentation for XPRESS, * * "Some implementation of the decompression algorithm expect an extra * symbol to mark the end of the data. Specifically, some implementations * fail during decompression if the Huffman symbol 256 is not found after * the actual data." * * This is the case with Microsoft's implementation in WIMGAPI, for example. So * although our implementation doesn't currently check for this extra symbol, * compressors would be wise to add it. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "decompress_common.h" #include "system_compression.h" #include "xpress_constants.h" /* This value is chosen for fast decompression. */ #define XPRESS_TABLEBITS 11 struct xpress_decompressor { union { DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); u8 lens[XPRESS_NUM_SYMBOLS]; }; DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN); } _aligned_attribute(DECODE_TABLE_ALIGNMENT); int xpress_decompress(struct xpress_decompressor * d, const void *compressed_data, size_t compressed_size, void *uncompressed_data, size_t uncompressed_size) { const u8 * const in_begin = compressed_data; u8 * const out_begin = uncompressed_data; u8 *out_next = out_begin; u8 * const out_end = out_begin + uncompressed_size; struct input_bitstream is; /* Read the Huffman codeword lengths. */ if (compressed_size < XPRESS_NUM_SYMBOLS / 2) return -1; for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { d->lens[2 * i + 0] = in_begin[i] & 0xf; d->lens[2 * i + 1] = in_begin[i] >> 4; } /* Build a decoding table for the Huffman code. */ if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, d->lens, XPRESS_MAX_CODEWORD_LEN, d->working_space)) return -1; /* Decode the matches and literals. */ init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, compressed_size - XPRESS_NUM_SYMBOLS / 2); while (out_next != out_end) { unsigned sym; unsigned log2_offset; u32 length; u32 offset; sym = read_huffsym(&is, d->decode_table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); if (sym < XPRESS_NUM_CHARS) { /* Literal */ *out_next++ = sym; } else { /* Match */ length = sym & 0xf; log2_offset = (sym >> 4) & 0xf; bitstream_ensure_bits(&is, 16); offset = ((u32)1 << log2_offset) | bitstream_pop_bits(&is, log2_offset); if (length == 0xf) { length += bitstream_read_byte(&is); if (length == 0xf + 0xff) length = bitstream_read_u16(&is); } length += XPRESS_MIN_MATCH_LEN; if (unlikely(lz_copy(length, offset, out_begin, out_next, out_end, XPRESS_MIN_MATCH_LEN))) return -1; out_next += length; } } return 0; } struct xpress_decompressor * xpress_allocate_decompressor(void) { return aligned_malloc(sizeof(struct xpress_decompressor), DECODE_TABLE_ALIGNMENT); } void xpress_free_decompressor(struct xpress_decompressor *d) { aligned_free(d); } ntfs2btrfs-20240115/src/ntfs.cpp000066400000000000000000000703101455127722500163270ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING #include "ntfs2btrfs.h" #include "ntfs.h" #include #include #include #include #ifndef _WIN32 #include #include #include #endif using namespace std; static void process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length, unsigned int sector_size) { uint64_t sectors; uint16_t* seq; uint8_t* ptr; if (length % sector_size != 0) throw formatted_error("Length was not a multiple of sector_size."); sectors = length / sector_size; if (header->UpdateSequenceArraySize < sectors + 1) throw formatted_error("UpdateSequenceArraySize was {:x}, expected {:x}", header->UpdateSequenceArraySize, sectors + 1); seq = (uint16_t*)((uint8_t*)header + header->UpdateSequenceArrayOffset); ptr = (uint8_t*)header + sector_size - sizeof(uint16_t); for (unsigned int i = 0; i < sectors; i++) { if (*(uint16_t*)ptr != seq[0]) throw formatted_error("Update sequence mismatch."); *(uint16_t*)ptr = seq[i + 1]; ptr += sector_size; } } ntfs_file::ntfs_file(ntfs& dev, uint64_t inode) : dev(dev), inode(inode) { file_record_buf.resize((size_t)dev.file_record_size); if (inode == 0) { dev.seek(dev.boot_sector->MFT * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster); dev.read(file_record_buf.data(), (uint32_t)dev.file_record_size); } else { // read from MFT auto str = dev.mft->read(inode * dev.file_record_size, (uint32_t)dev.file_record_size); memcpy(file_record_buf.data(), str.data(), (uint32_t)dev.file_record_size); // FIXME - can we avoid copy? } file_record = reinterpret_cast(file_record_buf.data()); if (file_record->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) { throw formatted_error("Invalid file signature ({:08x}, expected {:08x}).", file_record->MultiSectorHeader.Signature, NTFS_FILE_SIGNATURE); } process_fixups(&file_record->MultiSectorHeader, dev.file_record_size, dev.boot_sector->BytesPerSector); } void read_nonresident_mappings(const ATTRIBUTE_RECORD_HEADER& att, list& mappings, uint32_t cluster_size, uint64_t vdl) { uint64_t next_vcn = att.Form.Nonresident.LowestVcn, current_lcn = 0, current_vcn; uint8_t* stream = (uint8_t*)&att + att.Form.Nonresident.MappingPairsOffset; uint64_t max_cluster = vdl / cluster_size; if (vdl & (cluster_size - 1)) max_cluster++; if (max_cluster == 0) return; while (true) { uint64_t v, l; int64_t v_val, l_val; current_vcn = next_vcn; if (*stream == 0) break; v = *stream & 0xf; l = *stream >> 4; stream++; if (v > 8) throw formatted_error("Error: v > 8"); if (l > 8) throw formatted_error("Error: l > 8"); // FIXME - do we need to make sure that int64_t pointers don't go past end of buffer? v_val = *(int64_t*)stream; v_val &= (1ull << (v * 8)) - 1; if ((uint64_t)v_val & (1ull << ((v * 8) - 1))) // sign-extend if negative v_val |= 0xffffffffffffffff & ~((1ull << (v * 8)) - 1); stream += v; next_vcn += v_val; if (l != 0) { l_val = *(int64_t*)stream; l_val &= (1ull << (l * 8)) - 1; if ((uint64_t)l_val & (1ull << ((l * 8) - 1))) // sign-extend if negative l_val |= 0xffffffffffffffff & ~((1ull << (l * 8)) - 1); stream += l; current_lcn += l_val; if (next_vcn > max_cluster) next_vcn = max_cluster; mappings.emplace_back(current_lcn, current_vcn, next_vcn - current_vcn); } else mappings.emplace_back(0, current_vcn, next_vcn - current_vcn); if (next_vcn == max_cluster) break; } } buffer_t ntfs_file::read_nonresident_attribute(uint64_t offset, uint32_t length, const ATTRIBUTE_RECORD_HEADER* att) { list mappings; uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; read_nonresident_mappings(*att, mappings, cluster_size, att->Form.Nonresident.ValidDataLength); // FIXME - do we need to check that mappings is contiguous and in order? if (offset >= (uint64_t)att->Form.Nonresident.FileSize) return {}; if (offset + length > (uint64_t)att->Form.Nonresident.FileSize || length == 0) length = (uint32_t)(att->Form.Nonresident.FileSize - offset); buffer_t ret(length); memset(ret.data(), 0, length); for (const auto& m : mappings) { if (offset + length >= m.vcn * cluster_size && offset < (m.vcn + m.length) * cluster_size) { uint32_t buf_start, buf_end; uint64_t read_start, read_end; unsigned int skip_start, skip_end; if (offset < m.vcn * cluster_size) buf_start = (uint32_t)((m.vcn * cluster_size) - offset); else buf_start = 0; if (offset + length > (m.vcn + m.length) * cluster_size) buf_end = min((uint32_t)((m.vcn + m.length) * cluster_size), length); else buf_end = length; if (buf_end == buf_start) continue; read_start = m.lcn * cluster_size; if (offset > m.vcn * cluster_size) read_start += offset - (m.vcn * cluster_size); read_end = read_start + buf_end - buf_start; if ((read_start % dev.boot_sector->BytesPerSector) != 0) { skip_start = (unsigned int)(read_start % dev.boot_sector->BytesPerSector); read_start -= skip_start; } else skip_start = 0; if ((read_end % dev.boot_sector->BytesPerSector) != 0) { skip_end = (unsigned int)(dev.boot_sector->BytesPerSector - (read_end % dev.boot_sector->BytesPerSector)); read_end += skip_end; } else skip_end = 0; dev.seek(read_start); if (skip_start != 0 || skip_end != 0) { buffer_t tmp(read_end - read_start); dev.read(tmp.data(), tmp.size()); memcpy(&ret[buf_start], &tmp[skip_start], buf_end - buf_start); } else dev.read(&ret[buf_start], buf_end - buf_start); } } // FIXME - zero end if ValidDataLength < FileSize return ret; } buffer_t ntfs_file::read(uint64_t offset, uint32_t length, enum ntfs_attribute type, u16string_view name) { buffer_t ret; bool found = false; loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool { if (att.TypeCode != type || name != att_name) return true; if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) throw formatted_error("Cannot read encrypted attribute"); if (att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK) throw formatted_error("FIXME - handle reading compressed attribute"); // FIXME if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) ret = read_nonresident_attribute(offset, length, &att); else { if (offset >= res_data.length()) ret.clear(); else { if (offset + length > res_data.length() || length == 0) length = (uint32_t)(res_data.length() - offset); ret.resize(length); memcpy(ret.data(), &res_data[(uint32_t)offset], length); } } found = true; return false; }); if (!found) throw formatted_error("Attribute not found."); return ret; } list ntfs_file::read_mappings(enum ntfs_attribute type, u16string_view name) { list mappings; loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool { if (att.TypeCode != type || name != att_name) return true; if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) throw formatted_error("Attribute is resident"); uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; read_nonresident_mappings(att, mappings, cluster_size, att.Form.Nonresident.ValidDataLength); return false; }); return mappings; } ntfs::ntfs(const string& fn) { unsigned int sector_size = 512; // FIXME - find from device #ifdef _WIN32 bool drive = false; DWORD ret; wstring_convert, char16_t> convert; u16string namew; if ((fn.length() == 2 || fn.length() == 3) && ((fn[0] >= 'A' && fn[0] <= 'Z') || (fn[0] >= 'a' && fn[0] <= 'z')) && fn[1] == ':' && (fn.length() == 2 || fn[2] == '\\')) { namew = u"\\\\.\\X:"; namew[4] = fn[0]; drive = true; } else namew = convert.from_bytes(fn.data(), fn.data() + fn.length()); h = CreateFileW((WCHAR*)namew.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); if (h == INVALID_HANDLE_VALUE) throw last_error("CreateFile", GetLastError()); if (drive) { if (!DeviceIoControl(h, FSCTL_LOCK_VOLUME, nullptr, 0, nullptr, 0, &ret, nullptr)) { auto le = GetLastError(); CloseHandle(h); throw last_error("FSCTL_LOCK_VOLUME", le); } } #else fd = open(fn.c_str(), O_RDWR | O_EXCL); if (fd < 0) throw formatted_error("open returned {} (errno = {}).", fd, errno); #endif // read NTFS_BOOT_SECTOR boot_sector_buf.resize((size_t)sector_align(sizeof(NTFS_BOOT_SECTOR), sector_size)); seek(0); read(boot_sector_buf.data(), boot_sector_buf.size()); boot_sector = reinterpret_cast(boot_sector_buf.data()); // make sure is NTFS if (memcmp(boot_sector->FsName, NTFS_FS_NAME, sizeof(NTFS_FS_NAME) - 1)) throw formatted_error("Device was not an NTFS volume."); if (boot_sector->ClustersPerMFTRecord < 0) file_record_size = 1ull << -boot_sector->ClustersPerMFTRecord; else file_record_size = (uint64_t)boot_sector->BytesPerSector * (uint64_t)boot_sector->SectorsPerCluster * (uint64_t)boot_sector->ClustersPerMFTRecord; mft.reset(new ntfs_file(*this, 0)); ntfs_file vol_file(*this, NTFS_VOLUME_INODE); auto vi_str = vol_file.read(0, 0, ntfs_attribute::VOLUME_INFORMATION); auto vi = reinterpret_cast(vi_str.data()); if (vi->MajorVersion > 3 || (vi->MajorVersion == 3 && vi->MinorVersion > 1)) throw formatted_error("Unsupported NTFS version {}.{}.", vi->MajorVersion, vi->MinorVersion); if (vi->Flags & NTFS_VOLUME_DIRTY) throw formatted_error("Cannot convert volume with dirty bit set."); } static buffer_t read_from_mappings(const list& mappings, uint64_t start, uint32_t length, ntfs& dev) { uint32_t sector_size = dev.boot_sector->BytesPerSector; uint32_t cluster_size = sector_size * dev.boot_sector->SectorsPerCluster; buffer_t s(length); uint64_t cluster_start = start / cluster_size; uint64_t cluster_end = sector_align(start + length, cluster_size) / cluster_size; for (const auto& m : mappings) { if (m.vcn <= cluster_end && m.vcn + m.length >= cluster_start) { uint64_t read_start = max(start - (start % dev.boot_sector->BytesPerSector), m.vcn * cluster_size); uint64_t read_end = min(sector_align(start + length, dev.boot_sector->BytesPerSector), (m.vcn + m.length) * cluster_size); if (read_end == read_start) continue; buffer_t buf((uint32_t)(read_end - read_start)); dev.seek(read_start + ((m.lcn - m.vcn) * cluster_size)); dev.read(buf.data(), (uint32_t)(read_end - read_start)); memcpy(s.data(), buf.data() + read_start - start, (size_t)min(read_end - read_start, length - read_start + start)); } } return s; } static optional btree_search(const index_root& ir, const list& mappings, const index_node_header& inh, ntfs& dev, uint32_t key) { auto ent = reinterpret_cast((uint8_t*)&inh + inh.first_entry); do { if (ent->flags & INDEX_ENTRY_SUBNODE) { bool skip = false; if (!(ent->flags & INDEX_ENTRY_LAST)) { uint32_t v1 = *(uint32_t*)((uint8_t*)ent + sizeof(index_entry)); if (v1 == key) return buffer_t((uint8_t*)ent + sizeof(index_entry) + ent->stream_length, (uint8_t*)ent + ent->entry_length - sizeof(uint64_t)); skip = key > v1; } if (!skip) { uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)ent + ent->entry_length - sizeof(uint64_t)))->SegmentNumber; if (ir.bytes_per_index_record < dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster) vcn *= dev.boot_sector->BytesPerSector; else vcn *= (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster; auto data = read_from_mappings(mappings, vcn, ir.bytes_per_index_record, dev); auto& rec = *reinterpret_cast(data.data()); if (rec.MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) throw formatted_error("Index record magic was not INDX."); process_fixups(&rec.MultiSectorHeader, ir.bytes_per_index_record, dev.boot_sector->BytesPerSector); return btree_search(ir, mappings, rec.header, dev, key); } } else if (!(ent->flags & INDEX_ENTRY_LAST)) { uint32_t v = *(uint32_t*)((uint8_t*)ent + sizeof(index_entry)); if (v == key) return buffer_t((uint8_t*)ent + sizeof(index_entry) + ent->stream_length, (uint8_t*)ent + ent->entry_length); else if (v > key) break; } if (ent->flags & INDEX_ENTRY_LAST) break; ent = reinterpret_cast((uint8_t*)ent + ent->entry_length); } while (true); return nullopt; } string_view ntfs::find_sd(uint32_t id, ntfs_file& secure) { if (sd_list.count(id) > 0) { const auto& sd = sd_list.at(id); return {(char*)sd.data(), sd.size()}; } auto ir_str = secure.read(0, 0, ntfs_attribute::INDEX_ROOT, u"$SII"); auto ia = secure.read_mappings(ntfs_attribute::INDEX_ALLOCATION, u"$SII"); const auto& ir = *reinterpret_cast(ir_str.data()); auto ret = btree_search(ir, ia, ir.node_header, *this, id); if (!ret.has_value()) return ""; const auto& sde = *reinterpret_cast(ret.value().data()); auto sde2 = secure.read(sde.offset, sde.length, ntfs_attribute::DATA, u"$SDS"); if (memcmp(&sde, sde2.data(), sizeof(sd_entry))) throw formatted_error("SD headers do not match."); auto sv = string_view((char*)sde2.data(), sde2.size()).substr(sizeof(sd_entry)); buffer_t buf(sv.data(), sv.data() + sv.length()); auto [it, success] = sd_list.emplace(make_pair(id, buffer_t{})); it->second.swap(buf); return string_view((char*)it->second.data(), it->second.size()); } static void walk_btree(const index_root& ir, const list& mappings, const index_node_header& inh, ntfs& dev, const invocable auto& func, unsigned int level) { auto ent = reinterpret_cast((uint8_t*)&inh + inh.first_entry); do { if (ent->flags & INDEX_ENTRY_SUBNODE) { uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)ent + ent->entry_length - sizeof(uint64_t)))->SegmentNumber; if (ir.bytes_per_index_record < dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster) vcn *= dev.boot_sector->BytesPerSector; else vcn *= (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster; auto data = read_from_mappings(mappings, vcn, ir.bytes_per_index_record, dev); auto rec = reinterpret_cast(data.data()); if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) throw formatted_error("Index record magic was not INDX."); process_fixups(&rec->MultiSectorHeader, ir.bytes_per_index_record, dev.boot_sector->BytesPerSector); walk_btree(ir, mappings, rec->header, dev, func, level + 1); } else func(*ent, string_view((const char*)ent + sizeof(index_entry), ent->stream_length)); if (ent->flags & INDEX_ENTRY_LAST) break; ent = reinterpret_cast((uint8_t*)ent + ent->entry_length); } while (true); } void populate_skip_list(ntfs& dev, uint64_t inode, list& skiplist) { ntfs_file file(dev, inode); if (!file.is_directory()) return; auto ir_str = file.read(0, 0, ntfs_attribute::INDEX_ROOT, u"$I30"); auto ia = file.read_mappings(ntfs_attribute::INDEX_ALLOCATION, u"$I30"); const auto& ir = *reinterpret_cast(ir_str.data()); skiplist.emplace_back(inode); walk_btree(ir, ia, ir.node_header, dev, [&](const index_entry& ent, string_view data) { if (data.empty()) return; auto fn = reinterpret_cast(data.data()); if (fn->FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT) { bool found = false; uint64_t dir_inode = ent.file_reference.SegmentNumber; for (auto n : skiplist) { if (n == dir_inode) { found = true; break; } } if (!found) populate_skip_list(dev, dir_inode, skiplist); } }, 0); } void ntfs_file::loop_through_atts(const function& func) { auto att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset); size_t offset = file_record->FirstAttributeOffset; buffer_t attlist; while (true) { if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) break; if (att->TypeCode == ntfs_attribute::ATTRIBUTE_LIST) { if (att->FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) attlist = read_nonresident_attribute(0, (uint32_t)att->Form.Nonresident.FileSize, att); else { attlist.resize(att->Form.Resident.ValueLength); memcpy(attlist.data(), (uint8_t*)att + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength); } break; } offset += att->RecordLength; att = reinterpret_cast((uint8_t*)att + att->RecordLength); } if (!attlist.empty()) { vector other_inodes; { auto ent = (const attribute_list_entry*)attlist.data(); size_t left = attlist.size(); while (true) { uint64_t file_reference = ent->file_reference.SegmentNumber; if (file_reference == inode) { // contained elsewhere in this inode att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset); offset = file_record->FirstAttributeOffset; while (true) { if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) break; if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) { if (att->NameLength == 0 || !memcmp((uint8_t*)file_record + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) { string_view data; u16string_view name; if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength); if (att->NameLength != 0) name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength); if (!func(*att, data, name)) return; break; } } offset += att->RecordLength; att = reinterpret_cast((uint8_t*)att + att->RecordLength); } } else { bool found = false; for (auto n : other_inodes) { if (n == file_reference) { found = true; break; } } if (!found) other_inodes.push_back(file_reference); } if (left <= ent->record_length) break; left -= ent->record_length; ent = (const attribute_list_entry*)((uint8_t*)ent + ent->record_length); } } if (!other_inodes.empty()) { for (auto file_reference : other_inodes) { ntfs_file oth(dev, file_reference); auto ent = (const attribute_list_entry*)attlist.data(); auto left = attlist.size(); while (true) { if (ent->file_reference.SegmentNumber == file_reference) { att = reinterpret_cast((uint8_t*)oth.file_record + oth.file_record->FirstAttributeOffset); offset = oth.file_record->FirstAttributeOffset; while (true) { if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) break; if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) { if (att->NameLength == 0 || !memcmp((uint8_t*)oth.file_record + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) { string_view data; u16string_view name; if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) data = string_view((const char*)oth.file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength); if (att->NameLength != 0) name = u16string_view((char16_t*)((uint8_t*)oth.file_record + offset + att->NameOffset), att->NameLength); if (!func(*att, data, name)) return; break; } } offset += att->RecordLength; att = reinterpret_cast((uint8_t*)att + att->RecordLength); } } if (left <= ent->record_length) break; left -= ent->record_length; ent = (const attribute_list_entry*)((uint8_t*)ent + ent->record_length); } } } return; } att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset); offset = file_record->FirstAttributeOffset; while (true) { if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) break; string_view data; u16string_view name; if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength); if (att->NameLength != 0) name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength); if (!func(*att, data, name)) return; offset += att->RecordLength; att = reinterpret_cast((uint8_t*)att + att->RecordLength); } } string ntfs_file::get_filename() { list parts; ntfs_file* f = this; do { uint64_t dir_num = 0; f->loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view) -> bool { if (att.TypeCode != ntfs_attribute::FILE_NAME || att.FormCode != NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) return true; auto fn = reinterpret_cast(res_data.data()); if (fn->Namespace == file_name_type::DOS) return true; if (fn->Parent.SegmentNumber != NTFS_ROOT_DIR_INODE) dir_num = fn->Parent.SegmentNumber; auto name = u16string_view(fn->FileName, fn->FileNameLength); parts.emplace_back(name); return false; }); if (f != this) delete f; if (dir_num != 0) f = new ntfs_file(dev, dir_num); else break; } while (true); u16string retw; while (!parts.empty()) { retw += u"\\" + parts.back(); parts.pop_back(); } return utf16_to_utf8(retw); } void ntfs::seek(uint64_t pos) { #ifdef _WIN32 LARGE_INTEGER posli; posli.QuadPart = pos; if (!SetFilePointerEx(h, posli, nullptr, FILE_BEGIN)) throw last_error("SetFilePointerEx", GetLastError()); #else if (lseek(fd, pos, SEEK_SET) == -1) throw formatted_error("Error seeking to {:x} (errno = {}).", pos, errno); #endif } void ntfs::read(uint8_t* buf, size_t length) { #ifdef _WIN32 DWORD read; if (!ReadFile(h, buf, (DWORD)length, &read, nullptr)) throw last_error("ReadFile", GetLastError()); #else auto pos = lseek(fd, 0, SEEK_CUR); auto orig_length = length; do { auto ret = ::read(fd, buf, length); if (ret < 0) throw formatted_error("Error reading {:x} bytes at {:x} (errno {}).", orig_length, pos, errno); if ((size_t)ret == length) break; buf += ret; length -= ret; } while (true); #endif } void ntfs::write(const uint8_t* buf, size_t length) { #ifdef _WIN32 DWORD written; if (!WriteFile(h, buf, (DWORD)length, &written, nullptr)) throw last_error("WriteFile", GetLastError()); #else auto pos = lseek(fd, 0, SEEK_CUR); auto orig_length = length; do { auto ret = ::write(fd, buf, length); if (ret < 0) throw formatted_error("Error writing {:x} bytes at {:x} (errno {}).", orig_length, pos, errno); if ((size_t)ret == length) break; buf += ret; length -= ret; } while (true); #endif } ntfs2btrfs-20240115/src/ntfs.h000066400000000000000000000361161455127722500160020ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #pragma once #include "ntfs2btrfs.h" #include #include #include #include #include #ifdef _WIN32 #include #else #include #endif #pragma pack(push,1) typedef struct { uint8_t Jmp[3]; uint8_t FsName[8]; uint16_t BytesPerSector; uint8_t SectorsPerCluster; uint16_t ReservedSectors; uint8_t Unused1[5]; uint8_t Media; uint8_t Unused2[2]; uint16_t SectorsPerTrack; uint16_t Heads; uint32_t HiddenSectors; uint32_t Unused3; uint32_t Unknown; uint64_t TotalSectors; uint64_t MFT; uint64_t MFTMirr; int8_t ClustersPerMFTRecord; uint8_t Padding1[3]; int8_t ClustersPerIndexRecord; uint8_t Padding2[3]; uint64_t SerialNumber; uint32_t Checksum; } NTFS_BOOT_SECTOR; #define NTFS_FS_NAME "NTFS " // https://docs.microsoft.com/en-us/windows/win32/devnotes/attribute-record-header #define ATTRIBUTE_FLAG_COMPRESSION_MASK 0x00ff #define ATTRIBUTE_FLAG_SPARSE 0x8000 #define ATTRIBUTE_FLAG_ENCRYPTED 0x4000 enum class NTFS_ATTRIBUTE_FORM : uint8_t { RESIDENT_FORM = 0, NONRESIDENT_FORM = 1 }; enum class ntfs_attribute : uint32_t { STANDARD_INFORMATION = 0x10, ATTRIBUTE_LIST = 0x20, FILE_NAME = 0x30, VOLUME_VERSION = 0x40, SECURITY_DESCRIPTOR = 0x50, VOLUME_NAME = 0x60, VOLUME_INFORMATION = 0x70, DATA = 0x80, INDEX_ROOT = 0x90, INDEX_ALLOCATION = 0xA0, BITMAP = 0xB0, REPARSE_POINT = 0xC0, EA_INFORMATION = 0xD0, EA = 0xE0, PROPERTY_SET = 0xF0, LOGGED_UTILITY_STREAM = 0x100, }; template<> struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { auto it = ctx.begin(); if (it != ctx.end() && *it != '}') throw format_error("invalid format"); return it; } template auto format(enum ntfs_attribute att, format_context& ctx) const { switch (att) { case ntfs_attribute::STANDARD_INFORMATION: return fmt::format_to(ctx.out(), "STANDARD_INFORMATION"); case ntfs_attribute::ATTRIBUTE_LIST: return fmt::format_to(ctx.out(), "ATTRIBUTE_LIST"); case ntfs_attribute::FILE_NAME: return fmt::format_to(ctx.out(), "FILE_NAME"); case ntfs_attribute::VOLUME_VERSION: return fmt::format_to(ctx.out(), "VOLUME_VERSION"); case ntfs_attribute::SECURITY_DESCRIPTOR: return fmt::format_to(ctx.out(), "SECURITY_DESCRIPTOR"); case ntfs_attribute::VOLUME_NAME: return fmt::format_to(ctx.out(), "VOLUME_NAME"); case ntfs_attribute::VOLUME_INFORMATION: return fmt::format_to(ctx.out(), "VOLUME_INFORMATION"); case ntfs_attribute::DATA: return fmt::format_to(ctx.out(), "DATA"); case ntfs_attribute::INDEX_ROOT: return fmt::format_to(ctx.out(), "INDEX_ROOT"); case ntfs_attribute::INDEX_ALLOCATION: return fmt::format_to(ctx.out(), "INDEX_ALLOCATION"); case ntfs_attribute::BITMAP: return fmt::format_to(ctx.out(), "BITMAP"); case ntfs_attribute::REPARSE_POINT: return fmt::format_to(ctx.out(), "REPARSE_POINT"); case ntfs_attribute::EA_INFORMATION: return fmt::format_to(ctx.out(), "EA_INFORMATION"); case ntfs_attribute::EA: return fmt::format_to(ctx.out(), "EA"); case ntfs_attribute::PROPERTY_SET: return fmt::format_to(ctx.out(), "PROPERTY_SET"); case ntfs_attribute::LOGGED_UTILITY_STREAM: return fmt::format_to(ctx.out(), "LOGGED_UTILITY_STREAM"); default: return fmt::format_to(ctx.out(), "{:x}", (uint32_t)att); } } }; typedef struct _ATTRIBUTE_RECORD_HEADER { enum ntfs_attribute TypeCode; uint16_t RecordLength; uint16_t Unknown; NTFS_ATTRIBUTE_FORM FormCode; uint8_t NameLength; uint16_t NameOffset; uint16_t Flags; uint16_t Instance; union { struct { uint32_t ValueLength; uint16_t ValueOffset; uint8_t Reserved[2]; } Resident; struct { uint64_t LowestVcn; uint64_t HighestVcn; uint16_t MappingPairsOffset; uint16_t CompressionUnit; uint32_t Padding; uint64_t AllocatedLength; uint64_t FileSize; uint64_t ValidDataLength; uint64_t TotalAllocated; } Nonresident; } Form; } ATTRIBUTE_RECORD_HEADER; // https://docs.microsoft.com/en-us/windows/win32/devnotes/multi-sector-header typedef struct { uint32_t Signature; uint16_t UpdateSequenceArrayOffset; uint16_t UpdateSequenceArraySize; } MULTI_SECTOR_HEADER; // https://docs.microsoft.com/en-us/windows/win32/devnotes/mft-segment-reference typedef struct { uint64_t SegmentNumber : 48; uint64_t SequenceNumber : 16; } MFT_SEGMENT_REFERENCE; // based on https://docs.microsoft.com/en-us/windows/win32/devnotes/file-record-segment-header and // http://www.cse.scu.edu/~tschwarz/coen252_07Fall/Lectures/NTFS.html typedef struct { MULTI_SECTOR_HEADER MultiSectorHeader; uint64_t LogFileSequenceNumber; uint16_t SequenceNumber; uint16_t HardLinkCount; uint16_t FirstAttributeOffset; uint16_t Flags; uint32_t EntryUsedSize; uint32_t EntryAllocatedSize; MFT_SEGMENT_REFERENCE BaseFileRecordSegment; uint16_t NextAttributeID; } FILE_RECORD_SEGMENT_HEADER; #define FILE_RECORD_SEGMENT_IN_USE 1 #define FILE_RECORD_IS_DIRECTORY 2 static const uint32_t NTFS_FILE_SIGNATURE = 0x454c4946; // "FILE" #define NTFS_VOLUME_INODE 3 #define NTFS_ROOT_DIR_INODE 5 #define NTFS_BITMAP_INODE 6 #define NTFS_SECURE_INODE 9 // https://flatcap.org/linux-ntfs/ntfs/attributes/standard_information.html typedef struct { int64_t CreationTime; int64_t LastAccessTime; int64_t LastWriteTime; int64_t ChangeTime; uint32_t FileAttributes; uint32_t MaximumVersions; uint32_t VersionNumber; uint32_t ClassId; uint32_t OwnerId; uint32_t SecurityId; uint64_t QuotaCharged; uint64_t USN; } STANDARD_INFORMATION; #define FILE_ATTRIBUTE_READONLY 0x00000001 #define FILE_ATTRIBUTE_HIDDEN 0x00000002 #define FILE_ATTRIBUTE_SYSTEM 0x00000004 #define FILE_ATTRIBUTE_DIRECTORY 0x00000010 #define FILE_ATTRIBUTE_ARCHIVE 0x00000020 #define FILE_ATTRIBUTE_DEVICE 0x00000040 #define FILE_ATTRIBUTE_NORMAL 0x00000080 #define FILE_ATTRIBUTE_TEMPORARY 0x00000100 #define FILE_ATTRIBUTE_SPARSE_FILE 0x00000200 #define FILE_ATTRIBUTE_REPARSE_POINT 0x00000400 #define FILE_ATTRIBUTE_COMPRESSED 0x00000800 #define FILE_ATTRIBUTE_OFFLINE 0x00001000 #define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000 #define FILE_ATTRIBUTE_ENCRYPTED 0x00004000 #define FILE_ATTRIBUTE_VIRTUAL 0x00010000 #define FILE_ATTRIBUTE_DIRECTORY_MFT 0x10000000 // https://flatcap.org/linux-ntfs/ntfs/attributes/file_name.html enum class file_name_type : uint8_t { POSIX = 0, WINDOWS = 1, DOS = 2, WINDOWS_AND_DOS = 3 }; typedef struct { MFT_SEGMENT_REFERENCE Parent; int64_t CreationTime; int64_t LastAccessTime; int64_t LastWriteTime; int64_t ChangeTime; uint64_t AllocationSize; uint64_t EndOfFile; uint32_t FileAttributes; uint32_t EaSize; uint8_t FileNameLength; file_name_type Namespace; char16_t FileName[1]; } FILE_NAME; // https://flatcap.org/linux-ntfs/ntfs/concepts/node_header.html typedef struct { uint32_t first_entry; uint32_t total_size; uint32_t allocated_size; uint32_t flags; } index_node_header; // https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html #define INDEX_ENTRY_SUBNODE 1 #define INDEX_ENTRY_LAST 2 typedef struct { MFT_SEGMENT_REFERENCE file_reference; uint16_t entry_length; uint16_t stream_length; uint32_t flags; } index_entry; // https://flatcap.org/linux-ntfs/ntfs/attributes/index_root.html typedef struct { uint32_t attribute_type; uint32_t collation_rule; uint32_t bytes_per_index_record; uint8_t clusters_per_index_record; uint8_t padding[3]; index_node_header node_header; index_entry entries[1]; } index_root; // https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html typedef struct { MULTI_SECTOR_HEADER MultiSectorHeader; uint64_t sequence_number; uint64_t vcn; index_node_header header; uint16_t update_sequence; } index_record; #define INDEX_RECORD_MAGIC 0x58444e49 // "INDX" // https://flatcap.org/linux-ntfs/ntfs/files/secure.html typedef struct { uint32_t hash; uint32_t id; uint64_t offset; uint32_t length; } sd_entry; // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/ns-ntifs-_reparse_data_buffer typedef struct { uint32_t ReparseTag; uint16_t ReparseDataLength; uint16_t Reserved; union { struct { uint16_t SubstituteNameOffset; uint16_t SubstituteNameLength; uint16_t PrintNameOffset; uint16_t PrintNameLength; uint32_t Flags; char16_t PathBuffer[1]; } SymbolicLinkReparseBuffer; struct { uint16_t SubstituteNameOffset; uint16_t SubstituteNameLength; uint16_t PrintNameOffset; uint16_t PrintNameLength; char16_t PathBuffer[1]; } MountPointReparseBuffer; struct { uint8_t DataBuffer[1]; } GenericReparseBuffer; struct { uint32_t unknown; char name[1]; } LxSymlink; // undocumented }; } REPARSE_DATA_BUFFER; typedef struct { uint32_t unknown; char name[1]; } REPARSE_DATA_BUFFER_LX_SYMLINK; #ifndef IO_REPARSE_TAG_SYMLINK #define IO_REPARSE_TAG_SYMLINK 0xa000000c #endif #define IO_REPARSE_TAG_LX_SYMLINK 0xa000001d #ifndef IO_REPARSE_TAG_WOF #define IO_REPARSE_TAG_WOF 0x80000017 #endif #ifndef SYMLINK_FLAG_RELATIVE #define SYMLINK_FLAG_RELATIVE 0x00000001 #endif // https://flatcap.org/linux-ntfs/ntfs/attributes/volume_information.html typedef struct { uint64_t Unknown1; uint8_t MajorVersion; uint8_t MinorVersion; uint16_t Flags; uint32_t Unknown2; } VOLUME_INFORMATION; #define NTFS_VOLUME_DIRTY 0x0001 #define NTFS_VOLUME_RESIZE_JOURNAL 0x0002 #define NTFS_VOLUME_UPGRADE_ON_MOUNT 0x0004 #define NTFS_VOLUME_MOUNTED_ON_NT4 0x0008 #define NTFS_VOLUME_DELETE_USN_UNDERWAY 0x0010 #define NTFS_VOLUME_REPAIR_OBJECT_IDS 0x0020 #define NTFS_VOLUME_MODIFIED_BY_CHKDSK 0x8000 // https://flatcap.org/linux-ntfs/ntfs/attributes/attribute_list.html typedef struct { enum ntfs_attribute type; uint16_t record_length; uint8_t name_length; uint8_t name_offset; uint64_t starting_vcn; MFT_SEGMENT_REFERENCE file_reference; uint16_t instance; } attribute_list_entry; #define WOF_CURRENT_VERSION 1 #define WOF_PROVIDER_WIM 1 #define WOF_PROVIDER_FILE 2 typedef struct { uint32_t ReparseTag; uint16_t ReparseDataLength; uint16_t Reserved; uint8_t DataBuffer[1]; } reparse_point_header; // edited form of REPARSE_DATA_BUFFER typedef struct { uint32_t Version; uint32_t Provider; } wof_external_info; // WOF_EXTERNAL_INFO in winioctl.h #define FILE_PROVIDER_CURRENT_VERSION 1 #define FILE_PROVIDER_COMPRESSION_XPRESS4K 0 #define FILE_PROVIDER_COMPRESSION_LZX 1 #define FILE_PROVIDER_COMPRESSION_XPRESS8K 2 #define FILE_PROVIDER_COMPRESSION_XPRESS16K 3 typedef struct { uint32_t Version; uint32_t Algorithm; } file_provider_external_info_v0; // FILE_PROVIDER_EXTERNAL_INFO_V0 in winioctl.h // cf. https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/ns-wdm-_file_full_ea_information typedef struct { uint32_t NextEntryOffset; uint8_t Flags; uint8_t EaNameLength; uint16_t EaValueLength; char EaName[1]; } ea_data; typedef struct { uint32_t major; uint32_t minor; } lxdev; // https://dfir.ru/2019/01/19/ntfs-today/ typedef struct { uint16_t format; uint16_t version; uint32_t mode; uint32_t uid; uint32_t gid; uint32_t rdev; uint32_t atime_ns; uint32_t mtime_ns; uint32_t ctime_ns; uint64_t atime; uint64_t mtime; uint64_t ctime; } lxattrb; #pragma pack(pop) class ntfs; struct mapping { mapping(uint64_t lcn, uint64_t vcn, uint64_t length) : lcn(lcn), vcn(vcn), length(length) { } uint64_t lcn; uint64_t vcn; uint64_t length; }; class ntfs_file { public: ntfs_file(ntfs& dev, uint64_t inode); buffer_t read(uint64_t offset = 0, uint32_t length = 0, enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u""); std::list read_mappings(enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u""); bool is_directory() const { return file_record->Flags & FILE_RECORD_IS_DIRECTORY; } void loop_through_atts(const std::function& func); std::string get_filename(); FILE_RECORD_SEGMENT_HEADER* file_record; private: buffer_t read_nonresident_attribute(uint64_t offset, uint32_t length, const ATTRIBUTE_RECORD_HEADER* att); buffer_t file_record_buf; ntfs& dev; uint64_t inode; }; class ntfs { public: ntfs(const std::string& fn); ~ntfs() { #ifdef _WIN32 CloseHandle(h); #else close(fd); #endif } void seek(uint64_t pos); void read(uint8_t* buf, size_t length); void write(const uint8_t* buf, size_t length); std::string_view find_sd(uint32_t id, ntfs_file& secure); std::unique_ptr mft; buffer_t boot_sector_buf; NTFS_BOOT_SECTOR* boot_sector; uint64_t file_record_size; std::map sd_list; #ifdef _WIN32 HANDLE h; #else int fd; #endif }; // ntfs.cpp void read_nonresident_mappings(const ATTRIBUTE_RECORD_HEADER& att, std::list& mappings, uint32_t cluster_size, uint64_t vdl); void populate_skip_list(ntfs& dev, uint64_t inode, std::list& skiplist); ntfs2btrfs-20240115/src/ntfs2btrfs.cpp000077500000000000000000004113231455127722500174600ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #include "ntfs.h" #include "ntfs2btrfs.h" #include "crc32c.h" #include "xxhash.h" #if defined(__i386__) || defined(__x86_64__) #ifndef _MSC_VER #include #else #include #endif #endif #include #include #include #include #include #include #include #ifdef _WIN32 #include #else #include #endif #if !defined(_WIN32) && !defined(__FreeBSD__) #define USE_MMAP #endif #include "config.h" using namespace std; static list chunks; static list roots; static uint32_t tree_size = 0x4000; // FIXME static list space_list; static bool chunks_changed; static uint64_t data_size = 0; static BTRFS_UUID fs_uuid, chunk_uuid, dev_uuid, subvol_uuid; static list relocs; static uint64_t device_size, orig_device_size; static bool reloc_last_sector = false; static uint64_t mapped_inodes = 0, rewritten_inodes = 0, inline_inodes = 0; static uint64_t last_chunk_end; static const uint64_t stripe_length = 0x10000; static const uint64_t chunk_virt_offset = 0x100000; static const uint64_t dummy_inode = 0xffffffffffffffff; // protected data static const uint64_t first_ntfs_inode = 24; static const uint64_t data_chunk_size = 128 * 1024 * 1024; // FIXME static const uint64_t inode_offset = 0x101; static const uint16_t max_inline = 2048; static const uint64_t max_extent_size = 0x8000000; // 128 MB static const uint64_t max_comp_extent_size = 0x20000; // 128 KB static constexpr char chunk_error_message[] = "Could not find enough space to create new chunk. Try clearing a few gigabytes of space, or defragging."; #define EA_NTACL "security.NTACL" #define EA_NTACL_HASH 0x45922146 #define EA_DOSATTRIB "user.DOSATTRIB" #define EA_DOSATTRIB_HASH 0x914f9939 #define EA_REPARSE "user.reparse" #define EA_REPARSE_HASH 0xfabad1fe #define EA_CAP "security.capability" #define EA_CAP_HASH 0x7c3650b1 using runs_t = map>; static constexpr size_t utf16_to_utf8_len(u16string_view sv) noexcept { size_t ret = 0; while (!sv.empty()) { if (sv[0] < 0x80) ret++; else if (sv[0] < 0x800) ret += 2; else if (sv[0] < 0xd800) ret += 3; else if (sv[0] < 0xdc00) { if (sv.length() < 2 || (sv[1] & 0xdc00) != 0xdc00) { ret += 3; sv = sv.substr(1); continue; } ret += 4; sv = sv.substr(1); } else ret += 3; sv = sv.substr(1); } return ret; } static constexpr void utf16_to_utf8_span(u16string_view sv, span t) noexcept { auto ptr = t.begin(); if (ptr == t.end()) return; while (!sv.empty()) { if (sv[0] < 0x80) { *ptr = (uint8_t)sv[0]; ptr++; if (ptr == t.end()) return; } else if (sv[0] < 0x800) { *ptr = (uint8_t)(0xc0 | (sv[0] >> 6)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | (sv[0] & 0x3f)); ptr++; if (ptr == t.end()) return; } else if (sv[0] < 0xd800) { *ptr = (uint8_t)(0xe0 | (sv[0] >> 12)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | ((sv[0] >> 6) & 0x3f)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | (sv[0] & 0x3f)); ptr++; if (ptr == t.end()) return; } else if (sv[0] < 0xdc00) { if (sv.length() < 2 || (sv[1] & 0xdc00) != 0xdc00) { *ptr = (uint8_t)0xef; ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)0xbf; ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)0xbd; ptr++; if (ptr == t.end()) return; sv = sv.substr(1); continue; } char32_t cp = 0x10000 | ((sv[0] & ~0xd800) << 10) | (sv[1] & ~0xdc00); *ptr = (uint8_t)(0xf0 | (cp >> 18)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | ((cp >> 12) & 0x3f)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | ((cp >> 6) & 0x3f)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | (cp & 0x3f)); ptr++; if (ptr == t.end()) return; sv = sv.substr(1); } else if (sv[0] < 0xe000) { *ptr = (uint8_t)0xef; ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)0xbf; ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)0xbd; ptr++; if (ptr == t.end()) return; } else { *ptr = (uint8_t)(0xe0 | (sv[0] >> 12)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | ((sv[0] >> 6) & 0x3f)); ptr++; if (ptr == t.end()) return; *ptr = (uint8_t)(0x80 | (sv[0] & 0x3f)); ptr++; if (ptr == t.end()) return; } sv = sv.substr(1); } } string utf16_to_utf8(u16string_view sv) { if (sv.empty()) return ""; string ret(utf16_to_utf8_len(sv), 0); utf16_to_utf8_span(sv, ret); return ret; } static void space_list_remove(list& space_list, uint64_t offset, uint64_t length) { auto it = space_list.begin(); while (it != space_list.end()) { if (it->offset > offset + length) return; if (it->offset >= offset && it->offset + it->length <= offset + length) { // remove entry entirely auto it2 = it; it2++; space_list.erase(it); it = it2; continue; } else if (offset + length > it->offset && offset + length < it->offset + it->length) { if (offset > it->offset) { // cut out hole space_list.insert(it, space(it->offset, offset - it->offset)); it->length = it->offset + it->length - offset - length; it->offset = offset + length; return; } else { // remove start of entry it->length -= offset + length - it->offset; it->offset = offset + length; } } else if (offset > it->offset && offset < it->offset + it->length) // remove end of entry it->length = offset - it->offset; it++; } } static void remove_superblocks(chunk& c) { unsigned int i = 0; // FIXME - DUP while (superblock_addrs[i] != 0) { if (c.disk_start + c.length > superblock_addrs[i] && c.disk_start < superblock_addrs[i] + stripe_length) { uint64_t start = max(c.offset, superblock_addrs[i] - c.disk_start + c.offset); uint64_t end = min(c.offset + c.length, superblock_addrs[i] + stripe_length - c.disk_start + c.offset); space_list_remove(c.space_list, start, end - start); } i++; } } static void create_data_chunks(ntfs& dev, const buffer_t& bmpdata) { uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster; uint64_t addr = 0; // FIXME - make sure clusters_per_chunk is multiple of 8 string_view bdsv{(char*)bmpdata.data(), bmpdata.size()}; while (bdsv.length() > 0 && addr < device_size) { uint64_t chunk_length = min(device_size - addr, data_chunk_size); uint64_t clusters_per_chunk = chunk_length / cluster_size; string_view csv = bdsv.substr(0, (size_t)(clusters_per_chunk / 8)); size_t len = csv.length(); uint64_t run_start = 0, pos = 0; bool set = false; list used; if (chunk_length % stripe_length != 0) chunk_length -= chunk_length % stripe_length; // FIXME - do by uint64_t if 64-bit processor? while (csv.size() >= sizeof(uint32_t)) { auto v = *(uint32_t*)csv.data(); if ((!set && v == 0) || (set && v == 0xffffffff)) { pos += sizeof(uint32_t) * 8; csv = csv.substr(sizeof(uint32_t)); continue; } if (!set && v == 0xffffffff) { run_start = pos; set = true; pos += sizeof(uint32_t) * 8; } else if (set && v == 0) { if (pos != run_start) used.emplace_back(run_start, pos - run_start); set = false; pos += sizeof(uint32_t) * 8; } else { for (unsigned int i = 0; i < sizeof(uint32_t) * 8; i++) { if (v & 1) { if (!set) { run_start = pos; set = true; } } else { if (set) { if (pos != run_start) used.emplace_back(run_start, pos - run_start); set = false; } } v >>= 1; pos++; } } csv = csv.substr(sizeof(uint32_t)); } while (!csv.empty()) { auto v = *(uint8_t*)csv.data(); if ((!set && v == 0) || (set && v == 0xff)) { pos++; csv = csv.substr(1); continue; } if (!set && v == 0xff) { run_start = pos; set = true; pos += 8; } else if (set && v == 0) { if (pos != run_start) used.emplace_back(run_start, pos - run_start); set = false; pos += 8; } else { for (unsigned int i = 0; i < 8; i++) { if (v & 1) { if (!set) { run_start = pos; set = true; } } else { if (set) { if (pos != run_start) used.emplace_back(run_start, pos - run_start); set = false; } } v >>= 1; pos++; } } csv = csv.substr(1); } if (set && run_start != pos) used.emplace_back(run_start, pos - run_start); if (!used.empty()) { space_list_remove(space_list, addr, chunk_length); chunks.emplace_back(addr + chunk_virt_offset, chunk_length, addr, BLOCK_FLAG_DATA); auto& c = chunks.back(); uint64_t last = 0; for (const auto& u : used) { if (u.offset > last) c.space_list.emplace_back(c.offset + (last * cluster_size), (u.offset - last) * cluster_size); last = u.offset + u.length; } if (last * cluster_size < chunk_length) c.space_list.emplace_back(c.offset + (last * cluster_size), chunk_length - (last * cluster_size)); remove_superblocks(c); } addr += data_chunk_size; bdsv = bdsv.substr(len); } last_chunk_end = chunks.back().offset - chunk_virt_offset + chunks.back().length; } static void add_item(root& r, uint64_t obj_id, btrfs_key_type obj_type, uint64_t offset, const buffer_t& buf) { auto ret = r.items.emplace(KEY{obj_id, obj_type, offset}, buf); if (!ret.second) throw formatted_error("Could not insert entry ({:x}, {}, {:x}) into root items list.", obj_id, obj_type, offset); } static void add_item_move(root& r, uint64_t obj_id, btrfs_key_type obj_type, uint64_t offset, buffer_t& buf) { auto ret = r.items.emplace(KEY{obj_id, obj_type, offset}, buffer_t{}); if (!ret.second) throw formatted_error("Could not insert entry ({:x}, {}, {:x}) into root items list.", obj_id, obj_type, offset); auto& it = ret.first->second; it.swap(buf); } static void add_item(root& r, uint64_t obj_id, btrfs_key_type obj_type, uint64_t offset, const void* data, uint16_t len) { auto ret = r.items.emplace(KEY{obj_id, obj_type, offset}, buffer_t(len)); if (!ret.second) throw formatted_error("Could not insert entry ({:x}, {}, {:x}) into root items list.", obj_id, obj_type, offset); auto& it = ret.first->second; memcpy(it.data(), data, len); } static void add_chunk(root& chunk_root, root& devtree_root, root& extent_root, const chunk& c) { chunk_item_one_stripe ci1s; DEV_EXTENT de; BLOCK_GROUP_ITEM bgi; memset(&ci1s, 0, sizeof(chunk_item_one_stripe)); ci1s.chunk_item.size = c.length; ci1s.chunk_item.root_id = BTRFS_ROOT_EXTENT; ci1s.chunk_item.stripe_length = 0x10000; ci1s.chunk_item.type = c.type; ci1s.chunk_item.opt_io_alignment = 0x10000; ci1s.chunk_item.opt_io_width = 0x10000; ci1s.chunk_item.sector_size = 0x1000; // FIXME - get from superblock ci1s.chunk_item.num_stripes = 1; ci1s.chunk_item.sub_stripes = 1; ci1s.stripe.dev_id = 1; ci1s.stripe.offset = c.disk_start; ci1s.stripe.dev_uuid = dev_uuid; add_item(chunk_root, 0x100, btrfs_key_type::CHUNK_ITEM, c.offset, &ci1s, sizeof(ci1s)); de.chunktree = BTRFS_ROOT_CHUNK; de.objid = 0x100; de.address = c.offset; de.length = c.length; de.chunktree_uuid = chunk_uuid; add_item(devtree_root, 1, btrfs_key_type::DEV_EXTENT, c.disk_start, &de, sizeof(DEV_EXTENT)); bgi.chunk_tree = 0x100; bgi.flags = c.type; // bgi.used gets set in update_extent_root add_item(extent_root, c.offset, btrfs_key_type::BLOCK_GROUP_ITEM, c.length, &bgi, sizeof(BLOCK_GROUP_ITEM)); } static uint64_t allocate_metadata(uint64_t r, root& extent_root, uint8_t level) { bool system_chunk = r == BTRFS_ROOT_CHUNK; uint64_t chunk_size, disk_offset; bool found = false; metadata_item mi; mi.extent_item.refcount = 1; mi.extent_item.generation = 1; mi.extent_item.flags = EXTENT_ITEM_TREE_BLOCK; mi.type = btrfs_key_type::TREE_BLOCK_REF; mi.tbr.offset = r; for (auto& c : chunks) { if ((system_chunk && c.type & BLOCK_FLAG_SYSTEM) || (!system_chunk && c.type & BLOCK_FLAG_METADATA)) { for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) { if (it->length >= tree_size) { uint64_t addr = it->offset; if (it->length == tree_size) c.space_list.erase(it); else { it->offset += tree_size; it->length -= tree_size; } c.used += tree_size; add_item(extent_root, addr, btrfs_key_type::METADATA_ITEM, level, &mi, sizeof(metadata_item)); return addr; } } } } // create new chunk chunks_changed = true; if (system_chunk) chunk_size = 32 * 1024 * 1024; else chunk_size = 128 * 1024 * 1024; // FIXME for (const auto& s : space_list) { if (s.length >= chunk_size) { disk_offset = s.offset; space_list_remove(space_list, disk_offset, chunk_size); found = true; break; } } if (!found) throw formatted_error(chunk_error_message); chunks.emplace_back(disk_offset + chunk_virt_offset, chunk_size, disk_offset, system_chunk ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA); chunk& c = chunks.back(); c.space_list.emplace_back(c.offset, c.length); remove_superblocks(c); for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) { if (it->length >= tree_size) { uint64_t addr = it->offset; if (it->length == tree_size) c.space_list.erase(it); else { it->offset += tree_size; it->length -= tree_size; } c.used = tree_size; add_item(extent_root, addr, btrfs_key_type::METADATA_ITEM, level, &mi, sizeof(metadata_item)); return addr; } } throw formatted_error("Could not allocate metadata address"); } static uint64_t allocate_data(uint64_t length, bool change_used) { uint64_t disk_offset; bool found = false; for (auto& c : chunks) { if (c.type & BLOCK_FLAG_DATA) { for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) { if (it->length >= length) { uint64_t addr = it->offset; if (it->length == length) c.space_list.erase(it); else { it->offset += length; it->length -= length; } if (change_used) c.used += length; return addr; } } } } // create new chunk chunks_changed = true; for (const auto& s : space_list) { if (s.length >= data_chunk_size) { disk_offset = s.offset; space_list_remove(space_list, disk_offset, data_chunk_size); found = true; break; } } if (!found) throw formatted_error(chunk_error_message); chunks.emplace_back(disk_offset + chunk_virt_offset, data_chunk_size, disk_offset, BLOCK_FLAG_DATA); chunk& c = chunks.back(); c.space_list.emplace_back(c.offset, c.length); remove_superblocks(c); for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) { if (it->length >= length) { uint64_t addr = it->offset; if (it->length == length) c.space_list.erase(it); else { it->offset += length; it->length -= length; } if (change_used) c.used = length; return addr; } } throw formatted_error("Could not allocate data address"); } static void calc_tree_hash(tree_header& th, enum btrfs_csum_type csum_type) { switch (csum_type) { case btrfs_csum_type::crc32c: *(uint32_t*)th.csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th.fs_uuid, tree_size - (uint32_t)sizeof(th.csum)); break; case btrfs_csum_type::xxhash: *(uint64_t*)th.csum = XXH64((uint8_t*)&th.fs_uuid, tree_size - sizeof(th.csum), 0); break; case btrfs_csum_type::sha256: calc_sha256((uint8_t*)&th, &th.fs_uuid, tree_size - sizeof(th.csum)); break; case btrfs_csum_type::blake2: blake2b(&th, 32, &th.fs_uuid, tree_size - sizeof(th.csum)); break; default: break; } } void root::create_trees(root& extent_root, enum btrfs_csum_type csum_type) { uint32_t space_left, num_items; buffer_t buf(tree_size); memset(buf.data(), 0, tree_size); space_left = tree_size - (uint32_t)sizeof(tree_header); num_items = 0; auto& th = *(tree_header*)buf.data(); th.fs_uuid = fs_uuid; th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; th.chunk_tree_uuid = chunk_uuid; th.generation = 1; th.tree_id = id; th.level = 0; auto get_address = [this](root& extent_root, uint8_t level) { uint64_t addr; if (!old_addresses.empty()) { addr = old_addresses.front().first; if (level != old_addresses.front().second) { // change metadata level in extent tree if (auto f = extent_root.items.find(KEY{addr, btrfs_key_type::METADATA_ITEM, old_addresses.front().second}); f != extent_root.items.end()) { auto d = move(f->second); extent_root.items.erase(f); extent_root.items.emplace(make_pair(KEY{addr, btrfs_key_type::METADATA_ITEM, level}, d)); } } old_addresses.pop_front(); } else { addr = allocate_metadata(id, extent_root, level); allocations_done = true; } addresses.emplace_back(addr, level); return addr; }; { auto ln = (leaf_node*)((uint8_t*)buf.data() + sizeof(tree_header)); uint32_t data_off = tree_size - (uint32_t)sizeof(tree_header); for (const auto& i : items) { if (sizeof(leaf_node) + i.second.size() > space_left) { // tree complete, add to list th.address = get_address(extent_root, 0); th.num_items = num_items; calc_tree_hash(th, csum_type); trees.push_back(buf); metadata_size += tree_size; memset(buf.data(), 0, tree_size); th.fs_uuid = fs_uuid; th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; th.chunk_tree_uuid = chunk_uuid; th.generation = 1; th.tree_id = id; space_left = data_off = tree_size - (uint32_t)sizeof(tree_header); num_items = 0; ln = (leaf_node*)((uint8_t*)buf.data() + sizeof(tree_header)); } if (sizeof(leaf_node) + i.second.size() + sizeof(tree_header) > tree_size) throw formatted_error("Item too large for tree."); ln->key = i.first; ln->size = (uint32_t)i.second.size(); if (!i.second.empty()) { data_off -= (uint32_t)i.second.size(); memcpy((uint8_t*)buf.data() + sizeof(tree_header) + data_off, i.second.data(), i.second.size()); } ln->offset = data_off; ln++; num_items++; space_left -= (uint32_t)(sizeof(leaf_node) + i.second.size()); } } if (num_items > 0 || items.size() == 0) { // flush remaining tree th.address = get_address(extent_root, 0); th.num_items = num_items; calc_tree_hash(th, csum_type); trees.push_back(buf); metadata_size += tree_size; } level = 0; if (trees.size() == 1) { // no internal trees needed tree_addr = ((tree_header*)trees.back().data())->address; return; } // create internal trees if necessary do { unsigned int trees_added = 0; level++; memset(buf.data(), 0, tree_size); auto& th = *(tree_header*)buf.data(); th.fs_uuid = fs_uuid; th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; th.chunk_tree_uuid = chunk_uuid; th.generation = 1; th.tree_id = id; th.level = level; num_items = 0; space_left = tree_size - (uint32_t)sizeof(tree_header); auto in = (internal_node*)((uint8_t*)buf.data() + sizeof(tree_header)); for (const auto& t : trees) { auto th2 = (tree_header*)t.data(); if (th2->level >= level) break; if (th2->level < level - 1) continue; if (sizeof(internal_node) > space_left) { // tree complete, add to list th.address = get_address(extent_root, level); th.num_items = num_items; calc_tree_hash(th, csum_type); trees.push_back(buf); metadata_size += tree_size; memset(buf.data(), 0, tree_size); th.fs_uuid = fs_uuid; th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; th.chunk_tree_uuid = chunk_uuid; th.generation = 1; th.tree_id = id; th.level = level; space_left = tree_size - (uint32_t)sizeof(tree_header); num_items = 0; in = (internal_node*)((uint8_t*)buf.data() + sizeof(tree_header)); trees_added++; } auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header)); in->key = ln->key; in->address = th2->address; in->generation = 1; in++; num_items++; space_left -= (uint32_t)sizeof(internal_node); } if (num_items > 0) { // flush remaining tree th.address = get_address(extent_root, level); th.num_items = num_items; calc_tree_hash(th, csum_type); trees.push_back(buf); metadata_size += tree_size; trees_added++; } if (trees_added == 1) break; } while (true); tree_addr = ((tree_header*)trees.back().data())->address; } void root::write_trees(ntfs& dev) { for (const auto& t : trees) { auto& th = *(tree_header*)t.data(); uint64_t addr = th.address; bool found = false; for (const auto& c : chunks) { if (c.offset <= addr && c.offset + c.length >= addr + tree_size) { uint64_t physaddr = th.address - c.offset + c.disk_start; // FIXME - handle DUP dev.seek(physaddr); dev.write(t.data(), t.size()); found = true; break; } } if (!found) throw formatted_error("Could not find chunk containing address."); // FIXME - include number } } static void set_volume_label(superblock& sb, ntfs& dev) { try { ntfs_file vol_file(dev, NTFS_VOLUME_INODE); auto vnw = vol_file.read(0, 0, ntfs_attribute::VOLUME_NAME); if (vnw.empty()) return; auto vn = utf16_to_utf8(u16string_view((char16_t*)vnw.data(), vnw.size())); if (vn.length() > MAX_LABEL_SIZE) { vn = vn.substr(0, MAX_LABEL_SIZE); // remove whole code point while (!vn.empty() && vn[vn.length() - 1] & 0x80) { vn.pop_back(); } cerr << "Truncating volume label to \"" << vn << "\"" << endl; } // FIXME - check label doesn't contain slash or backslash if (vn.empty()) return; memcpy(sb.label, vn.data(), vn.length()); } catch (const exception& e) { // shouldn't be fatal cerr << "Error while setting volume label: " << e.what() << endl; } } static void write_superblocks(ntfs& dev, root& chunk_root, root& root_root, enum btrfs_compression compression, enum btrfs_csum_type csum_type) { uint32_t sector_size = 0x1000; // FIXME buffer_t buf((size_t)sector_align(sizeof(superblock), sector_size)); unsigned int i; uint32_t sys_chunk_size; uint64_t total_used; auto& sb = *(superblock*)buf.data(); memset(buf.data(), 0, buf.size()); sys_chunk_size = 0; for (const auto& c : chunk_root.items) { if (c.first.obj_type == btrfs_key_type::CHUNK_ITEM) { auto& ci = *(CHUNK_ITEM*)c.second.data(); if (ci.type & BLOCK_FLAG_SYSTEM) { sys_chunk_size += sizeof(KEY); sys_chunk_size += (uint32_t)c.second.size(); } } } if (sys_chunk_size > SYS_CHUNK_ARRAY_SIZE) throw formatted_error("System chunk list was too long ({} > {}).", sys_chunk_size, SYS_CHUNK_ARRAY_SIZE); total_used = 0; for (const auto& c : chunks) { total_used += c.used; } sb.uuid = fs_uuid; sb.magic = BTRFS_MAGIC; sb.generation = 1; sb.root_tree_addr = root_root.tree_addr; sb.chunk_tree_addr = chunk_root.tree_addr; sb.total_bytes = device_size; sb.bytes_used = total_used; sb.root_dir_objectid = BTRFS_ROOT_TREEDIR; sb.num_devices = 1; sb.sector_size = sector_size; sb.node_size = tree_size; sb.leaf_size = tree_size; sb.stripe_size = sector_size; sb.n = sys_chunk_size; sb.chunk_root_generation = 1; sb.incompat_flags = BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES; sb.csum_type = csum_type; sb.root_level = root_root.level; sb.chunk_root_level = chunk_root.level; if (compression == btrfs_compression::lzo) sb.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO; else if (compression == btrfs_compression::zstd) sb.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD; set_volume_label(sb, dev); for (const auto& c : chunk_root.items) { if (c.first.obj_type == btrfs_key_type::DEV_ITEM) { memcpy(&sb.dev_item, c.second.data(), sizeof(DEV_ITEM)); break; } } sb.uuid_tree_generation = 1; { uint8_t* ptr = sb.sys_chunk_array; for (const auto& c : chunk_root.items) { if (c.first.obj_type == btrfs_key_type::CHUNK_ITEM) { auto& ci = *(CHUNK_ITEM*)c.second.data(); if (ci.type & BLOCK_FLAG_SYSTEM) { auto& key = *(KEY*)ptr; key = c.first; ptr += sizeof(KEY); memcpy(ptr, c.second.data(), c.second.size()); ptr += c.second.size(); } } } } i = 0; while (superblock_addrs[i] != 0) { if (superblock_addrs[i] > device_size - buf.size()) return; sb.sb_phys_addr = superblock_addrs[i]; switch (csum_type) { case btrfs_csum_type::crc32c: *(uint32_t*)sb.checksum = ~calc_crc32c(0xffffffff, (uint8_t*)&sb.uuid, sizeof(superblock) - sizeof(sb.checksum)); break; case btrfs_csum_type::xxhash: *(uint64_t*)sb.checksum = XXH64(&sb.uuid, sizeof(superblock) - sizeof(sb.checksum), 0); break; case btrfs_csum_type::sha256: calc_sha256((uint8_t*)&sb, &sb.uuid, sizeof(superblock) - sizeof(sb.checksum)); break; case btrfs_csum_type::blake2: blake2b(&sb, 32, &sb.uuid, sizeof(superblock) - sizeof(sb.checksum)); break; default: break; } dev.seek(superblock_addrs[i]); dev.write(buf.data(), buf.size()); i++; } } static void add_dev_item(root& chunk_root) { DEV_ITEM di; uint32_t sector_size = 0x1000; // FIXME - get from superblock memset(&di, 0, sizeof(DEV_ITEM)); di.dev_id = 1; di.num_bytes = device_size; //uint64_t bytes_used; // FIXME di.optimal_io_align = sector_size; di.optimal_io_width = sector_size; di.minimal_io_size = sector_size; di.device_uuid = dev_uuid; di.fs_uuid = fs_uuid; add_item(chunk_root, 1, btrfs_key_type::DEV_ITEM, 1, &di, sizeof(DEV_ITEM)); } static void add_to_root_root(const root& r, root& root_root) { ROOT_ITEM ri; memset(&ri, 0, sizeof(ROOT_ITEM)); ri.inode.generation = 1; ri.inode.st_blocks = tree_size; ri.inode.st_size = 3; ri.inode.st_nlink = 1; ri.inode.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; ri.generation = 1; ri.objid = (r.id == BTRFS_ROOT_FSTREE || r.id >= 0x100) ? SUBVOL_ROOT_INODE : 0; ri.flags = r.readonly ? BTRFS_SUBVOL_READONLY : 0; ri.num_references = 1; ri.generation2 = 1; if (r.id == image_subvol_id) ri.uuid = subvol_uuid; // block_number, bytes_used, and root_level are set in update_root_root add_item(root_root, r.id, btrfs_key_type::ROOT_ITEM, 0, &ri, sizeof(ROOT_ITEM)); } static void update_root_root(root& root_root, enum btrfs_csum_type csum_type) { for (auto& t : root_root.trees) { auto& th = *(tree_header*)t.data(); if (th.level > 0) return; auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header)); bool changed = true; for (unsigned int i = 0; i < th.num_items; i++) { if (ln[i].key.obj_type == btrfs_key_type::ROOT_ITEM) { auto& ri = *(ROOT_ITEM*)((uint8_t*)t.data() + sizeof(tree_header) + ln[i].offset); for (const auto& r : roots) { if (r.id == ln[i].key.obj_id) { ri.block_number = r.tree_addr; ri.root_level = r.level; ri.bytes_used = r.metadata_size; changed = true; } } } } if (changed) calc_tree_hash(th, csum_type); } } static void add_dev_stats(root& r) { uint64_t ds[5]; memset(ds, 0, sizeof(ds)); add_item(r, 0, btrfs_key_type::DEV_STATS, 1, &ds, sizeof(ds)); } static BTRFS_UUID generate_uuid(default_random_engine& gen) { BTRFS_UUID uuid; uniform_int_distribution dist(0,0xffffffff); for (unsigned int i = 0; i < 4; i++) { *(uint32_t*)&uuid.uuid[i * sizeof(uint32_t)] = dist(gen); } return uuid; } static void update_extent_root(root& extent_root, enum btrfs_csum_type csum_type) { for (auto& t : extent_root.trees) { auto& th = *(tree_header*)t.data(); if (th.level > 0) return; auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header)); bool changed = true; for (unsigned int i = 0; i < th.num_items; i++) { if (ln[i].key.obj_type == btrfs_key_type::BLOCK_GROUP_ITEM) { auto& bgi = *(BLOCK_GROUP_ITEM*)((uint8_t*)t.data() + sizeof(tree_header) + ln[i].offset); for (const auto& c : chunks) { if (c.offset == ln[i].key.obj_id) { bgi.used = c.used; changed = true; } } } } if (changed) calc_tree_hash(th, csum_type); } } static void add_inode_ref(root& r, uint64_t inode, uint64_t parent, uint64_t index, string_view name) { if (r.items.count(KEY{inode, btrfs_key_type::INODE_REF, parent}) != 0) { // collision, append to the end auto& old = r.items.at(KEY{inode, btrfs_key_type::INODE_REF, parent}); size_t irlen = offsetof(INODE_REF, name[0]) + name.length(); // FIXME - check if too long for tree, and create INODE_EXTREF instead old.resize(old.size() + irlen); auto& ir = *(INODE_REF*)((uint8_t*)old.data() + old.size() - irlen); ir.index = index; ir.n = (uint16_t)name.length(); memcpy(ir.name, name.data(), name.length()); return; } buffer_t buf(offsetof(INODE_REF, name[0]) + name.length()); auto& ir = *(INODE_REF*)buf.data(); ir.index = index; ir.n = (uint16_t)name.length(); memcpy(ir.name, name.data(), name.length()); add_item_move(r, inode, btrfs_key_type::INODE_REF, parent, buf); } static void populate_fstree(root& r) { INODE_ITEM ii; memset(&ii, 0, sizeof(INODE_ITEM)); ii.generation = 1; ii.transid = 1; ii.st_nlink = 1; ii.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; ii.sequence = 1; add_item(r, SUBVOL_ROOT_INODE, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM)); add_inode_ref(r, SUBVOL_ROOT_INODE, SUBVOL_ROOT_INODE, 0, ".."); } static void update_chunk_root(root& chunk_root, enum btrfs_csum_type csum_type) { for (auto& t : chunk_root.trees) { auto& th = *(tree_header*)t.data(); if (th.level > 0) return; auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header)); for (unsigned int i = 0; i < th.num_items; i++) { if (ln[i].key.obj_id == 1 && ln[i].key.obj_type == btrfs_key_type::DEV_ITEM && ln[i].key.offset == 1) { auto& di = *(DEV_ITEM*)((uint8_t*)t.data() + sizeof(tree_header) + ln[i].offset); di.bytes_used = 0; for (const auto& c : chunks) { di.bytes_used += c.length; } calc_tree_hash(th, csum_type); return; } } } } static root& add_image_subvol(root& root_root, root& fstree_root) { static const char subvol_name[] = "image"; roots.emplace_back(image_subvol_id); root& r = roots.back(); r.readonly = true; // add ROOT_REF and ROOT_BACKREF { buffer_t buf(offsetof(ROOT_REF, name[0]) + sizeof(subvol_name) - 1); auto& rr = *(ROOT_REF*)buf.data(); rr.dir = SUBVOL_ROOT_INODE; rr.index = 2; rr.n = sizeof(subvol_name) - 1; memcpy(rr.name, subvol_name, sizeof(subvol_name) - 1); add_item(root_root, BTRFS_ROOT_FSTREE, btrfs_key_type::ROOT_REF, image_subvol_id, buf); add_item_move(root_root, image_subvol_id, btrfs_key_type::ROOT_BACKREF, BTRFS_ROOT_FSTREE, buf); } // add DIR_ITEM and DIR_INDEX { buffer_t buf(offsetof(DIR_ITEM, name[0]) + sizeof(subvol_name) - 1); auto& di = *(DIR_ITEM*)buf.data(); di.key.obj_id = image_subvol_id; di.key.obj_type = btrfs_key_type::ROOT_ITEM; di.key.offset = 0xffffffffffffffff; di.transid = 1; di.m = 0; di.n = sizeof(subvol_name) - 1; di.type = btrfs_inode_type::directory; memcpy(di.name, subvol_name, sizeof(subvol_name) - 1); auto hash = calc_crc32c(0xfffffffe, (const uint8_t*)subvol_name, sizeof(subvol_name) - 1); add_item(fstree_root, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_ITEM, hash, buf); add_item_move(fstree_root, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_INDEX, 2, buf); } // increase st_size in parent dir if (fstree_root.dir_size.count(SUBVOL_ROOT_INODE) == 0) fstree_root.dir_size[SUBVOL_ROOT_INODE] = (sizeof(subvol_name) - 1) * 2; else fstree_root.dir_size.at(SUBVOL_ROOT_INODE) += (sizeof(subvol_name) - 1) * 2; populate_fstree(r); return r; } static void create_image(root& r, ntfs& dev, const runs_t& runs, uint64_t inode, bool nocsum) { INODE_ITEM ii; uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster; // add INODE_ITEM memset(&ii, 0, sizeof(INODE_ITEM)); ii.generation = 1; ii.transid = 1; ii.st_size = orig_device_size; ii.st_nlink = 1; ii.st_mode = __S_IFREG | S_IRUSR | S_IWUSR; ii.sequence = 1; if (nocsum) ii.flags = BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM; // FIXME - use current time for the following // BTRFS_TIME st_atime; // BTRFS_TIME st_ctime; // BTRFS_TIME st_mtime; // BTRFS_TIME otime; for (const auto& rs : runs) { for (const auto& run : rs.second) { if (!run.relocated && !run.not_in_img) ii.st_blocks += run.length * cluster_size; } } add_item(r, inode, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM)); // add DIR_ITEM and DIR_INDEX { buffer_t buf(offsetof(DIR_ITEM, name[0]) + sizeof(image_filename) - 1); auto& di = *(DIR_ITEM*)buf.data(); di.key.obj_id = inode; di.key.obj_type = btrfs_key_type::INODE_ITEM; di.key.offset = 0; di.transid = 1; di.m = 0; di.n = sizeof(image_filename) - 1; di.type = btrfs_inode_type::file; memcpy(di.name, image_filename, sizeof(image_filename) - 1); auto hash = calc_crc32c(0xfffffffe, (const uint8_t*)image_filename, sizeof(image_filename) - 1); add_item(r, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_ITEM, hash, buf); add_item_move(r, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_INDEX, 2, buf); } // add INODE_REF add_inode_ref(r, inode, SUBVOL_ROOT_INODE, 2, image_filename); // increase st_size in parent dir for (auto& it : r.items) { if (it.first.obj_id == SUBVOL_ROOT_INODE && it.first.obj_type == btrfs_key_type::INODE_ITEM) { auto& ii2 = *(INODE_ITEM*)it.second.data(); ii2.st_size += (sizeof(image_filename) - 1) * 2; break; } } // add extents buffer_t buf(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)); auto& ed = *(EXTENT_DATA*)buf.data(); auto& ed2 = *(EXTENT_DATA2*)&ed.data; ed.generation = 1; ed.compression = btrfs_compression::none; ed.encryption = 0; ed.encoding = 0; ed.type = btrfs_extent_type::regular; for (const auto& rs : runs) { for (const auto& run : rs.second) { uint64_t addr; if (run.relocated || run.not_in_img) continue; ed.decoded_size = ed2.size = ed2.num_bytes = run.length * cluster_size; addr = run.offset * cluster_size; if (run.inode == dummy_inode) { for (const auto& reloc : relocs) { if (reloc.old_start == run.offset) { ed2.address = (reloc.new_start * cluster_size) + chunk_virt_offset; break; } } } else ed2.address = addr + chunk_virt_offset; ed2.offset = 0; add_item(r, inode, btrfs_key_type::EXTENT_DATA, addr, buf); data_size += ed2.size; } } } template static void parse_bitmap(const buffer_t& bmpdata, list& runs) { uint64_t run_start = 0, pos = 0; bool set = false; string_view bdsv{(char*)bmpdata.data(), bmpdata.size()}; // FIXME - by 64-bits if 64-bit processor (use typedef for uint64_t/uint32_t?) while (bdsv.size() >= sizeof(uint32_t)) { auto v = *(uint32_t*)bdsv.data(); if ((!set && v == 0) || (set && v == 0xffffffff)) { pos += sizeof(uint32_t) * 8; bdsv = bdsv.substr(sizeof(uint32_t)); continue; } if (!set && v == 0xffffffff) { run_start = pos; set = true; pos += sizeof(uint32_t) * 8; } else if (set && v == 0) { if (pos != run_start) runs.emplace_back(run_start, pos - run_start); set = false; pos += sizeof(uint32_t) * 8; } else { for (unsigned int i = 0; i < sizeof(uint32_t) * 8; i++) { if (v & 1) { if (!set) { run_start = pos; set = true; } } else { if (set) { if (pos != run_start) runs.emplace_back(run_start, pos - run_start); set = false; } } v >>= 1; pos++; } } bdsv = bdsv.substr(sizeof(uint32_t)); } while (!bdsv.empty()) { auto v = *(uint8_t*)bdsv.data(); if ((!set && v == 0) || (set && v == 0xff)) { pos++; bdsv = bdsv.substr(1); continue; } if (!set && v == 0xff) { run_start = pos; set = true; pos += 8; } else if (set && v == 0) { if (pos != run_start) runs.emplace_back(run_start, pos - run_start); set = false; pos += 8; } else { for (unsigned int i = 0; i < 8; i++) { if (v & 1) { if (!set) { run_start = pos; set = true; } } else { if (set) { if (pos != run_start) runs.emplace_back(run_start, pos - run_start); set = false; } } v >>= 1; pos++; } } bdsv = bdsv.substr(1); } if (set && run_start != pos) runs.emplace_back(run_start, pos - run_start); // FIXME - remove any bits after end of volume } static void parse_data_bitmap(ntfs& dev, const buffer_t& bmpdata, runs_t& runs) { uint64_t run_start = 0, pos = 0; bool set = false; string_view bdsv{(char*)bmpdata.data(), bmpdata.size()}; uint64_t clusters_per_chunk = data_chunk_size / ((uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster); // FIXME - by 64-bits if 64-bit processor (use typedef for uint64_t/uint32_t?) auto add_run = [&]() { while (true) { uint64_t chunk = run_start / clusters_per_chunk; auto& r = runs[chunk]; if (pos / clusters_per_chunk != chunk) { uint64_t len = clusters_per_chunk - (run_start % clusters_per_chunk); r.emplace_back(run_start, len); run_start += len; if (pos == run_start) break; } else { r.emplace_back(run_start, pos - run_start); break; } } }; while (bdsv.size() >= sizeof(uint32_t)) { auto v = *(uint32_t*)bdsv.data(); if ((!set && v == 0) || (set && v == 0xffffffff)) { pos += sizeof(uint32_t) * 8; bdsv = bdsv.substr(sizeof(uint32_t)); continue; } if (!set && v == 0xffffffff) { run_start = pos; set = true; pos += sizeof(uint32_t) * 8; } else if (set && v == 0) { if (pos != run_start) add_run(); set = false; pos += sizeof(uint32_t) * 8; } else { for (unsigned int i = 0; i < sizeof(uint32_t) * 8; i++) { if (v & 1) { if (!set) { run_start = pos; set = true; } } else { if (set) { if (pos != run_start) add_run(); set = false; } } v >>= 1; pos++; } } bdsv = bdsv.substr(sizeof(uint32_t)); } while (!bdsv.empty()) { auto v = *(uint8_t*)bdsv.data(); if ((!set && v == 0) || (set && v == 0xff)) { pos++; bdsv = bdsv.substr(1); continue; } if (!set && v == 0xff) { run_start = pos; set = true; pos += 8; } else if (set && v == 0) { if (pos != run_start) add_run(); set = false; pos += 8; } else { for (unsigned int i = 0; i < 8; i++) { if (v & 1) { if (!set) { run_start = pos; set = true; } } else { if (set) { if (pos != run_start) add_run(); set = false; } } v >>= 1; pos++; } } bdsv = bdsv.substr(1); } if (set && run_start != pos) add_run(); // FIXME - remove any bits after end of volume } static BTRFS_TIME win_time_to_unix(int64_t time) { uint64_t l = (uint64_t)time - 116444736000000000ULL; BTRFS_TIME bt; bt.seconds = l / 10000000; bt.nanoseconds = (uint32_t)((l % 10000000) * 100); return bt; } static void link_inode(root& r, uint64_t inode, uint64_t dir, string_view name, enum btrfs_inode_type type) { uint64_t seq; // add DIR_ITEM and DIR_INDEX if (r.dir_seqs.count(dir) == 0) r.dir_seqs[dir] = 2; seq = r.dir_seqs.at(dir); { buffer_t buf(offsetof(DIR_ITEM, name[0]) + name.length()); auto& di = *(DIR_ITEM*)buf.data(); di.key.obj_id = inode; di.key.obj_type = btrfs_key_type::INODE_ITEM; di.key.offset = 0; di.transid = 1; di.m = 0; di.n = (uint16_t)name.length(); di.type = type; memcpy(di.name, name.data(), name.length()); auto hash = calc_crc32c(0xfffffffe, (const uint8_t*)name.data(), (uint32_t)name.length()); if (r.items.count(KEY{dir, btrfs_key_type::DIR_ITEM, hash}) == 0) add_item(r, dir, btrfs_key_type::DIR_ITEM, hash, buf); else { // hash collision auto& ent = r.items.at(KEY{dir, btrfs_key_type::DIR_ITEM, hash}); if (!ent.empty()) { ent.resize(ent.size() + buf.size()); memcpy(ent.data() + ent.size() - buf.size(), &di, buf.size()); } else { ent.resize(buf.size()); memcpy(ent.data(), &di, buf.size()); } } add_item_move(r, dir, btrfs_key_type::DIR_INDEX, seq, buf); } // add INODE_REF add_inode_ref(r, inode, dir, seq, name); // increase st_size in parent dir if (r.dir_size.count(dir) == 0) r.dir_size[dir] = name.length() * 2; else r.dir_size.at(dir) += name.length() * 2; r.dir_seqs[dir]++; } static bool split_runs(const ntfs& dev, runs_t& runs, uint64_t offset, uint64_t length, uint64_t inode, uint64_t file_offset) { uint64_t clusters_per_chunk = data_chunk_size / ((uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster); bool ret = false; while (true) { uint64_t chunk = offset / clusters_per_chunk; uint64_t length2 = min(length, clusters_per_chunk - (offset % clusters_per_chunk)); if (runs.count(chunk) != 0) { auto& rl = runs[chunk]; for (auto it = rl.begin(); it != rl.end(); it++) { auto& r = *it; if (r.offset > offset + length2) break; if (offset + length2 > r.offset && offset < r.offset + r.length) { if (offset >= r.offset && offset + length2 <= r.offset + r.length) { // cut out middle if (offset > r.offset) rl.emplace(it, r.offset, offset - r.offset); rl.emplace(it, offset, length2, inode, file_offset, r.relocated); if (offset + length2 < r.offset + r.length) { r.length = r.offset + r.length - offset - length2; r.offset = offset + length2; } else rl.erase(it); ret = true; break; } throw formatted_error("Error assigning space to file. This can occur if the space bitmap has become corrupted. Run chkdsk and try again."); } } } if (length2 == length) return ret; offset += length2; length -= length2; } } static void process_mappings(const ntfs& dev, uint64_t inode, list& mappings, runs_t& runs) { uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster; uint64_t clusters_per_chunk = data_chunk_size / cluster_size; list mappings2; // avoid chunk boundaries for (const auto& m : mappings) { if (m.lcn == 0) // sparse continue; uint64_t chunk_start = m.lcn / clusters_per_chunk; uint64_t chunk_end = ((m.lcn + m.length) - 1) / clusters_per_chunk; if (chunk_end > chunk_start) { uint64_t start = m.lcn, vcn = m.vcn; do { uint64_t end = min((((start / clusters_per_chunk) + 1) * clusters_per_chunk), m.lcn + m.length); if (end == start) break; mappings2.emplace_back(start, vcn, end - start); vcn += end - start; start = end; } while (true); } else mappings2.emplace_back(m.lcn, m.vcn, m.length); } mappings.clear(); mappings.splice(mappings.begin(), mappings2); // change to avoid superblocks for (auto& r : relocs) { for (auto it = mappings.begin(); it != mappings.end(); it++) { auto& m = *it; if (m.lcn + m.length > r.old_start && m.lcn < r.old_start + r.length) { if (m.lcn >= r.old_start && m.lcn + m.length <= r.old_start + r.length) { // change whole mapping if (r.old_start < m.lcn) { // reloc starts before mapping for (auto& rs : runs) { // FIXME - optimize for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) { auto& r2 = *it2; if (r2.offset == r.old_start) { rs.second.emplace(it2, r2.offset, m.lcn - r2.offset, dummy_inode); r2.length -= m.lcn - r2.offset; r2.offset = m.lcn; } if (r2.offset == r.new_start) { rs.second.emplace(it2, r2.offset, m.lcn - r.old_start, 0, 0, true); r2.offset += m.lcn - r.old_start; r2.length -= m.lcn - r.old_start; } } } relocs.emplace_back(r.old_start, m.lcn - r.old_start, r.new_start); r.length -= m.lcn - r.old_start; r.new_start += m.lcn - r.old_start; r.old_start = m.lcn; } if (r.old_start + r.length > m.lcn + m.length) { // reloc goes beyond end of mapping relocs.emplace_back(m.lcn + m.length, r.old_start + r.length - m.lcn - m.length, r.new_start + m.lcn + m.length - r.old_start); r.length = m.lcn + m.length - r.old_start; for (auto& rs : runs) { // FIXME - optimize for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) { auto& r2 = *it2; if (r2.offset == r.old_start) { rs.second.emplace(it2, r.old_start, m.lcn + m.length - r.old_start, dummy_inode); r2.length -= m.lcn + m.length - r2.offset; r2.offset = m.lcn + m.length; } if (r2.offset == r.new_start) { rs.second.emplace(it2, r2.offset, m.lcn + m.length - r.old_start, 0, 0, true); r2.offset += m.lcn + m.length - r.old_start; r2.length -= m.lcn + m.length - r.old_start; } } } } m.lcn -= r.old_start; m.lcn += r.new_start; } else if (m.lcn <= r.old_start && m.lcn + m.length >= r.old_start + r.length) { // change middle if (m.lcn < r.old_start) { mappings.emplace(it, m.lcn, m.vcn, r.old_start - m.lcn); m.vcn += r.old_start - m.lcn; m.length -= r.old_start - m.lcn; m.lcn = r.old_start; } if (m.lcn + m.length > r.old_start + r.length) { mappings.emplace(it, r.new_start, m.vcn, r.length); m.lcn = r.old_start + r.length; m.length -= r.length; m.vcn += r.length; } else { m.lcn -= r.old_start; m.lcn += r.new_start; } } else if (m.lcn < r.old_start && m.lcn + m.length <= r.old_start + r.length) { // change end mappings.emplace(it, m.lcn, m.vcn, r.old_start - m.lcn); m.vcn += r.old_start - m.lcn; m.length -= r.old_start - m.lcn; m.lcn = r.new_start; if (r.length > m.length) { relocs.emplace_back(r.old_start + m.length, r.length - m.length, r.new_start + m.length); r.length = m.length; for (auto& rs : runs) { // FIXME - optimize bool found = false; for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) { auto& r2 = *it2; if (r2.offset == r.old_start) { rs.second.emplace(it2, r2.offset, m.length, dummy_inode); r2.offset += m.length; r2.length -= m.length; found = true; break; } } if (found) break; } } } else if (m.lcn > r.old_start && m.lcn + m.length > r.old_start + r.length) { // change beginning auto orig_r = r; if (r.old_start < m.lcn) { for (auto& rs : runs) { // FIXME - optimize for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) { auto& r2 = *it2; if (r2.offset == r.old_start) { rs.second.emplace(it2, r2.offset, m.lcn - r2.offset, dummy_inode); r2.length -= m.lcn - r2.offset; r2.offset = m.lcn; } if (r2.offset == r.new_start) { rs.second.emplace(it2, r2.offset, m.lcn - r.old_start, 0, 0, true); r2.offset += m.lcn - r.old_start; r2.length -= m.lcn - r.old_start; } } } relocs.emplace_back(m.lcn, r.old_start + r.length - m.lcn, r.new_start + m.lcn - r.old_start); r.length = m.lcn - r.old_start; } mappings.emplace(it, m.lcn - orig_r.old_start + orig_r.new_start, m.vcn, orig_r.old_start + orig_r.length - m.lcn); m.vcn += orig_r.old_start + orig_r.length - m.lcn; m.length -= orig_r.old_start + orig_r.length - m.lcn; m.lcn = orig_r.old_start + orig_r.length; } } } } for (const auto& m : mappings) { split_runs(dev, runs, m.lcn, m.length, inode, m.vcn); } } static void set_xattr(root& r, uint64_t inode, string_view name, uint32_t hash, const buffer_t& data) { buffer_t buf(offsetof(DIR_ITEM, name[0]) + name.size() + data.size()); auto& di = *(DIR_ITEM*)buf.data(); di.key.obj_id = di.key.offset = 0; di.key.obj_type = (btrfs_key_type)0; di.transid = 1; di.m = (uint16_t)data.size(); di.n = (uint16_t)name.size(); di.type = btrfs_inode_type::ea; memcpy(di.name, name.data(), name.size()); memcpy(di.name + name.size(), data.data(), data.size()); add_item_move(r, inode, btrfs_key_type::XATTR_ITEM, hash, buf); } static void clear_line() { #ifdef _WIN32 CONSOLE_SCREEN_BUFFER_INFO csbi; HANDLE console = GetStdHandle(STD_OUTPUT_HANDLE); // FIXME - flickers on Windows? if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { DWORD written; SetConsoleCursorPosition(console, { 0, csbi.dwCursorPosition.Y }); u16string spaces(csbi.dwSize.X, u' '); WriteConsoleW(console, spaces.data(), (DWORD)spaces.length(), &written, nullptr); SetConsoleCursorPosition(console, { 0, csbi.dwCursorPosition.Y }); } #else fmt::print("\33[2K"); fflush(stdout); #endif } static bool string_eq_ci(string_view s1, string_view s2) { if (s1.length() != s2.length()) return false; auto c1 = &s1[0]; auto c2 = &s2[0]; for (size_t i = 0; i < s1.length(); i++) { auto c1a = *c1; auto c2a = *c2; if (c1a >= 'A' && c1a <= 'Z') c1a = c1a - 'A' + 'a'; if (c2a >= 'A' && c2a <= 'Z') c2a = c2a - 'A' + 'a'; if (c1a != c2a) return false; c1++; c2++; } return true; } static void fix_truncated_utf8(string& s) { if (!((uint8_t)s.back() & 0x80)) // one-byte sequence return; if (((uint8_t)s.back() & 0xC0) == 0xC0) { // first byte of CP, nothing following s.pop_back(); return; } if (((uint8_t)s[s.length() - 2] & 0xE0) == 0xD0) // full two-byte sequence return; if (((uint8_t)s[s.length() - 2] & 0xE0) == 0xE0) { // three- or four-byte CP, two bytes s.pop_back(); s.pop_back(); return; } if (((uint8_t)s[s.length() - 3] & 0xF0) == 0xE0) // full three-byte sequence return; if (((uint8_t)s[s.length() - 3] & 0xF0) == 0xF0) { // four-byte CP, three bytes s.pop_back(); s.pop_back(); s.pop_back(); return; } } static void add_inode(root& r, uint64_t inode, uint64_t ntfs_inode, bool& is_dir, runs_t& runs, ntfs_file& secure, ntfs& dev, const list& skiplist, enum btrfs_compression opt_compression, bool nocsum) { INODE_ITEM ii; uint64_t file_size = 0; list mappings, wof_mappings; vector> links; buffer_t standard_info, sd, reparse_point, inline_data; string symlink; uint32_t atts; bool atts_set = false; map> xattrs; string filename; buffer_t wof_compressed_data; uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; bool processed_data = false, processed_wof_data = false, skipping = false; uint16_t compression_unit = 0; uint64_t vdl, wof_vdl; vector warnings; map eas; static const uint32_t sector_size = 0x1000; // FIXME ntfs_file f(dev, ntfs_inode); if (f.file_record->BaseFileRecordSegment.SegmentNumber != 0) return; is_dir = f.is_directory(); auto add_warning = [&](fmt::format_string s, Args&&... args) { if (filename.empty()) filename = f.get_filename(); warnings.emplace_back(filename + ": " + fmt::format(s, forward(args)...)); }; f.loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view name) -> bool { switch (att.TypeCode) { case ntfs_attribute::STANDARD_INFORMATION: if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) throw formatted_error("Error - STANDARD_INFORMATION is non-resident"); // FIXME - can this happen? standard_info.resize(res_data.length()); memcpy(standard_info.data(), res_data.data(), res_data.length()); break; case ntfs_attribute::DATA: if (name.empty()) { // main file data if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) { clear_line(); if (filename.empty()) filename = f.get_filename(); fmt::print(stderr, "Skipping encrypted inode {:x} ({})\n", inode - inode_offset, filename); skipping = true; return true; } if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !processed_data) { file_size = vdl = att.Form.Resident.ValueLength; inline_data.resize(res_data.size()); memcpy(inline_data.data(), res_data.data(), res_data.size()); } else { if (!processed_data) { file_size = att.Form.Nonresident.FileSize; compression_unit = att.Form.Nonresident.CompressionUnit; vdl = att.Form.Nonresident.ValidDataLength; if (!(att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK)) compression_unit = 0; } list mappings2; uint64_t last_vcn; if (mappings.empty()) last_vcn = 0; else last_vcn = mappings.back().vcn + mappings.back().length; if (last_vcn < att.Form.Nonresident.LowestVcn) mappings.emplace_back(0, last_vcn, att.Form.Nonresident.LowestVcn - last_vcn); read_nonresident_mappings(att, mappings2, cluster_size, vdl); mappings.splice(mappings.end(), mappings2); } processed_data = true; } else { // ADS static const char xattr_prefix[] = "user."; auto ads_name = utf16_to_utf8(name); auto max_xattr_size = (uint32_t)(tree_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(DIR_ITEM, name[0]) - ads_name.length() - (sizeof(xattr_prefix) - 1)); // FIXME - check xattr_name not reserved if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) { add_warning("Skipping encrypted ADS :{}", ads_name); break; } if (att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK) { add_warning("Skipping compressed ADS :{}", ads_name); // FIXME break; } auto name2 = xattr_prefix + ads_name; if (name2.size() > 255) { add_warning("Skipping ADS :{} as name too long", ads_name); break; } uint32_t hash = calc_crc32c(0xfffffffe, (const uint8_t*)name2.data(), (uint32_t)name2.length()); if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && (ads_name != "WofCompressedData" || !processed_wof_data)) { if (ads_name == "WofCompressedData") { wof_compressed_data.resize(res_data.length()); memcpy(wof_compressed_data.data(), res_data.data(), res_data.length()); } else { if (att.Form.Resident.ValueLength > max_xattr_size) { add_warning("Skipping overly large ADS :{} ({} > {})", ads_name, att.Form.Resident.ValueLength, max_xattr_size); break; } buffer_t buf(res_data.size()); memcpy(buf.data(), res_data.data(), res_data.size()); xattrs.emplace(name2, make_pair(hash, buf)); } } else { if (att.Form.Nonresident.FileSize > max_xattr_size && ads_name != "WofCompressedData") { add_warning("Skipping overly large ADS :{} ({} > {})", ads_name, att.Form.Nonresident.FileSize, max_xattr_size); break; } if (ads_name == "WofCompressedData") { if (!processed_wof_data) { wof_compressed_data.resize(att.Form.Nonresident.FileSize); wof_vdl = att.Form.Nonresident.ValidDataLength; } list mappings2; uint64_t last_vcn; if (wof_mappings.empty()) last_vcn = 0; else last_vcn = wof_mappings.back().vcn + wof_mappings.back().length; if (last_vcn < att.Form.Nonresident.LowestVcn) wof_mappings.emplace_back(0, last_vcn, att.Form.Nonresident.LowestVcn - last_vcn); read_nonresident_mappings(att, mappings2, cluster_size, wof_vdl); wof_mappings.splice(wof_mappings.end(), mappings2); processed_wof_data = true; } else { list ads_mappings; read_nonresident_mappings(att, ads_mappings, cluster_size, att.Form.Nonresident.ValidDataLength); buffer_t ads_data((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size)); memset(ads_data.data(), 0, ads_data.size()); for (const auto& m : ads_mappings) { dev.seek(m.lcn * cluster_size); dev.read(ads_data.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size)); } ads_data.resize((size_t)att.Form.Nonresident.FileSize); xattrs.emplace(name2, make_pair(hash, ads_data)); } } } break; case ntfs_attribute::FILE_NAME: { if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) throw formatted_error("Error - FILE_NAME is non-resident"); // FIXME - can this happen? if (att.Form.Resident.ValueLength < offsetof(FILE_NAME, FileName[0])) throw formatted_error("FILE_NAME was truncated"); auto fn = reinterpret_cast(res_data.data()); if (fn->Namespace != file_name_type::DOS) { if (att.Form.Resident.ValueLength < offsetof(FILE_NAME, FileName[0]) + (fn->FileNameLength * sizeof(char16_t))) throw formatted_error("FILE_NAME was truncated"); auto name2 = utf16_to_utf8(u16string_view((char16_t*)fn->FileName, fn->FileNameLength)); if (name2.length() > 255) { // FIXME - make sure no collision with existing file name2 = name2.substr(0, 255); fix_truncated_utf8(name2); add_warning("Name was too long, truncating to {}.", name2); } uint64_t parent = fn->Parent.SegmentNumber; if (!is_dir || links.empty()) { bool skip = false; for (auto n : skiplist) { if (n == parent) { skip = true; break; } } if (!skip) { for (const auto& l : links) { if (get<0>(l) == parent && get<1>(l) == name2) { skip = true; break; } } } if (!skip) links.emplace_back(parent, name2); } } break; } case ntfs_attribute::REPARSE_POINT: { if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) { reparse_point.resize(res_data.size()); memcpy(reparse_point.data(), res_data.data(), res_data.size()); } else { list rp_mappings; read_nonresident_mappings(att, rp_mappings, cluster_size, att.Form.Nonresident.ValidDataLength); reparse_point.resize((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size)); memset(reparse_point.data(), 0, reparse_point.size()); for (const auto& m : rp_mappings) { dev.seek(m.lcn * cluster_size); dev.read(reparse_point.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size)); } reparse_point.resize((size_t)att.Form.Nonresident.FileSize); } symlink.clear(); if (is_dir) break; const auto& rpb = *reinterpret_cast(reparse_point.data()); if (reparse_point.size() < offsetof(REPARSE_DATA_BUFFER, Reserved) || reparse_point.size() < rpb.ReparseDataLength + offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer)) { add_warning("Reparse point buffer was truncated."); break; } auto len = rpb.ReparseDataLength + offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer); switch (rpb.ReparseTag) { case IO_REPARSE_TAG_SYMLINK: if (len < offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) || (len < offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + rpb.SymbolicLinkReparseBuffer.PrintNameOffset + rpb.SymbolicLinkReparseBuffer.PrintNameLength)) { add_warning("Symlink reparse point buffer was truncated."); } else if (rpb.SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE) { u16string_view sv(&rpb.SymbolicLinkReparseBuffer.PathBuffer[rpb.SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(char16_t)], rpb.SymbolicLinkReparseBuffer.PrintNameLength / sizeof(char16_t)); symlink = utf16_to_utf8(sv); for (auto& c : symlink) { if (c == '\\') c = '/'; } reparse_point.clear(); } break; case IO_REPARSE_TAG_LX_SYMLINK: if (len < offsetof(REPARSE_DATA_BUFFER, LxSymlink.name)) add_warning("LXSS reparse point buffer was truncated."); else { symlink = string_view(rpb.LxSymlink.name, len - offsetof(REPARSE_DATA_BUFFER, LxSymlink.name)); reparse_point.clear(); } break; } break; } case ntfs_attribute::SECURITY_DESCRIPTOR: { auto max_sd_size = (uint32_t)(tree_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(DIR_ITEM, name[0]) - sizeof(EA_NTACL) + 1); if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) { if (att.Form.Resident.ValueLength > max_sd_size) { add_warning("Skipping overly large SD ({} > {})", att.Form.Resident.ValueLength, max_sd_size); break; } sd.resize(res_data.size()); memcpy(sd.data(), res_data.data(), res_data.size()); } else { if (att.Form.Nonresident.FileSize > max_sd_size) { add_warning("Skipping overly large SD ({} > {})", att.Form.Nonresident.FileSize, max_sd_size); break; } list sd_mappings; read_nonresident_mappings(att, sd_mappings, cluster_size, att.Form.Nonresident.ValidDataLength); sd.resize((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size)); memset(sd.data(), 0, sd.size()); for (const auto& m : sd_mappings) { dev.seek(m.lcn * cluster_size); dev.read(sd.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size)); } sd.resize((size_t)att.Form.Nonresident.FileSize); } break; } case ntfs_attribute::EA: { buffer_t eabuf; string_view sv; if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) { list ea_mappings; read_nonresident_mappings(att, ea_mappings, cluster_size, att.Form.Nonresident.ValidDataLength); eabuf.resize((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size)); memset(eabuf.data(), 0, eabuf.size()); for (const auto& m : ea_mappings) { dev.seek(m.lcn * cluster_size); dev.read(eabuf.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size)); } sv = string_view((char*)eabuf.data(), (size_t)att.Form.Nonresident.FileSize); } else sv = res_data; do { auto& ead = *(ea_data*)sv.data(); if (sv.length() < offsetof(ea_data, EaName)) { add_warning("truncated EA ({} bytes, expected at least {})", sv.length(), offsetof(ea_data, EaName)); break; } if (ead.NextEntryOffset > sv.length()) { add_warning("truncated EA ({} > {})", ead.NextEntryOffset, sv.length()); break; } if (offsetof(ea_data, EaName) + ead.EaNameLength + 1 + ead.EaValueLength > ead.NextEntryOffset) { add_warning("EA overflow ({} + {} + 1 + {} > {})", offsetof(ea_data, EaName), ead.EaNameLength, ead.EaValueLength, ead.NextEntryOffset); break; } auto ea_name = string_view(ead.EaName, ead.EaNameLength); buffer_t ea_value(ead.EaValueLength); memcpy(ea_value.data(), &ead.EaName[ead.EaNameLength + 1], ea_value.size()); eas.emplace(ea_name, move(ea_value)); sv = sv.substr(ead.NextEntryOffset); } while (!sv.empty()); break; } default: break; } return true; }); if (skipping) return; // skip page files if (links.size() == 1 && get<0>(links.front()) == NTFS_ROOT_DIR_INODE) { if (string_eq_ci(get<1>(links.front()), "pagefile.sys") || string_eq_ci(get<1>(links.front()), "hiberfil.sys") || string_eq_ci(get<1>(links.front()), "swapfile.sys")) return; } if (links.empty()) return; // don't create orphaned inodes // FIXME - form user.EA xattr from EAs we don't recognize memset(&ii, 0, sizeof(INODE_ITEM)); optional mode; enum btrfs_inode_type item_type = btrfs_inode_type::unknown; bool has_lxattrb = false; auto set_mode = [&](uint32_t& m) { if (is_dir && !__S_ISTYPE(m, __S_IFDIR)) { add_warning("st_mode did not have S_IFDIR set, setting."); m &= ~__S_IFMT; m |= __S_IFDIR; } else if (!is_dir && __S_ISTYPE(m, __S_IFDIR)) { add_warning("st_mode had S_IFDIR set, clearing."); m &= ~__S_IFMT; m |= __S_IFREG; } switch (m & __S_IFMT) { case __S_IFREG: item_type = btrfs_inode_type::file; break; case __S_IFDIR: item_type = btrfs_inode_type::directory; break; case __S_IFCHR: item_type = btrfs_inode_type::chardev; break; case __S_IFBLK: item_type = btrfs_inode_type::blockdev; break; case __S_IFIFO: item_type = btrfs_inode_type::fifo; break; case __S_IFSOCK: item_type = btrfs_inode_type::socket; break; case __S_IFLNK: item_type = btrfs_inode_type::symlink; break; default: add_warning("Unrecognized inode type {:o}.", m & __S_IFMT); } }; for (const auto& ea : eas) { const auto& n = ea.first; const auto& v = ea.second; if (n == "$LXUID") { if (v.size() != sizeof(uint32_t)) { add_warning("$LXUID EA was {} bytes, expected {}", v.size(), sizeof(uint32_t)); continue; } ii.st_uid = *(uint32_t*)v.data(); } else if (n == "$LXGID") { if (v.size() != sizeof(uint32_t)) { add_warning("$LXGID EA was {} bytes, expected {}", v.size(), sizeof(uint32_t)); continue; } ii.st_gid = *(uint32_t*)v.data(); } else if (n == "$LXMOD") { if (v.size() != sizeof(uint32_t)) { add_warning("$LXMOD EA was {} bytes, expected {}", v.size(), sizeof(uint32_t)); continue; } mode = *(uint32_t*)v.data(); set_mode(mode.value()); } else if (n == "$LXDEV") { if (v.size() != sizeof(lxdev)) { add_warning("$LXDEV EA was {} bytes, expected {}", v.size(), sizeof(lxdev)); continue; } const auto& d = *(lxdev*)v.data(); if (d.minor >= 0x100000) { add_warning("minor value {} is too large for Btrfs", d.minor); continue; } ii.st_rdev = (d.major << 20) | (d.minor & 0xfffff); } else if (n == "LXATTRB") { if (v.size() != sizeof(lxattrb)) { add_warning("LXATTRB EA was {} bytes, expected {}", v.size(), sizeof(lxattrb)); continue; } const auto& l = *(lxattrb*)v.data(); if (l.format != 0) { add_warning("LXATTRB format was {}, expected 0", l.format); continue; } if (l.version != 1) { add_warning("LXATTRB version was {}, expected 1", l.version); continue; } mode = l.mode; set_mode(mode.value()); ii.st_uid = l.uid; ii.st_gid = l.gid; ii.st_rdev = l.rdev; ii.st_atime.seconds = l.atime; ii.st_atime.nanoseconds = l.atime_ns; ii.st_mtime.seconds = l.mtime; ii.st_mtime.nanoseconds = l.mtime_ns; ii.st_ctime.seconds = l.ctime; ii.st_ctime.nanoseconds = l.ctime_ns; has_lxattrb = true; } else if (n == "LX.SECURITY.CAPABILITY") { static const string_view lxea = "lxea"; if (v.size() < lxea.length()) { add_warning("LX.SECURITY.CAPABILITY EA was {} bytes, expected at least {}", v.size(), lxea.length()); continue; } if (string_view((char*)v.data(), lxea.length()) != lxea) { add_warning("LX.SECURITY.CAPABILITY EA prefix was not \"{}\"", lxea); continue; } buffer_t v2(v.size() - lxea.length()); memcpy(v2.data(), v.data() + lxea.length(), v2.size()); xattrs.emplace(EA_CAP, make_pair(EA_CAP_HASH, v2)); } else { static const string_view recognized_eas[] = { "$KERNEL.PURGE.APPXFICACHE", "$KERNEL.PURGE.ESBCACHE", "$CI.CATALOGHINT", "C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.DATABASE", "C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.DATABASEEX1", "C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.EPOCHEA", "APPLICENSING", "C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.COMMON", "C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.COMMONEX", "C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.COMMONEX_1" "C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.USER", "$KERNEL.PURGE.SMARTLOCKER.VALID", "$KERNEL.SMARTLOCKER.ORIGINCLAIM", "$KERNEL.PURGE.APPID.HASHINFO", "$KERNEL.SMARTLOCKER.HASH", "$KERNEL.PURGE.CIPCACHE", "$KERNEL.SMARTLOCKER.UNINSTALLSTRINGS" }; bool found = false; for (const auto& r : recognized_eas) { if (r == n) { found = true; break; } } if (!found) add_warning("Unrecognized EA {}", n); } } if (!wof_mappings.empty()) { auto len = wof_compressed_data.size(); wof_compressed_data.resize(sector_align(len, cluster_size)); memset(wof_compressed_data.data(), 0, wof_compressed_data.size()); for (const auto& m : wof_mappings) { dev.seek(m.lcn * cluster_size); dev.read(wof_compressed_data.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size)); } wof_compressed_data.resize(len); } if (compression_unit != 0) { uint64_t cus = 1ull << compression_unit; buffer_t compdata((size_t)(cus * cluster_size)); try { while (!mappings.empty()) { uint64_t clusters = 0, compsize; bool compressed = false; while (clusters < cus) { if (mappings.empty()) { compressed = true; memset(compdata.data() + (clusters * cluster_size), 0, (size_t)((cus - clusters) * cluster_size)); break; } auto& m = mappings.front(); auto l = min(m.length, cus - clusters); if (m.lcn == 0) { memset(compdata.data() + (clusters * cluster_size), 0, (size_t)(l * cluster_size)); if (l < m.length) { m.vcn += l; m.length -= l; } else mappings.pop_front(); compressed = true; } else { dev.seek(m.lcn * cluster_size); dev.read(compdata.data() + (clusters * cluster_size), (size_t)(l * cluster_size)); if (l < m.length) { m.lcn += l; m.vcn += l; m.length -= l; } else mappings.pop_front(); } clusters += l; } if (!compressed) { if (filename.empty()) filename = f.get_filename(); inline_data.insert(inline_data.end(), compdata.begin(), compdata.end()); } else { compsize = compdata.size(); if (file_size - inline_data.size() < compsize) compsize = file_size - inline_data.size(); auto decomp = lznt1_decompress(string_view((char*)compdata.data(), compdata.size()), (uint32_t)compsize); inline_data.insert(inline_data.end(), decomp.begin(), decomp.end()); } if (inline_data.size() >= file_size) { inline_data.resize((size_t)file_size); mappings.clear(); break; } } } catch (const exception& e) { if (filename.empty()) filename = f.get_filename(); throw formatted_error("{}: {}", filename, e.what()); } } for (const auto& w : warnings) { fmt::print(stderr, "{}\n", w); } #undef add_warning const auto& si = *(const STANDARD_INFORMATION*)standard_info.data(); if (standard_info.size() >= offsetof(STANDARD_INFORMATION, MaximumVersions)) { uint32_t defda = 0; atts = si.FileAttributes; if (links.size() == 1 && get<1>(links[0])[0] == '.') defda |= FILE_ATTRIBUTE_HIDDEN; if (is_dir) { defda |= FILE_ATTRIBUTE_DIRECTORY; atts |= FILE_ATTRIBUTE_DIRECTORY; } else { defda |= FILE_ATTRIBUTE_ARCHIVE; atts &= ~FILE_ATTRIBUTE_DIRECTORY; } if (!reparse_point.empty() || !symlink.empty()) atts |= FILE_ATTRIBUTE_REPARSE_POINT; else atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; if (atts != defda) atts_set = true; } if (standard_info.size() >= offsetof(STANDARD_INFORMATION, OwnerId)) { ii.otime = win_time_to_unix(si.CreationTime); if (!has_lxattrb) { ii.st_atime = win_time_to_unix(si.LastAccessTime); ii.st_mtime = win_time_to_unix(si.LastWriteTime); ii.st_ctime = win_time_to_unix(si.ChangeTime); } } if (sd.empty() && standard_info.size() >= offsetof(STANDARD_INFORMATION, QuotaCharged)) { auto sv = dev.find_sd(si.SecurityId, secure); if (sv.empty()) { clear_line(); if (filename.empty()) filename = f.get_filename(); fmt::print(stderr, "Could not find SecurityId {} ({})\n", si.SecurityId, filename); } else { sd.resize(sv.size()); memcpy(sd.data(), sv.data(), sv.size()); } } if (reparse_point.size() > sizeof(uint32_t) && *(uint32_t*)reparse_point.data() == IO_REPARSE_TAG_WOF) { try { if (reparse_point.size() < offsetof(reparse_point_header, DataBuffer)) { throw formatted_error("IO_REPARSE_TAG_WOF reparse point buffer was {} bytes, expected at least {}.", reparse_point.size(), offsetof(reparse_point_header, DataBuffer)); } auto rph = (reparse_point_header*)reparse_point.data(); if (reparse_point.size() < offsetof(reparse_point_header, DataBuffer) + rph->ReparseDataLength) { throw formatted_error("IO_REPARSE_TAG_WOF reparse point buffer was {} bytes, expected {}.", reparse_point.size(), offsetof(reparse_point_header, DataBuffer) + rph->ReparseDataLength); } if (rph->ReparseDataLength < sizeof(wof_external_info)) { throw formatted_error("rph->ReparseDataLength was {} bytes, expected at least {}.", rph->ReparseDataLength, sizeof(wof_external_info)); } auto wofei = (wof_external_info*)rph->DataBuffer; if (wofei->Version != WOF_CURRENT_VERSION) throw formatted_error("Unsupported WOF version {}.", wofei->Version); if (wofei->Provider == WOF_PROVIDER_WIM) throw formatted_error("Unsupported WOF provider WOF_PROVIDER_WIM."); else if (wofei->Provider != WOF_PROVIDER_FILE) throw formatted_error("Unsupported WOF provider {}.", wofei->Provider); if (rph->ReparseDataLength < sizeof(wof_external_info) + sizeof(file_provider_external_info_v0)) { throw formatted_error("rph->ReparseDataLength was {} bytes, expected {}.", rph->ReparseDataLength, sizeof(wof_external_info) + sizeof(file_provider_external_info_v0)); } const auto& fpei = *(file_provider_external_info_v0*)&wofei[1]; if (fpei.Version != FILE_PROVIDER_CURRENT_VERSION) { throw formatted_error("rph->FILE_PROVIDER_EXTERNAL_INFO_V0 Version was {}, expected {}.", fpei.Version, FILE_PROVIDER_CURRENT_VERSION); } reparse_point.clear(); mappings.clear(); auto wcdsv = string_view((char*)wof_compressed_data.data(), wof_compressed_data.size()); switch (fpei.Algorithm) { case FILE_PROVIDER_COMPRESSION_XPRESS4K: inline_data = do_xpress_decompress(wcdsv, (uint32_t)file_size, 4096); break; case FILE_PROVIDER_COMPRESSION_LZX: inline_data = do_lzx_decompress(wcdsv, (uint32_t)file_size); break; case FILE_PROVIDER_COMPRESSION_XPRESS8K: inline_data = do_xpress_decompress(wcdsv, (uint32_t)file_size, 8192); break; case FILE_PROVIDER_COMPRESSION_XPRESS16K: inline_data = do_xpress_decompress(wcdsv, (uint32_t)file_size, 16384); break; default: throw formatted_error("Unrecognized WOF compression algorithm {}", fpei.Algorithm); } } catch (const exception& e) { if (filename.empty()) filename = f.get_filename(); fmt::print(stderr, "{}: {}\n", filename, e.what()); } } ii.generation = 1; ii.transid = 1; if (!is_dir && !reparse_point.empty()) { inline_data = reparse_point; file_size = reparse_point.size(); vdl = inline_data.size(); } else if (!symlink.empty()) { mappings.clear(); file_size = symlink.size(); inline_data.resize(symlink.size()); memcpy(inline_data.data(), symlink.data(), symlink.size()); vdl = inline_data.size(); } if (!is_dir) ii.st_size = file_size; ii.st_nlink = (uint32_t)links.size(); if (mode.has_value()) ii.st_mode = mode.value(); else { if (is_dir) ii.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; else ii.st_mode = __S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; if (!symlink.empty()) ii.st_mode |= __S_IFLNK; } ii.sequence = 1; if (nocsum && !is_dir) ii.flags = BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM; if (!mappings.empty()) { buffer_t buf(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)); auto& ed = *(EXTENT_DATA*)buf.data(); mapped_inodes++; auto& ed2 = *(EXTENT_DATA2*)&ed.data; ed.generation = 1; ed.compression = btrfs_compression::none; ed.encryption = 0; ed.encoding = 0; ed.type = btrfs_extent_type::regular; process_mappings(dev, inode, mappings, runs); if (vdl < file_size) { uint64_t alloc_size = sector_align(file_size, sector_size); uint64_t alloc_vdl = sector_align(vdl, sector_size); if (!mappings.empty() && (mappings.back().vcn + mappings.back().length) < alloc_size / sector_size) { mappings.emplace_back(0, mappings.back().vcn + mappings.back().length, (alloc_size / sector_size) - mappings.back().vcn - mappings.back().length); } while (alloc_vdl < alloc_size) { // for whole sectors, replace with sparse extents if (!mappings.empty()) { auto& m = mappings.back(); if (m.length * sector_size > alloc_size - alloc_vdl) { uint64_t sub = (alloc_size - alloc_vdl) / sector_size; if (sub > 0) { m.length -= sub * sector_size; alloc_size -= sub * sector_size; } break; } else { alloc_size -= m.length * sector_size; mappings.pop_back(); } } else { alloc_size = alloc_vdl; break; } } if (vdl < alloc_size) { // zero end of final sector if necessary buffer_t sector(sector_size); dev.seek((mappings.back().lcn + mappings.back().length - 1) * cluster_size); dev.read(sector.data(), sector.size()); memset(sector.data() + (vdl % sector_size), 0, sector_size - (vdl % sector_size)); dev.seek((mappings.back().lcn + mappings.back().length - 1) * cluster_size); dev.write(sector.data(), sector.size()); } } for (const auto& m : mappings) { if (m.lcn != 0) { // not sparse ed.decoded_size = ed2.size = ed2.num_bytes = m.length * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; ii.st_blocks += ed.decoded_size; ed2.address = (m.lcn * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster) + chunk_virt_offset; ed2.offset = 0; add_item(r, inode, btrfs_key_type::EXTENT_DATA, m.vcn * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster, buf); } } } else if (!inline_data.empty()) { if (inline_data.size() > max_inline) { buffer_t buf(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)); auto compression = opt_compression; auto& ed = *(EXTENT_DATA*)buf.data(); auto& ed2 = *(EXTENT_DATA2*)&ed.data; rewritten_inodes++; ed.generation = 1; ed.compression = btrfs_compression::none; ed.encryption = 0; ed.encoding = 0; ed.type = btrfs_extent_type::regular; // round to nearest sector, and zero end if (inline_data.size() & (sector_size - 1)) { auto oldlen = inline_data.size(); inline_data.resize((size_t)sector_align(inline_data.size(), sector_size)); memset(inline_data.data() + oldlen, 0, inline_data.size() - oldlen); } // FIXME - do by sparse extents, if longer than a sector if (vdl < inline_data.size()) memset(inline_data.data() + vdl, 0, (size_t)(inline_data.size() - vdl)); uint64_t pos = 0; string_view data{(char*)inline_data.data(), inline_data.size()}; while (!data.empty()) { uint64_t len, lcn, cl; bool inserted = false; buffer_t compdata; if (compression == btrfs_compression::none) len = min((size_t)max_extent_size, data.length()); #if defined(WITH_ZLIB) || defined(WITH_LZO) || defined(WITH_ZSTD) else if (data.length() <= cluster_size) { len = min((size_t)max_extent_size, data.length()); ed.compression = btrfs_compression::none; } else { optional c; len = min((size_t)max_comp_extent_size, data.length()); switch (compression) { #ifdef WITH_ZLIB case btrfs_compression::zlib: c = zlib_compress(data.substr(0, len), cluster_size); break; #endif #ifdef WITH_LZO case btrfs_compression::lzo: c = lzo_compress(data.substr(0, len), cluster_size); break; #endif #ifdef WITH_ZSTD case btrfs_compression::zstd: c = zstd_compress(data.substr(0, len), cluster_size); break; #endif default: break; } if (c.has_value()) { compdata = c.value(); ed.compression = compression; ii.flags |= BTRFS_INODE_COMPRESS; } else // incompressible ed.compression = btrfs_compression::none; // if first part of file incompressible, give up on rest and add nocomp flag if (pos == 0 && ed.compression == btrfs_compression::none) { ii.flags |= BTRFS_INODE_NOCOMPRESS; compression = btrfs_compression::none; len = min((size_t)max_extent_size, data.length()); } // FIXME - set xattr for compression type? } #endif ed.decoded_size = ed2.num_bytes = len; ed2.size = ed.compression == btrfs_compression::none ? len : compdata.size(); ii.st_blocks += ed.decoded_size; ed2.address = allocate_data(ed2.size, true); ed2.offset = 0; dev.seek(ed2.address - chunk_virt_offset); if (ed.compression == btrfs_compression::none) dev.write((uint8_t*)data.data(), (size_t)len); else dev.write(compdata.data(), compdata.size()); add_item(r, inode, btrfs_key_type::EXTENT_DATA, pos, buf); lcn = (ed2.address - chunk_virt_offset) / cluster_size; cl = ed2.size / cluster_size; auto& rl = runs[(ed2.address - chunk_virt_offset) / data_chunk_size]; for (auto it = rl.begin(); it != rl.end(); it++) { auto& r = *it; if (r.offset >= lcn + cl) { rl.emplace(it, lcn, cl, inode, pos / cluster_size, false, true); inserted = true; break; } } if (!inserted) rl.emplace_back(lcn, cl, inode, pos / cluster_size, false, true); if (data.length() > len) { pos += len; data = data.substr((size_t)len); } else break; } inline_data.clear(); } else { buffer_t buf(offsetof(EXTENT_DATA, data[0]) + inline_data.size()); auto& ed = *(EXTENT_DATA*)buf.data(); inline_inodes++; // FIXME - compress inline extents? ed.generation = 1; ed.decoded_size = inline_data.size(); ed.compression = btrfs_compression::none; ed.encryption = 0; ed.encoding = 0; ed.type = btrfs_extent_type::inline_extent; memcpy(ed.data, inline_data.data(), inline_data.size()); if (vdl < inline_data.size()) memset(ed.data + vdl, 0, (size_t)(inline_data.size() - vdl)); add_item_move(r, inode, btrfs_key_type::EXTENT_DATA, 0, buf); ii.st_blocks = inline_data.size(); } } add_item(r, inode, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM)); if (item_type == btrfs_inode_type::unknown) { if (is_dir) item_type = btrfs_inode_type::directory; else if (!symlink.empty()) item_type = btrfs_inode_type::symlink; else item_type = btrfs_inode_type::file; } for (const auto& l : links) { if (get<0>(l) == NTFS_ROOT_DIR_INODE) link_inode(r, inode, SUBVOL_ROOT_INODE, get<1>(l), item_type); else link_inode(r, inode, get<0>(l) + inode_offset, get<1>(l), item_type); } if (!sd.empty()) { // FIXME - omit SD if only one hard link and implied from parent? xattrs.emplace(EA_NTACL, make_pair(EA_NTACL_HASH, sd)); } if (atts_set) { char val[16], *val2; val2 = &val[sizeof(val) - 1]; do { uint8_t c = atts % 16; *val2 = (char)(c <= 9 ? (c + '0') : (c - 0xa + 'a')); val2--; atts >>= 4; } while (atts != 0); *val2 = 'x'; val2--; *val2 = '0'; auto sv = string_view(val2, val + sizeof(val) - val2); buffer_t buf(sv.size()); memcpy(buf.data(), sv.data(), sv.size()); xattrs.emplace(EA_DOSATTRIB, make_pair(EA_DOSATTRIB_HASH, buf)); } if (!reparse_point.empty() && is_dir) xattrs.emplace(EA_REPARSE, make_pair(EA_REPARSE_HASH, reparse_point)); for (const auto& xa : xattrs) { // FIXME - collisions (make hash key of map?) set_xattr(r, inode, xa.first, get<0>(xa.second), get<1>(xa.second)); } } static void create_inodes(root& r, const buffer_t& mftbmp, ntfs& dev, runs_t& runs, ntfs_file& secure, enum btrfs_compression compression, bool nocsum) { list inodes; list skiplist; uint64_t total = 0, num = 0; r.dir_seqs[SUBVOL_ROOT_INODE] = 3; parse_bitmap(mftbmp, inodes); for (const auto& l : inodes) { total += l.length; } while (!inodes.empty()) { auto& run = inodes.front(); uint64_t ntfs_inode = run.offset; uint64_t inode = ntfs_inode + inode_offset; bool dir; try { if (ntfs_inode >= first_ntfs_inode) add_inode(r, inode, ntfs_inode, dir, runs, secure, dev, skiplist, compression, nocsum); else if (ntfs_inode != NTFS_ROOT_DIR_INODE) populate_skip_list(dev, ntfs_inode, skiplist); } catch (...) { clear_line(); throw; } num++; fmt::print("Processing inode {} / {} ({:1.1f}%)\r", num, total, (float)num * 100.0f / (float)total); fflush(stdout); if (run.length == 1) inodes.pop_front(); else { run.offset++; run.length--; } } fmt::print("\n"); } static uint64_t get_extent_data_ref_hash2(uint64_t root, uint64_t objid, uint64_t offset) { uint32_t high_crc = 0xffffffff, low_crc = 0xffffffff; high_crc = calc_crc32c(high_crc, (uint8_t*)&root, sizeof(uint64_t)); low_crc = calc_crc32c(low_crc, (uint8_t*)&objid, sizeof(uint64_t)); low_crc = calc_crc32c(low_crc, (uint8_t*)&offset, sizeof(uint64_t)); return ((uint64_t)high_crc << 31) ^ (uint64_t)low_crc; } static void create_data_extent_items(root& extent_root, const runs_t& runs, uint32_t cluster_size, uint64_t image_subvol_id, uint64_t image_inode) { for (const auto& rs : runs) { for (const auto& r : rs.second) { uint64_t img_addr; if (r.inode == dummy_inode) continue; if (r.relocated) { for (const auto& reloc : relocs) { if (reloc.new_start == r.offset) { img_addr = reloc.old_start * cluster_size; break; } } } else img_addr = r.offset * cluster_size; if (r.inode == 0) { data_item di; di.extent_item.refcount = 1; di.extent_item.generation = 1; di.extent_item.flags = EXTENT_ITEM_DATA; di.type = btrfs_key_type::EXTENT_DATA_REF; di.edr.root = image_subvol_id; di.edr.objid = image_inode; di.edr.count = 1; di.edr.offset = img_addr; add_item(extent_root, (r.offset * cluster_size) + chunk_virt_offset, btrfs_key_type::EXTENT_ITEM, r.length * cluster_size, &di, sizeof(data_item)); } else if (r.not_in_img) { data_item di; di.extent_item.refcount = 1; di.extent_item.generation = 1; di.extent_item.flags = EXTENT_ITEM_DATA; di.type = btrfs_key_type::EXTENT_DATA_REF; di.edr.root = BTRFS_ROOT_FSTREE; di.edr.objid = r.inode; di.edr.count = 1; di.edr.offset = r.file_offset * cluster_size; add_item(extent_root, (r.offset * cluster_size) + chunk_virt_offset, btrfs_key_type::EXTENT_ITEM, r.length * cluster_size, &di, sizeof(data_item)); } else { data_item2 di2; EXTENT_DATA_REF* e1; EXTENT_DATA_REF* e2; di2.extent_item.refcount = 2; di2.extent_item.generation = 1; di2.extent_item.flags = EXTENT_ITEM_DATA; di2.type1 = btrfs_key_type::EXTENT_DATA_REF; di2.type2 = btrfs_key_type::EXTENT_DATA_REF; auto hash1 = get_extent_data_ref_hash2(image_subvol_id, image_inode, img_addr); auto hash2 = get_extent_data_ref_hash2(BTRFS_ROOT_FSTREE, r.inode, r.file_offset * cluster_size); if (hash2 > hash1) { e1 = &di2.edr2; e2 = &di2.edr1; } else { e1 = &di2.edr1; e2 = &di2.edr2; } e1->root = image_subvol_id; e1->objid = image_inode; e1->count = 1; e1->offset = img_addr; e2->root = BTRFS_ROOT_FSTREE; e2->objid = r.inode; e2->count = 1; e2->offset = r.file_offset * cluster_size; add_item(extent_root, (r.offset * cluster_size) + chunk_virt_offset, btrfs_key_type::EXTENT_ITEM, r.length * cluster_size, &di2, sizeof(data_item2)); } } } } #ifdef USE_MMAP class memory_map { public: memory_map(int fd, uint64_t off, size_t length) : length(length) { ptr = (uint8_t*)mmap(nullptr, length, PROT_READ, MAP_SHARED, fd, off); if (ptr == MAP_FAILED) throw formatted_error("mmap failed (errno = {})", errno); } ~memory_map() { munmap(ptr, length); } uint8_t* ptr; size_t length; }; #endif static void calc_checksums(root& csum_root, runs_t runs, ntfs& dev, enum btrfs_csum_type csum_type) { uint32_t sector_size = 0x1000; // FIXME uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; list runs2; uint64_t total = 0, num = 0; uint32_t csum_size; switch (csum_type) { case btrfs_csum_type::crc32c: csum_size = sizeof(uint32_t); break; case btrfs_csum_type::xxhash: csum_size = sizeof(uint64_t); break; case btrfs_csum_type::sha256: case btrfs_csum_type::blake2: csum_size = 32; break; } // See __MAX_CSUM_ITEMS in kernel auto max_run = (uint32_t)((tree_size - sizeof(tree_header) - (sizeof(leaf_node) * 2)) / csum_size) - 1; // FIXME - these are clusters, when they should be sectors // split and merge runs for (auto& r2 : runs) { auto& rs = r2.second; bool first = true; while (!rs.empty()) { auto& r = rs.front(); if (r.inode == dummy_inode) { rs.pop_front(); continue; } if (first || runs2.back().offset + runs2.back().length < r.offset || runs2.back().length == max_run) { // create new run if (r.length > max_run) { runs2.emplace_back(r.offset, max_run); r.offset += max_run; r.length -= max_run; } else { runs2.emplace_back(r.offset, r.length); rs.pop_front(); } first = false; continue; } // continue existing run if (runs2.back().length + r.length <= max_run) { runs2.back().length += r.length; rs.pop_front(); continue; } r.offset += max_run - runs2.back().length; r.length -= max_run - runs2.back().length; runs2.back().length = max_run; } } for (const auto& r : runs2) { total += r.length; } #ifdef USE_MMAP unique_ptr mm; uint64_t old_chunk = 0; #endif for (const auto& r : runs2) { buffer_t csums; if (r.offset * cluster_size >= orig_device_size) break; csums.resize((size_t)(r.length * cluster_size * csum_size / sector_size)); #ifdef USE_MMAP uint64_t chunk = (r.offset * cluster_size) / data_chunk_size; if (!mm || old_chunk != chunk) { mm.reset(new memory_map(dev.fd, chunk * data_chunk_size, min(data_chunk_size, orig_device_size - (chunk * data_chunk_size)))); old_chunk = chunk; } string_view sv((char*)mm->ptr + ((r.offset * cluster_size) % data_chunk_size), r.length * cluster_size); #else buffer_t data((size_t)(r.length * cluster_size)); dev.seek(r.offset * cluster_size); dev.read(data.data(), data.size()); auto sv = string_view((char*)data.data(), data.size()); #endif auto msg = [&]() { num++; if (num % 1000 == 0 || num == total) { fmt::print("Calculating checksums {} / {} ({:1.1f}%)\r", num, total, (float)num * 100.0f / (float)total); fflush(stdout); } }; switch (csum_type) { case btrfs_csum_type::crc32c: { auto csum = (uint32_t*)&csums[0]; while (sv.length() > 0) { *csum = ~calc_crc32c(0xffffffff, (const uint8_t*)sv.data(), sector_size); csum++; sv = sv.substr(sector_size); msg(); } break; } case btrfs_csum_type::xxhash: { auto csum = (uint64_t*)&csums[0]; while (sv.length() > 0) { *csum = XXH64(sv.data(), sector_size, 0); csum++; sv = sv.substr(sector_size); msg(); } break; } case btrfs_csum_type::sha256: { auto csum = (uint8_t*)&csums[0]; while (sv.length() > 0) { calc_sha256(csum, sv.data(), sector_size); csum += csum_size; sv = sv.substr(sector_size); msg(); } break; } case btrfs_csum_type::blake2: { auto csum = (uint8_t*)&csums[0]; while (sv.length() > 0) { blake2b(csum, csum_size, sv.data(), sector_size); csum += csum_size; sv = sv.substr(sector_size); msg(); } break; } } add_item(csum_root, EXTENT_CSUM_ID, btrfs_key_type::EXTENT_CSUM, (r.offset * cluster_size) + chunk_virt_offset, &csums[0], (uint16_t)(r.length * cluster_size * csum_size / sector_size)); } fmt::print("\n"); } static void protect_cluster(ntfs& dev, runs_t& runs, uint64_t cluster) { if (!split_runs(dev, runs, cluster, 1, dummy_inode, 0)) return; buffer_t data; uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; uint64_t addr = allocate_data(cluster_size, false) - chunk_virt_offset; if ((cluster + 1) * cluster_size > orig_device_size) data.resize((size_t)(orig_device_size - (cluster * cluster_size))); else data.resize((size_t)cluster_size); dev.seek(cluster * cluster_size); dev.read(data.data(), data.size()); dev.seek(addr); dev.write(data.data(), data.size()); relocs.emplace_back(cluster, 1, addr / cluster_size); uint64_t clusters_per_chunk = data_chunk_size / (uint64_t)cluster_size; uint64_t cluster_addr = (cluster * cluster_size) + chunk_virt_offset; for (auto& c : chunks) { if (c.offset <= cluster_addr && c.offset + c.length > cluster_addr) { c.used -= cluster_size; break; } } uint64_t chunk = (addr / (uint64_t)cluster_size) / clusters_per_chunk; if (runs.count(chunk) != 0) { auto& r = runs.at(chunk); for (auto it = r.begin(); it != r.end(); it++) { if (it->offset > addr / cluster_size) { r.emplace(it, addr / cluster_size, 1, 0, 0, true); return; } } } auto& r = runs[chunk]; r.emplace_back(addr / cluster_size, 1, 0, 0, true); } static void protect_superblocks(ntfs& dev, runs_t& runs) { uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; unsigned int i = 0; while (superblock_addrs[i] != 0) { if (superblock_addrs[i] > device_size - sizeof(superblock)) break; uint64_t cluster_start = (superblock_addrs[i] - (superblock_addrs[i] % stripe_length)) / cluster_size; uint64_t cluster_end = sector_align(superblock_addrs[i] - (superblock_addrs[i] % stripe_length) + stripe_length, cluster_size) / cluster_size; for (uint64_t j = cluster_start; j < cluster_end; j++) { protect_cluster(dev, runs, j); } i++; } // also relocate first cluster protect_cluster(dev, runs, 0); if (reloc_last_sector) protect_cluster(dev, runs, device_size / cluster_size); if (last_chunk_end < device_size) { uint64_t cluster_start = last_chunk_end / cluster_size; uint64_t cluster_end = device_size / cluster_size; if (reloc_last_sector) cluster_end--; for (auto i = cluster_start; i <= cluster_end; i++) { protect_cluster(dev, runs, i); } } } static void clear_first_cluster(ntfs& dev) { uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster; buffer_t data(cluster_size); memset(data.data(), 0, data.size()); dev.seek(0); dev.write(data.data(), data.size()); } static void calc_used_space(const runs_t& runs, uint32_t cluster_size) { for (const auto& rl : runs) { uint64_t offset = (rl.first * data_chunk_size) + chunk_virt_offset; for (auto& c : chunks) { if (offset == c.offset) { for (const auto& r : rl.second) { if ((r.offset * cluster_size) + chunk_virt_offset - c.offset < c.length) c.used += r.length * cluster_size; } break; } } } } static void populate_root_root(root& root_root) { INODE_ITEM ii; static const char default_subvol[] = "default"; static const uint32_t default_hash = 0x8dbfc2d2; for (const auto& r : roots) { if (r.id != BTRFS_ROOT_ROOT && r.id != BTRFS_ROOT_CHUNK) add_to_root_root(r, root_root); } add_inode_ref(root_root, BTRFS_ROOT_FSTREE, BTRFS_ROOT_TREEDIR, 0, "default"); memset(&ii, 0, sizeof(INODE_ITEM)); ii.generation = 1; ii.transid = 1; ii.st_nlink = 1; ii.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; add_item(root_root, BTRFS_ROOT_TREEDIR, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM)); add_inode_ref(root_root, BTRFS_ROOT_TREEDIR, BTRFS_ROOT_TREEDIR, 0, ".."); buffer_t buf(offsetof(DIR_ITEM, name[0]) + sizeof(default_subvol) - 1); auto& di = *(DIR_ITEM*)buf.data(); di.key.obj_id = BTRFS_ROOT_FSTREE; di.key.obj_type = btrfs_key_type::ROOT_ITEM; di.key.offset = 0xffffffffffffffff; di.transid = 0; di.m = 0; di.n = sizeof(default_subvol) - 1; di.type = btrfs_inode_type::directory; memcpy(di.name, default_subvol, sizeof(default_subvol) - 1); add_item_move(root_root, BTRFS_ROOT_TREEDIR, btrfs_key_type::DIR_ITEM, default_hash, buf); } static void add_subvol_uuid(root& r) { add_item(r, *(uint64_t*)&subvol_uuid, btrfs_key_type::SUBVOL_UUID, *(uint64_t*)&subvol_uuid.uuid[sizeof(uint64_t)], &image_subvol_id, sizeof(image_subvol_id)); } static void update_dir_sizes(root& r) { for (auto& it : r.items) { if (it.first.obj_type == btrfs_key_type::INODE_ITEM && r.dir_size.count(it.first.obj_id) != 0) { auto& ii = *(INODE_ITEM*)it.second.data(); // FIXME - would it speed things up if we removed the entry from dir_size map here? ii.st_size = r.dir_size.at(it.first.obj_id); } } } static void convert(ntfs& dev, enum btrfs_compression compression, enum btrfs_csum_type csum_type, bool nocsum) { uint32_t sector_size = 0x1000; // FIXME uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster; runs_t runs; static const uint64_t image_inode = 0x101; // FIXME - die if cluster size not multiple of 4096 { default_random_engine generator; generator.seed((unsigned int)chrono::high_resolution_clock::now().time_since_epoch().count()); fs_uuid = generate_uuid(generator); chunk_uuid = generate_uuid(generator); dev_uuid = generate_uuid(generator); subvol_uuid = generate_uuid(generator); } device_size = orig_device_size = dev.boot_sector->TotalSectors * dev.boot_sector->BytesPerSector; if (device_size % sector_size != 0) { device_size -= device_size % sector_size; reloc_last_sector = true; } space_list.emplace_back(0, device_size); ntfs_file bitmap(dev, NTFS_BITMAP_INODE); auto bmpdata = bitmap.read(); create_data_chunks(dev, bmpdata); roots.emplace_back(BTRFS_ROOT_ROOT); root& root_root = roots.back(); roots.emplace_back(BTRFS_ROOT_EXTENT); root& extent_root = roots.back(); roots.emplace_back(BTRFS_ROOT_CHUNK); root& chunk_root = roots.back(); add_dev_item(chunk_root); roots.emplace_back(BTRFS_ROOT_DEVTREE); root& devtree_root = roots.back(); add_dev_stats(devtree_root); roots.emplace_back(BTRFS_ROOT_FSTREE); root& fstree_root = roots.back(); populate_fstree(fstree_root); roots.emplace_back(BTRFS_ROOT_DATA_RELOC); populate_fstree(roots.back()); roots.emplace_back(BTRFS_ROOT_CHECKSUM); root& csum_root = roots.back(); root& image_subvol = add_image_subvol(root_root, fstree_root); parse_data_bitmap(dev, bmpdata, runs); // make sure runs don't go beyond end of device while (!runs.empty() && (runs.rbegin()->second.back().offset * cluster_size) + runs.rbegin()->second.back().length > device_size) { auto& r = runs.rbegin()->second; if (r.back().offset * cluster_size >= orig_device_size) { r.pop_back(); if (r.empty()) runs.erase(prev(runs.end())); } else { uint64_t len = orig_device_size - (r.back().offset * cluster_size); if (len % cluster_size) r.back().length = (len / cluster_size) + 1; else r.back().length = len / cluster_size; break; } } protect_superblocks(dev, runs); calc_used_space(runs, dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster); auto mftbmp = dev.mft->read(0, 0, ntfs_attribute::BITMAP); { ntfs_file secure(dev, NTFS_SECURE_INODE); create_inodes(fstree_root, mftbmp, dev, runs, secure, compression, nocsum); } fmt::print("Mapped {} inodes directly.\n", mapped_inodes); fmt::print("Rewrote {} inodes.\n", rewritten_inodes); fmt::print("Inlined {} inodes.\n", inline_inodes); create_image(image_subvol, dev, runs, image_inode, nocsum); roots.emplace_back(BTRFS_ROOT_UUID); add_subvol_uuid(roots.back()); create_data_extent_items(extent_root, runs, dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster, image_subvol.id, image_inode); fmt::print("Updating directory sizes\n"); for (auto& r : roots) { if (!r.dir_size.empty()) update_dir_sizes(r); } if (!nocsum) calc_checksums(csum_root, runs, dev, csum_type); populate_root_root(root_root); for (auto& r : roots) { if (r.id != BTRFS_ROOT_EXTENT && r.id != BTRFS_ROOT_CHUNK && r.id != BTRFS_ROOT_DEVTREE) r.create_trees(extent_root, csum_type); } do { bool extents_changed = false; chunks_changed = false; for (auto& c : chunks) { if (!c.added) { add_chunk(chunk_root, devtree_root, extent_root, c); c.added = true; } } for (auto& r : roots) { if (r.id == BTRFS_ROOT_EXTENT || r.id == BTRFS_ROOT_CHUNK || r.id == BTRFS_ROOT_DEVTREE) { r.old_addresses.swap(r.addresses); r.addresses.clear(); // FIXME - unallocate metadata and changed used value in chunks r.metadata_size -= r.trees.size() * tree_size; r.trees.clear(); r.allocations_done = false; r.create_trees(extent_root, csum_type); if (r.allocations_done) extents_changed = true; } } if (!chunks_changed && !extents_changed) break; } while (true); // update tree addresses and levels in-place in root 1 update_root_root(root_root, csum_type); // update used value in BLOCK_GROUP_ITEMs update_extent_root(extent_root, csum_type); // update bytes_used in DEV_ITEM in root 3 update_chunk_root(chunk_root, csum_type); for (auto& r : roots) { r.write_trees(dev); } write_superblocks(dev, chunk_root, root_root, compression, csum_type); clear_first_cluster(dev); } #if defined(__i386__) || defined(__x86_64__) static void check_cpu() noexcept { #ifndef _MSC_VER unsigned int cpuInfo[4]; __get_cpuid(1, &cpuInfo[0], &cpuInfo[1], &cpuInfo[2], &cpuInfo[3]); if (cpuInfo[2] & bit_SSE4_2) calc_crc32c = calc_crc32c_hw; #else int cpuInfo[4]; __cpuid(cpuInfo, 1); if (cpuInfo[2] & (1 << 20)) calc_crc32c = calc_crc32c_hw; #endif } #endif static enum btrfs_compression parse_compression_type(string_view s) { if (s == "none") return btrfs_compression::none; else if (s == "zlib") return btrfs_compression::zlib; else if (s == "lzo") return btrfs_compression::lzo; else if (s == "zstd") return btrfs_compression::zstd; else throw formatted_error("Unrecognized compression type {}.", s); } static enum btrfs_csum_type parse_csum_type(string_view s) { if (s == "crc32c") return btrfs_csum_type::crc32c; else if (s == "xxhash") return btrfs_csum_type::xxhash; else if (s == "sha256") return btrfs_csum_type::sha256; else if (s == "blake2") return btrfs_csum_type::blake2; else throw formatted_error("Unrecognized hash type {}.", s); } static vector read_args(int argc, char* argv[]) { vector ret; for (int i = 0; i < argc; i++) { ret.emplace_back(argv[i]); } return ret; } int main(int argc, char* argv[]) { try { auto args = read_args(argc, argv); if (args.size() == 2 && args[1] == "--version") { fmt::print("ntfs2btrfs " PROJECT_VER "\n"); return 1; } if (args.size() < 2 || (args.size() == 2 && (args[1] == "--help" || args[1] == "/?"))) { fmt::print(R"(Usage: ntfs2btrfs [OPTION]... device Convert an NTFS filesystem to Btrfs. -c, --compress=ALGO recompress compressed files; ALGO can be 'zlib', 'lzo', 'zstd', or 'none'. -h, --hash=ALGO checksum algorithm to use; ALGO can be 'crc32c' (default), 'xxhash', 'sha256', or 'blake2' -r, --rollback rollback to the original filesystem -d, --no-datasum disable data checksums )"); return 1; } string fn; enum btrfs_compression compression; enum btrfs_csum_type csum_type; bool do_rollback = false, nocsum = false; #ifdef WITH_ZSTD compression = btrfs_compression::zstd; #elif defined(WITH_LZO) compression = btrfs_compression::lzo; #elif defined(WITH_ZLIB) compression = btrfs_compression::zlib; #else compression = btrfs_compression::none; #endif csum_type = btrfs_csum_type::crc32c; for (size_t i = 1; i < args.size(); i++) { const auto& arg = args[i]; if (!arg.empty() && arg[0] == '-') { if (arg == "-c") { if (i == args.size() - 1) throw runtime_error("No value given for -c option."); compression = parse_compression_type(args[i+1]); i++; } else if (arg.substr(0, 11) == "--compress=") compression = parse_compression_type(arg.substr(11)); else if (arg == "-h") { if (i == args.size() - 1) throw runtime_error("No value given for -h option."); csum_type = parse_csum_type(args[i+1]); i++; } else if (arg.substr(0, 7) == "--hash=") csum_type = parse_csum_type(arg.substr(11)); else if (arg == "-r" || arg == "--rollback") do_rollback = true; else if (arg == "-d" || arg == "--no-datasum") nocsum = true; else throw formatted_error("Unrecognized option {}.", arg); } else { if (!fn.empty()) throw runtime_error("Multiple devices given."); fn = arg; } } if (fn.empty()) throw runtime_error("No device given."); #if defined(__i386__) || defined(__x86_64__) check_cpu(); #endif if (do_rollback) { rollback(fn); return 0; } if (nocsum && compression != btrfs_compression::none) { compression = btrfs_compression::none; fmt::print("Disabling compression as it requires checksums to be enabled.\n"); } else { #ifndef WITH_ZLIB if (compression == btrfs_compression::zlib) throw runtime_error("Zlib compression not compiled in."); #endif #ifndef WITH_LZO if (compression == btrfs_compression::lzo) throw runtime_error("LZO compression not compiled in."); #endif #ifndef WITH_ZSTD if (compression == btrfs_compression::zstd) throw runtime_error("Zstd compression not compiled in."); #endif switch (compression) { case btrfs_compression::zlib: fmt::print("Using Zlib compression.\n"); break; case btrfs_compression::lzo: fmt::print("Using LZO compression.\n"); break; case btrfs_compression::zstd: fmt::print("Using Zstd compression.\n"); break; case btrfs_compression::none: fmt::print("Not using compression.\n"); break; } } switch (csum_type) { case btrfs_csum_type::crc32c: fmt::print("Using CRC32C for checksums.\n"); break; case btrfs_csum_type::xxhash: fmt::print("Using xxHash for checksums.\n"); break; case btrfs_csum_type::sha256: fmt::print("Using SHA256 for checksums.\n"); break; case btrfs_csum_type::blake2: fmt::print("Using Blake2 for checksums.\n"); break; } if (nocsum) fmt::print("Not calculating checksums.\n"); ntfs dev(fn); convert(dev, compression, csum_type, nocsum); } catch (const exception& e) { cerr << e.what() << endl; return 1; } return 0; } ntfs2btrfs-20240115/src/ntfs2btrfs.h000066400000000000000000000301701455127722500171170ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #pragma once #include "btrfs.h" #include "config.h" #include #include #include #include #include #include #ifdef _WIN32 #include #endif #pragma warning(push) #pragma warning(disable : 26495 26451 26437 26812) #include #include #pragma warning(pop) #ifdef _MSC_VER #ifdef _M_IX86 #define __i386__ #elif defined(_M_X64) #define __x86_64__ #endif #endif #ifdef _WIN32 class last_error : public std::exception { public: last_error(std::string_view function, int le) { std::string nice_msg; { char* fm; if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, le, 0, reinterpret_cast(&fm), 0, nullptr)) { try { std::string_view s = fm; while (!s.empty() && (s[s.length() - 1] == u'\r' || s[s.length() - 1] == u'\n')) { s.remove_suffix(1); } nice_msg = s; } catch (...) { LocalFree(fm); throw; } LocalFree(fm); } } msg = std::string(function) + " failed (error " + std::to_string(le) + (!nice_msg.empty() ? (", " + nice_msg) : "") + ")."; } const char* what() const noexcept { return msg.c_str(); } private: std::string msg; }; class handle_closer { public: typedef HANDLE pointer; void operator()(HANDLE h) { if (h == INVALID_HANDLE_VALUE) return; CloseHandle(h); } }; typedef std::unique_ptr unique_handle; #endif class _formatted_error : public std::exception { public: template _formatted_error(const T& s, Args&&... args) { msg = fmt::format(s, std::forward(args)...); } const char* what() const noexcept { return msg.c_str(); } private: std::string msg; }; #define formatted_error(s, ...) _formatted_error(FMT_COMPILE(s), ##__VA_ARGS__) struct space { space(uint64_t offset, uint64_t length) : offset(offset), length(length) { } uint64_t offset; uint64_t length; }; struct chunk { chunk(uint64_t offset, uint64_t length, uint64_t disk_start, uint64_t type) : offset(offset), length(length), disk_start(disk_start), type(type) { } uint64_t offset; uint64_t length; uint64_t disk_start; uint64_t type; std::list space_list; bool added = false; uint64_t used = 0; }; struct data_alloc { data_alloc(uint64_t offset, uint64_t length, uint64_t inode = 0, uint64_t file_offset = 0, bool relocated = false, bool not_in_img = false) : offset(offset), length(length), inode(inode), file_offset(file_offset), relocated(relocated), not_in_img(not_in_img) { } uint64_t offset; uint64_t length; uint64_t inode; uint64_t file_offset; bool relocated; bool not_in_img; }; template> class default_init_allocator : public A { public: typedef std::allocator_traits a_t; template struct rebind { using other = default_init_allocator>; }; using A::A; template void construct(U* ptr) noexcept(std::is_nothrow_default_constructible::value) { ::new(static_cast(ptr)) U; } template void construct(U* ptr, Args&&... args) { a_t::construct(static_cast(*this), ptr, std::forward(args)...); } }; using buffer_t = std::vector>; static bool inline operator<(const KEY& a, const KEY& b) { if (a.obj_id < b.obj_id) return true; else if (a.obj_id > b.obj_id) return false; if (a.obj_type < b.obj_type) return true; else if (a.obj_type > b.obj_type) return false; if (a.offset < b.offset) return true; return false; } class ntfs; class root { public: root(uint64_t id) : id(id) { } void create_trees(root& extent_root, enum btrfs_csum_type csum_type); void write_trees(ntfs& dev); uint64_t id; std::map items; std::list trees; uint64_t tree_addr; uint8_t level; uint64_t metadata_size = 0; std::list> addresses, old_addresses; bool allocations_done = false; bool readonly = false; std::map dir_seqs; std::map dir_size; }; // from sys/stat.h #define __S_IFMT 0170000 /* These bits determine file type. */ #define __S_IFDIR 0040000 /* Directory. */ #define __S_IFCHR 0020000 /* Character device. */ #define __S_IFBLK 0060000 /* Block device. */ #define __S_IFREG 0100000 /* Regular file. */ #define __S_IFIFO 0010000 /* FIFO. */ #define __S_IFLNK 0120000 /* Symbolic link. */ #define __S_IFSOCK 0140000 /* Socket. */ #define __S_ISTYPE(mode, mask) (((mode) & __S_IFMT) == (mask)) #ifndef S_ISDIR #define S_ISDIR(mode) __S_ISTYPE((mode), __S_IFDIR) #endif #ifndef S_IRUSR #define S_IRUSR 0000400 #endif #ifndef S_IWUSR #define S_IWUSR 0000200 #endif #ifndef S_IXUSR #define S_IXUSR 0000100 #endif #ifndef S_IRGRP #define S_IRGRP (S_IRUSR >> 3) #endif #ifndef S_IWGRP #define S_IWGRP (S_IWUSR >> 3) #endif #ifndef S_IXGRP #define S_IXGRP (S_IXUSR >> 3) #endif #ifndef S_IROTH #define S_IROTH (S_IRGRP >> 3) #endif #ifndef S_IWOTH #define S_IWOTH (S_IWGRP >> 3) #endif #ifndef S_IXOTH #define S_IXOTH (S_IXGRP >> 3) #endif #ifndef S_ISUID #define S_ISUID 0004000 #endif #ifndef S_ISGID #define S_ISGID 0002000 #endif #ifndef S_ISVTX #define S_ISVTX 0001000 #endif #pragma pack(push,1) typedef struct { CHUNK_ITEM chunk_item; CHUNK_ITEM_STRIPE stripe; } chunk_item_one_stripe; typedef struct { EXTENT_ITEM extent_item; btrfs_key_type type; TREE_BLOCK_REF tbr; } metadata_item; typedef struct { EXTENT_ITEM extent_item; btrfs_key_type type; EXTENT_DATA_REF edr; } data_item; typedef struct { EXTENT_ITEM extent_item; btrfs_key_type type1; EXTENT_DATA_REF edr1; btrfs_key_type type2; EXTENT_DATA_REF edr2; } data_item2; #pragma pack(pop) struct relocation { relocation(uint64_t old_start, uint64_t length, uint64_t new_start) : old_start(old_start), length(length), new_start(new_start) { } uint64_t old_start; uint64_t length; uint64_t new_start; }; static inline uint64_t sector_align(uint64_t v, uint64_t s) { return ((v + s - 1) / s) * s; } template<> struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { auto it = ctx.begin(); if (it != ctx.end() && *it != '}') throw format_error("invalid format"); return it; } template auto format(enum btrfs_key_type k, format_context& ctx) const { switch (k) { case btrfs_key_type::INODE_ITEM: return fmt::format_to(ctx.out(), "INODE_ITEM"); case btrfs_key_type::INODE_REF: return fmt::format_to(ctx.out(), "INODE_REF"); case btrfs_key_type::INODE_EXTREF: return fmt::format_to(ctx.out(), "INODE_EXTREF"); case btrfs_key_type::XATTR_ITEM: return fmt::format_to(ctx.out(), "XATTR_ITEM"); case btrfs_key_type::ORPHAN_INODE: return fmt::format_to(ctx.out(), "ORPHAN_INODE"); case btrfs_key_type::DIR_ITEM: return fmt::format_to(ctx.out(), "DIR_ITEM"); case btrfs_key_type::DIR_INDEX: return fmt::format_to(ctx.out(), "DIR_INDEX"); case btrfs_key_type::EXTENT_DATA: return fmt::format_to(ctx.out(), "EXTENT_DATA"); case btrfs_key_type::EXTENT_CSUM: return fmt::format_to(ctx.out(), "EXTENT_CSUM"); case btrfs_key_type::ROOT_ITEM: return fmt::format_to(ctx.out(), "ROOT_ITEM"); case btrfs_key_type::ROOT_BACKREF: return fmt::format_to(ctx.out(), "ROOT_BACKREF"); case btrfs_key_type::ROOT_REF: return fmt::format_to(ctx.out(), "ROOT_REF"); case btrfs_key_type::EXTENT_ITEM: return fmt::format_to(ctx.out(), "EXTENT_ITEM"); case btrfs_key_type::METADATA_ITEM: return fmt::format_to(ctx.out(), "METADATA_ITEM"); case btrfs_key_type::TREE_BLOCK_REF: return fmt::format_to(ctx.out(), "TREE_BLOCK_REF"); case btrfs_key_type::EXTENT_DATA_REF: return fmt::format_to(ctx.out(), "EXTENT_DATA_REF"); case btrfs_key_type::EXTENT_REF_V0: return fmt::format_to(ctx.out(), "EXTENT_REF_V0"); case btrfs_key_type::SHARED_BLOCK_REF: return fmt::format_to(ctx.out(), "SHARED_BLOCK_REF"); case btrfs_key_type::SHARED_DATA_REF: return fmt::format_to(ctx.out(), "SHARED_DATA_REF"); case btrfs_key_type::BLOCK_GROUP_ITEM: return fmt::format_to(ctx.out(), "BLOCK_GROUP_ITEM"); case btrfs_key_type::FREE_SPACE_INFO: return fmt::format_to(ctx.out(), "FREE_SPACE_INFO"); case btrfs_key_type::FREE_SPACE_EXTENT: return fmt::format_to(ctx.out(), "FREE_SPACE_EXTENT"); case btrfs_key_type::FREE_SPACE_BITMAP: return fmt::format_to(ctx.out(), "FREE_SPACE_BITMAP"); case btrfs_key_type::DEV_EXTENT: return fmt::format_to(ctx.out(), "DEV_EXTENT"); case btrfs_key_type::DEV_ITEM: return fmt::format_to(ctx.out(), "DEV_ITEM"); case btrfs_key_type::CHUNK_ITEM: return fmt::format_to(ctx.out(), "CHUNK_ITEM"); case btrfs_key_type::TEMP_ITEM: return fmt::format_to(ctx.out(), "TEMP_ITEM"); case btrfs_key_type::DEV_STATS: return fmt::format_to(ctx.out(), "DEV_STATS"); case btrfs_key_type::SUBVOL_UUID: return fmt::format_to(ctx.out(), "SUBVOL_UUID"); case btrfs_key_type::SUBVOL_REC_UUID: return fmt::format_to(ctx.out(), "SUBVOL_REC_UUID"); default: return fmt::format_to(ctx.out(), "{:x}", (uint8_t)k); } } }; static const uint64_t image_subvol_id = 0x100; static const char image_filename[] = "ntfs.img"; // decomp.cpp buffer_t lznt1_decompress(std::string_view compdata, uint32_t size); buffer_t do_lzx_decompress(std::string_view compdata, uint32_t size); buffer_t do_xpress_decompress(std::string_view compdata, uint32_t size, uint32_t chunk_size); // compress.cpp #ifdef WITH_ZLIB std::optional zlib_compress(std::string_view data, uint32_t cluster_size); #endif #ifdef WITH_LZO std::optional lzo_compress(std::string_view data, uint32_t cluster_size); #endif #ifdef WITH_ZSTD std::optional zstd_compress(std::string_view data, uint32_t cluster_size); #endif // sha256.c extern "C" void calc_sha256(uint8_t* hash, const void* input, size_t len); // blake2b-ref.c extern "C" void blake2b(void *out, size_t outlen, const void* in, size_t inlen); // rollback.cpp void rollback(const std::string& fn); // ntfs2btrfs.cpp std::string utf16_to_utf8(std::u16string_view sv); ntfs2btrfs-20240115/src/rollback.cpp000066400000000000000000000342701455127722500171530ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2021 * * This file is part of ntfs2btrfs. * * Ntfs2btrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public Licence as published by * the Free Software Foundation, either version 2 of the Licence, or * (at your option) any later version. * * Ntfs2btrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public Licence * along with Ntfs2btrfs. If not, see . */ #include "ntfs2btrfs.h" #include "crc32c.h" #include #include #include #include using namespace std; using chunks_t = map; #define INCOMPAT_SUPPORTED (BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL | BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS | \ BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_RAID56 | \ BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES | \ BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD | BTRFS_INCOMPAT_FLAGS_METADATA_UUID | BTRFS_INCOMPAT_FLAGS_RAID1C34) class btrfs { public: btrfs(const string& fn); uint64_t find_root_addr(uint64_t root); bool walk_tree(uint64_t addr, const function& func); const pair& find_chunk(uint64_t addr); buffer_t raw_read(uint64_t phys_addr, uint32_t len); void raw_write(uint64_t phys_addr, const buffer_t& buf); private: superblock read_superblock(); void read_chunks(); buffer_t read(uint64_t addr, uint32_t len); #ifdef _WIN32 unique_handle h; bool drive = false; #else fstream f; #endif superblock sb; chunks_t chunks; }; btrfs::btrfs(const string& fn) { #ifdef _WIN32 DWORD ret; wstring_convert, char16_t> convert; u16string namew; if ((fn.length() == 2 || fn.length() == 3) && fn[0] >= 'A' && fn[0] <= 'Z' && fn[1] == ':' && (fn.length() == 2 || fn[2] == '\\')) { namew = u"\\\\.\\X:"; namew[4] = fn[0]; drive = true; } else namew = convert.from_bytes(fn.data(), fn.data() + fn.length()); h.reset(CreateFileW((WCHAR*)namew.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)); if (h.get() == INVALID_HANDLE_VALUE) throw last_error("CreateFile", GetLastError()); if (drive) { if (!DeviceIoControl(h.get(), FSCTL_LOCK_VOLUME, nullptr, 0, nullptr, 0, &ret, nullptr)) throw last_error("FSCTL_LOCK_VOLUME", GetLastError()); } #else f = fstream(fn, ios_base::in | ios_base::out | ios::binary); if (!f.good()) throw formatted_error("Failed to open {}.", fn); #endif sb = read_superblock(); read_chunks(); } superblock btrfs::read_superblock() { optional sb; uint64_t device_size; // find length of volume #ifdef _WIN32 if (drive) { GET_LENGTH_INFORMATION gli; DWORD ret; if (!DeviceIoControl(h.get(), IOCTL_DISK_GET_LENGTH_INFO, nullptr, 0, &gli, sizeof(gli), &ret, nullptr)) throw last_error("IOCTL_DISK_GET_LENGTH_INFO", GetLastError()); device_size = gli.Length.QuadPart; } else { LARGE_INTEGER li; if (!GetFileSizeEx(h.get(), &li)) throw last_error("GetFileSizeEx", GetLastError()); device_size = li.QuadPart; } #else f.seekg(0, ios::end); if (f.fail()) throw runtime_error("Error seeking to end of device."); device_size = f.tellg(); #endif unsigned int i = 0; while (superblock_addrs[i] != 0 && superblock_addrs[i] + sizeof(superblock) < device_size) { auto buf = raw_read(superblock_addrs[i], sizeof(superblock)); const auto& sb2 = *(superblock*)buf.data(); if (sb2.magic != BTRFS_MAGIC) { i++; continue; } // FIXME - check checksum if (!sb.has_value() || sb2.generation > sb.value().generation) sb = sb2; i++; } if (!sb.has_value()) throw runtime_error("Not a Btrfs volume."); if (sb.value().incompat_flags & ~INCOMPAT_SUPPORTED) throw formatted_error("Unsupported incompat flags {:x}.", sb.value().incompat_flags & ~INCOMPAT_SUPPORTED); return sb.value(); } const pair& btrfs::find_chunk(uint64_t addr) { for (const auto& c : chunks) { if (addr < c.first) continue; const auto& ci = *(CHUNK_ITEM*)c.second.data(); if (addr < c.first + ci.size) return c; } throw formatted_error("Could not find chunk for virtual address {:x}.", addr); } buffer_t btrfs::raw_read(uint64_t phys_addr, uint32_t len) { #ifdef _WIN32 LARGE_INTEGER posli; posli.QuadPart = phys_addr; if (!SetFilePointerEx(h.get(), posli, nullptr, FILE_BEGIN)) throw last_error("SetFilePointerEx", GetLastError()); #else f.seekg(phys_addr); if (f.fail()) throw formatted_error("Error seeking to {:x}.", phys_addr); #endif buffer_t ret(len); #ifdef _WIN32 DWORD read; if (!ReadFile(h.get(), ret.data(), (DWORD)len, &read, nullptr)) throw last_error("ReadFile", GetLastError()); #else f.read((char*)ret.data(), ret.size()); if (f.fail()) throw formatted_error("Error reading {:x} bytes at {:x}.", ret.size(), phys_addr); #endif return ret; } void btrfs::raw_write(uint64_t phys_addr, const buffer_t& buf) { #ifdef _WIN32 LARGE_INTEGER posli; posli.QuadPart = phys_addr; if (!SetFilePointerEx(h.get(), posli, nullptr, FILE_BEGIN)) throw last_error("SetFilePointerEx", GetLastError()); #else f.seekg(phys_addr); if (f.fail()) throw formatted_error("Error seeking to {:x}.", phys_addr); #endif #ifdef _WIN32 DWORD written; if (!WriteFile(h.get(), buf.data(), (DWORD)buf.size(), &written, nullptr)) throw last_error("WriteFile", GetLastError()); #else f.write((char*)buf.data(), buf.size()); if (f.fail()) throw formatted_error("Error writing {:x} bytes at {:x}.", buf.size(), phys_addr); #endif } buffer_t btrfs::read(uint64_t addr, uint32_t len) { const auto& cp = find_chunk(addr); const auto& c = *(CHUNK_ITEM*)cp.second.data(); if (c.type & BLOCK_FLAG_RAID0) throw runtime_error("FIXME - RAID 0"); else if (c.type & BLOCK_FLAG_RAID1) throw runtime_error("FIXME - RAID 1"); else if (c.type & BLOCK_FLAG_DUPLICATE) throw runtime_error("FIXME - DUPLICATE"); else if (c.type & BLOCK_FLAG_RAID10) throw runtime_error("FIXME - RAID10"); else if (c.type & BLOCK_FLAG_RAID5) throw runtime_error("FIXME - RAID5"); else if (c.type & BLOCK_FLAG_RAID6) throw runtime_error("FIXME - RAID6"); else if (c.type & BLOCK_FLAG_RAID1C3) throw runtime_error("FIXME - RAID1C3"); else if (c.type & BLOCK_FLAG_RAID1C4) throw runtime_error("FIXME - RAID1C4"); // SINGLE if (c.num_stripes == 0) throw runtime_error("CHUNK_ITEM had num_stripes == 0"); auto* cis = (CHUNK_ITEM_STRIPE*)(&c + 1); if (cis[0].dev_id != sb.dev_item.dev_id) throw runtime_error("Reading from other device not implemented."); return raw_read(addr - cp.first + cis[0].offset, len); } bool btrfs::walk_tree(uint64_t addr, const function& func) { auto tree = read(addr, sb.node_size); // FIXME - check checksum auto& th = *(tree_header*)tree.data(); // if root is not 0, recurse if (th.level != 0) { auto nodes = (internal_node*)(&th + 1); for (unsigned int i = 0; i < th.num_items; i++) { auto ret = walk_tree(nodes[i].address, func); if (!ret) return false; } return true; } auto nodes = (leaf_node*)(&th + 1); for (unsigned int i = 0; i < th.num_items; i++) { const auto& n = nodes[i]; bool b; if (n.size == 0) b = func(n.key, {}); else b = func(n.key, { (char*)&th + sizeof(tree_header) + n.offset, n.size }); if (!b) return false; } return true; } void btrfs::read_chunks() { auto ptr = (uint8_t*)&sb.sys_chunk_array; do { auto& key = *(KEY*)ptr; if (key.obj_type != btrfs_key_type::CHUNK_ITEM) break; auto& ci = *(CHUNK_ITEM*)(ptr + sizeof(key)); basic_string_view chunk_item{ptr + sizeof(key), sizeof(ci) + (ci.num_stripes * sizeof(CHUNK_ITEM_STRIPE))}; chunks.emplace(key.offset, buffer_t{chunk_item.data(), chunk_item.data() + chunk_item.size()}); ptr += sizeof(key) + chunk_item.size(); } while (ptr < &sb.sys_chunk_array[SYS_CHUNK_ARRAY_SIZE]); #if 0 for (const auto& c : chunks) { fmt::print("{:x}\n", c.first); const auto& ci = *(CHUNK_ITEM*)c.second.data(); fmt::print(" size {:x}, root_id {:x}, stripe_length {:x}, type {:x}, opt_io_alignment {:x}, opt_io_width {:x}, sector_size {:x}, num_stripes {:x}, sub_stripes {:x}\n", ci.size, ci.root_id, ci.stripe_length, ci.type, ci.opt_io_alignment, ci.opt_io_width, ci.sector_size, ci.num_stripes, ci.sub_stripes); auto* cis = (CHUNK_ITEM_STRIPE*)(&ci + 1); for (unsigned int i = 0; i < ci.num_stripes; i++) { fmt::print(" dev_id {:x}, offset {:x}\n", cis[i].dev_id, cis[i].offset); } } #endif chunks_t chunks2; walk_tree(sb.chunk_tree_addr, [&](const KEY& key, string_view data) { if (key.obj_type != btrfs_key_type::CHUNK_ITEM) return true; chunks2.emplace(key.offset, buffer_t{data.data(), data.data() + data.size()}); return true; }); chunks.swap(chunks2); } uint64_t btrfs::find_root_addr(uint64_t root) { optional ret; walk_tree(sb.root_tree_addr, [&](const KEY& key, string_view data) { if (key.obj_id != root || key.obj_type != btrfs_key_type::ROOT_ITEM) return true; const auto& ri = *(ROOT_ITEM*)data.data(); ret = ri.block_number; return false; }); if (!ret.has_value()) throw formatted_error("Could not find address for root {:x}.", root); return ret.value(); } void rollback(const string& fn) { btrfs b(fn); auto img_root_addr = b.find_root_addr(image_subvol_id); // find file called ntfs.img uint64_t inode = 0; uint32_t hash = calc_crc32c(0xfffffffe, (const uint8_t*)image_filename, sizeof(image_filename) - 1); b.walk_tree(img_root_addr, [&](const KEY& key, string_view data) { if (key.obj_id > SUBVOL_ROOT_INODE || (key.obj_id == SUBVOL_ROOT_INODE && key.obj_type > btrfs_key_type::DIR_ITEM)) return false; if (key.obj_id == SUBVOL_ROOT_INODE && key.obj_type == btrfs_key_type::DIR_ITEM && key.offset == hash) { auto& di = *(DIR_ITEM*)data.data(); // FIXME - handle hash collisions if (di.n == sizeof(image_filename) - 1 && !memcmp(di.name, image_filename, di.n)) { if (di.key.obj_type != btrfs_key_type::INODE_ITEM) throw formatted_error("DIR_ITEM for {} pointed to object type {}, expected INODE_ITEM.", string_view(di.name, di.n), di.key.obj_type); inode = di.key.obj_id; } return false; } return true; }); if (inode == 0) throw formatted_error("Could not find {} in subvol {:x}.", image_filename, image_subvol_id); // parse extent data map> extents; b.walk_tree(img_root_addr, [&](const KEY& key, string_view data) { if (key.obj_id > inode || (key.obj_id == inode && key.obj_type > btrfs_key_type::EXTENT_DATA)) return false; if (key.obj_id != inode || key.obj_type != btrfs_key_type::EXTENT_DATA) return true; const auto& ed = *(EXTENT_DATA*)data.data(); if (ed.compression != btrfs_compression::none) throw runtime_error("NTFS image has been compressed, cannot process."); if (ed.type == btrfs_extent_type::prealloc) return true; // treat as if sparse if (ed.type == btrfs_extent_type::inline_extent) throw runtime_error("NTFS image has inline extents, cannot process."); if (ed.type != btrfs_extent_type::regular) throw formatted_error("Unknown extent type {}.", (unsigned int)ed.type); const auto& ed2 = *(EXTENT_DATA2*)ed.data; if (ed2.address == 0 && ed2.size == 0) return true; // sparse, skip extents.emplace(key.offset, make_pair(ed2.address, ed2.size)); return true; }); // resolve logical addresses to physical map relocs; for (const auto& e : extents) { auto off = e.first; auto addr = e.second.first; auto len = e.second.second; auto& c = b.find_chunk(addr); auto& ci = *(CHUNK_ITEM*)c.second.data(); if (ci.type & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1 | BLOCK_FLAG_DUPLICATE | BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6 | BLOCK_FLAG_RAID1C3 | BLOCK_FLAG_RAID1C4)) { throw formatted_error("Data chunk {:x} was not SINGLE, cannot process.", c.first); } auto* cis = (CHUNK_ITEM_STRIPE*)(&ci + 1); auto physoff = addr - c.first + cis[0].offset; if (off == physoff) // identity map continue; relocs.emplace(off, buffer_t{}); auto& r = relocs.at(off); auto buf = b.raw_read(physoff, (uint32_t)len); // FIXME - check csum? r.swap(buf); } for (const auto& r : relocs) { b.raw_write(r.first, r.second); } // FIXME - TRIM? fmt::print("Device successfully rolled back to NTFS.\n"); } ntfs2btrfs-20240115/src/sha256.c000066400000000000000000000160531455127722500160310ustar00rootroot00000000000000#include #include // Public domain code from https://github.com/amosnier/sha-2 // FIXME - x86 SHA extensions #define CHUNK_SIZE 64 #define TOTAL_LEN_LEN 8 /* * ABOUT bool: this file does not use bool in order to be as pre-C99 compatible as possible. */ /* * Comments from pseudo-code at https://en.wikipedia.org/wiki/SHA-2 are reproduced here. * When useful for clarification, portions of the pseudo-code are reproduced here too. */ /* * Initialize array of round constants: * (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311): */ static const uint32_t k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; struct buffer_state { const uint8_t * p; size_t len; size_t total_len; int single_one_delivered; /* bool */ int total_len_delivered; /* bool */ }; static inline uint32_t right_rot(uint32_t value, unsigned int count) { /* * Defined behaviour in standard C for all count where 0 < count < 32, * which is what we need here. */ return value >> count | value << (32 - count); } static void init_buf_state(struct buffer_state * state, const void * input, size_t len) { state->p = input; state->len = len; state->total_len = len; state->single_one_delivered = 0; state->total_len_delivered = 0; } /* Return value: bool */ static int calc_chunk(uint8_t chunk[CHUNK_SIZE], struct buffer_state * state) { size_t space_in_chunk; if (state->total_len_delivered) { return 0; } if (state->len >= CHUNK_SIZE) { memcpy(chunk, state->p, CHUNK_SIZE); state->p += CHUNK_SIZE; state->len -= CHUNK_SIZE; return 1; } memcpy(chunk, state->p, state->len); chunk += state->len; space_in_chunk = CHUNK_SIZE - state->len; state->p += state->len; state->len = 0; /* If we are here, space_in_chunk is one at minimum. */ if (!state->single_one_delivered) { *chunk++ = 0x80; space_in_chunk -= 1; state->single_one_delivered = 1; } /* * Now: * - either there is enough space left for the total length, and we can conclude, * - or there is too little space left, and we have to pad the rest of this chunk with zeroes. * In the latter case, we will conclude at the next invokation of this function. */ if (space_in_chunk >= TOTAL_LEN_LEN) { const size_t left = space_in_chunk - TOTAL_LEN_LEN; size_t len = state->total_len; int i; memset(chunk, 0x00, left); chunk += left; /* Storing of len * 8 as a big endian 64-bit without overflow. */ chunk[7] = (uint8_t) (len << 3); len >>= 5; for (i = 6; i >= 0; i--) { chunk[i] = (uint8_t) len; len >>= 8; } state->total_len_delivered = 1; } else { memset(chunk, 0x00, space_in_chunk); } return 1; } /* * Limitations: * - Since input is a pointer in RAM, the data to hash should be in RAM, which could be a problem * for large data sizes. * - SHA algorithms theoretically operate on bit strings. However, this implementation has no support * for bit string lengths that are not multiples of eight, and it really operates on arrays of bytes. * In particular, the len parameter is a number of bytes. */ void calc_sha256(uint8_t* hash, const void* input, size_t len) { /* * Note 1: All integers (expect indexes) are 32-bit unsigned integers and addition is calculated modulo 2^32. * Note 2: For each round, there is one round constant k[i] and one entry in the message schedule array w[i], 0 = i = 63 * Note 3: The compression function uses 8 working variables, a through h * Note 4: Big-endian convention is used when expressing the constants in this pseudocode, * and when parsing message block data from bytes to words, for example, * the first word of the input message "abc" after padding is 0x61626380 */ /* * Initialize hash values: * (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19): */ uint32_t h[] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; unsigned i, j; /* 512-bit chunks is what we will operate on. */ uint8_t chunk[64]; struct buffer_state state; init_buf_state(&state, input, len); while (calc_chunk(chunk, &state)) { uint32_t ah[8]; const uint8_t *p = chunk; /* Initialize working variables to current hash value: */ for (i = 0; i < 8; i++) ah[i] = h[i]; /* Compression function main loop: */ for (i = 0; i < 4; i++) { /* * The w-array is really w[64], but since we only need * 16 of them at a time, we save stack by calculating * 16 at a time. * * This optimization was not there initially and the * rest of the comments about w[64] are kept in their * initial state. */ /* * create a 64-entry message schedule array w[0..63] of 32-bit words * (The initial values in w[0..63] don't matter, so many implementations zero them here) * copy chunk into first 16 words w[0..15] of the message schedule array */ uint32_t w[16]; for (j = 0; j < 16; j++) { if (i == 0) { w[j] = (uint32_t) p[0] << 24 | (uint32_t) p[1] << 16 | (uint32_t) p[2] << 8 | (uint32_t) p[3]; p += 4; } else { /* Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: */ const uint32_t s0 = right_rot(w[(j + 1) & 0xf], 7) ^ right_rot(w[(j + 1) & 0xf], 18) ^ (w[(j + 1) & 0xf] >> 3); const uint32_t s1 = right_rot(w[(j + 14) & 0xf], 17) ^ right_rot(w[(j + 14) & 0xf], 19) ^ (w[(j + 14) & 0xf] >> 10); w[j] = w[j] + s0 + w[(j + 9) & 0xf] + s1; } const uint32_t s1 = right_rot(ah[4], 6) ^ right_rot(ah[4], 11) ^ right_rot(ah[4], 25); const uint32_t ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]); const uint32_t temp1 = ah[7] + s1 + ch + k[i << 4 | j] + w[j]; const uint32_t s0 = right_rot(ah[0], 2) ^ right_rot(ah[0], 13) ^ right_rot(ah[0], 22); const uint32_t maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]); const uint32_t temp2 = s0 + maj; ah[7] = ah[6]; ah[6] = ah[5]; ah[5] = ah[4]; ah[4] = ah[3] + temp1; ah[3] = ah[2]; ah[2] = ah[1]; ah[1] = ah[0]; ah[0] = temp1 + temp2; } } /* Add the compressed chunk to the current hash value: */ for (i = 0; i < 8; i++) h[i] += ah[i]; } /* Produce the final hash value (big-endian): */ for (i = 0, j = 0; i < 8; i++) { hash[j++] = (uint8_t) (h[i] >> 24); hash[j++] = (uint8_t) (h[i] >> 16); hash[j++] = (uint8_t) (h[i] >> 8); hash[j++] = (uint8_t) h[i]; } } ntfs2btrfs-20240115/src/xxhash.c000066400000000000000000000704251455127722500163270ustar00rootroot00000000000000/* * xxHash - Fast Hash algorithm * Copyright (C) 2012-2016, Yann Collet * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You can contact the author at : * - xxHash homepage: http://www.xxhash.com * - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* ************************************* * Tuning parameters ***************************************/ /*!XXH_FORCE_MEMORY_ACCESS : * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. * The below switch allow to select different access method for improved performance. * Method 0 (default) : use `memcpy()`. Safe and portable. * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. * It can generate buggy code on targets which do not support unaligned memory accesses. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) * See http://stackoverflow.com/a/32095106/646947 for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define XXH_FORCE_MEMORY_ACCESS 2 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif /*!XXH_ACCEPT_NULL_INPUT_POINTER : * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. * By default, this option is disabled. To enable it, uncomment below define : */ /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ /*!XXH_FORCE_NATIVE_FORMAT : * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. * Should endian-independance be of no importance for your application, you may set the #define below to 1, * to improve speed for Big-endian CPU. * This option has no impact on Little_Endian CPU. */ #ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ # define XXH_FORCE_NATIVE_FORMAT 0 #endif /*!XXH_FORCE_ALIGN_CHECK : * This is a minor performance trick, only useful with lots of very small keys. * It means : check for aligned/unaligned input. * The check costs one initial branch per hash; set to 0 when the input data * is guaranteed to be aligned. */ #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) # define XXH_FORCE_ALIGN_CHECK 0 # else # define XXH_FORCE_ALIGN_CHECK 1 # endif #endif /* ************************************* * Includes & Memory related functions ***************************************/ /* Modify the local functions below should you wish to use some other memory routines */ /* for malloc(), free() */ #include #include /* size_t */ static void* XXH_malloc(size_t s) { return malloc(s); } static void XXH_free(void* p) { free(p); } /* for memcpy() */ #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } #ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY #endif #include "xxhash.h" /* ************************************* * Compiler Specific Options ***************************************/ #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # define INLINE_KEYWORD inline #else # define INLINE_KEYWORD #endif #if defined(__GNUC__) # define FORCE_INLINE_ATTR __attribute__((always_inline)) #elif defined(_MSC_VER) # define FORCE_INLINE_ATTR __forceinline #else # define FORCE_INLINE_ATTR #endif #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR #ifdef _MSC_VER # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif /* ************************************* * Basic Types ***************************************/ #ifndef MEM_MODULE # define MEM_MODULE # if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; typedef uint64_t U64; # else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; typedef signed int S32; typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ # endif #endif #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ static U32 XXH_read32(const void* memPtr) { U32 val; memcpy(&val, memPtr, sizeof(val)); return val; } static U64 XXH_read64(const void* memPtr) { U64 val; memcpy(&val, memPtr, sizeof(val)); return val; } #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ /* **************************************** * Compiler-specific Functions and Macros ******************************************/ #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ #if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) #else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong # define XXH_swap64 _byteswap_uint64 #elif GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 # define XXH_swap64 __builtin_bswap64 #else static U32 XXH_swap32 (U32 x) { return ((x << 24) & 0xff000000 ) | ((x << 8) & 0x00ff0000 ) | ((x >> 8) & 0x0000ff00 ) | ((x >> 24) & 0x000000ff ); } static U64 XXH_swap64 (U64 x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); } #endif /* ************************************* * Architecture Macros ***************************************/ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ #ifndef XXH_CPU_LITTLE_ENDIAN static const int g_one = 1; # define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) #endif /* *************************** * Memory reads *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); else return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } static U32 XXH_readBE32(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); else return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } static U64 XXH_readBE64(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); } /* ************************************* * Macros ***************************************/ #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ /* ************************************* * Constants ***************************************/ static const U32 PRIME32_1 = 2654435761U; static const U32 PRIME32_2 = 2246822519U; static const U32 PRIME32_3 = 3266489917U; static const U32 PRIME32_4 = 668265263U; static const U32 PRIME32_5 = 374761393U; static const U64 PRIME64_1 = 11400714785074694791ULL; static const U64 PRIME64_2 = 14029467366897019727ULL; static const U64 PRIME64_3 = 1609587929392839161ULL; static const U64 PRIME64_4 = 9650029242287828579ULL; static const U64 PRIME64_5 = 2870177450012600261ULL; XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } /* ************************** * Utils ****************************/ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } /* *************************** * Simple Hash Functions *****************************/ static U32 XXH32_round(U32 seed, U32 input) { seed += input * PRIME32_2; seed = XXH_rotl32(seed, 13); seed *= PRIME32_1; return seed; } FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } #endif if (len>=16) { const BYTE* const limit = bEnd - 16; U32 v1 = seed + PRIME32_1 + PRIME32_2; U32 v2 = seed + PRIME32_2; U32 v3 = seed + 0; U32 v4 = seed - PRIME32_1; do { v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; } while (p<=limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } h32 += (U32) len; while (p+4<=bEnd) { h32 += XXH_get32bits(p) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; p+=4; } while (p> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH32_CREATESTATE_STATIC(state); XXH32_reset(state, seed); XXH32_update(state, input, len); return XXH32_digest(state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } static U64 XXH64_round(U64 acc, U64 input) { acc += input * PRIME64_2; acc = XXH_rotl64(acc, 31); acc *= PRIME64_1; return acc; } static U64 XXH64_mergeRound(U64 acc, U64 val) { val = XXH64_round(0, val); acc ^= val; acc = acc * PRIME64_1 + PRIME64_4; return acc; } FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; U64 h64; #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } #endif if (len>=32) { const BYTE* const limit = bEnd - 32; U64 v1 = seed + PRIME64_1 + PRIME64_2; U64 v2 = seed + PRIME64_2; U64 v3 = seed + 0; U64 v4 = seed - PRIME64_1; do { v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; } while (p<=limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = seed + PRIME64_5; } h64 += (U64) len; while (p+8<=bEnd) { U64 const k1 = XXH64_round(0, XXH_get64bits(p)); h64 ^= k1; h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; p+=8; } if (p+4<=bEnd) { h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH64_CREATESTATE_STATIC(state); XXH64_reset(state, seed); XXH64_update(state, input, len); return XXH64_digest(state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } /* ************************************************** * Advanced Hash Functions ****************************************************/ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) { return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) { return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } /*** Hash feed ***/ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; memcpy(statePtr, &state, sizeof(state)); return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; memcpy(statePtr, &state, sizeof(state)); return XXH_OK; } FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (input==NULL) return XXH_ERROR; #endif state->total_len_32 += (unsigned)len; state->large_len |= (len>=16) | (state->total_len_32>=16); if (state->memsize + len < 16) { /* fill in tmp buffer */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); state->memsize += (unsigned)len; return XXH_OK; } if (state->memsize) { /* some data left from previous update */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { const U32* p32 = state->mem32; state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; } p += 16-state->memsize; state->memsize = 0; } if (p <= bEnd-16) { const BYTE* const limit = bEnd - 16; U32 v1 = state->v1; U32 v2 = state->v2; U32 v3 = state->v3; U32 v4 = state->v4; do { v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_update_endian(state_in, input, len, XXH_littleEndian); else return XXH32_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem32; const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->large_len) { h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; while (p+4<=bEnd) { h32 += XXH_readLE32(p, endian) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4; p+=4; } while (p> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_digest_endian(state_in, XXH_littleEndian); else return XXH32_digest_endian(state_in, XXH_bigEndian); } /* **** XXH64 **** */ FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (input==NULL) return XXH_ERROR; #endif state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) { /* tmp buffer is full */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); p += 32-state->memsize; state->memsize = 0; } if (p+32 <= bEnd) { const BYTE* const limit = bEnd - 32; U64 v1 = state->v1; U64 v2 = state->v2; U64 v3 = state->v3; U64 v4 = state->v4; do { v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_update_endian(state_in, input, len, XXH_littleEndian); else return XXH64_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem64; const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; U64 h64; if (state->total_len >= 32) { U64 const v1 = state->v1; U64 const v2 = state->v2; U64 const v3 = state->v3; U64 const v4 = state->v4; h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = state->v3 + PRIME64_5; } h64 += (U64) state->total_len; while (p+8<=bEnd) { U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); h64 ^= k1; h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; p+=8; } if (p+4<=bEnd) { h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_digest_endian(state_in, XXH_littleEndian); else return XXH64_digest_endian(state_in, XXH_bigEndian); } /* ************************** * Canonical representation ****************************/ /*! Default XXH result types are basic unsigned 32 and 64 bits. * The canonical representation follows human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. */ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) { return XXH_readBE32(src); } XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) { return XXH_readBE64(src); } ntfs2btrfs-20240115/src/xxhash.h000066400000000000000000000307551455127722500163360ustar00rootroot00000000000000/* xxHash - Extremely Fast Hash algorithm Header File Copyright (C) 2012-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* Notice extracted from xxHash homepage : xxHash is an extremely fast Hash algorithm, running at RAM speed limits. It also successfully passes all tests from the SMHasher suite. Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) Name Speed Q.Score Author xxHash 5.4 GB/s 10 CrapWow 3.2 GB/s 2 Andrew MumurHash 3a 2.7 GB/s 10 Austin Appleby SpookyHash 2.0 GB/s 10 Bob Jenkins SBox 1.4 GB/s 9 Bret Mulvey Lookup3 1.2 GB/s 9 Bob Jenkins SuperFastHash 1.2 GB/s 1 Paul Hsieh CityHash64 1.05 GB/s 10 Pike & Alakuijala FNV 0.55 GB/s 5 Fowler, Noll, Vo CRC32 0.43 GB/s 9 MD5-32 0.33 GB/s 10 Ronald L. Rivest SHA1-32 0.28 GB/s 10 Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. A 64-bits version, named XXH64, is available since r35. It offers much better speed, but for 64-bits applications only. Name Speed on 64 bits Speed on 32 bits XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s */ #if defined (__cplusplus) extern "C" { #endif #ifndef XXHASH_H_5627135585666179 #define XXHASH_H_5627135585666179 1 /* **************************** * Definitions ******************************/ #include /* size_t */ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; /* **************************** * API modifier ******************************/ /** XXH_PRIVATE_API * This is useful if you want to include xxhash functions in `static` mode * in order to inline them, and remove their symbol from the public list. * Methodology : * #define XXH_PRIVATE_API * #include "xxhash.h" * `xxhash.c` is automatically included. * It's not useful to compile and link it as a separate module anymore. */ #ifdef XXH_PRIVATE_API # ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY # endif # if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ # endif #else # define XXH_PUBLIC_API /* do nothing */ #endif /* XXH_PRIVATE_API */ /*!XXH_NAMESPACE, aka Namespace Emulation : If you want to include _and expose_ xxHash functions from within your own library, but also want to avoid symbol collisions with another library which also includes xxHash, you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). Note that no change is required within the calling program as long as it includes `xxhash.h` : regular symbol name will be automatically translated by this header. */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) #endif /* ************************************* * Version ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 6 #define XXH_VERSION_RELEASE 2 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); /* **************************** * Simple Hash Functions ******************************/ typedef unsigned int XXH32_hash_t; typedef unsigned long long XXH64_hash_t; XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); /*! XXH32() : Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s XXH64() : Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". "seed" can be used to alter the result predictably. This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). */ /* **************************** * Streaming Hash Functions ******************************/ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ /*! State allocation, compatible with dynamic libraries */ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); /* hash streaming */ XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); /* These functions generate the xxHash of an input provided in multiple segments. Note that, for small input, they are slower than single-call functions, due to state management. For small input, prefer `XXH32()` and `XXH64()` . XXH state must first be allocated, using XXH*_createState() . Start a new hash by initializing state with a seed, using XXH*_reset(). Then, feed the hash state by calling XXH*_update() as many times as necessary. Obviously, input must be allocated and read accessible. The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. Finally, a hash value can be produced anytime, by using XXH*_digest(). This function returns the nn-bits hash as an int or long long. It's still possible to continue inserting input into the hash state after a digest, and generate some new hashes later on, by calling again XXH*_digest(). When done, free XXH state space if it was allocated dynamically. */ /* ************************** * Utils ****************************/ #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ # define restrict /* disable restrict */ #endif XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); /* ************************** * Canonical representation ****************************/ /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. * The canonical representation uses human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. */ typedef struct { unsigned char digest[4]; } XXH32_canonical_t; typedef struct { unsigned char digest[8]; } XXH64_canonical_t; XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); #endif /* XXHASH_H_5627135585666179 */ /* ================================================================================================ This section contains definitions which are not guaranteed to remain stable. They may change in future versions, becoming incompatible with a different version of the library. They shall only be used with static linking. Never use these definitions in association with dynamic linking ! =================================================================================================== */ #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) #define XXH_STATIC_H_3543687687345 /* These definitions are only meant to allow allocation of XXH state statically, on stack, or in a struct for example. Do not use members directly. */ struct XXH32_state_s { unsigned total_len_32; unsigned large_len; unsigned v1; unsigned v2; unsigned v3; unsigned v4; unsigned mem32[4]; /* buffer defined as U32 for alignment */ unsigned memsize; unsigned reserved; /* never read nor write, will be removed in a future version */ }; /* typedef'd to XXH32_state_t */ struct XXH64_state_s { unsigned long long total_len; unsigned long long v1; unsigned long long v2; unsigned long long v3; unsigned long long v4; unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ unsigned memsize; unsigned reserved[2]; /* never read nor write, will be removed in a future version */ }; /* typedef'd to XXH64_state_t */ # ifdef XXH_PRIVATE_API # include "xxhash.c" /* include xxhash functions as `static`, for inlining */ # endif #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ #if defined (__cplusplus) } #endif