pax_global_header00006660000000000000000000000064145512772250014524gustar00rootroot0000000000000052 comment=1d55788d5919c34fb59f691be410695483c8e782
ntfs2btrfs-20240115/000077500000000000000000000000001455127722500140615ustar00rootroot00000000000000ntfs2btrfs-20240115/CMakeLists.txt000066400000000000000000000051671455127722500166320ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.14.3)
cmake_policy(SET CMP0091 NEW)
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>")
project(ntfs2btrfs VERSION 20240115)
include(GNUInstallDirs)
option(WITH_ZLIB "Include zlib support" ON)
option(WITH_LZO "Include lzo support" ON)
option(WITH_ZSTD "Include zstd support" ON)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ntfs2btrfs.8.in ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(fmt REQUIRED)
find_package(PkgConfig REQUIRED)
if(WITH_ZLIB)
find_package(ZLIB REQUIRED)
endif()
if(WITH_LZO)
pkg_check_modules(LZO REQUIRED lzo2)
endif()
if(WITH_ZSTD)
pkg_check_modules(ZSTD REQUIRED libzstd)
endif()
set(SRC_FILES src/ntfs2btrfs.cpp
src/ntfs.cpp
src/decomp.cpp
src/compress.cpp
src/rollback.cpp
src/crc32c.c
src/xxhash.c
src/sha256.c
src/blake2b-ref.c
src/ebiggers/lzx_decompress.c
src/ebiggers/lzx_common.c
src/ebiggers/aligned_malloc.c
src/ebiggers/decompress_common.c
src/ebiggers/xpress_decompress.c)
if(MSVC)
enable_language(ASM_MASM)
set(SRC_FILES ${SRC_FILES} src/crc32c-masm.asm)
else()
enable_language(ASM)
set(SRC_FILES ${SRC_FILES} src/crc32c-gas.S)
endif()
add_executable(ntfs2btrfs ${SRC_FILES})
if(CMAKE_BUILD_TYPE MATCHES "Debug")
add_definitions(-D_GLIBCXX_DEBUG)
endif()
target_link_libraries(ntfs2btrfs fmt::fmt-header-only)
if(WITH_ZLIB)
target_link_libraries(ntfs2btrfs ZLIB::ZLIB)
endif()
if(WITH_LZO)
target_link_libraries(ntfs2btrfs ${LZO_LINK_LIBRARIES})
endif()
if(WITH_ZSTD)
target_link_libraries(ntfs2btrfs ${ZSTD_LINK_LIBRARIES})
endif()
include_directories(${CMAKE_CURRENT_BINARY_DIR})
# Work around bug in MSVC version of cmake - see https://gitlab.kitware.com/cmake/cmake/-/merge_requests/4257
set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreaded "")
set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDLL "")
set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebug "")
set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebugDLL "")
if(MSVC)
target_compile_options(ntfs2btrfs PRIVATE /W4)
else()
target_compile_options(ntfs2btrfs PRIVATE -Wall -Wextra -Wno-address-of-packed-member -Wconversion -Wno-unknown-pragmas -Werror=pointer-arith)
endif()
install(TARGETS ntfs2btrfs DESTINATION ${CMAKE_INSTALL_SBINDIR})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8 DESTINATION ${CMAKE_INSTALL_MANDIR}/man8)
ntfs2btrfs-20240115/LICENCE000066400000000000000000000432541455127722500150560ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.
ntfs2btrfs-20240115/README.md000066400000000000000000000112461455127722500153440ustar00rootroot00000000000000Ntfs2btrfs
==========
Ntfs2btrfs is a tool which does in-place conversion of Microsoft's NTFS
filesystem to the open-source filesystem Btrfs, much as `btrfs-convert`
does for ext2. The original image is saved as a reflink copy at
`image/ntfs.img`, and if you want to keep the conversion you can delete
this to free up space.
Although I believe this tool to be stable, please note that I take no
responsibility if something goes awry!
You're probably also interested in [WinBtrfs](https://github.com/maharmstone/btrfs),
which is a Btrfs filesystem driver for Windows.
Thanks to [Eric Biggers](https://github.com/ebiggers), who [successfully reverse-engineered](https://github.com/ebiggers/ntfs-3g-system-compression/) Windows 10's
"WOF compressed data", and whose code I've used here.
Usage
-----
On Windows, from an Administrator command prompt:
`ntfs2btrfs.exe D:\`
Bear in mind that it won't work with your boot drive or a drive containing a
pagefile that's currently in use.
On Linux, as root:
`ntfs2btrfs /dev/sda1`
Installation
------------
On Windows, go to the [Releases page](https://github.com/maharmstone/ntfs2btrfs/releases) and
download the latest Zip file, or use [Scoop](https://github.com/ScoopInstaller/Main/blob/master/bucket/ntfs2btrfs.json).
For Linux:
* [Arch](https://aur.archlinux.org/packages/ntfs2btrfs)
* [Fedora](https://src.fedoraproject.org/rpms/ntfs2btrfs) (thanks to [Conan-Kudo](https://github.com/Conan-Kudo))
* Gentoo - available as sys-fs/ntfs2btrfs in the guru repository
* [Debian](https://packages.debian.org/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko))
* [Ubuntu](https://packages.ubuntu.com/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko))
* [openSUSE](https://build.opensuse.org/package/show/filesystems/ntfs2btrfs) (thanks to David Sterba)
For other distributions or operating systems, you will need to compile it yourself - see
below.
Changelog
---------
* 20240115
* Fixed compilation on GCC 14 (`-Werror=incompatible-pointer-types` now enabled by default)
* 20230501
* Fixed inline extent items being written out of order (not diagnosed by `btrfs check`)
* Fixed metadata items being written with wrong level value (not diagnosed by `btrfs check`)
* ADSes with overly-long names now get skipped
* 20220812
* Added --no-datasum option, to skip calculating checksums
* LXSS / WSL metadata is now preserved
* Fixed lowercase drive letters not being recognized
* Fixed crash due to iterator invalidation (thanks to nyanpasu64)
* Fixed corruption when NTFS places file in last megabyte of disk
* 20210923
* Added (Btrfs) compression support (zlib, lzo, and zstd)
* Added support for other hash algorithms: xxhash, sha256, and blake2
* Added support for rolling back to NTFS
* Added support for NT4-style security descriptors
* Increased conversion speed for volume with many inodes
* Fixed bug when fragmented file was in superblock location
* Fixed buffer overflow when reading security descriptors
* Fixed bug where filesystems would be corrupted in a way that `btrfs check` doesn't pick up
* 20210523
* Improved handling of large compressed files
* 20210402 (source code only release)
* Fixes for compilation on non-amd64 architectures
* 20210105
* Added support for NTFS compression
* Added support for "WOF compressed data"
* Fixed problems caused by sparse files
* Miscellaneous bug fixes
* 20201108
* Improved error handling
* Added better message if NTFS is corrupted or unclean
* Better handling of relocations
* 20200330
* Initial release
Compilation
-----------
On Windows, open the source directory in a recent version of MSVC, right-click
on CMakeLists.txt, and click Compile.
On Linux:
mkdir build
cd build
cmake ..
make
You'll also need [libfmt](https://github.com/fmtlib/fmt) installed - it should be
in your package manager.
Compression support requires zlib, lzo, and/or zstd - again, they will be in your
package manager. See also the cmake options WITH_ZLIB, WITH_LZO, and WITH_ZSTD,
if you want to disable this.
What works
----------
* Files
* Directories
* Symlinks
* Other reparse points
* Security descriptors
* Alternate data streams
* DOS attributes (hidden, system, etc.)
* Rollback to original NTFS image
* Preservation of LXSS metadata
What doesn't work
-----------------
* Windows' old extended attributes (you're not using these)
* Large (i.e >16KB) ADSes (you're not using these either)
* Preservation of the case-sensitivity flag
* Unusual cluster sizes (i.e. not 4 KB)
* Encrypted files
Can I boot Windows from Btrfs with this?
----------------------------------------
Yes, if the stars are right. See [Quibble](https://github.com/maharmstone/quibble).
ntfs2btrfs-20240115/ntfs2btrfs.8.in000066400000000000000000000051661455127722500166640ustar00rootroot00000000000000.TH NTFS2BTRFS "8" "January 2024" "ntfs2btrfs @PROJECT_VERSION@" "System Administration"
.SH NAME
ntfs2btrfs \- convert ntfs filesystem to btrfs filesystem
.SH SYNOPSIS
\fBntfs2btrfs\fR [options] \fIdevice\fR
.SH DESCRIPTION
This is a tool which does in-place conversion of Microsoft's NTFS filesystem
to the open-source filesystem Btrfs, much as \fBbtrfs\-convert\fR does for ext2.
.SH OPTIONS
.PP
-c \fI\fR, --compress=\fI\fR
.RS 4
Uses the specified algorithm to recompress files that are compressed on the
NTFS volume; valid choices are \fIzstd\fR, \fIlzo\fR, \fIzlib\fR, or \fInone\fR.
If you don't specify any value, \fIzstd\fR will be used, assuming it's been
compiled in. Note that this will be ignored if you also select --no-datasum (see
below).
.RE
.PP
-h \fI\fR, --hash=\fI\fR
.RS 4
Uses the specified checksumming algorithm; valid choices are \fIcrc32c\fR,
\fIxxhash\fR, \fIsha256\fR, and \fIblake2\fR. The first of these will be used by
default, and should be fine for most purposes.
.RE
.PP
-r, --rollback
.RS 4
Tries to restore the original NTFS filesystem. See \fBROLLBACK\fR below.
.RE
.PP
-d, --no-datasum
.RS 4
Skips calculating checksums for existing data. Don't choose this unless you're
sure it's what you want.
.RE
.SH ROLLBACK
The original filesystem image is saved as \fIimage/ntfs.img\fR as a reflink copy. You
can restore this at any time by using the rollback option, provided that you've
not moved the data by doing a balance. Bear in mind that this restores the volume
to how it was when you did the conversion, meaning that any changes you've made
since will be lost.
.PP
If you decide to keep the conversion, you can remove the \fIimage\fR subvolume at
any point to free up space.
.SH XATTRS
Various bits of NTFS-specific data are stored as Btrfs xattrs, in a manner that
the Windows btrfs driver understands (\fBhttps://github.com/maharmstone/btrfs\fR). Some
should also be understood by tools such as Wine and Samba, but YMMV.
.IP \[bu] 2
The NTFS attribute value is stored as a hex string at \fIuser.DOSATTRIB\fR.
.IP \[bu] 2
The reparse points on directories are stored at \fIuser.reparse\fR. NTFS symlinks should
be converted into POSIX symlinks. The data for other reparse points will be stored as
the contents of the files.
.IP \[bu] 2
The NT security descriptor is stored as \fIsecurity.NTACL\fR.
.IP \[bu] 2
Alternate data streams on files are stored in the \fIuser\fR namespace, e.g. \fI:Zone.Identifier\fR
becomes \fIuser.Zone.Identifier\fR.
.SH SEE ALSO
.BR btrfs (8),
.BR mkfs.btrfs (8).
.SH AUTHOR
Written by Mark Harmstone (\fBmark@harmstone.com\fR).
.SH WEB
.IP https://github.com/maharmstone/ntfs2btrfs
ntfs2btrfs-20240115/src/000077500000000000000000000000001455127722500146505ustar00rootroot00000000000000ntfs2btrfs-20240115/src/blake2-impl.h000066400000000000000000000117351455127722500171270ustar00rootroot00000000000000/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves . You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#include
#include
#define NATIVE_LITTLE_ENDIAN
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
#if defined(_MSC_VER)
#define BLAKE2_INLINE __inline
#elif defined(__GNUC__)
#define BLAKE2_INLINE __inline__
#else
#define BLAKE2_INLINE
#endif
#else
#define BLAKE2_INLINE inline
#endif
static BLAKE2_INLINE uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint32_t )( p[0] ) << 0) |
(( uint32_t )( p[1] ) << 8) |
(( uint32_t )( p[2] ) << 16) |
(( uint32_t )( p[3] ) << 24) ;
#endif
}
static BLAKE2_INLINE uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) |
(( uint64_t )( p[6] ) << 48) |
(( uint64_t )( p[7] ) << 56) ;
#endif
}
static BLAKE2_INLINE uint16_t load16( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint16_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return ( uint16_t )((( uint32_t )( p[0] ) << 0) |
(( uint32_t )( p[1] ) << 8));
#endif
}
static BLAKE2_INLINE void store16( void *dst, uint16_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
static BLAKE2_INLINE void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
#endif
}
static BLAKE2_INLINE void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
p[6] = (uint8_t)(w >> 48);
p[7] = (uint8_t)(w >> 56);
#endif
}
static BLAKE2_INLINE uint64_t load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) ;
}
static BLAKE2_INLINE void store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
}
static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
#if defined(_MSC_VER)
#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
#else
#define BLAKE2_PACKED(x) x __attribute__((packed))
#endif
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
typedef struct blake2b_state__
{
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[BLAKE2B_BLOCKBYTES];
size_t buflen;
size_t outlen;
uint8_t last_node;
} blake2b_state;
BLAKE2_PACKED(struct blake2b_param__
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint32_t node_offset; /* 12 */
uint32_t xof_length; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
});
typedef struct blake2b_param__ blake2b_param;
ntfs2btrfs-20240115/src/blake2b-ref.c000066400000000000000000000151411455127722500170720ustar00rootroot00000000000000/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves . You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include
#include
#include
#include "blake2-impl.h"
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
static const uint8_t blake2b_sigma[12][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
static int blake2b_update(blake2b_state* S, const void* in, size_t inlen);
static void blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = (uint64_t)-1;
}
/* Some helper functions, not necessarily useful */
static int blake2b_is_lastblock( const blake2b_state *S )
{
return S->f[0] != 0;
}
static void blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = (uint64_t)-1;
}
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
}
static void blake2b_init0( blake2b_state *S )
{
size_t i;
memset( S, 0, sizeof( blake2b_state ) );
for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
}
/* init xors IV with input parameter block */
static void blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
const uint8_t *p = ( const uint8_t * )( P );
size_t i;
blake2b_init0( S );
/* IV XOR ParamBlock */
for( i = 0; i < 8; ++i )
S->h[i] ^= load64( p + sizeof( S->h[i] ) * i );
S->outlen = P->digest_length;
}
static void blake2b_init( blake2b_state *S, size_t outlen )
{
blake2b_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
blake2b_init_param( S, P );
}
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
uint64_t m[16];
uint64_t v[16];
size_t i;
for( i = 0; i < 16; ++i ) {
m[i] = load64( block + i * sizeof( m[i] ) );
}
for( i = 0; i < 8; ++i ) {
v[i] = S->h[i];
}
v[ 8] = blake2b_IV[0];
v[ 9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = blake2b_IV[4] ^ S->t[0];
v[13] = blake2b_IV[5] ^ S->t[1];
v[14] = blake2b_IV[6] ^ S->f[0];
v[15] = blake2b_IV[7] ^ S->f[1];
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
for( i = 0; i < 8; ++i ) {
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}
}
#undef G
#undef ROUND
static int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
if( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
{
S->buflen = 0;
memcpy( S->buf + left, in, fill ); /* Fill buffer */
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); /* Compress */
in += fill; inlen -= fill;
while(inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress( S, in );
in += BLAKE2B_BLOCKBYTES;
inlen -= BLAKE2B_BLOCKBYTES;
}
}
memcpy( S->buf + S->buflen, in, inlen );
S->buflen += inlen;
}
return 0;
}
static int blake2b_final( blake2b_state *S, void *out, size_t outlen )
{
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
size_t i;
if( out == NULL || outlen < S->outlen )
return -1;
if( blake2b_is_lastblock( S ) )
return -1;
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, S->outlen );
return 0;
}
/* inlen, at least, should be uint64_t. Others can be size_t. */
void blake2b( void *out, size_t outlen, const void *in, size_t inlen )
{
blake2b_state S[1];
blake2b_init( S, outlen );
blake2b_update( S, ( const uint8_t * )in, inlen );
blake2b_final( S, out, outlen );
}
ntfs2btrfs-20240115/src/btrfs.h000066400000000000000000000353661455127722500161560ustar00rootroot00000000000000/* btrfs.h
* Generic btrfs header file. Thanks to whoever it was who wrote
* https://btrfs.wiki.kernel.org/index.php/On-disk_Format - you saved me a lot of time!
*
* I release this file, and this file only, into the public domain - do whatever
* you want with it. You don't have to, but I'd appreciate if you let me know if you
* use it anything cool - mark@harmstone.com. */
#pragma once
#include
static const uint64_t superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4000000000000, 0 };
#define BTRFS_MAGIC 0x4d5f53665248425f
#define MAX_LABEL_SIZE 0x100
#define SUBVOL_ROOT_INODE 0x100
enum class btrfs_key_type : uint8_t {
INODE_ITEM = 0x01,
INODE_REF = 0x0C,
INODE_EXTREF = 0x0D,
XATTR_ITEM = 0x18,
ORPHAN_INODE = 0x30,
DIR_ITEM = 0x54,
DIR_INDEX = 0x60,
EXTENT_DATA = 0x6C,
EXTENT_CSUM = 0x80,
ROOT_ITEM = 0x84,
ROOT_BACKREF = 0x90,
ROOT_REF = 0x9C,
EXTENT_ITEM = 0xA8,
METADATA_ITEM = 0xA9,
TREE_BLOCK_REF = 0xB0,
EXTENT_DATA_REF = 0xB2,
EXTENT_REF_V0 = 0xB4,
SHARED_BLOCK_REF = 0xB6,
SHARED_DATA_REF = 0xB8,
BLOCK_GROUP_ITEM = 0xC0,
FREE_SPACE_INFO = 0xC6,
FREE_SPACE_EXTENT = 0xC7,
FREE_SPACE_BITMAP = 0xC8,
DEV_EXTENT = 0xCC,
DEV_ITEM = 0xD8,
CHUNK_ITEM = 0xE4,
TEMP_ITEM = 0xF8,
DEV_STATS = 0xF9,
SUBVOL_UUID = 0xFB,
SUBVOL_REC_UUID = 0xFC
};
#define BTRFS_ROOT_ROOT 1
#define BTRFS_ROOT_EXTENT 2
#define BTRFS_ROOT_CHUNK 3
#define BTRFS_ROOT_DEVTREE 4
#define BTRFS_ROOT_FSTREE 5
#define BTRFS_ROOT_TREEDIR 6
#define BTRFS_ROOT_CHECKSUM 7
#define BTRFS_ROOT_UUID 9
#define BTRFS_ROOT_FREE_SPACE 0xa
#define BTRFS_ROOT_DATA_RELOC 0xFFFFFFFFFFFFFFF7
enum class btrfs_compression : uint8_t {
none = 0,
zlib = 1,
lzo = 2,
zstd = 3
};
#define BTRFS_ENCRYPTION_NONE 0
#define BTRFS_ENCODING_NONE 0
enum class btrfs_extent_type : uint8_t {
inline_extent = 0,
regular = 1,
prealloc = 2
};
#define BLOCK_FLAG_DATA 0x001
#define BLOCK_FLAG_SYSTEM 0x002
#define BLOCK_FLAG_METADATA 0x004
#define BLOCK_FLAG_RAID0 0x008
#define BLOCK_FLAG_RAID1 0x010
#define BLOCK_FLAG_DUPLICATE 0x020
#define BLOCK_FLAG_RAID10 0x040
#define BLOCK_FLAG_RAID5 0x080
#define BLOCK_FLAG_RAID6 0x100
#define BLOCK_FLAG_RAID1C3 0x200
#define BLOCK_FLAG_RAID1C4 0x400
#define FREE_SPACE_CACHE_ID 0xFFFFFFFFFFFFFFF5
#define EXTENT_CSUM_ID 0xFFFFFFFFFFFFFFF6
#define BALANCE_ITEM_ID 0xFFFFFFFFFFFFFFFC
#define BTRFS_INODE_NODATASUM 0x001
#define BTRFS_INODE_NODATACOW 0x002
#define BTRFS_INODE_READONLY 0x004
#define BTRFS_INODE_NOCOMPRESS 0x008
#define BTRFS_INODE_PREALLOC 0x010
#define BTRFS_INODE_SYNC 0x020
#define BTRFS_INODE_IMMUTABLE 0x040
#define BTRFS_INODE_APPEND 0x080
#define BTRFS_INODE_NODUMP 0x100
#define BTRFS_INODE_NOATIME 0x200
#define BTRFS_INODE_DIRSYNC 0x400
#define BTRFS_INODE_COMPRESS 0x800
#define BTRFS_SUBVOL_READONLY 0x1
#define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE 0x1
#define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID 0x2
#define BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF 0x0001
#define BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL 0x0002
#define BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS 0x0004
#define BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO 0x0008
#define BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD 0x0010
#define BTRFS_INCOMPAT_FLAGS_BIG_METADATA 0x0020
#define BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF 0x0040
#define BTRFS_INCOMPAT_FLAGS_RAID56 0x0080
#define BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA 0x0100
#define BTRFS_INCOMPAT_FLAGS_NO_HOLES 0x0200
#define BTRFS_INCOMPAT_FLAGS_METADATA_UUID 0x0400
#define BTRFS_INCOMPAT_FLAGS_RAID1C34 0x0800
#define BTRFS_SUPERBLOCK_FLAGS_SEEDING 0x100000000
#define BTRFS_ORPHAN_INODE_OBJID 0xFFFFFFFFFFFFFFFB
enum class btrfs_csum_type : uint16_t {
crc32c = 0,
xxhash = 1,
sha256 = 2,
blake2 = 3
};
#pragma pack(push, 1)
typedef struct {
uint8_t uuid[16];
} BTRFS_UUID;
typedef struct {
uint64_t obj_id;
btrfs_key_type obj_type;
uint64_t offset;
} KEY;
#define HEADER_FLAG_WRITTEN 0x000000000000001
#define HEADER_FLAG_SHARED_BACKREF 0x000000000000002
#define HEADER_FLAG_MIXED_BACKREF 0x100000000000000
typedef struct {
uint8_t csum[32];
BTRFS_UUID fs_uuid;
uint64_t address;
uint64_t flags;
BTRFS_UUID chunk_tree_uuid;
uint64_t generation;
uint64_t tree_id;
uint32_t num_items;
uint8_t level;
} tree_header;
typedef struct {
KEY key;
uint32_t offset;
uint32_t size;
} leaf_node;
typedef struct {
KEY key;
uint64_t address;
uint64_t generation;
} internal_node;
typedef struct {
uint64_t dev_id;
uint64_t num_bytes;
uint64_t bytes_used;
uint32_t optimal_io_align;
uint32_t optimal_io_width;
uint32_t minimal_io_size;
uint64_t type;
uint64_t generation;
uint64_t start_offset;
uint32_t dev_group;
uint8_t seek_speed;
uint8_t bandwidth;
BTRFS_UUID device_uuid;
BTRFS_UUID fs_uuid;
} DEV_ITEM;
#define SYS_CHUNK_ARRAY_SIZE 0x800
#define BTRFS_NUM_BACKUP_ROOTS 4
typedef struct {
uint64_t root_tree_addr;
uint64_t root_tree_generation;
uint64_t chunk_tree_addr;
uint64_t chunk_tree_generation;
uint64_t extent_tree_addr;
uint64_t extent_tree_generation;
uint64_t fs_tree_addr;
uint64_t fs_tree_generation;
uint64_t dev_root_addr;
uint64_t dev_root_generation;
uint64_t csum_root_addr;
uint64_t csum_root_generation;
uint64_t total_bytes;
uint64_t bytes_used;
uint64_t num_devices;
uint64_t reserved[4];
uint8_t root_level;
uint8_t chunk_root_level;
uint8_t extent_root_level;
uint8_t fs_root_level;
uint8_t dev_root_level;
uint8_t csum_root_level;
uint8_t reserved2[10];
} superblock_backup;
typedef struct {
uint8_t checksum[32];
BTRFS_UUID uuid;
uint64_t sb_phys_addr;
uint64_t flags;
uint64_t magic;
uint64_t generation;
uint64_t root_tree_addr;
uint64_t chunk_tree_addr;
uint64_t log_tree_addr;
uint64_t log_root_transid;
uint64_t total_bytes;
uint64_t bytes_used;
uint64_t root_dir_objectid;
uint64_t num_devices;
uint32_t sector_size;
uint32_t node_size;
uint32_t leaf_size;
uint32_t stripe_size;
uint32_t n;
uint64_t chunk_root_generation;
uint64_t compat_flags;
uint64_t compat_ro_flags;
uint64_t incompat_flags;
enum btrfs_csum_type csum_type;
uint8_t root_level;
uint8_t chunk_root_level;
uint8_t log_root_level;
DEV_ITEM dev_item;
char label[MAX_LABEL_SIZE];
uint64_t cache_generation;
uint64_t uuid_tree_generation;
uint64_t reserved[30];
uint8_t sys_chunk_array[SYS_CHUNK_ARRAY_SIZE];
superblock_backup backup[BTRFS_NUM_BACKUP_ROOTS];
uint8_t reserved2[565];
} superblock;
enum class btrfs_inode_type : uint8_t {
unknown = 0,
file = 1,
directory = 2,
chardev = 3,
blockdev = 4,
fifo = 5,
socket = 6,
symlink = 7,
ea = 8
};
typedef struct {
KEY key;
uint64_t transid;
uint16_t m;
uint16_t n;
enum btrfs_inode_type type;
char name[1];
} DIR_ITEM;
typedef struct {
uint64_t seconds;
uint32_t nanoseconds;
} BTRFS_TIME;
typedef struct {
uint64_t generation;
uint64_t transid;
uint64_t st_size;
uint64_t st_blocks;
uint64_t block_group;
uint32_t st_nlink;
uint32_t st_uid;
uint32_t st_gid;
uint32_t st_mode;
uint64_t st_rdev;
uint64_t flags;
uint64_t sequence;
uint8_t reserved[32];
BTRFS_TIME st_atime;
BTRFS_TIME st_ctime;
BTRFS_TIME st_mtime;
BTRFS_TIME otime;
} INODE_ITEM;
typedef struct {
INODE_ITEM inode;
uint64_t generation;
uint64_t objid;
uint64_t block_number;
uint64_t byte_limit;
uint64_t bytes_used;
uint64_t last_snapshot_generation;
uint64_t flags;
uint32_t num_references;
KEY drop_progress;
uint8_t drop_level;
uint8_t root_level;
uint64_t generation2;
BTRFS_UUID uuid;
BTRFS_UUID parent_uuid;
BTRFS_UUID received_uuid;
uint64_t ctransid;
uint64_t otransid;
uint64_t stransid;
uint64_t rtransid;
BTRFS_TIME ctime;
BTRFS_TIME otime;
BTRFS_TIME stime;
BTRFS_TIME rtime;
uint64_t reserved[8];
} ROOT_ITEM;
typedef struct {
uint64_t size;
uint64_t root_id;
uint64_t stripe_length;
uint64_t type;
uint32_t opt_io_alignment;
uint32_t opt_io_width;
uint32_t sector_size;
uint16_t num_stripes;
uint16_t sub_stripes;
} CHUNK_ITEM;
typedef struct {
uint64_t dev_id;
uint64_t offset;
BTRFS_UUID dev_uuid;
} CHUNK_ITEM_STRIPE;
typedef struct {
uint64_t generation;
uint64_t decoded_size;
enum btrfs_compression compression;
uint8_t encryption;
uint16_t encoding;
enum btrfs_extent_type type;
uint8_t data[1];
} EXTENT_DATA;
typedef struct {
uint64_t address;
uint64_t size;
uint64_t offset;
uint64_t num_bytes;
} EXTENT_DATA2;
typedef struct {
uint64_t index;
uint16_t n;
char name[1];
} INODE_REF;
typedef struct {
uint64_t dir;
uint64_t index;
uint16_t n;
char name[1];
} INODE_EXTREF;
#define EXTENT_ITEM_DATA 0x001
#define EXTENT_ITEM_TREE_BLOCK 0x002
#define EXTENT_ITEM_SHARED_BACKREFS 0x100
typedef struct {
uint64_t refcount;
uint64_t generation;
uint64_t flags;
} EXTENT_ITEM;
typedef struct {
KEY firstitem;
uint8_t level;
} EXTENT_ITEM2;
typedef struct {
uint32_t refcount;
} EXTENT_ITEM_V0;
typedef struct {
EXTENT_ITEM extent_item;
KEY firstitem;
uint8_t level;
} EXTENT_ITEM_TREE;
typedef struct {
uint64_t offset;
} TREE_BLOCK_REF;
typedef struct {
uint64_t root;
uint64_t objid;
uint64_t offset;
uint32_t count;
} EXTENT_DATA_REF;
typedef struct {
uint64_t used;
uint64_t chunk_tree;
uint64_t flags;
} BLOCK_GROUP_ITEM;
typedef struct {
uint64_t root;
uint64_t gen;
uint64_t objid;
uint32_t count;
} EXTENT_REF_V0;
typedef struct {
uint64_t offset;
} SHARED_BLOCK_REF;
typedef struct {
uint64_t offset;
uint32_t count;
} SHARED_DATA_REF;
static const uint8_t FREE_SPACE_EXTENT = 1;
static const uint8_t FREE_SPACE_BITMAP = 2;
typedef struct {
uint64_t offset;
uint64_t size;
uint8_t type;
} FREE_SPACE_ENTRY;
typedef struct {
KEY key;
uint64_t generation;
uint64_t num_entries;
uint64_t num_bitmaps;
} FREE_SPACE_ITEM;
typedef struct {
uint64_t dir;
uint64_t index;
uint16_t n;
char name[1];
} ROOT_REF;
typedef struct {
uint64_t chunktree;
uint64_t objid;
uint64_t address;
uint64_t length;
BTRFS_UUID chunktree_uuid;
} DEV_EXTENT;
#define BALANCE_FLAGS_DATA 0x1
#define BALANCE_FLAGS_SYSTEM 0x2
#define BALANCE_FLAGS_METADATA 0x4
#define BALANCE_ARGS_FLAGS_PROFILES 0x001
#define BALANCE_ARGS_FLAGS_USAGE 0x002
#define BALANCE_ARGS_FLAGS_DEVID 0x004
#define BALANCE_ARGS_FLAGS_DRANGE 0x008
#define BALANCE_ARGS_FLAGS_VRANGE 0x010
#define BALANCE_ARGS_FLAGS_LIMIT 0x020
#define BALANCE_ARGS_FLAGS_LIMIT_RANGE 0x040
#define BALANCE_ARGS_FLAGS_STRIPES_RANGE 0x080
#define BALANCE_ARGS_FLAGS_CONVERT 0x100
#define BALANCE_ARGS_FLAGS_SOFT 0x200
#define BALANCE_ARGS_FLAGS_USAGE_RANGE 0x400
typedef struct {
uint64_t profiles;
union {
uint64_t usage;
struct {
uint32_t usage_start;
uint32_t usage_end;
} s;
} u1;
uint64_t devid;
uint64_t drange_start;
uint64_t drange_end;
uint64_t vrange_start;
uint64_t vrange_end;
uint64_t convert;
uint64_t flags;
union {
uint64_t limit;
struct {
uint32_t limit_start;
uint32_t limit_end;
} s;
} u2;
uint32_t stripes_start;
uint32_t stripes_end;
uint8_t reserved[48];
} BALANCE_ARGS;
typedef struct {
uint64_t flags;
BALANCE_ARGS data;
BALANCE_ARGS metadata;
BALANCE_ARGS system;
uint8_t reserved[32];
} BALANCE_ITEM;
#define BTRFS_FREE_SPACE_USING_BITMAPS 1
typedef struct {
uint32_t count;
uint32_t flags;
} FREE_SPACE_INFO;
#define BTRFS_DEV_STAT_WRITE_ERRORS 0
#define BTRFS_DEV_STAT_READ_ERRORS 1
#define BTRFS_DEV_STAT_FLUSH_ERRORS 2
#define BTRFS_DEV_STAT_CORRUPTION_ERRORS 3
#define BTRFS_DEV_STAT_GENERATION_ERRORS 4
#define BTRFS_SEND_CMD_SUBVOL 1
#define BTRFS_SEND_CMD_SNAPSHOT 2
#define BTRFS_SEND_CMD_MKFILE 3
#define BTRFS_SEND_CMD_MKDIR 4
#define BTRFS_SEND_CMD_MKNOD 5
#define BTRFS_SEND_CMD_MKFIFO 6
#define BTRFS_SEND_CMD_MKSOCK 7
#define BTRFS_SEND_CMD_SYMLINK 8
#define BTRFS_SEND_CMD_RENAME 9
#define BTRFS_SEND_CMD_LINK 10
#define BTRFS_SEND_CMD_UNLINK 11
#define BTRFS_SEND_CMD_RMDIR 12
#define BTRFS_SEND_CMD_SET_XATTR 13
#define BTRFS_SEND_CMD_REMOVE_XATTR 14
#define BTRFS_SEND_CMD_WRITE 15
#define BTRFS_SEND_CMD_CLONE 16
#define BTRFS_SEND_CMD_TRUNCATE 17
#define BTRFS_SEND_CMD_CHMOD 18
#define BTRFS_SEND_CMD_CHOWN 19
#define BTRFS_SEND_CMD_UTIMES 20
#define BTRFS_SEND_CMD_END 21
#define BTRFS_SEND_CMD_UPDATE_EXTENT 22
#define BTRFS_SEND_TLV_UUID 1
#define BTRFS_SEND_TLV_TRANSID 2
#define BTRFS_SEND_TLV_INODE 3
#define BTRFS_SEND_TLV_SIZE 4
#define BTRFS_SEND_TLV_MODE 5
#define BTRFS_SEND_TLV_UID 6
#define BTRFS_SEND_TLV_GID 7
#define BTRFS_SEND_TLV_RDEV 8
#define BTRFS_SEND_TLV_CTIME 9
#define BTRFS_SEND_TLV_MTIME 10
#define BTRFS_SEND_TLV_ATIME 11
#define BTRFS_SEND_TLV_OTIME 12
#define BTRFS_SEND_TLV_XATTR_NAME 13
#define BTRFS_SEND_TLV_XATTR_DATA 14
#define BTRFS_SEND_TLV_PATH 15
#define BTRFS_SEND_TLV_PATH_TO 16
#define BTRFS_SEND_TLV_PATH_LINK 17
#define BTRFS_SEND_TLV_OFFSET 18
#define BTRFS_SEND_TLV_DATA 19
#define BTRFS_SEND_TLV_CLONE_UUID 20
#define BTRFS_SEND_TLV_CLONE_CTRANSID 21
#define BTRFS_SEND_TLV_CLONE_PATH 22
#define BTRFS_SEND_TLV_CLONE_OFFSET 23
#define BTRFS_SEND_TLV_CLONE_LENGTH 24
#define BTRFS_SEND_MAGIC "btrfs-stream"
typedef struct {
uint8_t magic[13];
uint32_t version;
} btrfs_send_header;
typedef struct {
uint32_t length;
uint16_t cmd;
uint32_t csum;
} btrfs_send_command;
typedef struct {
uint16_t type;
uint16_t length;
} btrfs_send_tlv;
#pragma pack(pop)
ntfs2btrfs-20240115/src/compress.cpp000066400000000000000000000111631455127722500172110ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2021
*
* This file is part of ntfs2btrfs.
*
* Ntfs2btrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 2 of the Licence, or
* (at your option) any later version.
*
* Ntfs2btrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public Licence
* along with Ntfs2btrfs. If not, see . */
#include "ntfs2btrfs.h"
#ifdef WITH_ZLIB
#include
#endif
#ifdef WITH_LZO
#include
#endif
#ifdef WITH_ZSTD
#include
#endif
using namespace std;
#ifdef WITH_ZLIB
optional zlib_compress(string_view data, uint32_t cluster_size) {
z_stream c_stream;
int ret;
buffer_t out(data.length());
c_stream.zalloc = Z_NULL;
c_stream.zfree = Z_NULL;
c_stream.opaque = (voidpf)0;
ret = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION);
if (ret != Z_OK)
throw formatted_error("deflateInit returned {}", ret);
c_stream.next_in = (uint8_t*)data.data();
c_stream.avail_in = (unsigned int)data.length();
c_stream.next_out = (uint8_t*)out.data();
c_stream.avail_out = (unsigned int)out.size();
do {
ret = deflate(&c_stream, Z_FINISH);
if (ret != Z_OK && ret != Z_STREAM_END) {
deflateEnd(&c_stream);
throw formatted_error("deflate returned {}", ret);
}
if (c_stream.avail_in == 0 || c_stream.avail_out == 0)
break;
} while (ret != Z_STREAM_END);
deflateEnd(&c_stream);
if (c_stream.avail_in > 0) // compressed version would be longer than uncompressed
return nullopt;
if (c_stream.total_out > data.length() - cluster_size) // space saving less than one sector
return nullopt;
// round to sector, and zero end
out.resize((c_stream.total_out + cluster_size - 1) & ~(cluster_size - 1), 0);
return out;
}
#endif
#ifdef WITH_LZO
static __inline size_t lzo_max_outlen(size_t inlen) {
return inlen + (inlen / 16) + 64 + 3; // formula comes from LZO.FAQ
}
optional lzo_compress(string_view data, uint32_t cluster_size) {
size_t num_pages;
num_pages = data.length() / cluster_size;
// Four-byte overall header
// Another four-byte header page
// Each page has a maximum size of lzo_max_outlen(cluster_size)
// Plus another four bytes for possible padding
buffer_t outbuf(sizeof(uint32_t) + ((lzo_max_outlen(cluster_size) + (2 * sizeof(uint32_t))) * num_pages));
buffer_t wrkmem(LZO1X_MEM_COMPRESS);
auto out_size = (uint32_t*)outbuf.data();
*out_size = sizeof(uint32_t);
auto in = (lzo_bytep)data.data();
auto out = (lzo_bytep)(outbuf.data() + (2 * sizeof(uint32_t)));
for (unsigned int i = 0; i < num_pages; i++) {
auto pagelen = (uint32_t*)(out - sizeof(uint32_t));
lzo_uint outlen;
auto ret = lzo1x_1_compress(in, cluster_size, out, &outlen, wrkmem.data());
if (ret != LZO_E_OK)
throw formatted_error("lzo1x_1_compress returned {}", ret);
*pagelen = (uint32_t)outlen;
*out_size += (uint32_t)(outlen + sizeof(uint32_t));
in += cluster_size;
out += outlen + sizeof(uint32_t);
// new page needs to start at a 32-bit boundary
if (cluster_size - (*out_size % cluster_size) < sizeof(uint32_t)) {
memset(out, 0, cluster_size - (*out_size % cluster_size));
out += cluster_size - (*out_size % cluster_size);
*out_size += cluster_size - (*out_size % cluster_size);
}
if (*out_size >= data.length())
return nullopt;
}
outbuf.resize(*out_size);
if (outbuf.size() > data.length() - cluster_size)
return nullopt;
outbuf.resize((outbuf.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0);
return outbuf;
}
#endif
#ifdef WITH_ZSTD
optional zstd_compress(string_view data, uint32_t cluster_size) {
buffer_t out(ZSTD_compressBound(data.length()));
auto ret = ZSTD_compress(out.data(), out.size(), data.data(), data.length(), 1);
if (ZSTD_isError(ret))
throw formatted_error("ZSTD_compress returned {}", ret);
if (ret > data.length() - cluster_size)
return nullopt;
out.resize(ret);
out.resize((out.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0);
return out;
}
#endif
ntfs2btrfs-20240115/src/config.h.in000066400000000000000000000002011455127722500166640ustar00rootroot00000000000000#pragma once
#define PROJECT_VER "@PROJECT_VERSION@"
#cmakedefine WITH_ZLIB 1
#cmakedefine WITH_LZO 1
#cmakedefine WITH_ZSTD 1
ntfs2btrfs-20240115/src/crc32c-gas.S000077500000000000000000000067451455127722500166420ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020
*
* This file is part of WinBtrfs.
*
* WinBtrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public Licence as published by
* the Free Software Foundation, either version 3 of the Licence, or
* (at your option) any later version.
*
* WinBtrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public Licence for more details.
*
* You should have received a copy of the GNU Lesser General Public Licence
* along with WinBtrfs. If not, see . */
#ifdef __i386__
.intel_syntax noprefix
#ifdef __MINGW32__
.extern _crctable
.global _calc_crc32c_sw@12
.global _calc_crc32c_hw@12
#else
.extern crctable
.global calc_crc32c_sw
.global calc_crc32c_hw
#endif
/* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
#ifdef __MINGW32__
_calc_crc32c_sw@12:
#else
calc_crc32c_sw:
#endif
push ebp
mov ebp, esp
push esi
push ebx
mov eax, [ebp+8]
mov edx, [ebp+12]
mov ebx, [ebp+16]
/* eax = crc / seed
* ebx = len
* esi = tmp
* edx = buf
* ecx = tmp2 */
crcloop:
test ebx, ebx
jz crcend
mov esi, eax
shr esi, 8
mov cl, byte ptr [edx]
xor al, cl
and eax, 255
shl eax, 2
#ifdef __MINGW32__
mov eax, [_crctable + eax]
#else
mov eax, [crctable + eax]
#endif
xor eax, esi
inc edx
dec ebx
jmp crcloop
crcend:
pop ebx
pop esi
pop ebp
ret 12
/****************************************************/
/* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
#ifdef __MINGW32__
_calc_crc32c_hw@12:
#else
calc_crc32c_hw:
#endif
push ebp
mov ebp, esp
mov eax, [ebp+8]
mov edx, [ebp+12]
mov ecx, [ebp+16]
/* eax = crc / seed
* ecx = len
* edx = buf */
crchw_loop:
cmp ecx, 4
jl crchw_stragglers
crc32 eax, dword ptr [edx]
add edx, 4
sub ecx, 4
jmp crchw_loop
crchw_stragglers:
cmp ecx, 2
jl crchw_stragglers2
crc32 eax, word ptr [edx]
add edx, 2
sub ecx, 2
crchw_stragglers2:
test ecx, ecx
jz crchw_end
crc32 eax, byte ptr [edx]
inc edx
dec ecx
jmp crchw_stragglers2
crchw_end:
pop ebp
ret 12
#elif defined(__x86_64__)
.intel_syntax noprefix
.extern crctable
.global calc_crc32c_sw
.global calc_crc32c_hw
/* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
calc_crc32c_sw:
/* rax = crc / seed
* rdx = buf
* r8 = len
* rcx = tmp
* r10 = tmp2
* r11 = crctable */
lea r11, [rip + crctable]
mov rax, rcx
crcloop:
test r8, r8
jz crcend
mov rcx, rax
shr rcx, 8
mov r10b, byte ptr [rdx]
xor al, r10b
and rax, 255
shl rax, 2
mov eax, [r11 + rax]
xor rax, rcx
inc rdx
dec r8
jmp crcloop
crcend:
ret
/****************************************************/
/* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
calc_crc32c_hw:
/* rax = crc / seed
* rdx = buf
* r8 = len */
mov rax, rcx
crchw_loop:
cmp r8, 8
jl crchw_stragglers
crc32 rax, qword ptr [rdx]
add rdx, 8
sub r8, 8
jmp crchw_loop
crchw_stragglers:
cmp r8, 4
jl crchw_stragglers2
crc32 eax, dword ptr [rdx]
add rdx, 4
sub r8, 4
crchw_stragglers2:
cmp r8, 2
jl crchw_stragglers3
crc32 eax, word ptr [rdx]
add rdx, 2
sub r8, 2
crchw_stragglers3:
test r8, r8
jz crchw_end
crc32 eax, byte ptr [rdx]
inc rdx
dec r8
jmp crchw_stragglers3
crchw_end:
ret
#endif
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
ntfs2btrfs-20240115/src/crc32c-masm.asm000077500000000000000000000061031455127722500173670ustar00rootroot00000000000000; Copyright (c) Mark Harmstone 2020
;
; This file is part of WinBtrfs.
;
; WinBtrfs is free software: you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public Licence as published by
; the Free Software Foundation, either version 3 of the Licence, or
; (at your option) any later version.
;
; WinBtrfs is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU Lesser General Public Licence for more details.
;
; You should have received a copy of the GNU Lesser General Public Licence
; along with WinBtrfs. If not, see .
IFDEF RAX
ELSE
.686P
ENDIF
_TEXT SEGMENT
IFDEF RAX
EXTERN crctable:qword
PUBLIC calc_crc32c_sw
; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen);
calc_crc32c_sw:
; rax = crc / seed
; rdx = buf
; r8 = len
; rcx = tmp
; r10 = tmp2
mov rax, rcx
crcloop:
test r8, r8
jz crcend
mov rcx, rax
shr rcx, 8
mov r10b, byte ptr [rdx]
xor al, r10b
and rax, 255
shl rax, 2
mov r10, offset crctable
mov eax, dword ptr [r10 + rax]
xor rax, rcx
inc rdx
dec r8
jmp crcloop
crcend:
ret
; ****************************************************
; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen);
PUBLIC calc_crc32c_hw
calc_crc32c_hw:
; rax = crc / seed
; rdx = buf
; r8 = len
mov rax, rcx
crchw_loop:
cmp r8, 8
jl crchw_stragglers
crc32 rax, qword ptr [rdx]
add rdx, 8
sub r8, 8
jmp crchw_loop
crchw_stragglers:
cmp r8, 4
jl crchw_stragglers2
crc32 eax, dword ptr [rdx]
add rdx, 4
sub r8, 4
crchw_stragglers2:
cmp r8, 2
jl crchw_stragglers3
crc32 eax, word ptr [rdx]
add rdx, 2
sub r8, 2
crchw_stragglers3:
test r8, r8
jz crchw_end
crc32 eax, byte ptr [rdx]
inc rdx
dec r8
jmp crchw_stragglers3
crchw_end:
ret
ELSE
EXTERN _crctable:ABS
; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen);
PUBLIC _calc_crc32c_sw@12
_calc_crc32c_sw@12:
push ebp
mov ebp, esp
push esi
push ebx
mov eax, [ebp+8]
mov edx, [ebp+12]
mov ebx, [ebp+16]
; eax = crc / seed
; ebx = len
; esi = tmp
; edx = buf
; ecx = tmp2
crcloop:
test ebx, ebx
jz crcend
mov esi, eax
shr esi, 8
mov cl, byte ptr [edx]
xor al, cl
and eax, 255
shl eax, 2
mov eax, [_crctable + eax]
xor eax, esi
inc edx
dec ebx
jmp crcloop
crcend:
pop ebx
pop esi
pop ebp
ret 12
; ****************************************************
; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen);
PUBLIC _calc_crc32c_hw@12
_calc_crc32c_hw@12:
push ebp
mov ebp, esp
mov eax, [ebp+8]
mov edx, [ebp+12]
mov ecx, [ebp+16]
; eax = crc / seed
; ecx = len
; edx = buf
crchw_loop:
cmp ecx, 4
jl crchw_stragglers
crc32 eax, dword ptr [edx]
add edx, 4
sub ecx, 4
jmp crchw_loop
crchw_stragglers:
cmp ecx, 2
jl crchw_stragglers2
crc32 eax, word ptr [edx]
add edx, 2
sub ecx, 2
crchw_stragglers2:
test ecx, ecx
jz crchw_end
crc32 eax, byte ptr [edx]
inc edx
dec ecx
jmp crchw_stragglers2
crchw_end:
pop ebp
ret 12
ENDIF
_TEXT ENDS
end
ntfs2btrfs-20240115/src/crc32c.c000066400000000000000000000106561455127722500161030ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2016-17
*
* This file is part of WinBtrfs.
*
* WinBtrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public Licence as published by
* the Free Software Foundation, either version 3 of the Licence, or
* (at your option) any later version.
*
* WinBtrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public Licence for more details.
*
* You should have received a copy of the GNU Lesser General Public Licence
* along with WinBtrfs. If not, see . */
#include "crc32c.h"
#include
#include
crc_func calc_crc32c = calc_crc32c_sw;
#ifdef __cplusplus
extern "C"
{
#endif
const uint32_t crctable[] = {
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
};
// x86 and amd64 versions live in asm files
#if !defined(__i386__) && !defined(__x86_64__) && !defined(_M_IX86) && !defined(_M_X64)
uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen) {
uint32_t rem = seed;
for (uint32_t i = 0; i < msglen; i++) {
rem = crctable[(rem ^ msg[i]) & 0xff] ^ (rem >> 8);
}
return rem;
}
#endif
#ifdef __cplusplus
}
#endif
ntfs2btrfs-20240115/src/crc32c.h000066400000000000000000000025151455127722500161030ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020
*
* This file is part of ntfs2btrfs.
*
* Ntfs2btrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 2 of the Licence, or
* (at your option) any later version.
*
* Ntfs2btrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public Licence
* along with Ntfs2btrfs. If not, see . */
#pragma once
#include
#ifndef _WIN32
#ifdef __i386__
#define __stdcall __attribute__((stdcall))
#elif defined(__x86_64__)
#define __stdcall __attribute__((ms_abi))
#else
#define __stdcall
#endif
#endif
#ifdef __cplusplus
extern "C"
{
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32_t __stdcall calc_crc32c_hw(uint32_t seed, const uint8_t* msg, uint32_t msglen);
#endif
uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen);
typedef uint32_t (__stdcall *crc_func)(uint32_t seed, const uint8_t* msg, uint32_t msglen);
extern crc_func calc_crc32c;
#ifdef __cplusplus
}
#endif
ntfs2btrfs-20240115/src/decomp.cpp000066400000000000000000000170451455127722500166320ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020
*
* This file is part of ntfs2btrfs.
*
* Ntfs2btrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 2 of the Licence, or
* (at your option) any later version.
*
* Ntfs2btrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public Licence
* along with Ntfs2btrfs. If not, see . */
#include "ntfs2btrfs.h"
#include "ebiggers/system_compression.h"
#define LZX_CHUNK_SIZE 32768
using namespace std;
static buffer_t lznt1_decompress_chunk(string_view data) {
buffer_t s;
while (!data.empty()) {
auto fg = (uint8_t)data[0];
data = data.substr(1);
if (fg == 0) {
if (data.length() < 8) {
s.insert(s.end(), data.begin(), data.end());
return s;
} else {
s.insert(s.end(), data.begin(), data.begin() + 8);
data = data.substr(8);
}
} else {
for (unsigned int i = 0; i < 8; i++) {
if (data.empty())
return s;
if (!(fg & 1)) {
s.insert(s.end(), data.begin(), data.begin() + 1);
data = data.substr(1);
} else {
if (data.length() < sizeof(uint16_t))
throw formatted_error("Compressed chunk was {} bytes, expected at least 2.", data.length());
// See https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-xca/90fc6a28-f627-4ee5-82ce-445a6cf98b22
auto v = *(uint16_t*)data.data();
data = data.substr(2);
// Shamelessly stolen from https://github.com/you0708/lznt1 - thank you!
uint64_t u = s.size() - 1;
uint64_t lm = 0xfff;
uint64_t os = 12;
while (u >= 0x10) {
lm >>= 1;
os--;
u >>= 1;
}
auto l = (v & lm) + 3;
auto d = (v >> os) + 1;
s.reserve((uint32_t)(s.size() + l));
while (l > 0) {
s.resize(s.size() + 1);
s[s.size() - 1] = s[s.size() - d - 1];
l--;
}
}
fg >>= 1;
}
}
}
return s;
}
buffer_t lznt1_decompress(string_view compdata, uint32_t size) {
buffer_t ret(size);
uint8_t* ptr;
memset(ret.data(), 0, ret.size());
ptr = ret.data();
while (true) {
if (compdata.length() < sizeof(uint16_t))
throw formatted_error("compdata was {} bytes, expected at least 2.", compdata.length());
auto h = *(uint16_t*)compdata.data();
if (h == 0)
return ret;
compdata = compdata.substr(2);
auto sig = (h & 0x7000) >> 12;
if (sig != 3)
throw formatted_error("Compression signature was {}, expected 3.", sig);
auto len = (uint32_t)(((uint64_t)h & 0xfff) + 1);
if (compdata.length() < len)
throw formatted_error("compdata was {} bytes, expected at least {}.", compdata.length(), len);
auto data = string_view(compdata.data(), len);
compdata = compdata.substr(len);
if (h & 0x8000) {
auto c = lznt1_decompress_chunk(data);
if (ptr + c.size() >= ret.data() + size) {
memcpy(ptr, c.data(), size - (ptr - ret.data()));
return ret;
} else {
memcpy(ptr, c.data(), c.size());
ptr += c.size();
}
} else {
if (ptr + data.length() >= ret.data() + size) {
memcpy(ptr, data.data(), size - (ptr - ret.data()));
return ret;
} else {
memcpy(ptr, data.data(), data.length());
ptr += data.length();
}
}
}
return ret;
}
buffer_t do_lzx_decompress(string_view compdata, uint32_t size) {
auto ctx = lzx_allocate_decompressor(LZX_CHUNK_SIZE);
if (!ctx)
throw formatted_error("lzx_allocate_decompressor returned NULL.");
uint64_t num_chunks = (size + LZX_CHUNK_SIZE - 1) / LZX_CHUNK_SIZE;
auto offsets = (uint32_t*)compdata.data();
buffer_t ret(size);
auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)),
(uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t))));
for (uint64_t i = 0; i < num_chunks; i++) {
uint64_t off = i == 0 ? 0 : offsets[i - 1];
uint32_t complen;
if (i == 0)
complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length();
else if (i == num_chunks - 1)
complen = (uint32_t)data.length() - offsets[i - 1];
else
complen = offsets[i] - offsets[i - 1];
if (complen == (i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE)) {
// stored uncompressed
memcpy(ret.data() + (i * LZX_CHUNK_SIZE), data.data() + off, complen);
} else {
auto err = lzx_decompress(ctx, data.data() + off, complen, ret.data() + (i * LZX_CHUNK_SIZE),
(uint32_t)(i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE));
if (err != 0) {
lzx_free_decompressor(ctx);
throw formatted_error("lzx_decompress returned {}.", err);
}
}
}
lzx_free_decompressor(ctx);
return ret;
}
buffer_t do_xpress_decompress(string_view compdata, uint32_t size, uint32_t chunk_size) {
auto ctx = xpress_allocate_decompressor();
if (!ctx)
throw formatted_error("xpress_allocate_decompressor returned NULL.");
uint64_t num_chunks = (size + chunk_size - 1) / chunk_size;
auto offsets = (uint32_t*)compdata.data();
buffer_t ret(size);
auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)),
(uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t))));
for (uint64_t i = 0; i < num_chunks; i++) {
uint64_t off = i == 0 ? 0 : offsets[i - 1];
uint32_t complen;
if (i == 0)
complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length();
else if (i == num_chunks - 1)
complen = (uint32_t)data.length() - offsets[i - 1];
else
complen = offsets[i] - offsets[i - 1];
if (complen == (i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)) {
// stored uncompressed
memcpy(ret.data() + (i * chunk_size), data.data() + off, complen);
} else {
auto err = xpress_decompress(ctx, data.data() + off, complen, ret.data() + (i * chunk_size),
(size_t)(i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size));
if (err != 0) {
xpress_free_decompressor(ctx);
throw formatted_error("xpress_decompress returned {}.", err);
}
}
}
xpress_free_decompressor(ctx);
return ret;
}
ntfs2btrfs-20240115/src/ebiggers/000077500000000000000000000000001455127722500164375ustar00rootroot00000000000000ntfs2btrfs-20240115/src/ebiggers/aligned_malloc.c000066400000000000000000000013271455127722500215400ustar00rootroot00000000000000/*
* aligned_malloc.c - aligned memory allocation
*
* This file provides portable aligned memory allocation functions that only use
* malloc() and free(). This avoids portability problems with posix_memalign(),
* aligned_alloc(), etc.
*/
#include
#include "common_defs.h"
void *
aligned_malloc(size_t size, size_t alignment)
{
const uintptr_t mask = alignment - 1;
char *ptr = NULL;
char *raw_ptr;
raw_ptr = malloc(mask + sizeof(size_t) + size);
if (raw_ptr) {
ptr = (char *)raw_ptr + sizeof(size_t);
ptr = (void *)(((uintptr_t)ptr + mask) & ~mask);
*((size_t *)ptr - 1) = ptr - raw_ptr;
}
return ptr;
}
void
aligned_free(void *ptr)
{
if (ptr)
free((char *)ptr - *((size_t *)ptr - 1));
}
ntfs2btrfs-20240115/src/ebiggers/common_defs.h000066400000000000000000000155001455127722500211020ustar00rootroot00000000000000#ifndef _COMMON_DEFS_H
#define _COMMON_DEFS_H
// #include
// #include
#include
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int32_t s32;
/* ========================================================================== */
/* Type definitions */
/* ========================================================================== */
/*
* Type of a machine word. 'unsigned long' would be logical, but that is only
* 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best
* we can do without a bunch of #ifdefs appears to be 'size_t'.
*/
typedef size_t machine_word_t;
#define WORDBYTES sizeof(machine_word_t)
#define WORDBITS (8 * WORDBYTES)
/* ========================================================================== */
/* Compiler-specific definitions */
/* ========================================================================== */
#ifdef __GNUC__ /* GCC, or GCC-compatible compiler such as clang */
# define forceinline inline __attribute__((always_inline))
# define likely(expr) __builtin_expect(!!(expr), 1)
# define unlikely(expr) __builtin_expect(!!(expr), 0)
# define _aligned_attribute(n) __attribute__((aligned(n)))
# define bsr32(n) (31 - __builtin_clz(n))
# define bsr64(n) (63 - __builtin_clzll(n))
# define bsf32(n) __builtin_ctz(n)
# define bsf64(n) __builtin_ctzll(n)
# ifndef min
# define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
(_a < _b) ? _a : _b; })
# endif
# ifndef max
# define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
(_a > _b) ? _a : _b; })
# endif
# define DEFINE_UNALIGNED_TYPE(type) \
struct type##_unaligned { \
type v; \
} __attribute__((packed)); \
\
static inline type \
load_##type##_unaligned(const void *p) \
{ \
return ((const struct type##_unaligned *)p)->v; \
} \
\
static inline void \
store_##type##_unaligned(type val, void *p) \
{ \
((struct type##_unaligned *)p)->v = val; \
}
#endif /* __GNUC__ */
/* Declare that the annotated function should always be inlined. This might be
* desirable in highly tuned code, e.g. compression codecs */
#ifndef forceinline
# define forceinline inline
#endif
/* Hint that the expression is usually true */
#ifndef likely
# define likely(expr) (expr)
#endif
/* Hint that the expression is usually false */
#ifndef unlikely
# define unlikely(expr) (expr)
#endif
/* Declare that the annotated variable, or variables of the annotated type, are
* to be aligned on n-byte boundaries */
#ifndef _aligned_attribute
# define _aligned_attribute(n)
#endif
/* min() and max() macros */
#ifndef min
# define min(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef max
# define max(a, b) ((a) > (b) ? (a) : (b))
#endif
/* STATIC_ASSERT() - verify the truth of an expression at compilation time */
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
/* STATIC_ASSERT_ZERO() - verify the truth of an expression at compilation time
* and also produce a result of value '0' to be used in constant expressions */
#define STATIC_ASSERT_ZERO(expr) ((int)sizeof(char[-!(expr)]))
/* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
* can be performed efficiently on the target platform. */
#if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED)
# define UNALIGNED_ACCESS_IS_FAST 1
#else
# define UNALIGNED_ACCESS_IS_FAST 0
#endif
/*
* DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
* defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
* which load and store variables of type 'type' from/to unaligned memory
* addresses.
*/
#ifndef DEFINE_UNALIGNED_TYPE
#include
/*
* Although memcpy() may seem inefficient, it *usually* gets optimized
* appropriately by modern compilers. It's portable and may be the best we can
* do for a fallback...
*/
#define DEFINE_UNALIGNED_TYPE(type) \
\
static forceinline type \
load_##type##_unaligned(const void *p) \
{ \
type v; \
memcpy(&v, p, sizeof(v)); \
return v; \
} \
\
static forceinline void \
store_##type##_unaligned(type v, void *p) \
{ \
memcpy(p, &v, sizeof(v)); \
}
#endif /* !DEFINE_UNALIGNED_TYPE */
/* ========================================================================== */
/* Unaligned memory accesses */
/* ========================================================================== */
#define load_word_unaligned load_machine_word_t_unaligned
#define store_word_unaligned store_machine_word_t_unaligned
/* ========================================================================== */
/* Bit scan functions */
/* ========================================================================== */
/*
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
* significant end) of the *most* significant 1 bit in the input value. The
* input value must be nonzero!
*/
#ifndef bsr32
static forceinline unsigned
bsr32(u32 v)
{
unsigned bit = 0;
while ((v >>= 1) != 0)
bit++;
return bit;
}
#endif
#ifndef bsr64
static forceinline unsigned
bsr64(u64 v)
{
unsigned bit = 0;
while ((v >>= 1) != 0)
bit++;
return bit;
}
#endif
static forceinline unsigned
bsrw(machine_word_t v)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsr32(v);
else
return bsr64(v);
}
/*
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
* significant end) of the *least* significant 1 bit in the input value. The
* input value must be nonzero!
*/
#ifndef bsf32
static forceinline unsigned
bsf32(u32 v)
{
unsigned bit;
for (bit = 0; !(v & 1); bit++, v >>= 1)
;
return bit;
}
#endif
#ifndef bsf64
static forceinline unsigned
bsf64(u64 v)
{
unsigned bit;
for (bit = 0; !(v & 1); bit++, v >>= 1)
;
return bit;
}
#endif
static forceinline unsigned
bsfw(machine_word_t v)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsf32(v);
else
return bsf64(v);
}
/* Return the log base 2 of 'n', rounded up to the nearest integer. */
static forceinline unsigned
ilog2_ceil(size_t n)
{
if (n <= 1)
return 0;
return 1 + bsrw(n - 1);
}
/* ========================================================================== */
/* Aligned memory allocation */
/* ========================================================================== */
extern void *aligned_malloc(size_t size, size_t alignment);
extern void aligned_free(void *ptr);
#endif /* _COMMON_DEFS_H */
ntfs2btrfs-20240115/src/ebiggers/decompress_common.c000066400000000000000000000313641455127722500223260ustar00rootroot00000000000000/*
* decompress_common.c
*
* Code for decompression shared among multiple compression formats.
*
* The following copying information applies to this specific source code file:
*
* Written in 2012-2016 by Eric Biggers
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
* Dedication (the "CC0").
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
*
* You should have received a copy of the CC0 along with this software; if not
* see .
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include
#ifdef __SSE2__
# include
#endif
#include "decompress_common.h"
/*
* make_huffman_decode_table() -
*
* Given an alphabet of symbols and the length of each symbol's codeword in a
* canonical prefix code, build a table for quickly decoding symbols that were
* encoded with that code.
*
* A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols
* such that no whole codeword is a prefix of any other. A prefix code might be
* a _Huffman code_, which means that it is an optimum prefix code for a given
* list of symbol frequencies and was generated by the Huffman algorithm.
* Although the prefix codes processed here will ordinarily be "Huffman codes",
* strictly speaking the decoder cannot know whether a given code was actually
* generated by the Huffman algorithm or not.
*
* A prefix code is _canonical_ if and only if a longer codeword never
* lexicographically precedes a shorter codeword, and the lexicographic ordering
* of codewords of equal length is the same as the lexicographic ordering of the
* corresponding symbols. The advantage of using a canonical prefix code is
* that the codewords can be reconstructed from only the symbol => codeword
* length mapping. This eliminates the need to transmit the codewords
* explicitly. Instead, they can be enumerated in lexicographic order after
* sorting the symbols primarily by increasing codeword length and secondarily
* by increasing symbol value.
*
* However, the decoder's real goal is to decode symbols with the code, not just
* generate the list of codewords. Consequently, this function directly builds
* a table for efficiently decoding symbols using the code. The basic idea is
* that given the next 'max_codeword_len' bits of input, the decoder can look up
* the next decoded symbol by indexing a table containing '2^max_codeword_len'
* entries. A codeword with length 'max_codeword_len' will have exactly one
* entry in this table, whereas a codeword shorter than 'max_codeword_len' will
* have multiple entries in this table. Precisely, a codeword of length 'n'
* will have '2^(max_codeword_len - n)' entries. The index of each such entry,
* considered as a bitstring of length 'max_codeword_len', will contain the
* corresponding codeword as a prefix.
*
* That's the basic idea, but we extend it in two ways:
*
* - Often the maximum codeword length is too long for it to be efficient to
* build the full decode table whenever a new code is used. Instead, we build
* a "root" table using only '2^table_bits' entries, where 'table_bits <=
* max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a
* symbol directly (for codewords not longer than 'table_bits'), or the index
* of a subtable which must be indexed with additional bits of input to fully
* decode the symbol (for codewords longer than 'table_bits').
*
* - Whenever the decoder decodes a symbol, it needs to know the codeword length
* so that it can remove the appropriate number of input bits. The obvious
* solution would be to simply retain the codeword lengths array and use the
* decoded symbol as an index into it. However, that would require two array
* accesses when decoding each symbol. Our strategy is to instead store the
* codeword length directly in the decode table entry along with the symbol.
*
* See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table
* entries, and see read_huffsym() for full details on how symbols are decoded.
*
* @decode_table:
* The array in which to build the decode table. This must have been
* declared by the DECODE_TABLE() macro. This may alias @lens, since all
* @lens are consumed before the decode table is written to.
*
* @num_syms:
* The number of symbols in the alphabet.
*
* @table_bits:
* The log base 2 of the number of entries in the root table.
*
* @lens:
* An array of length @num_syms, indexed by symbol, that gives the length
* of the codeword, in bits, for each symbol. The length can be 0, which
* means that the symbol does not have a codeword assigned. In addition,
* @lens may alias @decode_table, as noted above.
*
* @max_codeword_len:
* The maximum codeword length permitted for this code. All entries in
* 'lens' must be less than or equal to this value.
*
* @working_space
* A temporary array that was declared with DECODE_TABLE_WORKING_SPACE().
*
* Returns 0 on success, or -1 if the lengths do not form a valid prefix code.
*/
int
make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
unsigned table_bits, const u8 lens[],
unsigned max_codeword_len, u16 working_space[])
{
u16 * const len_counts = &working_space[0];
u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
s32 remainder = 1;
uint8_t *entry_ptr = (uint8_t *)decode_table;
unsigned codeword_len = 1;
unsigned sym_idx;
unsigned codeword;
unsigned subtable_pos;
unsigned subtable_bits;
unsigned subtable_prefix;
/* Count how many codewords have each length, including 0. */
for (unsigned len = 0; len <= max_codeword_len; len++)
len_counts[len] = 0;
for (unsigned sym = 0; sym < num_syms; sym++)
len_counts[lens[sym]]++;
/* It is already guaranteed that all lengths are <= max_codeword_len,
* but it cannot be assumed they form a complete prefix code. A
* codeword of length n should require a proportion of the codespace
* equaling (1/2)^n. The code is complete if and only if, by this
* measure, the codespace is exactly filled by the lengths. */
for (unsigned len = 1; len <= max_codeword_len; len++) {
remainder = (remainder << 1) - len_counts[len];
/* Do the lengths overflow the codespace? */
if (unlikely(remainder < 0))
return -1;
}
if (remainder != 0) {
/* The lengths do not fill the codespace; that is, they form an
* incomplete code. This is permitted only if the code is empty
* (contains no symbols). */
if (unlikely(remainder != 1U << max_codeword_len))
return -1;
/* The code is empty. When processing a well-formed stream, the
* decode table need not be initialized in this case. However,
* we cannot assume the stream is well-formed, so we must
* initialize the decode table anyway. Setting all entries to 0
* makes the decode table always produce symbol '0' without
* consuming any bits, which is good enough. */
memset(decode_table, 0, sizeof(decode_table[0]) << table_bits);
return 0;
}
/* Sort the symbols primarily by increasing codeword length and
* secondarily by increasing symbol value. */
/* Initialize 'offsets' so that 'offsets[len]' is the number of
* codewords shorter than 'len' bits, including length 0. */
offsets[0] = 0;
for (unsigned len = 0; len < max_codeword_len; len++)
offsets[len + 1] = offsets[len] + len_counts[len];
/* Use the 'offsets' array to sort the symbols. */
for (unsigned sym = 0; sym < num_syms; sym++)
sorted_syms[offsets[lens[sym]]++] = sym;
/*
* Fill the root table entries for codewords no longer than table_bits.
*
* The table will start with entries for the shortest codeword(s), which
* will have the most entries. From there, the number of entries per
* codeword will decrease. As an optimization, we may begin filling
* entries with SSE2 vector accesses (8 entries/store), then change to
* word accesses (2 or 4 entries/store), then change to 16-bit accesses
* (1 entry/store).
*/
sym_idx = offsets[0];
#ifdef __SSE2__
/* Fill entries one 128-bit vector (8 entries) at a time. */
for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
(sizeof(__m128i) / sizeof(decode_table[0]));
stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
{
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
for (; sym_idx < end_sym_idx; sym_idx++) {
/* Note: unlike in the "word" version below, the __m128i
* type already has __attribute__((may_alias)), so using
* it to access an array of u16 will not violate strict
* aliasing. */
__m128i v = _mm_set1_epi16(
MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
codeword_len));
unsigned n = stores_per_loop;
do {
*(__m128i *)entry_ptr = v;
entry_ptr += sizeof(v);
} while (--n);
}
}
#endif /* __SSE2__ */
#ifdef __GNUC__
/* Fill entries one word (2 or 4 entries) at a time. */
for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
(WORDBYTES / sizeof(decode_table[0]));
stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
{
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
for (; sym_idx < end_sym_idx; sym_idx++) {
/* Accessing the array of u16 as u32 or u64 would
* violate strict aliasing and would require compiling
* the code with -fno-strict-aliasing to guarantee
* correctness. To work around this problem, use the
* gcc 'may_alias' extension. */
typedef machine_word_t
__attribute__((may_alias)) aliased_word_t;
aliased_word_t v = repeat_u16(
MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
codeword_len));
unsigned n = stores_per_loop;
do {
*(aliased_word_t *)entry_ptr = v;
entry_ptr += sizeof(v);
} while (--n);
}
}
#endif /* __GNUC__ */
/* Fill entries one at a time. */
for (unsigned stores_per_loop = (1U << (table_bits - codeword_len));
stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
{
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
for (; sym_idx < end_sym_idx; sym_idx++) {
u16 v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
codeword_len);
unsigned n = stores_per_loop;
do {
*(u16 *)entry_ptr = v;
entry_ptr += sizeof(v);
} while (--n);
}
}
/* If all symbols were processed, then no subtables are required. */
if (sym_idx == num_syms)
return 0;
/* At least one subtable is required. Process the remaining symbols. */
codeword = ((u16 *)entry_ptr - decode_table) << 1;
subtable_pos = 1U << table_bits;
subtable_bits = table_bits;
subtable_prefix = -1;
do {
while (len_counts[codeword_len] == 0) {
codeword_len++;
codeword <<= 1;
}
unsigned prefix = codeword >> (codeword_len - table_bits);
/* Start a new subtable if the first 'table_bits' bits of the
* codeword don't match the prefix for the previous subtable, or
* if this will be the first subtable. */
if (prefix != subtable_prefix) {
subtable_prefix = prefix;
/*
* Calculate the subtable length. If the codeword
* length exceeds 'table_bits' by n, then the subtable
* needs at least 2^n entries. But it may need more; if
* there are fewer than 2^n codewords of length
* 'table_bits + n' remaining, then n will need to be
* incremented to bring in longer codewords until the
* subtable can be filled completely. Note that it
* always will, eventually, be possible to fill the
* subtable, since it was previously verified that the
* code is complete.
*/
subtable_bits = codeword_len - table_bits;
remainder = (s32)1 << subtable_bits;
for (;;) {
remainder -= len_counts[table_bits +
subtable_bits];
if (remainder <= 0)
break;
subtable_bits++;
remainder <<= 1;
}
/* Create the entry that points from the root table to
* the subtable. This entry contains the index of the
* start of the subtable and the number of bits with
* which the subtable is indexed (the log base 2 of the
* number of entries it contains). */
decode_table[subtable_prefix] =
MAKE_DECODE_TABLE_ENTRY(subtable_pos,
subtable_bits);
}
/* Fill the subtable entries for this symbol. */
u16 entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
codeword_len - table_bits);
unsigned n = 1U << (subtable_bits - (codeword_len -
table_bits));
do {
decode_table[subtable_pos++] = entry;
} while (--n);
len_counts[codeword_len]--;
codeword++;
} while (++sym_idx < num_syms);
return 0;
}
ntfs2btrfs-20240115/src/ebiggers/decompress_common.h000066400000000000000000000474331455127722500223370ustar00rootroot00000000000000/*
* decompress_common.h
*
* Header for decompression code shared by multiple compression formats.
*
* The following copying information applies to this specific source code file:
*
* Written in 2012-2016 by Eric Biggers
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
* Dedication (the "CC0").
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
*
* You should have received a copy of the CC0 along with this software; if not
* see .
*/
#ifndef _DECOMPRESS_COMMON_H
#define _DECOMPRESS_COMMON_H
#include
#include
#include "common_defs.h"
/******************************************************************************/
/* Input bitstream for XPRESS and LZX */
/*----------------------------------------------------------------------------*/
/* Structure that encapsulates a block of in-memory data being interpreted as a
* stream of bits, optionally with interwoven literal bytes. Bits are assumed
* to be stored in little endian 16-bit coding units, with the bits ordered high
* to low. */
struct input_bitstream {
/* Bits that have been read from the input buffer. The bits are
* left-justified; the next bit is always bit 31. */
u32 bitbuf;
/* Number of bits currently held in @bitbuf. */
u32 bitsleft;
/* Pointer to the next byte to be retrieved from the input buffer. */
const u8 *next;
/* Pointer past the end of the input buffer. */
const u8 *end;
};
/* Initialize a bitstream to read from the specified input buffer. */
static forceinline void
init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size)
{
is->bitbuf = 0;
is->bitsleft = 0;
is->next = buffer;
is->end = is->next + size;
}
/* Note: for performance reasons, the following methods don't return error codes
* to the caller if the input buffer is overrun. Instead, they just assume that
* all overrun data is zeroes. This has no effect on well-formed compressed
* data. The only disadvantage is that bad compressed data may go undetected,
* but even this is irrelevant if higher level code checksums the uncompressed
* data anyway. */
/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
* bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
* may be called on the bitstream to peek or remove up to @num_bits bits. */
static forceinline void
bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits)
{
/* This currently works for at most 17 bits. */
if (is->bitsleft >= num_bits)
return;
if (unlikely(is->end - is->next < 2))
goto overflow;
is->bitbuf |= (u32)*((uint16_t*)is->next) << (16 - is->bitsleft);
is->next += 2;
is->bitsleft += 16;
if (unlikely(num_bits == 17 && is->bitsleft == 16)) {
if (unlikely(is->end - is->next < 2))
goto overflow;
is->bitbuf |= (u32)*((uint16_t*)(is->next));
is->next += 2;
is->bitsleft = 32;
}
return;
overflow:
is->bitsleft = 32;
}
/* Return the next @num_bits bits from the bitstream, without removing them.
* There must be at least @num_bits remaining in the buffer variable, from a
* previous call to bitstream_ensure_bits(). */
static forceinline u32
bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits)
{
return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
}
/* Remove @num_bits from the bitstream. There must be at least @num_bits
* remaining in the buffer variable, from a previous call to
* bitstream_ensure_bits(). */
static forceinline void
bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits)
{
is->bitbuf <<= num_bits;
is->bitsleft -= num_bits;
}
/* Remove and return @num_bits bits from the bitstream. There must be at least
* @num_bits remaining in the buffer variable, from a previous call to
* bitstream_ensure_bits(). */
static forceinline u32
bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits)
{
u32 bits = bitstream_peek_bits(is, num_bits);
bitstream_remove_bits(is, num_bits);
return bits;
}
/* Read and return the next @num_bits bits from the bitstream. */
static forceinline u32
bitstream_read_bits(struct input_bitstream *is, unsigned num_bits)
{
bitstream_ensure_bits(is, num_bits);
return bitstream_pop_bits(is, num_bits);
}
/* Read and return the next literal byte embedded in the bitstream. */
static forceinline u8
bitstream_read_byte(struct input_bitstream *is)
{
if (unlikely(is->end == is->next))
return 0;
return *is->next++;
}
/* Read and return the next 16-bit integer embedded in the bitstream. */
static forceinline u16
bitstream_read_u16(struct input_bitstream *is)
{
u16 v;
if (unlikely(is->end - is->next < 2))
return 0;
v = *(uint16_t*)is->next;
is->next += 2;
return v;
}
/* Read and return the next 32-bit integer embedded in the bitstream. */
static forceinline u32
bitstream_read_u32(struct input_bitstream *is)
{
u32 v;
if (unlikely(is->end - is->next < 4))
return 0;
v = *(uint32_t*)is->next;
is->next += 4;
return v;
}
/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
* Return 0 if there were enough bytes remaining in the input, otherwise -1. */
static forceinline int
bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count)
{
if (unlikely(is->end - is->next < count))
return -1;
memcpy(dst_buffer, is->next, count);
is->next += count;
return 0;
}
/* Align the input bitstream on a coding-unit boundary. */
static forceinline void
bitstream_align(struct input_bitstream *is)
{
is->bitsleft = 0;
is->bitbuf = 0;
}
/******************************************************************************/
/* Huffman decoding */
/*----------------------------------------------------------------------------*/
/*
* Required alignment for the Huffman decode tables. We require this alignment
* so that we can fill the entries with vector or word instructions and not have
* to deal with misaligned buffers.
*/
#define DECODE_TABLE_ALIGNMENT 16
/*
* Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12
* bits) and 'length' (low 4 bits). The precise meaning of these fields depends
* on the type of entry:
*
* Root table entries which are *not* subtable pointers:
* symbol: symbol to decode
* length: codeword length in bits
*
* Root table entries which are subtable pointers:
* symbol: index of start of subtable
* length: number of bits with which the subtable is indexed
*
* Subtable entries:
* symbol: symbol to decode
* length: codeword length in bits, minus the number of bits with which the
* root table is indexed
*/
#define DECODE_TABLE_SYMBOL_SHIFT 4
#define DECODE_TABLE_MAX_SYMBOL ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1)
#define DECODE_TABLE_MAX_LENGTH ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1)
#define DECODE_TABLE_LENGTH_MASK DECODE_TABLE_MAX_LENGTH
#define MAKE_DECODE_TABLE_ENTRY(symbol, length) \
(((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length))
/*
* Read and return the next Huffman-encoded symbol from the given bitstream
* using the given decode table.
*
* If the input data is exhausted, then the Huffman symbol will be decoded as if
* the missing bits were all zeroes.
*
* XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in
* lzms_decompress.c; keep them in sync!
*/
static forceinline unsigned
read_huffsym(struct input_bitstream *is, const u16 decode_table[],
unsigned table_bits, unsigned max_codeword_len)
{
unsigned entry;
unsigned symbol;
unsigned length;
/* Preload the bitbuffer with 'max_codeword_len' bits so that we're
* guaranteed to be able to fully decode a codeword. */
bitstream_ensure_bits(is, max_codeword_len);
/* Index the root table by the next 'table_bits' bits of input. */
entry = decode_table[bitstream_peek_bits(is, table_bits)];
/* Extract the "symbol" and "length" from the entry. */
symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
length = entry & DECODE_TABLE_LENGTH_MASK;
/* If the root table is indexed by the full 'max_codeword_len' bits,
* then there cannot be any subtables, and this will be known at compile
* time. Otherwise, we must check whether the decoded symbol is really
* a subtable pointer. If so, we must discard the bits with which the
* root table was indexed, then index the subtable by the next 'length'
* bits of input to get the real entry. */
if (max_codeword_len > table_bits &&
entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT)))
{
/* Subtable required */
bitstream_remove_bits(is, table_bits);
entry = decode_table[symbol + bitstream_peek_bits(is, length)];
symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
length = entry & DECODE_TABLE_LENGTH_MASK;
}
/* Discard the bits (or the remaining bits, if a subtable was required)
* of the codeword. */
bitstream_remove_bits(is, length);
/* Return the decoded symbol. */
return symbol;
}
/*
* The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode
* table entries, including all subtable entries, that may be required for
* decoding a given Huffman code. This depends on three parameters:
*
* num_syms: the maximum number of symbols in the code
* table_bits: the number of bits with which the root table will be indexed
* max_codeword_len: the maximum allowed codeword length in the code
*
* Given these parameters, the utility program 'enough' from zlib, when passed
* the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will
* compute the maximum number of entries required. This has already been done
* for the combinations we need and incorporated into the macro below so that
* the mapping can be done at compilation time. If an unknown combination is
* used, then a compilation error will result. To fix this, use 'enough' to
* find the missing value and add it below. If that still doesn't fix the
* compilation error, then most likely a constraint would be violated by the
* requested parameters, so they cannot be used, at least without other changes
* to the decode table --- see DECODE_TABLE_SIZE().
*/
#define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \
((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \
((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \
((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \
((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \
((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \
((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \
((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \
((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \
((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \
((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \
((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \
((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \
((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \
((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \
((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \
((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \
((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \
((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \
((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \
((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \
((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \
((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \
((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \
((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \
-1)
/* Wrapper around DECODE_TABLE_ENOUGH() that does additional compile-time
* validation. */
#define DECODE_TABLE_SIZE(num_syms, table_bits, max_codeword_len) ( \
\
/* All values must be positive. */ \
STATIC_ASSERT_ZERO((num_syms) > 0) + \
STATIC_ASSERT_ZERO((table_bits) > 0) + \
STATIC_ASSERT_ZERO((max_codeword_len) > 0) + \
\
/* There cannot be more symbols than possible codewords. */ \
STATIC_ASSERT_ZERO((num_syms) <= 1U << (max_codeword_len)) + \
\
/* There is no reason for the root table to be indexed with
* more bits than the maximum codeword length. */ \
STATIC_ASSERT_ZERO((table_bits) <= (max_codeword_len)) + \
\
/* The maximum symbol value must fit in the 'symbol' field. */ \
STATIC_ASSERT_ZERO((num_syms) - 1 <= DECODE_TABLE_MAX_SYMBOL) + \
\
/* The maximum codeword length in the root table must fit in
* the 'length' field. */ \
STATIC_ASSERT_ZERO((table_bits) <= DECODE_TABLE_MAX_LENGTH) + \
\
/* The maximum codeword length in a subtable must fit in the
* 'length' field. */ \
STATIC_ASSERT_ZERO((max_codeword_len) - (table_bits) <= \
DECODE_TABLE_MAX_LENGTH) + \
\
/* The minimum subtable index must be greater than the maximum
* symbol value. If this were not the case, then there would
* be no way to tell whether a given root table entry is a
* "subtable pointer" or not. (An alternate solution would be
* to reserve a flag bit specifically for this purpose.) */ \
STATIC_ASSERT_ZERO((1U << table_bits) > (num_syms) - 1) + \
\
/* The needed 'enough' value must have been defined. */ \
STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \
(num_syms), (table_bits), \
(max_codeword_len)) > 0) + \
\
/* The maximum subtable index must fit in the 'symbol' field. */\
STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \
(num_syms), (table_bits), \
(max_codeword_len)) - 1 <= \
DECODE_TABLE_MAX_SYMBOL) + \
\
/* Finally, make the macro evaluate to the needed maximum
* number of decode table entries. */ \
DECODE_TABLE_ENOUGH((num_syms), (table_bits), \
(max_codeword_len)) \
)
/*
* Declare the decode table for a Huffman code, given several compile-time
* constants that describe the code. See DECODE_TABLE_ENOUGH() for details.
*
* Decode tables must be aligned to a DECODE_TABLE_ALIGNMENT-byte boundary.
* This implies that if a decode table is nested inside a dynamically allocated
* structure, then the outer structure must be allocated on a
* DECODE_TABLE_ALIGNMENT-byte aligned boundary as well.
*/
#define DECODE_TABLE(name, num_syms, table_bits, max_codeword_len) \
u16 name[DECODE_TABLE_SIZE((num_syms), (table_bits), \
(max_codeword_len))] \
_aligned_attribute(DECODE_TABLE_ALIGNMENT)
/*
* Declare the temporary "working_space" array needed for building the decode
* table for a Huffman code.
*/
#define DECODE_TABLE_WORKING_SPACE(name, num_syms, max_codeword_len) \
u16 name[2 * ((max_codeword_len) + 1) + (num_syms)]
extern int
make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
unsigned table_bits, const u8 lens[],
unsigned max_codeword_len, u16 working_space[]);
/******************************************************************************/
/* LZ match copying */
/*----------------------------------------------------------------------------*/
static forceinline void
copy_word_unaligned(const void *src, void *dst)
{
*(machine_word_t*)dst = *(machine_word_t*)src;
}
static forceinline machine_word_t
repeat_u16(u16 b)
{
machine_word_t v = b;
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
v |= v << 16;
v |= v << ((WORDBITS == 64) ? 32 : 0);
return v;
}
static forceinline machine_word_t
repeat_byte(u8 b)
{
return repeat_u16(((u16)b << 8) | b);
}
/*
* Copy an LZ77 match of 'length' bytes from the match source at 'out_next -
* offset' to the match destination at 'out_next'. The source and destination
* may overlap.
*
* This handles validating the length and offset. It is validated that the
* beginning of the match source is '>= out_begin' and that end of the match
* destination is '<= out_end'. The return value is 0 if the match was valid
* (and was copied), otherwise -1.
*
* 'min_length' is a hint which specifies the minimum possible match length.
* This should be a compile-time constant.
*/
static forceinline int
lz_copy(u32 length, u32 offset, u8 *out_begin, u8 *out_next, u8 *out_end,
u32 min_length)
{
const u8 *src;
u8 *end;
/* Validate the offset. */
if (unlikely(offset > out_next - out_begin))
return -1;
/*
* Fast path: copy a match which is no longer than a few words, is not
* overlapped such that copying a word at a time would produce incorrect
* results, and is not too close to the end of the buffer. Note that
* this might copy more than the length of the match, but that's okay in
* this scenario.
*/
src = out_next - offset;
if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES &&
offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES)
{
copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0);
copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1);
copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2);
return 0;
}
/* Validate the length. This isn't needed in the fast path above, due
* to the additional conditions tested, but we do need it here. */
if (unlikely(length > out_end - out_next))
return -1;
end = out_next + length;
/*
* Try to copy one word at a time. On i386 and x86_64 this is faster
* than copying one byte at a time, unless the data is near-random and
* all the matches have very short lengths. Note that since this
* requires unaligned memory accesses, it won't necessarily be faster on
* every architecture.
*
* Also note that we might copy more than the length of the match. For
* example, if a word is 8 bytes and the match is of length 5, then
* we'll simply copy 8 bytes. This is okay as long as we don't write
* beyond the end of the output buffer, hence the check for (out_end -
* end >= WORDBYTES - 1).
*/
if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1))
{
if (offset >= WORDBYTES) {
/* The source and destination words don't overlap. */
do {
copy_word_unaligned(src, out_next);
src += WORDBYTES;
out_next += WORDBYTES;
} while (out_next < end);
return 0;
} else if (offset == 1) {
/* Offset 1 matches are equivalent to run-length
* encoding of the previous byte. This case is common
* if the data contains many repeated bytes. */
machine_word_t v = repeat_byte(*(out_next - 1));
do {
*(machine_word_t*)out_next = v;
src += WORDBYTES;
out_next += WORDBYTES;
} while (out_next < end);
return 0;
}
/*
* We don't bother with special cases for other 'offset <
* WORDBYTES', which are usually rarer than 'offset == 1'.
* Extra checks will just slow things down. Actually, it's
* possible to handle all the 'offset < WORDBYTES' cases using
* the same code, but it still becomes more complicated doesn't
* seem any faster overall; it definitely slows down the more
* common 'offset == 1' case.
*/
}
/* Fall back to a bytewise copy. */
if (min_length >= 2)
*out_next++ = *src++;
if (min_length >= 3)
*out_next++ = *src++;
if (min_length >= 4)
*out_next++ = *src++;
do {
*out_next++ = *src++;
} while (out_next != end);
return 0;
}
#endif /* _DECOMPRESS_COMMON_H */
ntfs2btrfs-20240115/src/ebiggers/lzx_common.c000066400000000000000000000233661455127722500210020ustar00rootroot00000000000000/*
* lzx_common.c - Common code for LZX compression and decompression.
*/
/*
* Copyright (C) 2012-2016 Eric Biggers
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include
#ifdef __SSE2__
# include
#endif
#ifdef __AVX2__
# include
#endif
#include "common_defs.h"
#include "lzx_common.h"
/* Mapping: offset slot => first match offset that uses that offset slot.
* The offset slots for repeat offsets map to "fake" offsets < 1. */
const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = {
-2 , -1 , 0 , 1 , 2 , /* 0 --- 4 */
4 , 6 , 10 , 14 , 22 , /* 5 --- 9 */
30 , 46 , 62 , 94 , 126 , /* 10 --- 14 */
190 , 254 , 382 , 510 , 766 , /* 15 --- 19 */
1022 , 1534 , 2046 , 3070 , 4094 , /* 20 --- 24 */
6142 , 8190 , 12286 , 16382 , 24574 , /* 25 --- 29 */
32766 , 49150 , 65534 , 98302 , 131070 , /* 30 --- 34 */
196606 , 262142 , 393214 , 524286 , 655358 , /* 35 --- 39 */
786430 , 917502 , 1048574, 1179646, 1310718, /* 40 --- 44 */
1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */
2097150 /* extra */
};
/* Mapping: offset slot => how many extra bits must be read and added to the
* corresponding offset slot base to decode the match offset. */
const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = {
0 , 0 , 0 , 0 , 1 ,
1 , 2 , 2 , 3 , 3 ,
4 , 4 , 5 , 5 , 6 ,
6 , 7 , 7 , 8 , 8 ,
9 , 9 , 10, 10, 11,
11, 12, 12, 13, 13,
14, 14, 15, 15, 16,
16, 17, 17, 17, 17,
17, 17, 17, 17, 17,
17, 17, 17, 17, 17,
};
/* Round the specified buffer size up to the next valid LZX window size, and
* return its order (log2). Or, if the buffer size is 0 or greater than the
* largest valid LZX window size, return 0. */
unsigned
lzx_get_window_order(size_t max_bufsize)
{
if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE)
return 0;
return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER);
}
/* Given a valid LZX window order, return the number of symbols that will exist
* in the main Huffman code. */
unsigned
lzx_get_num_main_syms(unsigned window_order)
{
/* Note: one would expect that the maximum match offset would be
* 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two
* bytes were to match the last two bytes. However, the format
* disallows this case. This reduces the number of needed offset slots
* by 1. */
u32 window_size = (u32)1 << window_order;
u32 max_offset = window_size - LZX_MIN_MATCH_LEN - 1;
unsigned num_offset_slots = 30;
while (max_offset >= lzx_offset_slot_base[num_offset_slots])
num_offset_slots++;
return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS);
}
static void
do_translate_target(void *target, s32 input_pos)
{
s32 abs_offset, rel_offset;
rel_offset = *(int32_t*)target;
if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) {
if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) {
/* "good translation" */
abs_offset = rel_offset + input_pos;
} else {
/* "compensating translation" */
abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE;
}
*(uint32_t*)target = abs_offset;
}
}
static void
undo_translate_target(void *target, s32 input_pos)
{
s32 abs_offset, rel_offset;
abs_offset = *(int32_t*)target;
if (abs_offset >= 0) {
if (abs_offset < LZX_WIM_MAGIC_FILESIZE) {
/* "good translation" */
rel_offset = abs_offset - input_pos;
*(uint32_t*)target = rel_offset;
}
} else {
if (abs_offset >= -input_pos) {
/* "compensating translation" */
rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE;
*(uint32_t*)target = rel_offset;
}
}
}
/*
* Do or undo the 'E8' preprocessing used in LZX. Before compression, the
* uncompressed data is preprocessed by changing the targets of x86 CALL
* instructions from relative offsets to absolute offsets. After decompression,
* the translation is undone by changing the targets of x86 CALL instructions
* from absolute offsets to relative offsets.
*
* Note that despite its intent, E8 preprocessing can be done on any data even
* if it is not actually x86 machine code. In fact, E8 preprocessing appears to
* always be used in LZX-compressed resources in WIM files; there is no bit to
* indicate whether it is used or not, unlike in the LZX compressed format as
* used in cabinet files, where a bit is reserved for that purpose.
*
* E8 preprocessing is disabled in the last 6 bytes of the uncompressed data,
* which really means the 5-byte call instruction cannot start in the last 10
* bytes of the uncompressed data. This is one of the errors in the LZX
* documentation.
*
* E8 preprocessing does not appear to be disabled after the 32768th chunk of a
* WIM resource, which apparently is another difference from the LZX compression
* used in cabinet files.
*
* E8 processing is supposed to take the file size as a parameter, as it is used
* in calculating the translated jump targets. But in WIM files, this file size
* is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).
*/
static void
lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32))
{
#if !defined(__SSE2__) && !defined(__AVX2__)
/*
* A worthwhile optimization is to push the end-of-buffer check into the
* relatively rare E8 case. This is possible if we replace the last six
* bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
* before reaching end-of-buffer. In addition, this scheme guarantees
* that no translation can begin following an E8 byte in the last 10
* bytes because a 4-byte offset containing E8 as its high byte is a
* large negative number that is not valid for translation. That is
* exactly what we need.
*/
u8 *tail;
u8 saved_bytes[6];
u8 *p;
if (size <= 10)
return;
tail = &data[size - 6];
memcpy(saved_bytes, tail, 6);
memset(tail, 0xE8, 6);
p = data;
for (;;) {
while (*p != 0xE8)
p++;
if (p >= tail)
break;
(*process_target)(p + 1, p - data);
p += 5;
}
memcpy(tail, saved_bytes, 6);
#else
/* SSE2 or AVX-2 optimized version for x86_64 */
u8 *p = data;
u64 valid_mask = ~0;
if (size <= 10)
return;
#ifdef __AVX2__
# define ALIGNMENT_REQUIRED 32
#else
# define ALIGNMENT_REQUIRED 16
#endif
/* Process one byte at a time until the pointer is properly aligned. */
while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) {
if (p >= data + size - 10)
return;
if (*p == 0xE8 && (valid_mask & 1)) {
(*process_target)(p + 1, p - data);
valid_mask &= ~0x1F;
}
p++;
valid_mask >>= 1;
valid_mask |= (u64)1 << 63;
}
if (data + size - p >= 64) {
/* Vectorized processing */
/* Note: we use a "trap" E8 byte to eliminate the need to check
* for end-of-buffer in the inner loop. This byte is carefully
* positioned so that it will never be changed by a previous
* translation before it is detected. */
u8 *trap = p + ((data + size - p) & ~31) - 32 + 4;
u8 saved_byte = *trap;
*trap = 0xE8;
for (;;) {
u32 e8_mask;
u8 *orig_p = p;
#ifdef __AVX2__
const __m256i e8_bytes = _mm256_set1_epi8(0xE8);
for (;;) {
__m256i bytes = *(const __m256i *)p;
__m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes);
e8_mask = _mm256_movemask_epi8(cmpresult);
if (e8_mask)
break;
p += 32;
}
#else
const __m128i e8_bytes = _mm_set1_epi8(0xE8);
for (;;) {
/* Read the next 32 bytes of data and test them
* for E8 bytes. */
__m128i bytes1 = *(const __m128i *)p;
__m128i bytes2 = *(const __m128i *)(p + 16);
__m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes);
__m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes);
u32 mask1 = _mm_movemask_epi8(cmpresult1);
u32 mask2 = _mm_movemask_epi8(cmpresult2);
/* The masks have a bit set for each E8 byte.
* We stay in this fast inner loop as long as
* there are no E8 bytes. */
if (mask1 | mask2) {
e8_mask = mask1 | (mask2 << 16);
break;
}
p += 32;
}
#endif
/* Did we pass over data with no E8 bytes? */
if (p != orig_p)
valid_mask = ~0;
/* Are we nearing end-of-buffer? */
if (p == trap - 4)
break;
/* Process the E8 bytes. However, the AND with
* 'valid_mask' ensures we never process an E8 byte that
* was itself part of a translation target. */
while ((e8_mask &= valid_mask)) {
unsigned bit = bsf32(e8_mask);
(*process_target)(p + bit + 1, p + bit - data);
valid_mask &= ~((u64)0x1F << bit);
}
valid_mask >>= 32;
valid_mask |= 0xFFFFFFFF00000000;
p += 32;
}
*trap = saved_byte;
}
/* Approaching the end of the buffer; process one byte a time. */
while (p < data + size - 10) {
if (*p == 0xE8 && (valid_mask & 1)) {
(*process_target)(p + 1, p - data);
valid_mask &= ~0x1F;
}
p++;
valid_mask >>= 1;
valid_mask |= (u64)1 << 63;
}
#endif /* __SSE2__ || __AVX2__ */
}
void
lzx_preprocess(u8 *data, u32 size)
{
lzx_e8_filter(data, size, do_translate_target);
}
void
lzx_postprocess(u8 *data, u32 size)
{
lzx_e8_filter(data, size, undo_translate_target);
}
ntfs2btrfs-20240115/src/ebiggers/lzx_common.h000066400000000000000000000010671455127722500210010ustar00rootroot00000000000000/*
* lzx_common.h
*
* Declarations shared between LZX compression and decompression.
*/
#ifndef _LZX_COMMON_H
#define _LZX_COMMON_H
#include "lzx_constants.h"
#include "common_defs.h"
extern const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1];
extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
extern unsigned
lzx_get_window_order(size_t max_bufsize);
extern unsigned
lzx_get_num_main_syms(unsigned window_order);
extern void
lzx_preprocess(u8 *data, u32 size);
extern void
lzx_postprocess(u8 *data, u32 size);
#endif /* _LZX_COMMON_H */
ntfs2btrfs-20240115/src/ebiggers/lzx_constants.h000066400000000000000000000074751455127722500215360ustar00rootroot00000000000000/*
* lzx_constants.h
*
* Constants for the LZX compression format.
*/
#ifndef _LZX_CONSTANTS_H
#define _LZX_CONSTANTS_H
/* Number of literal byte values. */
#define LZX_NUM_CHARS 256
/* The smallest and largest allowed match lengths. */
#define LZX_MIN_MATCH_LEN 2
#define LZX_MAX_MATCH_LEN 257
/* Number of distinct match lengths that can be represented. */
#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
/* Number of match lengths for which no length symbol is required. */
#define LZX_NUM_PRIMARY_LENS 7
#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
/* Valid values of the 3-bit block type field. */
#define LZX_BLOCKTYPE_VERBATIM 1
#define LZX_BLOCKTYPE_ALIGNED 2
#define LZX_BLOCKTYPE_UNCOMPRESSED 3
/* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum
* sizes of the sliding window. */
#define LZX_MIN_WINDOW_ORDER 15
#define LZX_MAX_WINDOW_ORDER 21
#define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */
#define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */
/* Maximum number of offset slots. (The actual number of offset slots depends
* on the window size.) */
#define LZX_MAX_OFFSET_SLOTS 50
/* Maximum number of symbols in the main code. (The actual number of symbols in
* the main code depends on the window size.) */
#define LZX_MAINCODE_MAX_NUM_SYMBOLS \
(LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
/* Number of symbols in the length code. */
#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
/* Number of symbols in the pre-code. */
#define LZX_PRECODE_NUM_SYMBOLS 20
/* Number of bits in which each pre-code codeword length is represented. */
#define LZX_PRECODE_ELEMENT_SIZE 4
/* Number of low-order bits of each match offset that are entropy-encoded in
* aligned offset blocks. */
#define LZX_NUM_ALIGNED_OFFSET_BITS 3
/* Number of symbols in the aligned offset code. */
#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
/* Mask for the match offset bits that are entropy-encoded in aligned offset
* blocks. */
#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
/* Number of bits in which each aligned offset codeword length is represented. */
#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
/* The first offset slot which requires an aligned offset symbol in aligned
* offset blocks. */
#define LZX_MIN_ALIGNED_OFFSET_SLOT 8
/* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */
#define LZX_MIN_ALIGNED_OFFSET 14
/* The maximum number of extra offset bits in verbatim blocks. (One would need
* to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset
* bits in *aligned* blocks.) */
#define LZX_MAX_NUM_EXTRA_BITS 17
/* Maximum lengths (in bits) for length-limited Huffman code construction. */
#define LZX_MAX_MAIN_CODEWORD_LEN 16
#define LZX_MAX_LEN_CODEWORD_LEN 16
#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
/* For LZX-compressed blocks in WIM resources, this value is always used as the
* filesize parameter for the call instruction (0xe8 byte) preprocessing, even
* though the blocks themselves are not this size, and the size of the actual
* file resource in the WIM file is very likely to be something entirely
* different as well. */
#define LZX_WIM_MAGIC_FILESIZE 12000000
/* Assumed LZX block size when the encoded block size begins with a 0 bit.
* This is probably WIM-specific. */
#define LZX_DEFAULT_BLOCK_SIZE 32768
/* Number of offsets in the recent (or "repeat") offsets queue. */
#define LZX_NUM_RECENT_OFFSETS 3
/* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */
#define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1)
#endif /* _LZX_CONSTANTS_H */
ntfs2btrfs-20240115/src/ebiggers/lzx_decompress.c000066400000000000000000000403051455127722500216460ustar00rootroot00000000000000/*
* lzx_decompress.c
*
* A decompressor for the LZX compression format, as used in WIM files.
*/
/*
* Copyright (C) 2012-2016 Eric Biggers
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
/*
* LZX is an LZ77 and Huffman-code based compression format that has many
* similarities to DEFLATE (the format used by zlib/gzip). The compression
* ratio is as good or better than DEFLATE. See lzx_compress.c for a format
* overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
* historical overview. Here I make some pragmatic notes.
*
* The old specification for LZX is the document "Microsoft LZX Data Compression
* Format" (1997). It defines the LZX format as used in cabinet files. Allowed
* window sizes are 2^n where 15 <= n <= 21. However, this document contains
* several errors, so don't read too much into it...
*
* The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
* Compression and Decompression" (2014). It defines the LZX format as used by
* Microsoft's binary patcher. It corrects several errors in the 1997 document
* and extends the format in several ways --- namely, optional reference data,
* up to 2^25 byte windows, and longer match lengths.
*
* WIM files use a more restricted form of LZX. No LZX DELTA extensions are
* present, the window is not "sliding", E8 preprocessing is done
* unconditionally with a fixed file size, and the maximum window size is always
* 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
* This code is primarily intended to implement this form of LZX. But although
* not compatible with WIMGAPI, this code also supports maximum window sizes up
* to 2^21 bytes.
*
* TODO: Add support for window sizes up to 2^25 bytes.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include
#include "decompress_common.h"
#include "lzx_common.h"
#include "system_compression.h"
/* These values are chosen for fast decompression. */
#define LZX_MAINCODE_TABLEBITS 11
#define LZX_LENCODE_TABLEBITS 9
#define LZX_PRECODE_TABLEBITS 6
#define LZX_ALIGNEDCODE_TABLEBITS 7
#define LZX_READ_LENS_MAX_OVERRUN 50
struct lzx_decompressor {
DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS,
LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS,
LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
union {
DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS,
LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
};
union {
DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS,
LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
};
union {
DECODE_TABLE_WORKING_SPACE(maincode_working_space,
LZX_MAINCODE_MAX_NUM_SYMBOLS,
LZX_MAX_MAIN_CODEWORD_LEN);
DECODE_TABLE_WORKING_SPACE(lencode_working_space,
LZX_LENCODE_NUM_SYMBOLS,
LZX_MAX_LEN_CODEWORD_LEN);
DECODE_TABLE_WORKING_SPACE(alignedcode_working_space,
LZX_ALIGNEDCODE_NUM_SYMBOLS,
LZX_MAX_ALIGNED_CODEWORD_LEN);
DECODE_TABLE_WORKING_SPACE(precode_working_space,
LZX_PRECODE_NUM_SYMBOLS,
LZX_MAX_PRE_CODEWORD_LEN);
};
unsigned window_order;
unsigned num_main_syms;
/* Like lzx_extra_offset_bits[], but does not include the entropy-coded
* bits of aligned offset blocks */
u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS];
} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
/* Read a Huffman-encoded symbol using the precode. */
static forceinline unsigned
read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->precode_decode_table,
LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
}
/* Read a Huffman-encoded symbol using the main code. */
static forceinline unsigned
read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->maincode_decode_table,
LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
}
/* Read a Huffman-encoded symbol using the length code. */
static forceinline unsigned
read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->lencode_decode_table,
LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
}
/* Read a Huffman-encoded symbol using the aligned offset code. */
static forceinline unsigned
read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->alignedcode_decode_table,
LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
}
/*
* Read a precode from the compressed input bitstream, then use it to decode
* @num_lens codeword length values and write them to @lens.
*/
static int
lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
u8 *lens, unsigned num_lens)
{
u8 *len_ptr = lens;
u8 *lens_end = lens + num_lens;
/* Read the lengths of the precode codewords. These are stored
* explicitly. */
for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
d->precode_lens[i] =
bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
}
/* Build the decoding table for the precode. */
if (make_huffman_decode_table(d->precode_decode_table,
LZX_PRECODE_NUM_SYMBOLS,
LZX_PRECODE_TABLEBITS,
d->precode_lens,
LZX_MAX_PRE_CODEWORD_LEN,
d->precode_working_space))
return -1;
/* Decode the codeword lengths. */
do {
unsigned presym;
u8 len;
/* Read the next precode symbol. */
presym = read_presym(d, is);
if (presym < 17) {
/* Difference from old length */
len = *len_ptr - presym;
if ((int8_t)len < 0)
len += 17;
*len_ptr++ = len;
} else {
/* Special RLE values */
unsigned run_len;
if (presym == 17) {
/* Run of 0's */
run_len = 4 + bitstream_read_bits(is, 4);
len = 0;
} else if (presym == 18) {
/* Longer run of 0's */
run_len = 20 + bitstream_read_bits(is, 5);
len = 0;
} else {
/* Run of identical lengths */
run_len = 4 + bitstream_read_bits(is, 1);
presym = read_presym(d, is);
if (unlikely(presym > 17))
return -1;
len = *len_ptr - presym;
if ((int8_t)len < 0)
len += 17;
}
do {
*len_ptr++ = len;
} while (--run_len);
/*
* The worst case overrun is when presym == 18,
* run_len == 20 + 31, and only 1 length was remaining.
* So LZX_READ_LENS_MAX_OVERRUN == 50.
*
* Overrun while reading the first half of maincode_lens
* can corrupt the previous values in the second half.
* This doesn't really matter because the resulting
* lengths will still be in range, and data that
* generates overruns is invalid anyway.
*/
}
} while (len_ptr < lens_end);
return 0;
}
/*
* Read the header of an LZX block. For all block types, the block type and
* size is saved in *block_type_ret and *block_size_ret, respectively. For
* compressed blocks, the codeword lengths are also saved. For uncompressed
* blocks, the recent offsets queue is also updated.
*/
static int
lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
u32 recent_offsets[], int *block_type_ret,
u32 *block_size_ret)
{
int block_type;
u32 block_size;
bitstream_ensure_bits(is, 4);
/* Read the block type. */
block_type = bitstream_pop_bits(is, 3);
/* Read the block size. */
if (bitstream_pop_bits(is, 1)) {
block_size = LZX_DEFAULT_BLOCK_SIZE;
} else {
block_size = bitstream_read_bits(is, 16);
if (d->window_order >= 16) {
block_size <<= 8;
block_size |= bitstream_read_bits(is, 8);
}
}
switch (block_type) {
case LZX_BLOCKTYPE_ALIGNED:
/* Read the aligned offset codeword lengths. */
for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
d->alignedcode_lens[i] =
bitstream_read_bits(is,
LZX_ALIGNEDCODE_ELEMENT_SIZE);
}
/* Fall though, since the rest of the header for aligned offset
* blocks is the same as that for verbatim blocks. */
case LZX_BLOCKTYPE_VERBATIM:
/* Read the main codeword lengths, which are divided into two
* parts: literal symbols and match headers. */
if (lzx_read_codeword_lens(d, is, d->maincode_lens,
LZX_NUM_CHARS))
return -1;
if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
d->num_main_syms - LZX_NUM_CHARS))
return -1;
/* Read the length codeword lengths. */
if (lzx_read_codeword_lens(d, is, d->lencode_lens,
LZX_LENCODE_NUM_SYMBOLS))
return -1;
break;
case LZX_BLOCKTYPE_UNCOMPRESSED:
/*
* The header of an uncompressed block contains new values for
* the recent offsets queue, starting on the next 16-bit
* boundary in the bitstream. Careful: if the stream is
* *already* aligned, the correct thing to do is to throw away
* the next 16 bits (this is probably a mistake in the format).
*/
bitstream_ensure_bits(is, 1);
bitstream_align(is);
recent_offsets[0] = bitstream_read_u32(is);
recent_offsets[1] = bitstream_read_u32(is);
recent_offsets[2] = bitstream_read_u32(is);
/* Offsets of 0 are invalid. */
if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
recent_offsets[2] == 0)
return -1;
break;
default:
/* Unrecognized block type. */
return -1;
}
*block_type_ret = block_type;
*block_size_ret = block_size;
return 0;
}
/* Decompress a block of LZX-compressed data. */
static int
lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
int block_type, u32 block_size,
u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
{
u8 * const block_end = out_next + block_size;
unsigned min_aligned_offset_slot;
/*
* Build the Huffman decode tables. We always need to build the main
* and length decode tables. For aligned blocks we additionally need to
* build the aligned offset decode table.
*/
if (make_huffman_decode_table(d->maincode_decode_table,
d->num_main_syms,
LZX_MAINCODE_TABLEBITS,
d->maincode_lens,
LZX_MAX_MAIN_CODEWORD_LEN,
d->maincode_working_space))
return -1;
if (make_huffman_decode_table(d->lencode_decode_table,
LZX_LENCODE_NUM_SYMBOLS,
LZX_LENCODE_TABLEBITS,
d->lencode_lens,
LZX_MAX_LEN_CODEWORD_LEN,
d->lencode_working_space))
return -1;
if (block_type == LZX_BLOCKTYPE_ALIGNED) {
if (make_huffman_decode_table(d->alignedcode_decode_table,
LZX_ALIGNEDCODE_NUM_SYMBOLS,
LZX_ALIGNEDCODE_TABLEBITS,
d->alignedcode_lens,
LZX_MAX_ALIGNED_CODEWORD_LEN,
d->alignedcode_working_space))
return -1;
min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
sizeof(lzx_extra_offset_bits));
} else {
min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
memcpy(d->extra_offset_bits, lzx_extra_offset_bits,
sizeof(lzx_extra_offset_bits));
}
/* Decode the literals and matches. */
do {
unsigned mainsym;
unsigned length;
u32 offset;
unsigned offset_slot;
mainsym = read_mainsym(d, is);
if (mainsym < LZX_NUM_CHARS) {
/* Literal */
*out_next++ = mainsym;
continue;
}
/* Match */
/* Decode the length header and offset slot. */
STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0);
length = mainsym % LZX_NUM_LEN_HEADERS;
offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS;
/* If needed, read a length symbol to decode the full length. */
if (length == LZX_NUM_PRIMARY_LENS)
length += read_lensym(d, is);
length += LZX_MIN_MATCH_LEN;
if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
/* Repeat offset */
/* Note: This isn't a real LRU queue, since using the R2
* offset doesn't bump the R1 offset down to R2. */
offset = recent_offsets[offset_slot];
recent_offsets[offset_slot] = recent_offsets[0];
} else {
/* Explicit offset */
offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]);
if (offset_slot >= min_aligned_offset_slot) {
offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) |
read_alignedsym(d, is);
}
offset += lzx_offset_slot_base[offset_slot];
/* Update the match offset LRU queue. */
STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
recent_offsets[2] = recent_offsets[1];
recent_offsets[1] = recent_offsets[0];
}
recent_offsets[0] = offset;
/* Validate the match and copy it to the current position. */
if (unlikely(lz_copy(length, offset, out_begin,
out_next, block_end, LZX_MIN_MATCH_LEN)))
return -1;
out_next += length;
} while (out_next != block_end);
return 0;
}
int
lzx_decompress(struct lzx_decompressor *d,
const void *compressed_data, size_t compressed_size,
void *uncompressed_data, size_t uncompressed_size)
{
u8 * const out_begin = uncompressed_data;
u8 *out_next = out_begin;
u8 * const out_end = out_begin + uncompressed_size;
struct input_bitstream is;
STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
unsigned may_have_e8_byte = 0;
init_input_bitstream(&is, compressed_data, compressed_size);
/* Codeword lengths begin as all 0's for delta encoding purposes. */
memset(d->maincode_lens, 0, d->num_main_syms);
memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
/* Decompress blocks until we have all the uncompressed data. */
while (out_next != out_end) {
int block_type;
u32 block_size;
if (lzx_read_block_header(d, &is, recent_offsets,
&block_type, &block_size))
return -1;
if (block_size < 1 || block_size > out_end - out_next)
return -1;
if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
/* Compressed block */
if (lzx_decompress_block(d, &is, block_type, block_size,
out_begin, out_next,
recent_offsets))
return -1;
/* If the first E8 byte was in this block, then it must
* have been encoded as a literal using mainsym E8. */
may_have_e8_byte |= d->maincode_lens[0xE8];
} else {
/* Uncompressed block */
if (bitstream_read_bytes(&is, out_next, block_size))
return -1;
/* Re-align the bitstream if needed. */
if (block_size & 1)
bitstream_read_byte(&is);
/* There may have been an E8 byte in the block. */
may_have_e8_byte = 1;
}
out_next += block_size;
}
/* Postprocess the data unless it cannot possibly contain E8 bytes. */
if (may_have_e8_byte)
lzx_postprocess(uncompressed_data, uncompressed_size);
return 0;
}
struct lzx_decompressor *
lzx_allocate_decompressor(size_t max_block_size)
{
unsigned window_order;
struct lzx_decompressor *d;
window_order = lzx_get_window_order(max_block_size);
if (window_order == 0) {
errno = EINVAL;
return NULL;
}
d = aligned_malloc(sizeof(*d), DECODE_TABLE_ALIGNMENT);
if (!d)
return NULL;
d->window_order = window_order;
d->num_main_syms = lzx_get_num_main_syms(window_order);
/* Initialize 'd->extra_offset_bits_minus_aligned'. */
STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) ==
sizeof(lzx_extra_offset_bits));
STATIC_ASSERT(sizeof(d->extra_offset_bits) ==
sizeof(lzx_extra_offset_bits));
memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits,
sizeof(lzx_extra_offset_bits));
for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++)
{
d->extra_offset_bits_minus_aligned[offset_slot] -=
LZX_NUM_ALIGNED_OFFSET_BITS;
}
return d;
}
void
lzx_free_decompressor(struct lzx_decompressor *d)
{
aligned_free(d);
}
ntfs2btrfs-20240115/src/ebiggers/system_compression.h000066400000000000000000000035101455127722500225540ustar00rootroot00000000000000/*
* system_compression.h - declarations for accessing System Compressed files
*
* Copyright (C) 2015 Eric Biggers
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include
#include
/* System compressed file access */
struct ntfs_system_decompression_ctx;
extern void
ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx);
/* XPRESS decompression */
struct xpress_decompressor;
extern struct xpress_decompressor *xpress_allocate_decompressor(void);
extern int xpress_decompress(struct xpress_decompressor *decompressor,
const void *compressed_data, size_t compressed_size,
void *uncompressed_data, size_t uncompressed_size);
extern void xpress_free_decompressor(struct xpress_decompressor *decompressor);
/* LZX decompression */
struct lzx_decompressor;
extern struct lzx_decompressor *
lzx_allocate_decompressor(size_t max_block_size);
extern int lzx_decompress(struct lzx_decompressor *decompressor,
const void *compressed_data, size_t compressed_size,
void *uncompressed_data, size_t uncompressed_size);
extern void lzx_free_decompressor(struct lzx_decompressor *decompressor);
#ifdef __cplusplus
}
#endif
ntfs2btrfs-20240115/src/ebiggers/xpress_constants.h000066400000000000000000000006531455127722500222340ustar00rootroot00000000000000/*
* xpress_constants.h
*
* Constants for the XPRESS compression format.
*/
#ifndef _XPRESS_CONSTANTS_H
#define _XPRESS_CONSTANTS_H
#define XPRESS_NUM_CHARS 256
#define XPRESS_NUM_SYMBOLS 512
#define XPRESS_MAX_CODEWORD_LEN 15
#define XPRESS_END_OF_DATA 256
#define XPRESS_MIN_OFFSET 1
#define XPRESS_MAX_OFFSET 65535
#define XPRESS_MIN_MATCH_LEN 3
#define XPRESS_MAX_MATCH_LEN 65538
#endif /* _XPRESS_CONSTANTS_H */
ntfs2btrfs-20240115/src/ebiggers/xpress_decompress.c000066400000000000000000000126511455127722500223600ustar00rootroot00000000000000/*
* xpress_decompress.c
*
* A decompressor for the XPRESS compression format (Huffman variant).
*/
/*
*
* Copyright (C) 2012-2016 Eric Biggers
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
/*
* The XPRESS compression format is an LZ77 and Huffman-code based algorithm.
* That means it is fairly similar to LZX compression, but XPRESS is simpler, so
* it is a little faster to compress and decompress.
*
* The XPRESS compression format is mostly documented in a file called "[MS-XCA]
* Xpress Compression Algorithm". In the MSDN library, it can currently be
* found under Open Specifications => Protocols => Windows Protocols => Windows
* Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in
* WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
* (there apparently are some other versions of XPRESS as well).
*
* If you are already familiar with the LZ77 algorithm and Huffman coding, the
* XPRESS format is fairly simple. The compressed data begins with 256 bytes
* that contain 512 4-bit integers that are the lengths of the symbols in the
* Huffman code used for match/literal headers. In contrast with more
* complicated formats such as DEFLATE and LZX, this is the only Huffman code
* that is used for the entirety of the XPRESS compressed data, and the codeword
* lengths are not encoded with a pretree.
*
* The rest of the compressed data is Huffman-encoded symbols. Values 0 through
* 255 represent the corresponding literal bytes. Values 256 through 511
* represent matches and may require extra bits or bytes to be read to get the
* match offset and match length.
*
* The trickiest part is probably the way in which literal bytes for match
* lengths are interleaved in the bitstream.
*
* Also, a caveat--- according to Microsoft's documentation for XPRESS,
*
* "Some implementation of the decompression algorithm expect an extra
* symbol to mark the end of the data. Specifically, some implementations
* fail during decompression if the Huffman symbol 256 is not found after
* the actual data."
*
* This is the case with Microsoft's implementation in WIMGAPI, for example. So
* although our implementation doesn't currently check for this extra symbol,
* compressors would be wise to add it.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "decompress_common.h"
#include "system_compression.h"
#include "xpress_constants.h"
/* This value is chosen for fast decompression. */
#define XPRESS_TABLEBITS 11
struct xpress_decompressor {
union {
DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS,
XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
u8 lens[XPRESS_NUM_SYMBOLS];
};
DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS,
XPRESS_MAX_CODEWORD_LEN);
} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
int
xpress_decompress(struct xpress_decompressor * d,
const void *compressed_data, size_t compressed_size,
void *uncompressed_data, size_t uncompressed_size)
{
const u8 * const in_begin = compressed_data;
u8 * const out_begin = uncompressed_data;
u8 *out_next = out_begin;
u8 * const out_end = out_begin + uncompressed_size;
struct input_bitstream is;
/* Read the Huffman codeword lengths. */
if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
return -1;
for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
d->lens[2 * i + 0] = in_begin[i] & 0xf;
d->lens[2 * i + 1] = in_begin[i] >> 4;
}
/* Build a decoding table for the Huffman code. */
if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
XPRESS_TABLEBITS, d->lens,
XPRESS_MAX_CODEWORD_LEN,
d->working_space))
return -1;
/* Decode the matches and literals. */
init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
compressed_size - XPRESS_NUM_SYMBOLS / 2);
while (out_next != out_end) {
unsigned sym;
unsigned log2_offset;
u32 length;
u32 offset;
sym = read_huffsym(&is, d->decode_table,
XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
if (sym < XPRESS_NUM_CHARS) {
/* Literal */
*out_next++ = sym;
} else {
/* Match */
length = sym & 0xf;
log2_offset = (sym >> 4) & 0xf;
bitstream_ensure_bits(&is, 16);
offset = ((u32)1 << log2_offset) |
bitstream_pop_bits(&is, log2_offset);
if (length == 0xf) {
length += bitstream_read_byte(&is);
if (length == 0xf + 0xff)
length = bitstream_read_u16(&is);
}
length += XPRESS_MIN_MATCH_LEN;
if (unlikely(lz_copy(length, offset,
out_begin, out_next, out_end,
XPRESS_MIN_MATCH_LEN)))
return -1;
out_next += length;
}
}
return 0;
}
struct xpress_decompressor *
xpress_allocate_decompressor(void)
{
return aligned_malloc(sizeof(struct xpress_decompressor),
DECODE_TABLE_ALIGNMENT);
}
void
xpress_free_decompressor(struct xpress_decompressor *d)
{
aligned_free(d);
}
ntfs2btrfs-20240115/src/ntfs.cpp000066400000000000000000000703101455127722500163270ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020
*
* This file is part of ntfs2btrfs.
*
* Ntfs2btrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 2 of the Licence, or
* (at your option) any later version.
*
* Ntfs2btrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public Licence
* along with Ntfs2btrfs. If not, see . */
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
#include "ntfs2btrfs.h"
#include "ntfs.h"
#include
#include
#include
#include
#ifndef _WIN32
#include
#include
#include
#endif
using namespace std;
static void process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length, unsigned int sector_size) {
uint64_t sectors;
uint16_t* seq;
uint8_t* ptr;
if (length % sector_size != 0)
throw formatted_error("Length was not a multiple of sector_size.");
sectors = length / sector_size;
if (header->UpdateSequenceArraySize < sectors + 1)
throw formatted_error("UpdateSequenceArraySize was {:x}, expected {:x}", header->UpdateSequenceArraySize, sectors + 1);
seq = (uint16_t*)((uint8_t*)header + header->UpdateSequenceArrayOffset);
ptr = (uint8_t*)header + sector_size - sizeof(uint16_t);
for (unsigned int i = 0; i < sectors; i++) {
if (*(uint16_t*)ptr != seq[0])
throw formatted_error("Update sequence mismatch.");
*(uint16_t*)ptr = seq[i + 1];
ptr += sector_size;
}
}
ntfs_file::ntfs_file(ntfs& dev, uint64_t inode) : dev(dev), inode(inode) {
file_record_buf.resize((size_t)dev.file_record_size);
if (inode == 0) {
dev.seek(dev.boot_sector->MFT * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster);
dev.read(file_record_buf.data(), (uint32_t)dev.file_record_size);
} else { // read from MFT
auto str = dev.mft->read(inode * dev.file_record_size, (uint32_t)dev.file_record_size);
memcpy(file_record_buf.data(), str.data(), (uint32_t)dev.file_record_size); // FIXME - can we avoid copy?
}
file_record = reinterpret_cast(file_record_buf.data());
if (file_record->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
throw formatted_error("Invalid file signature ({:08x}, expected {:08x}).",
file_record->MultiSectorHeader.Signature, NTFS_FILE_SIGNATURE);
}
process_fixups(&file_record->MultiSectorHeader, dev.file_record_size, dev.boot_sector->BytesPerSector);
}
void read_nonresident_mappings(const ATTRIBUTE_RECORD_HEADER& att, list& mappings,
uint32_t cluster_size, uint64_t vdl) {
uint64_t next_vcn = att.Form.Nonresident.LowestVcn, current_lcn = 0, current_vcn;
uint8_t* stream = (uint8_t*)&att + att.Form.Nonresident.MappingPairsOffset;
uint64_t max_cluster = vdl / cluster_size;
if (vdl & (cluster_size - 1))
max_cluster++;
if (max_cluster == 0)
return;
while (true) {
uint64_t v, l;
int64_t v_val, l_val;
current_vcn = next_vcn;
if (*stream == 0)
break;
v = *stream & 0xf;
l = *stream >> 4;
stream++;
if (v > 8)
throw formatted_error("Error: v > 8");
if (l > 8)
throw formatted_error("Error: l > 8");
// FIXME - do we need to make sure that int64_t pointers don't go past end of buffer?
v_val = *(int64_t*)stream;
v_val &= (1ull << (v * 8)) - 1;
if ((uint64_t)v_val & (1ull << ((v * 8) - 1))) // sign-extend if negative
v_val |= 0xffffffffffffffff & ~((1ull << (v * 8)) - 1);
stream += v;
next_vcn += v_val;
if (l != 0) {
l_val = *(int64_t*)stream;
l_val &= (1ull << (l * 8)) - 1;
if ((uint64_t)l_val & (1ull << ((l * 8) - 1))) // sign-extend if negative
l_val |= 0xffffffffffffffff & ~((1ull << (l * 8)) - 1);
stream += l;
current_lcn += l_val;
if (next_vcn > max_cluster)
next_vcn = max_cluster;
mappings.emplace_back(current_lcn, current_vcn, next_vcn - current_vcn);
} else
mappings.emplace_back(0, current_vcn, next_vcn - current_vcn);
if (next_vcn == max_cluster)
break;
}
}
buffer_t ntfs_file::read_nonresident_attribute(uint64_t offset, uint32_t length, const ATTRIBUTE_RECORD_HEADER* att) {
list mappings;
uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
read_nonresident_mappings(*att, mappings, cluster_size, att->Form.Nonresident.ValidDataLength);
// FIXME - do we need to check that mappings is contiguous and in order?
if (offset >= (uint64_t)att->Form.Nonresident.FileSize)
return {};
if (offset + length > (uint64_t)att->Form.Nonresident.FileSize || length == 0)
length = (uint32_t)(att->Form.Nonresident.FileSize - offset);
buffer_t ret(length);
memset(ret.data(), 0, length);
for (const auto& m : mappings) {
if (offset + length >= m.vcn * cluster_size && offset < (m.vcn + m.length) * cluster_size) {
uint32_t buf_start, buf_end;
uint64_t read_start, read_end;
unsigned int skip_start, skip_end;
if (offset < m.vcn * cluster_size)
buf_start = (uint32_t)((m.vcn * cluster_size) - offset);
else
buf_start = 0;
if (offset + length > (m.vcn + m.length) * cluster_size)
buf_end = min((uint32_t)((m.vcn + m.length) * cluster_size), length);
else
buf_end = length;
if (buf_end == buf_start)
continue;
read_start = m.lcn * cluster_size;
if (offset > m.vcn * cluster_size)
read_start += offset - (m.vcn * cluster_size);
read_end = read_start + buf_end - buf_start;
if ((read_start % dev.boot_sector->BytesPerSector) != 0) {
skip_start = (unsigned int)(read_start % dev.boot_sector->BytesPerSector);
read_start -= skip_start;
} else
skip_start = 0;
if ((read_end % dev.boot_sector->BytesPerSector) != 0) {
skip_end = (unsigned int)(dev.boot_sector->BytesPerSector - (read_end % dev.boot_sector->BytesPerSector));
read_end += skip_end;
} else
skip_end = 0;
dev.seek(read_start);
if (skip_start != 0 || skip_end != 0) {
buffer_t tmp(read_end - read_start);
dev.read(tmp.data(), tmp.size());
memcpy(&ret[buf_start], &tmp[skip_start], buf_end - buf_start);
} else
dev.read(&ret[buf_start], buf_end - buf_start);
}
}
// FIXME - zero end if ValidDataLength < FileSize
return ret;
}
buffer_t ntfs_file::read(uint64_t offset, uint32_t length, enum ntfs_attribute type, u16string_view name) {
buffer_t ret;
bool found = false;
loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool {
if (att.TypeCode != type || name != att_name)
return true;
if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED)
throw formatted_error("Cannot read encrypted attribute");
if (att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK)
throw formatted_error("FIXME - handle reading compressed attribute"); // FIXME
if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM)
ret = read_nonresident_attribute(offset, length, &att);
else {
if (offset >= res_data.length())
ret.clear();
else {
if (offset + length > res_data.length() || length == 0)
length = (uint32_t)(res_data.length() - offset);
ret.resize(length);
memcpy(ret.data(), &res_data[(uint32_t)offset], length);
}
}
found = true;
return false;
});
if (!found)
throw formatted_error("Attribute not found.");
return ret;
}
list ntfs_file::read_mappings(enum ntfs_attribute type, u16string_view name) {
list mappings;
loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool {
if (att.TypeCode != type || name != att_name)
return true;
if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
throw formatted_error("Attribute is resident");
uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
read_nonresident_mappings(att, mappings, cluster_size, att.Form.Nonresident.ValidDataLength);
return false;
});
return mappings;
}
ntfs::ntfs(const string& fn) {
unsigned int sector_size = 512; // FIXME - find from device
#ifdef _WIN32
bool drive = false;
DWORD ret;
wstring_convert, char16_t> convert;
u16string namew;
if ((fn.length() == 2 || fn.length() == 3) && ((fn[0] >= 'A' && fn[0] <= 'Z') || (fn[0] >= 'a' && fn[0] <= 'z')) && fn[1] == ':' && (fn.length() == 2 || fn[2] == '\\')) {
namew = u"\\\\.\\X:";
namew[4] = fn[0];
drive = true;
} else
namew = convert.from_bytes(fn.data(), fn.data() + fn.length());
h = CreateFileW((WCHAR*)namew.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE,
nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
if (h == INVALID_HANDLE_VALUE)
throw last_error("CreateFile", GetLastError());
if (drive) {
if (!DeviceIoControl(h, FSCTL_LOCK_VOLUME, nullptr, 0, nullptr, 0, &ret, nullptr)) {
auto le = GetLastError();
CloseHandle(h);
throw last_error("FSCTL_LOCK_VOLUME", le);
}
}
#else
fd = open(fn.c_str(), O_RDWR | O_EXCL);
if (fd < 0)
throw formatted_error("open returned {} (errno = {}).", fd, errno);
#endif
// read NTFS_BOOT_SECTOR
boot_sector_buf.resize((size_t)sector_align(sizeof(NTFS_BOOT_SECTOR), sector_size));
seek(0);
read(boot_sector_buf.data(), boot_sector_buf.size());
boot_sector = reinterpret_cast(boot_sector_buf.data());
// make sure is NTFS
if (memcmp(boot_sector->FsName, NTFS_FS_NAME, sizeof(NTFS_FS_NAME) - 1))
throw formatted_error("Device was not an NTFS volume.");
if (boot_sector->ClustersPerMFTRecord < 0)
file_record_size = 1ull << -boot_sector->ClustersPerMFTRecord;
else
file_record_size = (uint64_t)boot_sector->BytesPerSector * (uint64_t)boot_sector->SectorsPerCluster * (uint64_t)boot_sector->ClustersPerMFTRecord;
mft.reset(new ntfs_file(*this, 0));
ntfs_file vol_file(*this, NTFS_VOLUME_INODE);
auto vi_str = vol_file.read(0, 0, ntfs_attribute::VOLUME_INFORMATION);
auto vi = reinterpret_cast(vi_str.data());
if (vi->MajorVersion > 3 || (vi->MajorVersion == 3 && vi->MinorVersion > 1))
throw formatted_error("Unsupported NTFS version {}.{}.", vi->MajorVersion, vi->MinorVersion);
if (vi->Flags & NTFS_VOLUME_DIRTY)
throw formatted_error("Cannot convert volume with dirty bit set.");
}
static buffer_t read_from_mappings(const list& mappings, uint64_t start, uint32_t length, ntfs& dev) {
uint32_t sector_size = dev.boot_sector->BytesPerSector;
uint32_t cluster_size = sector_size * dev.boot_sector->SectorsPerCluster;
buffer_t s(length);
uint64_t cluster_start = start / cluster_size;
uint64_t cluster_end = sector_align(start + length, cluster_size) / cluster_size;
for (const auto& m : mappings) {
if (m.vcn <= cluster_end && m.vcn + m.length >= cluster_start) {
uint64_t read_start = max(start - (start % dev.boot_sector->BytesPerSector), m.vcn * cluster_size);
uint64_t read_end = min(sector_align(start + length, dev.boot_sector->BytesPerSector), (m.vcn + m.length) * cluster_size);
if (read_end == read_start)
continue;
buffer_t buf((uint32_t)(read_end - read_start));
dev.seek(read_start + ((m.lcn - m.vcn) * cluster_size));
dev.read(buf.data(), (uint32_t)(read_end - read_start));
memcpy(s.data(), buf.data() + read_start - start, (size_t)min(read_end - read_start, length - read_start + start));
}
}
return s;
}
static optional btree_search(const index_root& ir, const list& mappings, const index_node_header& inh,
ntfs& dev, uint32_t key) {
auto ent = reinterpret_cast((uint8_t*)&inh + inh.first_entry);
do {
if (ent->flags & INDEX_ENTRY_SUBNODE) {
bool skip = false;
if (!(ent->flags & INDEX_ENTRY_LAST)) {
uint32_t v1 = *(uint32_t*)((uint8_t*)ent + sizeof(index_entry));
if (v1 == key)
return buffer_t((uint8_t*)ent + sizeof(index_entry) + ent->stream_length, (uint8_t*)ent + ent->entry_length - sizeof(uint64_t));
skip = key > v1;
}
if (!skip) {
uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)ent + ent->entry_length - sizeof(uint64_t)))->SegmentNumber;
if (ir.bytes_per_index_record < dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster)
vcn *= dev.boot_sector->BytesPerSector;
else
vcn *= (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster;
auto data = read_from_mappings(mappings, vcn, ir.bytes_per_index_record, dev);
auto& rec = *reinterpret_cast(data.data());
if (rec.MultiSectorHeader.Signature != INDEX_RECORD_MAGIC)
throw formatted_error("Index record magic was not INDX.");
process_fixups(&rec.MultiSectorHeader, ir.bytes_per_index_record, dev.boot_sector->BytesPerSector);
return btree_search(ir, mappings, rec.header, dev, key);
}
} else if (!(ent->flags & INDEX_ENTRY_LAST)) {
uint32_t v = *(uint32_t*)((uint8_t*)ent + sizeof(index_entry));
if (v == key)
return buffer_t((uint8_t*)ent + sizeof(index_entry) + ent->stream_length, (uint8_t*)ent + ent->entry_length);
else if (v > key)
break;
}
if (ent->flags & INDEX_ENTRY_LAST)
break;
ent = reinterpret_cast((uint8_t*)ent + ent->entry_length);
} while (true);
return nullopt;
}
string_view ntfs::find_sd(uint32_t id, ntfs_file& secure) {
if (sd_list.count(id) > 0) {
const auto& sd = sd_list.at(id);
return {(char*)sd.data(), sd.size()};
}
auto ir_str = secure.read(0, 0, ntfs_attribute::INDEX_ROOT, u"$SII");
auto ia = secure.read_mappings(ntfs_attribute::INDEX_ALLOCATION, u"$SII");
const auto& ir = *reinterpret_cast(ir_str.data());
auto ret = btree_search(ir, ia, ir.node_header, *this, id);
if (!ret.has_value())
return "";
const auto& sde = *reinterpret_cast(ret.value().data());
auto sde2 = secure.read(sde.offset, sde.length, ntfs_attribute::DATA, u"$SDS");
if (memcmp(&sde, sde2.data(), sizeof(sd_entry)))
throw formatted_error("SD headers do not match.");
auto sv = string_view((char*)sde2.data(), sde2.size()).substr(sizeof(sd_entry));
buffer_t buf(sv.data(), sv.data() + sv.length());
auto [it, success] = sd_list.emplace(make_pair(id, buffer_t{}));
it->second.swap(buf);
return string_view((char*)it->second.data(), it->second.size());
}
static void walk_btree(const index_root& ir, const list& mappings, const index_node_header& inh, ntfs& dev,
const invocable auto& func, unsigned int level) {
auto ent = reinterpret_cast((uint8_t*)&inh + inh.first_entry);
do {
if (ent->flags & INDEX_ENTRY_SUBNODE) {
uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)ent + ent->entry_length - sizeof(uint64_t)))->SegmentNumber;
if (ir.bytes_per_index_record < dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster)
vcn *= dev.boot_sector->BytesPerSector;
else
vcn *= (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster;
auto data = read_from_mappings(mappings, vcn, ir.bytes_per_index_record, dev);
auto rec = reinterpret_cast(data.data());
if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC)
throw formatted_error("Index record magic was not INDX.");
process_fixups(&rec->MultiSectorHeader, ir.bytes_per_index_record, dev.boot_sector->BytesPerSector);
walk_btree(ir, mappings, rec->header, dev, func, level + 1);
} else
func(*ent, string_view((const char*)ent + sizeof(index_entry), ent->stream_length));
if (ent->flags & INDEX_ENTRY_LAST)
break;
ent = reinterpret_cast((uint8_t*)ent + ent->entry_length);
} while (true);
}
void populate_skip_list(ntfs& dev, uint64_t inode, list& skiplist) {
ntfs_file file(dev, inode);
if (!file.is_directory())
return;
auto ir_str = file.read(0, 0, ntfs_attribute::INDEX_ROOT, u"$I30");
auto ia = file.read_mappings(ntfs_attribute::INDEX_ALLOCATION, u"$I30");
const auto& ir = *reinterpret_cast(ir_str.data());
skiplist.emplace_back(inode);
walk_btree(ir, ia, ir.node_header, dev, [&](const index_entry& ent, string_view data) {
if (data.empty())
return;
auto fn = reinterpret_cast(data.data());
if (fn->FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT) {
bool found = false;
uint64_t dir_inode = ent.file_reference.SegmentNumber;
for (auto n : skiplist) {
if (n == dir_inode) {
found = true;
break;
}
}
if (!found)
populate_skip_list(dev, dir_inode, skiplist);
}
}, 0);
}
void ntfs_file::loop_through_atts(const function& func) {
auto att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset);
size_t offset = file_record->FirstAttributeOffset;
buffer_t attlist;
while (true) {
if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
break;
if (att->TypeCode == ntfs_attribute::ATTRIBUTE_LIST) {
if (att->FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM)
attlist = read_nonresident_attribute(0, (uint32_t)att->Form.Nonresident.FileSize, att);
else {
attlist.resize(att->Form.Resident.ValueLength);
memcpy(attlist.data(), (uint8_t*)att + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
}
break;
}
offset += att->RecordLength;
att = reinterpret_cast((uint8_t*)att + att->RecordLength);
}
if (!attlist.empty()) {
vector other_inodes;
{
auto ent = (const attribute_list_entry*)attlist.data();
size_t left = attlist.size();
while (true) {
uint64_t file_reference = ent->file_reference.SegmentNumber;
if (file_reference == inode) { // contained elsewhere in this inode
att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset);
offset = file_record->FirstAttributeOffset;
while (true) {
if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
break;
if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) {
if (att->NameLength == 0 || !memcmp((uint8_t*)file_record + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) {
string_view data;
u16string_view name;
if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
if (att->NameLength != 0)
name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength);
if (!func(*att, data, name))
return;
break;
}
}
offset += att->RecordLength;
att = reinterpret_cast((uint8_t*)att + att->RecordLength);
}
} else {
bool found = false;
for (auto n : other_inodes) {
if (n == file_reference) {
found = true;
break;
}
}
if (!found)
other_inodes.push_back(file_reference);
}
if (left <= ent->record_length)
break;
left -= ent->record_length;
ent = (const attribute_list_entry*)((uint8_t*)ent + ent->record_length);
}
}
if (!other_inodes.empty()) {
for (auto file_reference : other_inodes) {
ntfs_file oth(dev, file_reference);
auto ent = (const attribute_list_entry*)attlist.data();
auto left = attlist.size();
while (true) {
if (ent->file_reference.SegmentNumber == file_reference) {
att = reinterpret_cast((uint8_t*)oth.file_record + oth.file_record->FirstAttributeOffset);
offset = oth.file_record->FirstAttributeOffset;
while (true) {
if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
break;
if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) {
if (att->NameLength == 0 || !memcmp((uint8_t*)oth.file_record + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) {
string_view data;
u16string_view name;
if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
data = string_view((const char*)oth.file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
if (att->NameLength != 0)
name = u16string_view((char16_t*)((uint8_t*)oth.file_record + offset + att->NameOffset), att->NameLength);
if (!func(*att, data, name))
return;
break;
}
}
offset += att->RecordLength;
att = reinterpret_cast((uint8_t*)att + att->RecordLength);
}
}
if (left <= ent->record_length)
break;
left -= ent->record_length;
ent = (const attribute_list_entry*)((uint8_t*)ent + ent->record_length);
}
}
}
return;
}
att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset);
offset = file_record->FirstAttributeOffset;
while (true) {
if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
break;
string_view data;
u16string_view name;
if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
if (att->NameLength != 0)
name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength);
if (!func(*att, data, name))
return;
offset += att->RecordLength;
att = reinterpret_cast((uint8_t*)att + att->RecordLength);
}
}
string ntfs_file::get_filename() {
list parts;
ntfs_file* f = this;
do {
uint64_t dir_num = 0;
f->loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view) -> bool {
if (att.TypeCode != ntfs_attribute::FILE_NAME || att.FormCode != NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
return true;
auto fn = reinterpret_cast(res_data.data());
if (fn->Namespace == file_name_type::DOS)
return true;
if (fn->Parent.SegmentNumber != NTFS_ROOT_DIR_INODE)
dir_num = fn->Parent.SegmentNumber;
auto name = u16string_view(fn->FileName, fn->FileNameLength);
parts.emplace_back(name);
return false;
});
if (f != this)
delete f;
if (dir_num != 0)
f = new ntfs_file(dev, dir_num);
else
break;
} while (true);
u16string retw;
while (!parts.empty()) {
retw += u"\\" + parts.back();
parts.pop_back();
}
return utf16_to_utf8(retw);
}
void ntfs::seek(uint64_t pos) {
#ifdef _WIN32
LARGE_INTEGER posli;
posli.QuadPart = pos;
if (!SetFilePointerEx(h, posli, nullptr, FILE_BEGIN))
throw last_error("SetFilePointerEx", GetLastError());
#else
if (lseek(fd, pos, SEEK_SET) == -1)
throw formatted_error("Error seeking to {:x} (errno = {}).", pos, errno);
#endif
}
void ntfs::read(uint8_t* buf, size_t length) {
#ifdef _WIN32
DWORD read;
if (!ReadFile(h, buf, (DWORD)length, &read, nullptr))
throw last_error("ReadFile", GetLastError());
#else
auto pos = lseek(fd, 0, SEEK_CUR);
auto orig_length = length;
do {
auto ret = ::read(fd, buf, length);
if (ret < 0)
throw formatted_error("Error reading {:x} bytes at {:x} (errno {}).", orig_length, pos, errno);
if ((size_t)ret == length)
break;
buf += ret;
length -= ret;
} while (true);
#endif
}
void ntfs::write(const uint8_t* buf, size_t length) {
#ifdef _WIN32
DWORD written;
if (!WriteFile(h, buf, (DWORD)length, &written, nullptr))
throw last_error("WriteFile", GetLastError());
#else
auto pos = lseek(fd, 0, SEEK_CUR);
auto orig_length = length;
do {
auto ret = ::write(fd, buf, length);
if (ret < 0)
throw formatted_error("Error writing {:x} bytes at {:x} (errno {}).", orig_length, pos, errno);
if ((size_t)ret == length)
break;
buf += ret;
length -= ret;
} while (true);
#endif
}
ntfs2btrfs-20240115/src/ntfs.h000066400000000000000000000361161455127722500160020ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020
*
* This file is part of ntfs2btrfs.
*
* Ntfs2btrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 2 of the Licence, or
* (at your option) any later version.
*
* Ntfs2btrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public Licence
* along with Ntfs2btrfs. If not, see . */
#pragma once
#include "ntfs2btrfs.h"
#include
#include
#include
#include
#include
#ifdef _WIN32
#include
#else
#include
#endif
#pragma pack(push,1)
typedef struct {
uint8_t Jmp[3];
uint8_t FsName[8];
uint16_t BytesPerSector;
uint8_t SectorsPerCluster;
uint16_t ReservedSectors;
uint8_t Unused1[5];
uint8_t Media;
uint8_t Unused2[2];
uint16_t SectorsPerTrack;
uint16_t Heads;
uint32_t HiddenSectors;
uint32_t Unused3;
uint32_t Unknown;
uint64_t TotalSectors;
uint64_t MFT;
uint64_t MFTMirr;
int8_t ClustersPerMFTRecord;
uint8_t Padding1[3];
int8_t ClustersPerIndexRecord;
uint8_t Padding2[3];
uint64_t SerialNumber;
uint32_t Checksum;
} NTFS_BOOT_SECTOR;
#define NTFS_FS_NAME "NTFS "
// https://docs.microsoft.com/en-us/windows/win32/devnotes/attribute-record-header
#define ATTRIBUTE_FLAG_COMPRESSION_MASK 0x00ff
#define ATTRIBUTE_FLAG_SPARSE 0x8000
#define ATTRIBUTE_FLAG_ENCRYPTED 0x4000
enum class NTFS_ATTRIBUTE_FORM : uint8_t {
RESIDENT_FORM = 0,
NONRESIDENT_FORM = 1
};
enum class ntfs_attribute : uint32_t {
STANDARD_INFORMATION = 0x10,
ATTRIBUTE_LIST = 0x20,
FILE_NAME = 0x30,
VOLUME_VERSION = 0x40,
SECURITY_DESCRIPTOR = 0x50,
VOLUME_NAME = 0x60,
VOLUME_INFORMATION = 0x70,
DATA = 0x80,
INDEX_ROOT = 0x90,
INDEX_ALLOCATION = 0xA0,
BITMAP = 0xB0,
REPARSE_POINT = 0xC0,
EA_INFORMATION = 0xD0,
EA = 0xE0,
PROPERTY_SET = 0xF0,
LOGGED_UTILITY_STREAM = 0x100,
};
template<>
struct fmt::formatter {
constexpr auto parse(format_parse_context& ctx) {
auto it = ctx.begin();
if (it != ctx.end() && *it != '}')
throw format_error("invalid format");
return it;
}
template
auto format(enum ntfs_attribute att, format_context& ctx) const {
switch (att) {
case ntfs_attribute::STANDARD_INFORMATION:
return fmt::format_to(ctx.out(), "STANDARD_INFORMATION");
case ntfs_attribute::ATTRIBUTE_LIST:
return fmt::format_to(ctx.out(), "ATTRIBUTE_LIST");
case ntfs_attribute::FILE_NAME:
return fmt::format_to(ctx.out(), "FILE_NAME");
case ntfs_attribute::VOLUME_VERSION:
return fmt::format_to(ctx.out(), "VOLUME_VERSION");
case ntfs_attribute::SECURITY_DESCRIPTOR:
return fmt::format_to(ctx.out(), "SECURITY_DESCRIPTOR");
case ntfs_attribute::VOLUME_NAME:
return fmt::format_to(ctx.out(), "VOLUME_NAME");
case ntfs_attribute::VOLUME_INFORMATION:
return fmt::format_to(ctx.out(), "VOLUME_INFORMATION");
case ntfs_attribute::DATA:
return fmt::format_to(ctx.out(), "DATA");
case ntfs_attribute::INDEX_ROOT:
return fmt::format_to(ctx.out(), "INDEX_ROOT");
case ntfs_attribute::INDEX_ALLOCATION:
return fmt::format_to(ctx.out(), "INDEX_ALLOCATION");
case ntfs_attribute::BITMAP:
return fmt::format_to(ctx.out(), "BITMAP");
case ntfs_attribute::REPARSE_POINT:
return fmt::format_to(ctx.out(), "REPARSE_POINT");
case ntfs_attribute::EA_INFORMATION:
return fmt::format_to(ctx.out(), "EA_INFORMATION");
case ntfs_attribute::EA:
return fmt::format_to(ctx.out(), "EA");
case ntfs_attribute::PROPERTY_SET:
return fmt::format_to(ctx.out(), "PROPERTY_SET");
case ntfs_attribute::LOGGED_UTILITY_STREAM:
return fmt::format_to(ctx.out(), "LOGGED_UTILITY_STREAM");
default:
return fmt::format_to(ctx.out(), "{:x}", (uint32_t)att);
}
}
};
typedef struct _ATTRIBUTE_RECORD_HEADER {
enum ntfs_attribute TypeCode;
uint16_t RecordLength;
uint16_t Unknown;
NTFS_ATTRIBUTE_FORM FormCode;
uint8_t NameLength;
uint16_t NameOffset;
uint16_t Flags;
uint16_t Instance;
union {
struct {
uint32_t ValueLength;
uint16_t ValueOffset;
uint8_t Reserved[2];
} Resident;
struct {
uint64_t LowestVcn;
uint64_t HighestVcn;
uint16_t MappingPairsOffset;
uint16_t CompressionUnit;
uint32_t Padding;
uint64_t AllocatedLength;
uint64_t FileSize;
uint64_t ValidDataLength;
uint64_t TotalAllocated;
} Nonresident;
} Form;
} ATTRIBUTE_RECORD_HEADER;
// https://docs.microsoft.com/en-us/windows/win32/devnotes/multi-sector-header
typedef struct {
uint32_t Signature;
uint16_t UpdateSequenceArrayOffset;
uint16_t UpdateSequenceArraySize;
} MULTI_SECTOR_HEADER;
// https://docs.microsoft.com/en-us/windows/win32/devnotes/mft-segment-reference
typedef struct {
uint64_t SegmentNumber : 48;
uint64_t SequenceNumber : 16;
} MFT_SEGMENT_REFERENCE;
// based on https://docs.microsoft.com/en-us/windows/win32/devnotes/file-record-segment-header and
// http://www.cse.scu.edu/~tschwarz/coen252_07Fall/Lectures/NTFS.html
typedef struct {
MULTI_SECTOR_HEADER MultiSectorHeader;
uint64_t LogFileSequenceNumber;
uint16_t SequenceNumber;
uint16_t HardLinkCount;
uint16_t FirstAttributeOffset;
uint16_t Flags;
uint32_t EntryUsedSize;
uint32_t EntryAllocatedSize;
MFT_SEGMENT_REFERENCE BaseFileRecordSegment;
uint16_t NextAttributeID;
} FILE_RECORD_SEGMENT_HEADER;
#define FILE_RECORD_SEGMENT_IN_USE 1
#define FILE_RECORD_IS_DIRECTORY 2
static const uint32_t NTFS_FILE_SIGNATURE = 0x454c4946; // "FILE"
#define NTFS_VOLUME_INODE 3
#define NTFS_ROOT_DIR_INODE 5
#define NTFS_BITMAP_INODE 6
#define NTFS_SECURE_INODE 9
// https://flatcap.org/linux-ntfs/ntfs/attributes/standard_information.html
typedef struct {
int64_t CreationTime;
int64_t LastAccessTime;
int64_t LastWriteTime;
int64_t ChangeTime;
uint32_t FileAttributes;
uint32_t MaximumVersions;
uint32_t VersionNumber;
uint32_t ClassId;
uint32_t OwnerId;
uint32_t SecurityId;
uint64_t QuotaCharged;
uint64_t USN;
} STANDARD_INFORMATION;
#define FILE_ATTRIBUTE_READONLY 0x00000001
#define FILE_ATTRIBUTE_HIDDEN 0x00000002
#define FILE_ATTRIBUTE_SYSTEM 0x00000004
#define FILE_ATTRIBUTE_DIRECTORY 0x00000010
#define FILE_ATTRIBUTE_ARCHIVE 0x00000020
#define FILE_ATTRIBUTE_DEVICE 0x00000040
#define FILE_ATTRIBUTE_NORMAL 0x00000080
#define FILE_ATTRIBUTE_TEMPORARY 0x00000100
#define FILE_ATTRIBUTE_SPARSE_FILE 0x00000200
#define FILE_ATTRIBUTE_REPARSE_POINT 0x00000400
#define FILE_ATTRIBUTE_COMPRESSED 0x00000800
#define FILE_ATTRIBUTE_OFFLINE 0x00001000
#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000
#define FILE_ATTRIBUTE_ENCRYPTED 0x00004000
#define FILE_ATTRIBUTE_VIRTUAL 0x00010000
#define FILE_ATTRIBUTE_DIRECTORY_MFT 0x10000000
// https://flatcap.org/linux-ntfs/ntfs/attributes/file_name.html
enum class file_name_type : uint8_t {
POSIX = 0,
WINDOWS = 1,
DOS = 2,
WINDOWS_AND_DOS = 3
};
typedef struct {
MFT_SEGMENT_REFERENCE Parent;
int64_t CreationTime;
int64_t LastAccessTime;
int64_t LastWriteTime;
int64_t ChangeTime;
uint64_t AllocationSize;
uint64_t EndOfFile;
uint32_t FileAttributes;
uint32_t EaSize;
uint8_t FileNameLength;
file_name_type Namespace;
char16_t FileName[1];
} FILE_NAME;
// https://flatcap.org/linux-ntfs/ntfs/concepts/node_header.html
typedef struct {
uint32_t first_entry;
uint32_t total_size;
uint32_t allocated_size;
uint32_t flags;
} index_node_header;
// https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html
#define INDEX_ENTRY_SUBNODE 1
#define INDEX_ENTRY_LAST 2
typedef struct {
MFT_SEGMENT_REFERENCE file_reference;
uint16_t entry_length;
uint16_t stream_length;
uint32_t flags;
} index_entry;
// https://flatcap.org/linux-ntfs/ntfs/attributes/index_root.html
typedef struct {
uint32_t attribute_type;
uint32_t collation_rule;
uint32_t bytes_per_index_record;
uint8_t clusters_per_index_record;
uint8_t padding[3];
index_node_header node_header;
index_entry entries[1];
} index_root;
// https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html
typedef struct {
MULTI_SECTOR_HEADER MultiSectorHeader;
uint64_t sequence_number;
uint64_t vcn;
index_node_header header;
uint16_t update_sequence;
} index_record;
#define INDEX_RECORD_MAGIC 0x58444e49 // "INDX"
// https://flatcap.org/linux-ntfs/ntfs/files/secure.html
typedef struct {
uint32_t hash;
uint32_t id;
uint64_t offset;
uint32_t length;
} sd_entry;
// https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/ns-ntifs-_reparse_data_buffer
typedef struct {
uint32_t ReparseTag;
uint16_t ReparseDataLength;
uint16_t Reserved;
union {
struct {
uint16_t SubstituteNameOffset;
uint16_t SubstituteNameLength;
uint16_t PrintNameOffset;
uint16_t PrintNameLength;
uint32_t Flags;
char16_t PathBuffer[1];
} SymbolicLinkReparseBuffer;
struct {
uint16_t SubstituteNameOffset;
uint16_t SubstituteNameLength;
uint16_t PrintNameOffset;
uint16_t PrintNameLength;
char16_t PathBuffer[1];
} MountPointReparseBuffer;
struct {
uint8_t DataBuffer[1];
} GenericReparseBuffer;
struct {
uint32_t unknown;
char name[1];
} LxSymlink; // undocumented
};
} REPARSE_DATA_BUFFER;
typedef struct {
uint32_t unknown;
char name[1];
} REPARSE_DATA_BUFFER_LX_SYMLINK;
#ifndef IO_REPARSE_TAG_SYMLINK
#define IO_REPARSE_TAG_SYMLINK 0xa000000c
#endif
#define IO_REPARSE_TAG_LX_SYMLINK 0xa000001d
#ifndef IO_REPARSE_TAG_WOF
#define IO_REPARSE_TAG_WOF 0x80000017
#endif
#ifndef SYMLINK_FLAG_RELATIVE
#define SYMLINK_FLAG_RELATIVE 0x00000001
#endif
// https://flatcap.org/linux-ntfs/ntfs/attributes/volume_information.html
typedef struct {
uint64_t Unknown1;
uint8_t MajorVersion;
uint8_t MinorVersion;
uint16_t Flags;
uint32_t Unknown2;
} VOLUME_INFORMATION;
#define NTFS_VOLUME_DIRTY 0x0001
#define NTFS_VOLUME_RESIZE_JOURNAL 0x0002
#define NTFS_VOLUME_UPGRADE_ON_MOUNT 0x0004
#define NTFS_VOLUME_MOUNTED_ON_NT4 0x0008
#define NTFS_VOLUME_DELETE_USN_UNDERWAY 0x0010
#define NTFS_VOLUME_REPAIR_OBJECT_IDS 0x0020
#define NTFS_VOLUME_MODIFIED_BY_CHKDSK 0x8000
// https://flatcap.org/linux-ntfs/ntfs/attributes/attribute_list.html
typedef struct {
enum ntfs_attribute type;
uint16_t record_length;
uint8_t name_length;
uint8_t name_offset;
uint64_t starting_vcn;
MFT_SEGMENT_REFERENCE file_reference;
uint16_t instance;
} attribute_list_entry;
#define WOF_CURRENT_VERSION 1
#define WOF_PROVIDER_WIM 1
#define WOF_PROVIDER_FILE 2
typedef struct {
uint32_t ReparseTag;
uint16_t ReparseDataLength;
uint16_t Reserved;
uint8_t DataBuffer[1];
} reparse_point_header; // edited form of REPARSE_DATA_BUFFER
typedef struct {
uint32_t Version;
uint32_t Provider;
} wof_external_info; // WOF_EXTERNAL_INFO in winioctl.h
#define FILE_PROVIDER_CURRENT_VERSION 1
#define FILE_PROVIDER_COMPRESSION_XPRESS4K 0
#define FILE_PROVIDER_COMPRESSION_LZX 1
#define FILE_PROVIDER_COMPRESSION_XPRESS8K 2
#define FILE_PROVIDER_COMPRESSION_XPRESS16K 3
typedef struct {
uint32_t Version;
uint32_t Algorithm;
} file_provider_external_info_v0; // FILE_PROVIDER_EXTERNAL_INFO_V0 in winioctl.h
// cf. https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/ns-wdm-_file_full_ea_information
typedef struct {
uint32_t NextEntryOffset;
uint8_t Flags;
uint8_t EaNameLength;
uint16_t EaValueLength;
char EaName[1];
} ea_data;
typedef struct {
uint32_t major;
uint32_t minor;
} lxdev;
// https://dfir.ru/2019/01/19/ntfs-today/
typedef struct {
uint16_t format;
uint16_t version;
uint32_t mode;
uint32_t uid;
uint32_t gid;
uint32_t rdev;
uint32_t atime_ns;
uint32_t mtime_ns;
uint32_t ctime_ns;
uint64_t atime;
uint64_t mtime;
uint64_t ctime;
} lxattrb;
#pragma pack(pop)
class ntfs;
struct mapping {
mapping(uint64_t lcn, uint64_t vcn, uint64_t length) : lcn(lcn), vcn(vcn), length(length) { }
uint64_t lcn;
uint64_t vcn;
uint64_t length;
};
class ntfs_file {
public:
ntfs_file(ntfs& dev, uint64_t inode);
buffer_t read(uint64_t offset = 0, uint32_t length = 0, enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u"");
std::list read_mappings(enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u"");
bool is_directory() const {
return file_record->Flags & FILE_RECORD_IS_DIRECTORY;
}
void loop_through_atts(const std::function& func);
std::string get_filename();
FILE_RECORD_SEGMENT_HEADER* file_record;
private:
buffer_t read_nonresident_attribute(uint64_t offset, uint32_t length, const ATTRIBUTE_RECORD_HEADER* att);
buffer_t file_record_buf;
ntfs& dev;
uint64_t inode;
};
class ntfs {
public:
ntfs(const std::string& fn);
~ntfs() {
#ifdef _WIN32
CloseHandle(h);
#else
close(fd);
#endif
}
void seek(uint64_t pos);
void read(uint8_t* buf, size_t length);
void write(const uint8_t* buf, size_t length);
std::string_view find_sd(uint32_t id, ntfs_file& secure);
std::unique_ptr mft;
buffer_t boot_sector_buf;
NTFS_BOOT_SECTOR* boot_sector;
uint64_t file_record_size;
std::map sd_list;
#ifdef _WIN32
HANDLE h;
#else
int fd;
#endif
};
// ntfs.cpp
void read_nonresident_mappings(const ATTRIBUTE_RECORD_HEADER& att, std::list& mappings,
uint32_t cluster_size, uint64_t vdl);
void populate_skip_list(ntfs& dev, uint64_t inode, std::list& skiplist);
ntfs2btrfs-20240115/src/ntfs2btrfs.cpp000077500000000000000000004113231455127722500174600ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020
*
* This file is part of ntfs2btrfs.
*
* Ntfs2btrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 2 of the Licence, or
* (at your option) any later version.
*
* Ntfs2btrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public Licence
* along with Ntfs2btrfs. If not, see . */
#include "ntfs.h"
#include "ntfs2btrfs.h"
#include "crc32c.h"
#include "xxhash.h"
#if defined(__i386__) || defined(__x86_64__)
#ifndef _MSC_VER
#include
#else
#include
#endif
#endif
#include
#include
#include
#include
#include
#include
#include
#ifdef _WIN32
#include
#else
#include
#endif
#if !defined(_WIN32) && !defined(__FreeBSD__)
#define USE_MMAP
#endif
#include "config.h"
using namespace std;
static list chunks;
static list roots;
static uint32_t tree_size = 0x4000; // FIXME
static list space_list;
static bool chunks_changed;
static uint64_t data_size = 0;
static BTRFS_UUID fs_uuid, chunk_uuid, dev_uuid, subvol_uuid;
static list relocs;
static uint64_t device_size, orig_device_size;
static bool reloc_last_sector = false;
static uint64_t mapped_inodes = 0, rewritten_inodes = 0, inline_inodes = 0;
static uint64_t last_chunk_end;
static const uint64_t stripe_length = 0x10000;
static const uint64_t chunk_virt_offset = 0x100000;
static const uint64_t dummy_inode = 0xffffffffffffffff; // protected data
static const uint64_t first_ntfs_inode = 24;
static const uint64_t data_chunk_size = 128 * 1024 * 1024; // FIXME
static const uint64_t inode_offset = 0x101;
static const uint16_t max_inline = 2048;
static const uint64_t max_extent_size = 0x8000000; // 128 MB
static const uint64_t max_comp_extent_size = 0x20000; // 128 KB
static constexpr char chunk_error_message[] = "Could not find enough space to create new chunk. Try clearing a few gigabytes of space, or defragging.";
#define EA_NTACL "security.NTACL"
#define EA_NTACL_HASH 0x45922146
#define EA_DOSATTRIB "user.DOSATTRIB"
#define EA_DOSATTRIB_HASH 0x914f9939
#define EA_REPARSE "user.reparse"
#define EA_REPARSE_HASH 0xfabad1fe
#define EA_CAP "security.capability"
#define EA_CAP_HASH 0x7c3650b1
using runs_t = map>;
static constexpr size_t utf16_to_utf8_len(u16string_view sv) noexcept {
size_t ret = 0;
while (!sv.empty()) {
if (sv[0] < 0x80)
ret++;
else if (sv[0] < 0x800)
ret += 2;
else if (sv[0] < 0xd800)
ret += 3;
else if (sv[0] < 0xdc00) {
if (sv.length() < 2 || (sv[1] & 0xdc00) != 0xdc00) {
ret += 3;
sv = sv.substr(1);
continue;
}
ret += 4;
sv = sv.substr(1);
} else
ret += 3;
sv = sv.substr(1);
}
return ret;
}
static constexpr void utf16_to_utf8_span(u16string_view sv, span t) noexcept {
auto ptr = t.begin();
if (ptr == t.end())
return;
while (!sv.empty()) {
if (sv[0] < 0x80) {
*ptr = (uint8_t)sv[0];
ptr++;
if (ptr == t.end())
return;
} else if (sv[0] < 0x800) {
*ptr = (uint8_t)(0xc0 | (sv[0] >> 6));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | (sv[0] & 0x3f));
ptr++;
if (ptr == t.end())
return;
} else if (sv[0] < 0xd800) {
*ptr = (uint8_t)(0xe0 | (sv[0] >> 12));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | ((sv[0] >> 6) & 0x3f));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | (sv[0] & 0x3f));
ptr++;
if (ptr == t.end())
return;
} else if (sv[0] < 0xdc00) {
if (sv.length() < 2 || (sv[1] & 0xdc00) != 0xdc00) {
*ptr = (uint8_t)0xef;
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)0xbf;
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)0xbd;
ptr++;
if (ptr == t.end())
return;
sv = sv.substr(1);
continue;
}
char32_t cp = 0x10000 | ((sv[0] & ~0xd800) << 10) | (sv[1] & ~0xdc00);
*ptr = (uint8_t)(0xf0 | (cp >> 18));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | ((cp >> 12) & 0x3f));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | ((cp >> 6) & 0x3f));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | (cp & 0x3f));
ptr++;
if (ptr == t.end())
return;
sv = sv.substr(1);
} else if (sv[0] < 0xe000) {
*ptr = (uint8_t)0xef;
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)0xbf;
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)0xbd;
ptr++;
if (ptr == t.end())
return;
} else {
*ptr = (uint8_t)(0xe0 | (sv[0] >> 12));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | ((sv[0] >> 6) & 0x3f));
ptr++;
if (ptr == t.end())
return;
*ptr = (uint8_t)(0x80 | (sv[0] & 0x3f));
ptr++;
if (ptr == t.end())
return;
}
sv = sv.substr(1);
}
}
string utf16_to_utf8(u16string_view sv) {
if (sv.empty())
return "";
string ret(utf16_to_utf8_len(sv), 0);
utf16_to_utf8_span(sv, ret);
return ret;
}
static void space_list_remove(list& space_list, uint64_t offset, uint64_t length) {
auto it = space_list.begin();
while (it != space_list.end()) {
if (it->offset > offset + length)
return;
if (it->offset >= offset && it->offset + it->length <= offset + length) { // remove entry entirely
auto it2 = it;
it2++;
space_list.erase(it);
it = it2;
continue;
} else if (offset + length > it->offset && offset + length < it->offset + it->length) {
if (offset > it->offset) { // cut out hole
space_list.insert(it, space(it->offset, offset - it->offset));
it->length = it->offset + it->length - offset - length;
it->offset = offset + length;
return;
} else { // remove start of entry
it->length -= offset + length - it->offset;
it->offset = offset + length;
}
} else if (offset > it->offset && offset < it->offset + it->length) // remove end of entry
it->length = offset - it->offset;
it++;
}
}
static void remove_superblocks(chunk& c) {
unsigned int i = 0;
// FIXME - DUP
while (superblock_addrs[i] != 0) {
if (c.disk_start + c.length > superblock_addrs[i] && c.disk_start < superblock_addrs[i] + stripe_length) {
uint64_t start = max(c.offset, superblock_addrs[i] - c.disk_start + c.offset);
uint64_t end = min(c.offset + c.length, superblock_addrs[i] + stripe_length - c.disk_start + c.offset);
space_list_remove(c.space_list, start, end - start);
}
i++;
}
}
static void create_data_chunks(ntfs& dev, const buffer_t& bmpdata) {
uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster;
uint64_t addr = 0;
// FIXME - make sure clusters_per_chunk is multiple of 8
string_view bdsv{(char*)bmpdata.data(), bmpdata.size()};
while (bdsv.length() > 0 && addr < device_size) {
uint64_t chunk_length = min(device_size - addr, data_chunk_size);
uint64_t clusters_per_chunk = chunk_length / cluster_size;
string_view csv = bdsv.substr(0, (size_t)(clusters_per_chunk / 8));
size_t len = csv.length();
uint64_t run_start = 0, pos = 0;
bool set = false;
list used;
if (chunk_length % stripe_length != 0)
chunk_length -= chunk_length % stripe_length;
// FIXME - do by uint64_t if 64-bit processor?
while (csv.size() >= sizeof(uint32_t)) {
auto v = *(uint32_t*)csv.data();
if ((!set && v == 0) || (set && v == 0xffffffff)) {
pos += sizeof(uint32_t) * 8;
csv = csv.substr(sizeof(uint32_t));
continue;
}
if (!set && v == 0xffffffff) {
run_start = pos;
set = true;
pos += sizeof(uint32_t) * 8;
} else if (set && v == 0) {
if (pos != run_start)
used.emplace_back(run_start, pos - run_start);
set = false;
pos += sizeof(uint32_t) * 8;
} else {
for (unsigned int i = 0; i < sizeof(uint32_t) * 8; i++) {
if (v & 1) {
if (!set) {
run_start = pos;
set = true;
}
} else {
if (set) {
if (pos != run_start)
used.emplace_back(run_start, pos - run_start);
set = false;
}
}
v >>= 1;
pos++;
}
}
csv = csv.substr(sizeof(uint32_t));
}
while (!csv.empty()) {
auto v = *(uint8_t*)csv.data();
if ((!set && v == 0) || (set && v == 0xff)) {
pos++;
csv = csv.substr(1);
continue;
}
if (!set && v == 0xff) {
run_start = pos;
set = true;
pos += 8;
} else if (set && v == 0) {
if (pos != run_start)
used.emplace_back(run_start, pos - run_start);
set = false;
pos += 8;
} else {
for (unsigned int i = 0; i < 8; i++) {
if (v & 1) {
if (!set) {
run_start = pos;
set = true;
}
} else {
if (set) {
if (pos != run_start)
used.emplace_back(run_start, pos - run_start);
set = false;
}
}
v >>= 1;
pos++;
}
}
csv = csv.substr(1);
}
if (set && run_start != pos)
used.emplace_back(run_start, pos - run_start);
if (!used.empty()) {
space_list_remove(space_list, addr, chunk_length);
chunks.emplace_back(addr + chunk_virt_offset, chunk_length, addr, BLOCK_FLAG_DATA);
auto& c = chunks.back();
uint64_t last = 0;
for (const auto& u : used) {
if (u.offset > last)
c.space_list.emplace_back(c.offset + (last * cluster_size), (u.offset - last) * cluster_size);
last = u.offset + u.length;
}
if (last * cluster_size < chunk_length)
c.space_list.emplace_back(c.offset + (last * cluster_size), chunk_length - (last * cluster_size));
remove_superblocks(c);
}
addr += data_chunk_size;
bdsv = bdsv.substr(len);
}
last_chunk_end = chunks.back().offset - chunk_virt_offset + chunks.back().length;
}
static void add_item(root& r, uint64_t obj_id, btrfs_key_type obj_type, uint64_t offset, const buffer_t& buf) {
auto ret = r.items.emplace(KEY{obj_id, obj_type, offset}, buf);
if (!ret.second)
throw formatted_error("Could not insert entry ({:x}, {}, {:x}) into root items list.", obj_id, obj_type, offset);
}
static void add_item_move(root& r, uint64_t obj_id, btrfs_key_type obj_type, uint64_t offset, buffer_t& buf) {
auto ret = r.items.emplace(KEY{obj_id, obj_type, offset}, buffer_t{});
if (!ret.second)
throw formatted_error("Could not insert entry ({:x}, {}, {:x}) into root items list.", obj_id, obj_type, offset);
auto& it = ret.first->second;
it.swap(buf);
}
static void add_item(root& r, uint64_t obj_id, btrfs_key_type obj_type, uint64_t offset, const void* data, uint16_t len) {
auto ret = r.items.emplace(KEY{obj_id, obj_type, offset}, buffer_t(len));
if (!ret.second)
throw formatted_error("Could not insert entry ({:x}, {}, {:x}) into root items list.", obj_id, obj_type, offset);
auto& it = ret.first->second;
memcpy(it.data(), data, len);
}
static void add_chunk(root& chunk_root, root& devtree_root, root& extent_root, const chunk& c) {
chunk_item_one_stripe ci1s;
DEV_EXTENT de;
BLOCK_GROUP_ITEM bgi;
memset(&ci1s, 0, sizeof(chunk_item_one_stripe));
ci1s.chunk_item.size = c.length;
ci1s.chunk_item.root_id = BTRFS_ROOT_EXTENT;
ci1s.chunk_item.stripe_length = 0x10000;
ci1s.chunk_item.type = c.type;
ci1s.chunk_item.opt_io_alignment = 0x10000;
ci1s.chunk_item.opt_io_width = 0x10000;
ci1s.chunk_item.sector_size = 0x1000; // FIXME - get from superblock
ci1s.chunk_item.num_stripes = 1;
ci1s.chunk_item.sub_stripes = 1;
ci1s.stripe.dev_id = 1;
ci1s.stripe.offset = c.disk_start;
ci1s.stripe.dev_uuid = dev_uuid;
add_item(chunk_root, 0x100, btrfs_key_type::CHUNK_ITEM, c.offset, &ci1s, sizeof(ci1s));
de.chunktree = BTRFS_ROOT_CHUNK;
de.objid = 0x100;
de.address = c.offset;
de.length = c.length;
de.chunktree_uuid = chunk_uuid;
add_item(devtree_root, 1, btrfs_key_type::DEV_EXTENT, c.disk_start, &de, sizeof(DEV_EXTENT));
bgi.chunk_tree = 0x100;
bgi.flags = c.type;
// bgi.used gets set in update_extent_root
add_item(extent_root, c.offset, btrfs_key_type::BLOCK_GROUP_ITEM, c.length, &bgi, sizeof(BLOCK_GROUP_ITEM));
}
static uint64_t allocate_metadata(uint64_t r, root& extent_root, uint8_t level) {
bool system_chunk = r == BTRFS_ROOT_CHUNK;
uint64_t chunk_size, disk_offset;
bool found = false;
metadata_item mi;
mi.extent_item.refcount = 1;
mi.extent_item.generation = 1;
mi.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
mi.type = btrfs_key_type::TREE_BLOCK_REF;
mi.tbr.offset = r;
for (auto& c : chunks) {
if ((system_chunk && c.type & BLOCK_FLAG_SYSTEM) || (!system_chunk && c.type & BLOCK_FLAG_METADATA)) {
for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) {
if (it->length >= tree_size) {
uint64_t addr = it->offset;
if (it->length == tree_size)
c.space_list.erase(it);
else {
it->offset += tree_size;
it->length -= tree_size;
}
c.used += tree_size;
add_item(extent_root, addr, btrfs_key_type::METADATA_ITEM, level, &mi, sizeof(metadata_item));
return addr;
}
}
}
}
// create new chunk
chunks_changed = true;
if (system_chunk)
chunk_size = 32 * 1024 * 1024;
else
chunk_size = 128 * 1024 * 1024; // FIXME
for (const auto& s : space_list) {
if (s.length >= chunk_size) {
disk_offset = s.offset;
space_list_remove(space_list, disk_offset, chunk_size);
found = true;
break;
}
}
if (!found)
throw formatted_error(chunk_error_message);
chunks.emplace_back(disk_offset + chunk_virt_offset, chunk_size, disk_offset, system_chunk ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA);
chunk& c = chunks.back();
c.space_list.emplace_back(c.offset, c.length);
remove_superblocks(c);
for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) {
if (it->length >= tree_size) {
uint64_t addr = it->offset;
if (it->length == tree_size)
c.space_list.erase(it);
else {
it->offset += tree_size;
it->length -= tree_size;
}
c.used = tree_size;
add_item(extent_root, addr, btrfs_key_type::METADATA_ITEM, level, &mi, sizeof(metadata_item));
return addr;
}
}
throw formatted_error("Could not allocate metadata address");
}
static uint64_t allocate_data(uint64_t length, bool change_used) {
uint64_t disk_offset;
bool found = false;
for (auto& c : chunks) {
if (c.type & BLOCK_FLAG_DATA) {
for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) {
if (it->length >= length) {
uint64_t addr = it->offset;
if (it->length == length)
c.space_list.erase(it);
else {
it->offset += length;
it->length -= length;
}
if (change_used)
c.used += length;
return addr;
}
}
}
}
// create new chunk
chunks_changed = true;
for (const auto& s : space_list) {
if (s.length >= data_chunk_size) {
disk_offset = s.offset;
space_list_remove(space_list, disk_offset, data_chunk_size);
found = true;
break;
}
}
if (!found)
throw formatted_error(chunk_error_message);
chunks.emplace_back(disk_offset + chunk_virt_offset, data_chunk_size, disk_offset, BLOCK_FLAG_DATA);
chunk& c = chunks.back();
c.space_list.emplace_back(c.offset, c.length);
remove_superblocks(c);
for (auto it = c.space_list.begin(); it != c.space_list.end(); it++) {
if (it->length >= length) {
uint64_t addr = it->offset;
if (it->length == length)
c.space_list.erase(it);
else {
it->offset += length;
it->length -= length;
}
if (change_used)
c.used = length;
return addr;
}
}
throw formatted_error("Could not allocate data address");
}
static void calc_tree_hash(tree_header& th, enum btrfs_csum_type csum_type) {
switch (csum_type) {
case btrfs_csum_type::crc32c:
*(uint32_t*)th.csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th.fs_uuid, tree_size - (uint32_t)sizeof(th.csum));
break;
case btrfs_csum_type::xxhash:
*(uint64_t*)th.csum = XXH64((uint8_t*)&th.fs_uuid, tree_size - sizeof(th.csum), 0);
break;
case btrfs_csum_type::sha256:
calc_sha256((uint8_t*)&th, &th.fs_uuid, tree_size - sizeof(th.csum));
break;
case btrfs_csum_type::blake2:
blake2b(&th, 32, &th.fs_uuid, tree_size - sizeof(th.csum));
break;
default:
break;
}
}
void root::create_trees(root& extent_root, enum btrfs_csum_type csum_type) {
uint32_t space_left, num_items;
buffer_t buf(tree_size);
memset(buf.data(), 0, tree_size);
space_left = tree_size - (uint32_t)sizeof(tree_header);
num_items = 0;
auto& th = *(tree_header*)buf.data();
th.fs_uuid = fs_uuid;
th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
th.chunk_tree_uuid = chunk_uuid;
th.generation = 1;
th.tree_id = id;
th.level = 0;
auto get_address = [this](root& extent_root, uint8_t level) {
uint64_t addr;
if (!old_addresses.empty()) {
addr = old_addresses.front().first;
if (level != old_addresses.front().second) { // change metadata level in extent tree
if (auto f = extent_root.items.find(KEY{addr, btrfs_key_type::METADATA_ITEM, old_addresses.front().second}); f != extent_root.items.end()) {
auto d = move(f->second);
extent_root.items.erase(f);
extent_root.items.emplace(make_pair(KEY{addr, btrfs_key_type::METADATA_ITEM, level}, d));
}
}
old_addresses.pop_front();
} else {
addr = allocate_metadata(id, extent_root, level);
allocations_done = true;
}
addresses.emplace_back(addr, level);
return addr;
};
{
auto ln = (leaf_node*)((uint8_t*)buf.data() + sizeof(tree_header));
uint32_t data_off = tree_size - (uint32_t)sizeof(tree_header);
for (const auto& i : items) {
if (sizeof(leaf_node) + i.second.size() > space_left) { // tree complete, add to list
th.address = get_address(extent_root, 0);
th.num_items = num_items;
calc_tree_hash(th, csum_type);
trees.push_back(buf);
metadata_size += tree_size;
memset(buf.data(), 0, tree_size);
th.fs_uuid = fs_uuid;
th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
th.chunk_tree_uuid = chunk_uuid;
th.generation = 1;
th.tree_id = id;
space_left = data_off = tree_size - (uint32_t)sizeof(tree_header);
num_items = 0;
ln = (leaf_node*)((uint8_t*)buf.data() + sizeof(tree_header));
}
if (sizeof(leaf_node) + i.second.size() + sizeof(tree_header) > tree_size)
throw formatted_error("Item too large for tree.");
ln->key = i.first;
ln->size = (uint32_t)i.second.size();
if (!i.second.empty()) {
data_off -= (uint32_t)i.second.size();
memcpy((uint8_t*)buf.data() + sizeof(tree_header) + data_off, i.second.data(), i.second.size());
}
ln->offset = data_off;
ln++;
num_items++;
space_left -= (uint32_t)(sizeof(leaf_node) + i.second.size());
}
}
if (num_items > 0 || items.size() == 0) { // flush remaining tree
th.address = get_address(extent_root, 0);
th.num_items = num_items;
calc_tree_hash(th, csum_type);
trees.push_back(buf);
metadata_size += tree_size;
}
level = 0;
if (trees.size() == 1) { // no internal trees needed
tree_addr = ((tree_header*)trees.back().data())->address;
return;
}
// create internal trees if necessary
do {
unsigned int trees_added = 0;
level++;
memset(buf.data(), 0, tree_size);
auto& th = *(tree_header*)buf.data();
th.fs_uuid = fs_uuid;
th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
th.chunk_tree_uuid = chunk_uuid;
th.generation = 1;
th.tree_id = id;
th.level = level;
num_items = 0;
space_left = tree_size - (uint32_t)sizeof(tree_header);
auto in = (internal_node*)((uint8_t*)buf.data() + sizeof(tree_header));
for (const auto& t : trees) {
auto th2 = (tree_header*)t.data();
if (th2->level >= level)
break;
if (th2->level < level - 1)
continue;
if (sizeof(internal_node) > space_left) { // tree complete, add to list
th.address = get_address(extent_root, level);
th.num_items = num_items;
calc_tree_hash(th, csum_type);
trees.push_back(buf);
metadata_size += tree_size;
memset(buf.data(), 0, tree_size);
th.fs_uuid = fs_uuid;
th.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
th.chunk_tree_uuid = chunk_uuid;
th.generation = 1;
th.tree_id = id;
th.level = level;
space_left = tree_size - (uint32_t)sizeof(tree_header);
num_items = 0;
in = (internal_node*)((uint8_t*)buf.data() + sizeof(tree_header));
trees_added++;
}
auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header));
in->key = ln->key;
in->address = th2->address;
in->generation = 1;
in++;
num_items++;
space_left -= (uint32_t)sizeof(internal_node);
}
if (num_items > 0) { // flush remaining tree
th.address = get_address(extent_root, level);
th.num_items = num_items;
calc_tree_hash(th, csum_type);
trees.push_back(buf);
metadata_size += tree_size;
trees_added++;
}
if (trees_added == 1)
break;
} while (true);
tree_addr = ((tree_header*)trees.back().data())->address;
}
void root::write_trees(ntfs& dev) {
for (const auto& t : trees) {
auto& th = *(tree_header*)t.data();
uint64_t addr = th.address;
bool found = false;
for (const auto& c : chunks) {
if (c.offset <= addr && c.offset + c.length >= addr + tree_size) {
uint64_t physaddr = th.address - c.offset + c.disk_start;
// FIXME - handle DUP
dev.seek(physaddr);
dev.write(t.data(), t.size());
found = true;
break;
}
}
if (!found)
throw formatted_error("Could not find chunk containing address."); // FIXME - include number
}
}
static void set_volume_label(superblock& sb, ntfs& dev) {
try {
ntfs_file vol_file(dev, NTFS_VOLUME_INODE);
auto vnw = vol_file.read(0, 0, ntfs_attribute::VOLUME_NAME);
if (vnw.empty())
return;
auto vn = utf16_to_utf8(u16string_view((char16_t*)vnw.data(), vnw.size()));
if (vn.length() > MAX_LABEL_SIZE) {
vn = vn.substr(0, MAX_LABEL_SIZE);
// remove whole code point
while (!vn.empty() && vn[vn.length() - 1] & 0x80) {
vn.pop_back();
}
cerr << "Truncating volume label to \"" << vn << "\"" << endl;
}
// FIXME - check label doesn't contain slash or backslash
if (vn.empty())
return;
memcpy(sb.label, vn.data(), vn.length());
} catch (const exception& e) { // shouldn't be fatal
cerr << "Error while setting volume label: " << e.what() << endl;
}
}
static void write_superblocks(ntfs& dev, root& chunk_root, root& root_root, enum btrfs_compression compression,
enum btrfs_csum_type csum_type) {
uint32_t sector_size = 0x1000; // FIXME
buffer_t buf((size_t)sector_align(sizeof(superblock), sector_size));
unsigned int i;
uint32_t sys_chunk_size;
uint64_t total_used;
auto& sb = *(superblock*)buf.data();
memset(buf.data(), 0, buf.size());
sys_chunk_size = 0;
for (const auto& c : chunk_root.items) {
if (c.first.obj_type == btrfs_key_type::CHUNK_ITEM) {
auto& ci = *(CHUNK_ITEM*)c.second.data();
if (ci.type & BLOCK_FLAG_SYSTEM) {
sys_chunk_size += sizeof(KEY);
sys_chunk_size += (uint32_t)c.second.size();
}
}
}
if (sys_chunk_size > SYS_CHUNK_ARRAY_SIZE)
throw formatted_error("System chunk list was too long ({} > {}).", sys_chunk_size, SYS_CHUNK_ARRAY_SIZE);
total_used = 0;
for (const auto& c : chunks) {
total_used += c.used;
}
sb.uuid = fs_uuid;
sb.magic = BTRFS_MAGIC;
sb.generation = 1;
sb.root_tree_addr = root_root.tree_addr;
sb.chunk_tree_addr = chunk_root.tree_addr;
sb.total_bytes = device_size;
sb.bytes_used = total_used;
sb.root_dir_objectid = BTRFS_ROOT_TREEDIR;
sb.num_devices = 1;
sb.sector_size = sector_size;
sb.node_size = tree_size;
sb.leaf_size = tree_size;
sb.stripe_size = sector_size;
sb.n = sys_chunk_size;
sb.chunk_root_generation = 1;
sb.incompat_flags = BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF |
BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES;
sb.csum_type = csum_type;
sb.root_level = root_root.level;
sb.chunk_root_level = chunk_root.level;
if (compression == btrfs_compression::lzo)
sb.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO;
else if (compression == btrfs_compression::zstd)
sb.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD;
set_volume_label(sb, dev);
for (const auto& c : chunk_root.items) {
if (c.first.obj_type == btrfs_key_type::DEV_ITEM) {
memcpy(&sb.dev_item, c.second.data(), sizeof(DEV_ITEM));
break;
}
}
sb.uuid_tree_generation = 1;
{
uint8_t* ptr = sb.sys_chunk_array;
for (const auto& c : chunk_root.items) {
if (c.first.obj_type == btrfs_key_type::CHUNK_ITEM) {
auto& ci = *(CHUNK_ITEM*)c.second.data();
if (ci.type & BLOCK_FLAG_SYSTEM) {
auto& key = *(KEY*)ptr;
key = c.first;
ptr += sizeof(KEY);
memcpy(ptr, c.second.data(), c.second.size());
ptr += c.second.size();
}
}
}
}
i = 0;
while (superblock_addrs[i] != 0) {
if (superblock_addrs[i] > device_size - buf.size())
return;
sb.sb_phys_addr = superblock_addrs[i];
switch (csum_type) {
case btrfs_csum_type::crc32c:
*(uint32_t*)sb.checksum = ~calc_crc32c(0xffffffff, (uint8_t*)&sb.uuid, sizeof(superblock) - sizeof(sb.checksum));
break;
case btrfs_csum_type::xxhash:
*(uint64_t*)sb.checksum = XXH64(&sb.uuid, sizeof(superblock) - sizeof(sb.checksum), 0);
break;
case btrfs_csum_type::sha256:
calc_sha256((uint8_t*)&sb, &sb.uuid, sizeof(superblock) - sizeof(sb.checksum));
break;
case btrfs_csum_type::blake2:
blake2b(&sb, 32, &sb.uuid, sizeof(superblock) - sizeof(sb.checksum));
break;
default:
break;
}
dev.seek(superblock_addrs[i]);
dev.write(buf.data(), buf.size());
i++;
}
}
static void add_dev_item(root& chunk_root) {
DEV_ITEM di;
uint32_t sector_size = 0x1000; // FIXME - get from superblock
memset(&di, 0, sizeof(DEV_ITEM));
di.dev_id = 1;
di.num_bytes = device_size;
//uint64_t bytes_used; // FIXME
di.optimal_io_align = sector_size;
di.optimal_io_width = sector_size;
di.minimal_io_size = sector_size;
di.device_uuid = dev_uuid;
di.fs_uuid = fs_uuid;
add_item(chunk_root, 1, btrfs_key_type::DEV_ITEM, 1, &di, sizeof(DEV_ITEM));
}
static void add_to_root_root(const root& r, root& root_root) {
ROOT_ITEM ri;
memset(&ri, 0, sizeof(ROOT_ITEM));
ri.inode.generation = 1;
ri.inode.st_blocks = tree_size;
ri.inode.st_size = 3;
ri.inode.st_nlink = 1;
ri.inode.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
ri.generation = 1;
ri.objid = (r.id == BTRFS_ROOT_FSTREE || r.id >= 0x100) ? SUBVOL_ROOT_INODE : 0;
ri.flags = r.readonly ? BTRFS_SUBVOL_READONLY : 0;
ri.num_references = 1;
ri.generation2 = 1;
if (r.id == image_subvol_id)
ri.uuid = subvol_uuid;
// block_number, bytes_used, and root_level are set in update_root_root
add_item(root_root, r.id, btrfs_key_type::ROOT_ITEM, 0, &ri, sizeof(ROOT_ITEM));
}
static void update_root_root(root& root_root, enum btrfs_csum_type csum_type) {
for (auto& t : root_root.trees) {
auto& th = *(tree_header*)t.data();
if (th.level > 0)
return;
auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header));
bool changed = true;
for (unsigned int i = 0; i < th.num_items; i++) {
if (ln[i].key.obj_type == btrfs_key_type::ROOT_ITEM) {
auto& ri = *(ROOT_ITEM*)((uint8_t*)t.data() + sizeof(tree_header) + ln[i].offset);
for (const auto& r : roots) {
if (r.id == ln[i].key.obj_id) {
ri.block_number = r.tree_addr;
ri.root_level = r.level;
ri.bytes_used = r.metadata_size;
changed = true;
}
}
}
}
if (changed)
calc_tree_hash(th, csum_type);
}
}
static void add_dev_stats(root& r) {
uint64_t ds[5];
memset(ds, 0, sizeof(ds));
add_item(r, 0, btrfs_key_type::DEV_STATS, 1, &ds, sizeof(ds));
}
static BTRFS_UUID generate_uuid(default_random_engine& gen) {
BTRFS_UUID uuid;
uniform_int_distribution dist(0,0xffffffff);
for (unsigned int i = 0; i < 4; i++) {
*(uint32_t*)&uuid.uuid[i * sizeof(uint32_t)] = dist(gen);
}
return uuid;
}
static void update_extent_root(root& extent_root, enum btrfs_csum_type csum_type) {
for (auto& t : extent_root.trees) {
auto& th = *(tree_header*)t.data();
if (th.level > 0)
return;
auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header));
bool changed = true;
for (unsigned int i = 0; i < th.num_items; i++) {
if (ln[i].key.obj_type == btrfs_key_type::BLOCK_GROUP_ITEM) {
auto& bgi = *(BLOCK_GROUP_ITEM*)((uint8_t*)t.data() + sizeof(tree_header) + ln[i].offset);
for (const auto& c : chunks) {
if (c.offset == ln[i].key.obj_id) {
bgi.used = c.used;
changed = true;
}
}
}
}
if (changed)
calc_tree_hash(th, csum_type);
}
}
static void add_inode_ref(root& r, uint64_t inode, uint64_t parent, uint64_t index, string_view name) {
if (r.items.count(KEY{inode, btrfs_key_type::INODE_REF, parent}) != 0) { // collision, append to the end
auto& old = r.items.at(KEY{inode, btrfs_key_type::INODE_REF, parent});
size_t irlen = offsetof(INODE_REF, name[0]) + name.length();
// FIXME - check if too long for tree, and create INODE_EXTREF instead
old.resize(old.size() + irlen);
auto& ir = *(INODE_REF*)((uint8_t*)old.data() + old.size() - irlen);
ir.index = index;
ir.n = (uint16_t)name.length();
memcpy(ir.name, name.data(), name.length());
return;
}
buffer_t buf(offsetof(INODE_REF, name[0]) + name.length());
auto& ir = *(INODE_REF*)buf.data();
ir.index = index;
ir.n = (uint16_t)name.length();
memcpy(ir.name, name.data(), name.length());
add_item_move(r, inode, btrfs_key_type::INODE_REF, parent, buf);
}
static void populate_fstree(root& r) {
INODE_ITEM ii;
memset(&ii, 0, sizeof(INODE_ITEM));
ii.generation = 1;
ii.transid = 1;
ii.st_nlink = 1;
ii.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
ii.sequence = 1;
add_item(r, SUBVOL_ROOT_INODE, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM));
add_inode_ref(r, SUBVOL_ROOT_INODE, SUBVOL_ROOT_INODE, 0, "..");
}
static void update_chunk_root(root& chunk_root, enum btrfs_csum_type csum_type) {
for (auto& t : chunk_root.trees) {
auto& th = *(tree_header*)t.data();
if (th.level > 0)
return;
auto ln = (leaf_node*)((uint8_t*)t.data() + sizeof(tree_header));
for (unsigned int i = 0; i < th.num_items; i++) {
if (ln[i].key.obj_id == 1 && ln[i].key.obj_type == btrfs_key_type::DEV_ITEM && ln[i].key.offset == 1) {
auto& di = *(DEV_ITEM*)((uint8_t*)t.data() + sizeof(tree_header) + ln[i].offset);
di.bytes_used = 0;
for (const auto& c : chunks) {
di.bytes_used += c.length;
}
calc_tree_hash(th, csum_type);
return;
}
}
}
}
static root& add_image_subvol(root& root_root, root& fstree_root) {
static const char subvol_name[] = "image";
roots.emplace_back(image_subvol_id);
root& r = roots.back();
r.readonly = true;
// add ROOT_REF and ROOT_BACKREF
{
buffer_t buf(offsetof(ROOT_REF, name[0]) + sizeof(subvol_name) - 1);
auto& rr = *(ROOT_REF*)buf.data();
rr.dir = SUBVOL_ROOT_INODE;
rr.index = 2;
rr.n = sizeof(subvol_name) - 1;
memcpy(rr.name, subvol_name, sizeof(subvol_name) - 1);
add_item(root_root, BTRFS_ROOT_FSTREE, btrfs_key_type::ROOT_REF, image_subvol_id, buf);
add_item_move(root_root, image_subvol_id, btrfs_key_type::ROOT_BACKREF, BTRFS_ROOT_FSTREE, buf);
}
// add DIR_ITEM and DIR_INDEX
{
buffer_t buf(offsetof(DIR_ITEM, name[0]) + sizeof(subvol_name) - 1);
auto& di = *(DIR_ITEM*)buf.data();
di.key.obj_id = image_subvol_id;
di.key.obj_type = btrfs_key_type::ROOT_ITEM;
di.key.offset = 0xffffffffffffffff;
di.transid = 1;
di.m = 0;
di.n = sizeof(subvol_name) - 1;
di.type = btrfs_inode_type::directory;
memcpy(di.name, subvol_name, sizeof(subvol_name) - 1);
auto hash = calc_crc32c(0xfffffffe, (const uint8_t*)subvol_name, sizeof(subvol_name) - 1);
add_item(fstree_root, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_ITEM, hash, buf);
add_item_move(fstree_root, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_INDEX, 2, buf);
}
// increase st_size in parent dir
if (fstree_root.dir_size.count(SUBVOL_ROOT_INODE) == 0)
fstree_root.dir_size[SUBVOL_ROOT_INODE] = (sizeof(subvol_name) - 1) * 2;
else
fstree_root.dir_size.at(SUBVOL_ROOT_INODE) += (sizeof(subvol_name) - 1) * 2;
populate_fstree(r);
return r;
}
static void create_image(root& r, ntfs& dev, const runs_t& runs, uint64_t inode, bool nocsum) {
INODE_ITEM ii;
uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster;
// add INODE_ITEM
memset(&ii, 0, sizeof(INODE_ITEM));
ii.generation = 1;
ii.transid = 1;
ii.st_size = orig_device_size;
ii.st_nlink = 1;
ii.st_mode = __S_IFREG | S_IRUSR | S_IWUSR;
ii.sequence = 1;
if (nocsum)
ii.flags = BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM;
// FIXME - use current time for the following
// BTRFS_TIME st_atime;
// BTRFS_TIME st_ctime;
// BTRFS_TIME st_mtime;
// BTRFS_TIME otime;
for (const auto& rs : runs) {
for (const auto& run : rs.second) {
if (!run.relocated && !run.not_in_img)
ii.st_blocks += run.length * cluster_size;
}
}
add_item(r, inode, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM));
// add DIR_ITEM and DIR_INDEX
{
buffer_t buf(offsetof(DIR_ITEM, name[0]) + sizeof(image_filename) - 1);
auto& di = *(DIR_ITEM*)buf.data();
di.key.obj_id = inode;
di.key.obj_type = btrfs_key_type::INODE_ITEM;
di.key.offset = 0;
di.transid = 1;
di.m = 0;
di.n = sizeof(image_filename) - 1;
di.type = btrfs_inode_type::file;
memcpy(di.name, image_filename, sizeof(image_filename) - 1);
auto hash = calc_crc32c(0xfffffffe, (const uint8_t*)image_filename, sizeof(image_filename) - 1);
add_item(r, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_ITEM, hash, buf);
add_item_move(r, SUBVOL_ROOT_INODE, btrfs_key_type::DIR_INDEX, 2, buf);
}
// add INODE_REF
add_inode_ref(r, inode, SUBVOL_ROOT_INODE, 2, image_filename);
// increase st_size in parent dir
for (auto& it : r.items) {
if (it.first.obj_id == SUBVOL_ROOT_INODE && it.first.obj_type == btrfs_key_type::INODE_ITEM) {
auto& ii2 = *(INODE_ITEM*)it.second.data();
ii2.st_size += (sizeof(image_filename) - 1) * 2;
break;
}
}
// add extents
buffer_t buf(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
auto& ed = *(EXTENT_DATA*)buf.data();
auto& ed2 = *(EXTENT_DATA2*)&ed.data;
ed.generation = 1;
ed.compression = btrfs_compression::none;
ed.encryption = 0;
ed.encoding = 0;
ed.type = btrfs_extent_type::regular;
for (const auto& rs : runs) {
for (const auto& run : rs.second) {
uint64_t addr;
if (run.relocated || run.not_in_img)
continue;
ed.decoded_size = ed2.size = ed2.num_bytes = run.length * cluster_size;
addr = run.offset * cluster_size;
if (run.inode == dummy_inode) {
for (const auto& reloc : relocs) {
if (reloc.old_start == run.offset) {
ed2.address = (reloc.new_start * cluster_size) + chunk_virt_offset;
break;
}
}
} else
ed2.address = addr + chunk_virt_offset;
ed2.offset = 0;
add_item(r, inode, btrfs_key_type::EXTENT_DATA, addr, buf);
data_size += ed2.size;
}
}
}
template
static void parse_bitmap(const buffer_t& bmpdata, list& runs) {
uint64_t run_start = 0, pos = 0;
bool set = false;
string_view bdsv{(char*)bmpdata.data(), bmpdata.size()};
// FIXME - by 64-bits if 64-bit processor (use typedef for uint64_t/uint32_t?)
while (bdsv.size() >= sizeof(uint32_t)) {
auto v = *(uint32_t*)bdsv.data();
if ((!set && v == 0) || (set && v == 0xffffffff)) {
pos += sizeof(uint32_t) * 8;
bdsv = bdsv.substr(sizeof(uint32_t));
continue;
}
if (!set && v == 0xffffffff) {
run_start = pos;
set = true;
pos += sizeof(uint32_t) * 8;
} else if (set && v == 0) {
if (pos != run_start)
runs.emplace_back(run_start, pos - run_start);
set = false;
pos += sizeof(uint32_t) * 8;
} else {
for (unsigned int i = 0; i < sizeof(uint32_t) * 8; i++) {
if (v & 1) {
if (!set) {
run_start = pos;
set = true;
}
} else {
if (set) {
if (pos != run_start)
runs.emplace_back(run_start, pos - run_start);
set = false;
}
}
v >>= 1;
pos++;
}
}
bdsv = bdsv.substr(sizeof(uint32_t));
}
while (!bdsv.empty()) {
auto v = *(uint8_t*)bdsv.data();
if ((!set && v == 0) || (set && v == 0xff)) {
pos++;
bdsv = bdsv.substr(1);
continue;
}
if (!set && v == 0xff) {
run_start = pos;
set = true;
pos += 8;
} else if (set && v == 0) {
if (pos != run_start)
runs.emplace_back(run_start, pos - run_start);
set = false;
pos += 8;
} else {
for (unsigned int i = 0; i < 8; i++) {
if (v & 1) {
if (!set) {
run_start = pos;
set = true;
}
} else {
if (set) {
if (pos != run_start)
runs.emplace_back(run_start, pos - run_start);
set = false;
}
}
v >>= 1;
pos++;
}
}
bdsv = bdsv.substr(1);
}
if (set && run_start != pos)
runs.emplace_back(run_start, pos - run_start);
// FIXME - remove any bits after end of volume
}
static void parse_data_bitmap(ntfs& dev, const buffer_t& bmpdata, runs_t& runs) {
uint64_t run_start = 0, pos = 0;
bool set = false;
string_view bdsv{(char*)bmpdata.data(), bmpdata.size()};
uint64_t clusters_per_chunk = data_chunk_size / ((uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster);
// FIXME - by 64-bits if 64-bit processor (use typedef for uint64_t/uint32_t?)
auto add_run = [&]() {
while (true) {
uint64_t chunk = run_start / clusters_per_chunk;
auto& r = runs[chunk];
if (pos / clusters_per_chunk != chunk) {
uint64_t len = clusters_per_chunk - (run_start % clusters_per_chunk);
r.emplace_back(run_start, len);
run_start += len;
if (pos == run_start)
break;
} else {
r.emplace_back(run_start, pos - run_start);
break;
}
}
};
while (bdsv.size() >= sizeof(uint32_t)) {
auto v = *(uint32_t*)bdsv.data();
if ((!set && v == 0) || (set && v == 0xffffffff)) {
pos += sizeof(uint32_t) * 8;
bdsv = bdsv.substr(sizeof(uint32_t));
continue;
}
if (!set && v == 0xffffffff) {
run_start = pos;
set = true;
pos += sizeof(uint32_t) * 8;
} else if (set && v == 0) {
if (pos != run_start)
add_run();
set = false;
pos += sizeof(uint32_t) * 8;
} else {
for (unsigned int i = 0; i < sizeof(uint32_t) * 8; i++) {
if (v & 1) {
if (!set) {
run_start = pos;
set = true;
}
} else {
if (set) {
if (pos != run_start)
add_run();
set = false;
}
}
v >>= 1;
pos++;
}
}
bdsv = bdsv.substr(sizeof(uint32_t));
}
while (!bdsv.empty()) {
auto v = *(uint8_t*)bdsv.data();
if ((!set && v == 0) || (set && v == 0xff)) {
pos++;
bdsv = bdsv.substr(1);
continue;
}
if (!set && v == 0xff) {
run_start = pos;
set = true;
pos += 8;
} else if (set && v == 0) {
if (pos != run_start)
add_run();
set = false;
pos += 8;
} else {
for (unsigned int i = 0; i < 8; i++) {
if (v & 1) {
if (!set) {
run_start = pos;
set = true;
}
} else {
if (set) {
if (pos != run_start)
add_run();
set = false;
}
}
v >>= 1;
pos++;
}
}
bdsv = bdsv.substr(1);
}
if (set && run_start != pos)
add_run();
// FIXME - remove any bits after end of volume
}
static BTRFS_TIME win_time_to_unix(int64_t time) {
uint64_t l = (uint64_t)time - 116444736000000000ULL;
BTRFS_TIME bt;
bt.seconds = l / 10000000;
bt.nanoseconds = (uint32_t)((l % 10000000) * 100);
return bt;
}
static void link_inode(root& r, uint64_t inode, uint64_t dir, string_view name,
enum btrfs_inode_type type) {
uint64_t seq;
// add DIR_ITEM and DIR_INDEX
if (r.dir_seqs.count(dir) == 0)
r.dir_seqs[dir] = 2;
seq = r.dir_seqs.at(dir);
{
buffer_t buf(offsetof(DIR_ITEM, name[0]) + name.length());
auto& di = *(DIR_ITEM*)buf.data();
di.key.obj_id = inode;
di.key.obj_type = btrfs_key_type::INODE_ITEM;
di.key.offset = 0;
di.transid = 1;
di.m = 0;
di.n = (uint16_t)name.length();
di.type = type;
memcpy(di.name, name.data(), name.length());
auto hash = calc_crc32c(0xfffffffe, (const uint8_t*)name.data(), (uint32_t)name.length());
if (r.items.count(KEY{dir, btrfs_key_type::DIR_ITEM, hash}) == 0)
add_item(r, dir, btrfs_key_type::DIR_ITEM, hash, buf);
else { // hash collision
auto& ent = r.items.at(KEY{dir, btrfs_key_type::DIR_ITEM, hash});
if (!ent.empty()) {
ent.resize(ent.size() + buf.size());
memcpy(ent.data() + ent.size() - buf.size(), &di, buf.size());
} else {
ent.resize(buf.size());
memcpy(ent.data(), &di, buf.size());
}
}
add_item_move(r, dir, btrfs_key_type::DIR_INDEX, seq, buf);
}
// add INODE_REF
add_inode_ref(r, inode, dir, seq, name);
// increase st_size in parent dir
if (r.dir_size.count(dir) == 0)
r.dir_size[dir] = name.length() * 2;
else
r.dir_size.at(dir) += name.length() * 2;
r.dir_seqs[dir]++;
}
static bool split_runs(const ntfs& dev, runs_t& runs, uint64_t offset, uint64_t length, uint64_t inode, uint64_t file_offset) {
uint64_t clusters_per_chunk = data_chunk_size / ((uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster);
bool ret = false;
while (true) {
uint64_t chunk = offset / clusters_per_chunk;
uint64_t length2 = min(length, clusters_per_chunk - (offset % clusters_per_chunk));
if (runs.count(chunk) != 0) {
auto& rl = runs[chunk];
for (auto it = rl.begin(); it != rl.end(); it++) {
auto& r = *it;
if (r.offset > offset + length2)
break;
if (offset + length2 > r.offset && offset < r.offset + r.length) {
if (offset >= r.offset && offset + length2 <= r.offset + r.length) { // cut out middle
if (offset > r.offset)
rl.emplace(it, r.offset, offset - r.offset);
rl.emplace(it, offset, length2, inode, file_offset, r.relocated);
if (offset + length2 < r.offset + r.length) {
r.length = r.offset + r.length - offset - length2;
r.offset = offset + length2;
} else
rl.erase(it);
ret = true;
break;
}
throw formatted_error("Error assigning space to file. This can occur if the space bitmap has become corrupted. Run chkdsk and try again.");
}
}
}
if (length2 == length)
return ret;
offset += length2;
length -= length2;
}
}
static void process_mappings(const ntfs& dev, uint64_t inode, list& mappings, runs_t& runs) {
uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster;
uint64_t clusters_per_chunk = data_chunk_size / cluster_size;
list mappings2;
// avoid chunk boundaries
for (const auto& m : mappings) {
if (m.lcn == 0) // sparse
continue;
uint64_t chunk_start = m.lcn / clusters_per_chunk;
uint64_t chunk_end = ((m.lcn + m.length) - 1) / clusters_per_chunk;
if (chunk_end > chunk_start) {
uint64_t start = m.lcn, vcn = m.vcn;
do {
uint64_t end = min((((start / clusters_per_chunk) + 1) * clusters_per_chunk), m.lcn + m.length);
if (end == start)
break;
mappings2.emplace_back(start, vcn, end - start);
vcn += end - start;
start = end;
} while (true);
} else
mappings2.emplace_back(m.lcn, m.vcn, m.length);
}
mappings.clear();
mappings.splice(mappings.begin(), mappings2);
// change to avoid superblocks
for (auto& r : relocs) {
for (auto it = mappings.begin(); it != mappings.end(); it++) {
auto& m = *it;
if (m.lcn + m.length > r.old_start && m.lcn < r.old_start + r.length) {
if (m.lcn >= r.old_start && m.lcn + m.length <= r.old_start + r.length) { // change whole mapping
if (r.old_start < m.lcn) { // reloc starts before mapping
for (auto& rs : runs) { // FIXME - optimize
for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) {
auto& r2 = *it2;
if (r2.offset == r.old_start) {
rs.second.emplace(it2, r2.offset, m.lcn - r2.offset, dummy_inode);
r2.length -= m.lcn - r2.offset;
r2.offset = m.lcn;
}
if (r2.offset == r.new_start) {
rs.second.emplace(it2, r2.offset, m.lcn - r.old_start, 0, 0, true);
r2.offset += m.lcn - r.old_start;
r2.length -= m.lcn - r.old_start;
}
}
}
relocs.emplace_back(r.old_start, m.lcn - r.old_start, r.new_start);
r.length -= m.lcn - r.old_start;
r.new_start += m.lcn - r.old_start;
r.old_start = m.lcn;
}
if (r.old_start + r.length > m.lcn + m.length) { // reloc goes beyond end of mapping
relocs.emplace_back(m.lcn + m.length, r.old_start + r.length - m.lcn - m.length,
r.new_start + m.lcn + m.length - r.old_start);
r.length = m.lcn + m.length - r.old_start;
for (auto& rs : runs) { // FIXME - optimize
for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) {
auto& r2 = *it2;
if (r2.offset == r.old_start) {
rs.second.emplace(it2, r.old_start, m.lcn + m.length - r.old_start, dummy_inode);
r2.length -= m.lcn + m.length - r2.offset;
r2.offset = m.lcn + m.length;
}
if (r2.offset == r.new_start) {
rs.second.emplace(it2, r2.offset, m.lcn + m.length - r.old_start, 0, 0, true);
r2.offset += m.lcn + m.length - r.old_start;
r2.length -= m.lcn + m.length - r.old_start;
}
}
}
}
m.lcn -= r.old_start;
m.lcn += r.new_start;
} else if (m.lcn <= r.old_start && m.lcn + m.length >= r.old_start + r.length) { // change middle
if (m.lcn < r.old_start) {
mappings.emplace(it, m.lcn, m.vcn, r.old_start - m.lcn);
m.vcn += r.old_start - m.lcn;
m.length -= r.old_start - m.lcn;
m.lcn = r.old_start;
}
if (m.lcn + m.length > r.old_start + r.length) {
mappings.emplace(it, r.new_start, m.vcn, r.length);
m.lcn = r.old_start + r.length;
m.length -= r.length;
m.vcn += r.length;
} else {
m.lcn -= r.old_start;
m.lcn += r.new_start;
}
} else if (m.lcn < r.old_start && m.lcn + m.length <= r.old_start + r.length) { // change end
mappings.emplace(it, m.lcn, m.vcn, r.old_start - m.lcn);
m.vcn += r.old_start - m.lcn;
m.length -= r.old_start - m.lcn;
m.lcn = r.new_start;
if (r.length > m.length) {
relocs.emplace_back(r.old_start + m.length, r.length - m.length, r.new_start + m.length);
r.length = m.length;
for (auto& rs : runs) { // FIXME - optimize
bool found = false;
for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) {
auto& r2 = *it2;
if (r2.offset == r.old_start) {
rs.second.emplace(it2, r2.offset, m.length, dummy_inode);
r2.offset += m.length;
r2.length -= m.length;
found = true;
break;
}
}
if (found)
break;
}
}
} else if (m.lcn > r.old_start && m.lcn + m.length > r.old_start + r.length) { // change beginning
auto orig_r = r;
if (r.old_start < m.lcn) {
for (auto& rs : runs) { // FIXME - optimize
for (auto it2 = rs.second.begin(); it2 != rs.second.end(); it2++) {
auto& r2 = *it2;
if (r2.offset == r.old_start) {
rs.second.emplace(it2, r2.offset, m.lcn - r2.offset, dummy_inode);
r2.length -= m.lcn - r2.offset;
r2.offset = m.lcn;
}
if (r2.offset == r.new_start) {
rs.second.emplace(it2, r2.offset, m.lcn - r.old_start, 0, 0, true);
r2.offset += m.lcn - r.old_start;
r2.length -= m.lcn - r.old_start;
}
}
}
relocs.emplace_back(m.lcn, r.old_start + r.length - m.lcn, r.new_start + m.lcn - r.old_start);
r.length = m.lcn - r.old_start;
}
mappings.emplace(it, m.lcn - orig_r.old_start + orig_r.new_start, m.vcn, orig_r.old_start + orig_r.length - m.lcn);
m.vcn += orig_r.old_start + orig_r.length - m.lcn;
m.length -= orig_r.old_start + orig_r.length - m.lcn;
m.lcn = orig_r.old_start + orig_r.length;
}
}
}
}
for (const auto& m : mappings) {
split_runs(dev, runs, m.lcn, m.length, inode, m.vcn);
}
}
static void set_xattr(root& r, uint64_t inode, string_view name, uint32_t hash, const buffer_t& data) {
buffer_t buf(offsetof(DIR_ITEM, name[0]) + name.size() + data.size());
auto& di = *(DIR_ITEM*)buf.data();
di.key.obj_id = di.key.offset = 0;
di.key.obj_type = (btrfs_key_type)0;
di.transid = 1;
di.m = (uint16_t)data.size();
di.n = (uint16_t)name.size();
di.type = btrfs_inode_type::ea;
memcpy(di.name, name.data(), name.size());
memcpy(di.name + name.size(), data.data(), data.size());
add_item_move(r, inode, btrfs_key_type::XATTR_ITEM, hash, buf);
}
static void clear_line() {
#ifdef _WIN32
CONSOLE_SCREEN_BUFFER_INFO csbi;
HANDLE console = GetStdHandle(STD_OUTPUT_HANDLE);
// FIXME - flickers on Windows?
if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
DWORD written;
SetConsoleCursorPosition(console, { 0, csbi.dwCursorPosition.Y });
u16string spaces(csbi.dwSize.X, u' ');
WriteConsoleW(console, spaces.data(), (DWORD)spaces.length(), &written, nullptr);
SetConsoleCursorPosition(console, { 0, csbi.dwCursorPosition.Y });
}
#else
fmt::print("\33[2K");
fflush(stdout);
#endif
}
static bool string_eq_ci(string_view s1, string_view s2) {
if (s1.length() != s2.length())
return false;
auto c1 = &s1[0];
auto c2 = &s2[0];
for (size_t i = 0; i < s1.length(); i++) {
auto c1a = *c1;
auto c2a = *c2;
if (c1a >= 'A' && c1a <= 'Z')
c1a = c1a - 'A' + 'a';
if (c2a >= 'A' && c2a <= 'Z')
c2a = c2a - 'A' + 'a';
if (c1a != c2a)
return false;
c1++;
c2++;
}
return true;
}
static void fix_truncated_utf8(string& s) {
if (!((uint8_t)s.back() & 0x80)) // one-byte sequence
return;
if (((uint8_t)s.back() & 0xC0) == 0xC0) { // first byte of CP, nothing following
s.pop_back();
return;
}
if (((uint8_t)s[s.length() - 2] & 0xE0) == 0xD0) // full two-byte sequence
return;
if (((uint8_t)s[s.length() - 2] & 0xE0) == 0xE0) { // three- or four-byte CP, two bytes
s.pop_back();
s.pop_back();
return;
}
if (((uint8_t)s[s.length() - 3] & 0xF0) == 0xE0) // full three-byte sequence
return;
if (((uint8_t)s[s.length() - 3] & 0xF0) == 0xF0) { // four-byte CP, three bytes
s.pop_back();
s.pop_back();
s.pop_back();
return;
}
}
static void add_inode(root& r, uint64_t inode, uint64_t ntfs_inode, bool& is_dir, runs_t& runs,
ntfs_file& secure, ntfs& dev, const list& skiplist, enum btrfs_compression opt_compression,
bool nocsum) {
INODE_ITEM ii;
uint64_t file_size = 0;
list mappings, wof_mappings;
vector> links;
buffer_t standard_info, sd, reparse_point, inline_data;
string symlink;
uint32_t atts;
bool atts_set = false;
map> xattrs;
string filename;
buffer_t wof_compressed_data;
uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
bool processed_data = false, processed_wof_data = false, skipping = false;
uint16_t compression_unit = 0;
uint64_t vdl, wof_vdl;
vector warnings;
map eas;
static const uint32_t sector_size = 0x1000; // FIXME
ntfs_file f(dev, ntfs_inode);
if (f.file_record->BaseFileRecordSegment.SegmentNumber != 0)
return;
is_dir = f.is_directory();
auto add_warning = [&](fmt::format_string s, Args&&... args) {
if (filename.empty())
filename = f.get_filename();
warnings.emplace_back(filename + ": " + fmt::format(s, forward(args)...));
};
f.loop_through_atts([&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view name) -> bool {
switch (att.TypeCode) {
case ntfs_attribute::STANDARD_INFORMATION:
if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM)
throw formatted_error("Error - STANDARD_INFORMATION is non-resident"); // FIXME - can this happen?
standard_info.resize(res_data.length());
memcpy(standard_info.data(), res_data.data(), res_data.length());
break;
case ntfs_attribute::DATA:
if (name.empty()) { // main file data
if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) {
clear_line();
if (filename.empty())
filename = f.get_filename();
fmt::print(stderr, "Skipping encrypted inode {:x} ({})\n", inode - inode_offset, filename);
skipping = true;
return true;
}
if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !processed_data) {
file_size = vdl = att.Form.Resident.ValueLength;
inline_data.resize(res_data.size());
memcpy(inline_data.data(), res_data.data(), res_data.size());
} else {
if (!processed_data) {
file_size = att.Form.Nonresident.FileSize;
compression_unit = att.Form.Nonresident.CompressionUnit;
vdl = att.Form.Nonresident.ValidDataLength;
if (!(att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK))
compression_unit = 0;
}
list mappings2;
uint64_t last_vcn;
if (mappings.empty())
last_vcn = 0;
else
last_vcn = mappings.back().vcn + mappings.back().length;
if (last_vcn < att.Form.Nonresident.LowestVcn)
mappings.emplace_back(0, last_vcn, att.Form.Nonresident.LowestVcn - last_vcn);
read_nonresident_mappings(att, mappings2, cluster_size, vdl);
mappings.splice(mappings.end(), mappings2);
}
processed_data = true;
} else { // ADS
static const char xattr_prefix[] = "user.";
auto ads_name = utf16_to_utf8(name);
auto max_xattr_size = (uint32_t)(tree_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(DIR_ITEM, name[0]) - ads_name.length() - (sizeof(xattr_prefix) - 1));
// FIXME - check xattr_name not reserved
if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) {
add_warning("Skipping encrypted ADS :{}", ads_name);
break;
}
if (att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK) {
add_warning("Skipping compressed ADS :{}", ads_name); // FIXME
break;
}
auto name2 = xattr_prefix + ads_name;
if (name2.size() > 255) {
add_warning("Skipping ADS :{} as name too long", ads_name);
break;
}
uint32_t hash = calc_crc32c(0xfffffffe, (const uint8_t*)name2.data(), (uint32_t)name2.length());
if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && (ads_name != "WofCompressedData" || !processed_wof_data)) {
if (ads_name == "WofCompressedData") {
wof_compressed_data.resize(res_data.length());
memcpy(wof_compressed_data.data(), res_data.data(), res_data.length());
} else {
if (att.Form.Resident.ValueLength > max_xattr_size) {
add_warning("Skipping overly large ADS :{} ({} > {})", ads_name, att.Form.Resident.ValueLength, max_xattr_size);
break;
}
buffer_t buf(res_data.size());
memcpy(buf.data(), res_data.data(), res_data.size());
xattrs.emplace(name2, make_pair(hash, buf));
}
} else {
if (att.Form.Nonresident.FileSize > max_xattr_size && ads_name != "WofCompressedData") {
add_warning("Skipping overly large ADS :{} ({} > {})", ads_name, att.Form.Nonresident.FileSize, max_xattr_size);
break;
}
if (ads_name == "WofCompressedData") {
if (!processed_wof_data) {
wof_compressed_data.resize(att.Form.Nonresident.FileSize);
wof_vdl = att.Form.Nonresident.ValidDataLength;
}
list mappings2;
uint64_t last_vcn;
if (wof_mappings.empty())
last_vcn = 0;
else
last_vcn = wof_mappings.back().vcn + wof_mappings.back().length;
if (last_vcn < att.Form.Nonresident.LowestVcn)
wof_mappings.emplace_back(0, last_vcn, att.Form.Nonresident.LowestVcn - last_vcn);
read_nonresident_mappings(att, mappings2, cluster_size, wof_vdl);
wof_mappings.splice(wof_mappings.end(), mappings2);
processed_wof_data = true;
} else {
list ads_mappings;
read_nonresident_mappings(att, ads_mappings, cluster_size, att.Form.Nonresident.ValidDataLength);
buffer_t ads_data((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size));
memset(ads_data.data(), 0, ads_data.size());
for (const auto& m : ads_mappings) {
dev.seek(m.lcn * cluster_size);
dev.read(ads_data.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size));
}
ads_data.resize((size_t)att.Form.Nonresident.FileSize);
xattrs.emplace(name2, make_pair(hash, ads_data));
}
}
}
break;
case ntfs_attribute::FILE_NAME: {
if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM)
throw formatted_error("Error - FILE_NAME is non-resident"); // FIXME - can this happen?
if (att.Form.Resident.ValueLength < offsetof(FILE_NAME, FileName[0]))
throw formatted_error("FILE_NAME was truncated");
auto fn = reinterpret_cast(res_data.data());
if (fn->Namespace != file_name_type::DOS) {
if (att.Form.Resident.ValueLength < offsetof(FILE_NAME, FileName[0]) + (fn->FileNameLength * sizeof(char16_t)))
throw formatted_error("FILE_NAME was truncated");
auto name2 = utf16_to_utf8(u16string_view((char16_t*)fn->FileName, fn->FileNameLength));
if (name2.length() > 255) {
// FIXME - make sure no collision with existing file
name2 = name2.substr(0, 255);
fix_truncated_utf8(name2);
add_warning("Name was too long, truncating to {}.", name2);
}
uint64_t parent = fn->Parent.SegmentNumber;
if (!is_dir || links.empty()) {
bool skip = false;
for (auto n : skiplist) {
if (n == parent) {
skip = true;
break;
}
}
if (!skip) {
for (const auto& l : links) {
if (get<0>(l) == parent && get<1>(l) == name2) {
skip = true;
break;
}
}
}
if (!skip)
links.emplace_back(parent, name2);
}
}
break;
}
case ntfs_attribute::REPARSE_POINT: {
if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) {
reparse_point.resize(res_data.size());
memcpy(reparse_point.data(), res_data.data(), res_data.size());
} else {
list rp_mappings;
read_nonresident_mappings(att, rp_mappings, cluster_size, att.Form.Nonresident.ValidDataLength);
reparse_point.resize((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size));
memset(reparse_point.data(), 0, reparse_point.size());
for (const auto& m : rp_mappings) {
dev.seek(m.lcn * cluster_size);
dev.read(reparse_point.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size));
}
reparse_point.resize((size_t)att.Form.Nonresident.FileSize);
}
symlink.clear();
if (is_dir)
break;
const auto& rpb = *reinterpret_cast(reparse_point.data());
if (reparse_point.size() < offsetof(REPARSE_DATA_BUFFER, Reserved) ||
reparse_point.size() < rpb.ReparseDataLength + offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer)) {
add_warning("Reparse point buffer was truncated.");
break;
}
auto len = rpb.ReparseDataLength + offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer);
switch (rpb.ReparseTag) {
case IO_REPARSE_TAG_SYMLINK:
if (len < offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) ||
(len < offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) +
rpb.SymbolicLinkReparseBuffer.PrintNameOffset +
rpb.SymbolicLinkReparseBuffer.PrintNameLength)) {
add_warning("Symlink reparse point buffer was truncated.");
} else if (rpb.SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE) {
u16string_view sv(&rpb.SymbolicLinkReparseBuffer.PathBuffer[rpb.SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(char16_t)],
rpb.SymbolicLinkReparseBuffer.PrintNameLength / sizeof(char16_t));
symlink = utf16_to_utf8(sv);
for (auto& c : symlink) {
if (c == '\\')
c = '/';
}
reparse_point.clear();
}
break;
case IO_REPARSE_TAG_LX_SYMLINK:
if (len < offsetof(REPARSE_DATA_BUFFER, LxSymlink.name))
add_warning("LXSS reparse point buffer was truncated.");
else {
symlink = string_view(rpb.LxSymlink.name, len - offsetof(REPARSE_DATA_BUFFER, LxSymlink.name));
reparse_point.clear();
}
break;
}
break;
}
case ntfs_attribute::SECURITY_DESCRIPTOR: {
auto max_sd_size = (uint32_t)(tree_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(DIR_ITEM, name[0]) - sizeof(EA_NTACL) + 1);
if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) {
if (att.Form.Resident.ValueLength > max_sd_size) {
add_warning("Skipping overly large SD ({} > {})", att.Form.Resident.ValueLength, max_sd_size);
break;
}
sd.resize(res_data.size());
memcpy(sd.data(), res_data.data(), res_data.size());
} else {
if (att.Form.Nonresident.FileSize > max_sd_size) {
add_warning("Skipping overly large SD ({} > {})", att.Form.Nonresident.FileSize, max_sd_size);
break;
}
list sd_mappings;
read_nonresident_mappings(att, sd_mappings, cluster_size, att.Form.Nonresident.ValidDataLength);
sd.resize((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size));
memset(sd.data(), 0, sd.size());
for (const auto& m : sd_mappings) {
dev.seek(m.lcn * cluster_size);
dev.read(sd.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size));
}
sd.resize((size_t)att.Form.Nonresident.FileSize);
}
break;
}
case ntfs_attribute::EA: {
buffer_t eabuf;
string_view sv;
if (att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
list ea_mappings;
read_nonresident_mappings(att, ea_mappings, cluster_size, att.Form.Nonresident.ValidDataLength);
eabuf.resize((size_t)sector_align(att.Form.Nonresident.FileSize, cluster_size));
memset(eabuf.data(), 0, eabuf.size());
for (const auto& m : ea_mappings) {
dev.seek(m.lcn * cluster_size);
dev.read(eabuf.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size));
}
sv = string_view((char*)eabuf.data(), (size_t)att.Form.Nonresident.FileSize);
} else
sv = res_data;
do {
auto& ead = *(ea_data*)sv.data();
if (sv.length() < offsetof(ea_data, EaName)) {
add_warning("truncated EA ({} bytes, expected at least {})", sv.length(), offsetof(ea_data, EaName));
break;
}
if (ead.NextEntryOffset > sv.length()) {
add_warning("truncated EA ({} > {})", ead.NextEntryOffset, sv.length());
break;
}
if (offsetof(ea_data, EaName) + ead.EaNameLength + 1 + ead.EaValueLength > ead.NextEntryOffset) {
add_warning("EA overflow ({} + {} + 1 + {} > {})", offsetof(ea_data, EaName), ead.EaNameLength,
ead.EaValueLength, ead.NextEntryOffset);
break;
}
auto ea_name = string_view(ead.EaName, ead.EaNameLength);
buffer_t ea_value(ead.EaValueLength);
memcpy(ea_value.data(), &ead.EaName[ead.EaNameLength + 1], ea_value.size());
eas.emplace(ea_name, move(ea_value));
sv = sv.substr(ead.NextEntryOffset);
} while (!sv.empty());
break;
}
default:
break;
}
return true;
});
if (skipping)
return;
// skip page files
if (links.size() == 1 && get<0>(links.front()) == NTFS_ROOT_DIR_INODE) {
if (string_eq_ci(get<1>(links.front()), "pagefile.sys") || string_eq_ci(get<1>(links.front()), "hiberfil.sys") ||
string_eq_ci(get<1>(links.front()), "swapfile.sys"))
return;
}
if (links.empty())
return; // don't create orphaned inodes
// FIXME - form user.EA xattr from EAs we don't recognize
memset(&ii, 0, sizeof(INODE_ITEM));
optional mode;
enum btrfs_inode_type item_type = btrfs_inode_type::unknown;
bool has_lxattrb = false;
auto set_mode = [&](uint32_t& m) {
if (is_dir && !__S_ISTYPE(m, __S_IFDIR)) {
add_warning("st_mode did not have S_IFDIR set, setting.");
m &= ~__S_IFMT;
m |= __S_IFDIR;
} else if (!is_dir && __S_ISTYPE(m, __S_IFDIR)) {
add_warning("st_mode had S_IFDIR set, clearing.");
m &= ~__S_IFMT;
m |= __S_IFREG;
}
switch (m & __S_IFMT) {
case __S_IFREG:
item_type = btrfs_inode_type::file;
break;
case __S_IFDIR:
item_type = btrfs_inode_type::directory;
break;
case __S_IFCHR:
item_type = btrfs_inode_type::chardev;
break;
case __S_IFBLK:
item_type = btrfs_inode_type::blockdev;
break;
case __S_IFIFO:
item_type = btrfs_inode_type::fifo;
break;
case __S_IFSOCK:
item_type = btrfs_inode_type::socket;
break;
case __S_IFLNK:
item_type = btrfs_inode_type::symlink;
break;
default:
add_warning("Unrecognized inode type {:o}.", m & __S_IFMT);
}
};
for (const auto& ea : eas) {
const auto& n = ea.first;
const auto& v = ea.second;
if (n == "$LXUID") {
if (v.size() != sizeof(uint32_t)) {
add_warning("$LXUID EA was {} bytes, expected {}", v.size(), sizeof(uint32_t));
continue;
}
ii.st_uid = *(uint32_t*)v.data();
} else if (n == "$LXGID") {
if (v.size() != sizeof(uint32_t)) {
add_warning("$LXGID EA was {} bytes, expected {}", v.size(), sizeof(uint32_t));
continue;
}
ii.st_gid = *(uint32_t*)v.data();
} else if (n == "$LXMOD") {
if (v.size() != sizeof(uint32_t)) {
add_warning("$LXMOD EA was {} bytes, expected {}", v.size(), sizeof(uint32_t));
continue;
}
mode = *(uint32_t*)v.data();
set_mode(mode.value());
} else if (n == "$LXDEV") {
if (v.size() != sizeof(lxdev)) {
add_warning("$LXDEV EA was {} bytes, expected {}", v.size(), sizeof(lxdev));
continue;
}
const auto& d = *(lxdev*)v.data();
if (d.minor >= 0x100000) {
add_warning("minor value {} is too large for Btrfs", d.minor);
continue;
}
ii.st_rdev = (d.major << 20) | (d.minor & 0xfffff);
} else if (n == "LXATTRB") {
if (v.size() != sizeof(lxattrb)) {
add_warning("LXATTRB EA was {} bytes, expected {}", v.size(), sizeof(lxattrb));
continue;
}
const auto& l = *(lxattrb*)v.data();
if (l.format != 0) {
add_warning("LXATTRB format was {}, expected 0", l.format);
continue;
}
if (l.version != 1) {
add_warning("LXATTRB version was {}, expected 1", l.version);
continue;
}
mode = l.mode;
set_mode(mode.value());
ii.st_uid = l.uid;
ii.st_gid = l.gid;
ii.st_rdev = l.rdev;
ii.st_atime.seconds = l.atime;
ii.st_atime.nanoseconds = l.atime_ns;
ii.st_mtime.seconds = l.mtime;
ii.st_mtime.nanoseconds = l.mtime_ns;
ii.st_ctime.seconds = l.ctime;
ii.st_ctime.nanoseconds = l.ctime_ns;
has_lxattrb = true;
} else if (n == "LX.SECURITY.CAPABILITY") {
static const string_view lxea = "lxea";
if (v.size() < lxea.length()) {
add_warning("LX.SECURITY.CAPABILITY EA was {} bytes, expected at least {}", v.size(), lxea.length());
continue;
}
if (string_view((char*)v.data(), lxea.length()) != lxea) {
add_warning("LX.SECURITY.CAPABILITY EA prefix was not \"{}\"", lxea);
continue;
}
buffer_t v2(v.size() - lxea.length());
memcpy(v2.data(), v.data() + lxea.length(), v2.size());
xattrs.emplace(EA_CAP, make_pair(EA_CAP_HASH, v2));
} else {
static const string_view recognized_eas[] = {
"$KERNEL.PURGE.APPXFICACHE",
"$KERNEL.PURGE.ESBCACHE",
"$CI.CATALOGHINT",
"C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.DATABASE",
"C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.DATABASEEX1",
"C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.EPOCHEA",
"APPLICENSING",
"C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.COMMON",
"C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.COMMONEX",
"C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.COMMONEX_1"
"C8A05BC0-3FA8-49E9-8148-61EE14A67687.CSC.USER",
"$KERNEL.PURGE.SMARTLOCKER.VALID",
"$KERNEL.SMARTLOCKER.ORIGINCLAIM",
"$KERNEL.PURGE.APPID.HASHINFO",
"$KERNEL.SMARTLOCKER.HASH",
"$KERNEL.PURGE.CIPCACHE",
"$KERNEL.SMARTLOCKER.UNINSTALLSTRINGS"
};
bool found = false;
for (const auto& r : recognized_eas) {
if (r == n) {
found = true;
break;
}
}
if (!found)
add_warning("Unrecognized EA {}", n);
}
}
if (!wof_mappings.empty()) {
auto len = wof_compressed_data.size();
wof_compressed_data.resize(sector_align(len, cluster_size));
memset(wof_compressed_data.data(), 0, wof_compressed_data.size());
for (const auto& m : wof_mappings) {
dev.seek(m.lcn * cluster_size);
dev.read(wof_compressed_data.data() + (m.vcn * cluster_size), (size_t)(m.length * cluster_size));
}
wof_compressed_data.resize(len);
}
if (compression_unit != 0) {
uint64_t cus = 1ull << compression_unit;
buffer_t compdata((size_t)(cus * cluster_size));
try {
while (!mappings.empty()) {
uint64_t clusters = 0, compsize;
bool compressed = false;
while (clusters < cus) {
if (mappings.empty()) {
compressed = true;
memset(compdata.data() + (clusters * cluster_size), 0, (size_t)((cus - clusters) * cluster_size));
break;
}
auto& m = mappings.front();
auto l = min(m.length, cus - clusters);
if (m.lcn == 0) {
memset(compdata.data() + (clusters * cluster_size), 0, (size_t)(l * cluster_size));
if (l < m.length) {
m.vcn += l;
m.length -= l;
} else
mappings.pop_front();
compressed = true;
} else {
dev.seek(m.lcn * cluster_size);
dev.read(compdata.data() + (clusters * cluster_size), (size_t)(l * cluster_size));
if (l < m.length) {
m.lcn += l;
m.vcn += l;
m.length -= l;
} else
mappings.pop_front();
}
clusters += l;
}
if (!compressed) {
if (filename.empty())
filename = f.get_filename();
inline_data.insert(inline_data.end(), compdata.begin(), compdata.end());
} else {
compsize = compdata.size();
if (file_size - inline_data.size() < compsize)
compsize = file_size - inline_data.size();
auto decomp = lznt1_decompress(string_view((char*)compdata.data(), compdata.size()), (uint32_t)compsize);
inline_data.insert(inline_data.end(), decomp.begin(), decomp.end());
}
if (inline_data.size() >= file_size) {
inline_data.resize((size_t)file_size);
mappings.clear();
break;
}
}
} catch (const exception& e) {
if (filename.empty())
filename = f.get_filename();
throw formatted_error("{}: {}", filename, e.what());
}
}
for (const auto& w : warnings) {
fmt::print(stderr, "{}\n", w);
}
#undef add_warning
const auto& si = *(const STANDARD_INFORMATION*)standard_info.data();
if (standard_info.size() >= offsetof(STANDARD_INFORMATION, MaximumVersions)) {
uint32_t defda = 0;
atts = si.FileAttributes;
if (links.size() == 1 && get<1>(links[0])[0] == '.')
defda |= FILE_ATTRIBUTE_HIDDEN;
if (is_dir) {
defda |= FILE_ATTRIBUTE_DIRECTORY;
atts |= FILE_ATTRIBUTE_DIRECTORY;
} else {
defda |= FILE_ATTRIBUTE_ARCHIVE;
atts &= ~FILE_ATTRIBUTE_DIRECTORY;
}
if (!reparse_point.empty() || !symlink.empty())
atts |= FILE_ATTRIBUTE_REPARSE_POINT;
else
atts &= ~FILE_ATTRIBUTE_REPARSE_POINT;
if (atts != defda)
atts_set = true;
}
if (standard_info.size() >= offsetof(STANDARD_INFORMATION, OwnerId)) {
ii.otime = win_time_to_unix(si.CreationTime);
if (!has_lxattrb) {
ii.st_atime = win_time_to_unix(si.LastAccessTime);
ii.st_mtime = win_time_to_unix(si.LastWriteTime);
ii.st_ctime = win_time_to_unix(si.ChangeTime);
}
}
if (sd.empty() && standard_info.size() >= offsetof(STANDARD_INFORMATION, QuotaCharged)) {
auto sv = dev.find_sd(si.SecurityId, secure);
if (sv.empty()) {
clear_line();
if (filename.empty())
filename = f.get_filename();
fmt::print(stderr, "Could not find SecurityId {} ({})\n", si.SecurityId, filename);
} else {
sd.resize(sv.size());
memcpy(sd.data(), sv.data(), sv.size());
}
}
if (reparse_point.size() > sizeof(uint32_t) && *(uint32_t*)reparse_point.data() == IO_REPARSE_TAG_WOF) {
try {
if (reparse_point.size() < offsetof(reparse_point_header, DataBuffer)) {
throw formatted_error("IO_REPARSE_TAG_WOF reparse point buffer was {} bytes, expected at least {}.",
reparse_point.size(), offsetof(reparse_point_header, DataBuffer));
}
auto rph = (reparse_point_header*)reparse_point.data();
if (reparse_point.size() < offsetof(reparse_point_header, DataBuffer) + rph->ReparseDataLength) {
throw formatted_error("IO_REPARSE_TAG_WOF reparse point buffer was {} bytes, expected {}.",
reparse_point.size(), offsetof(reparse_point_header, DataBuffer) + rph->ReparseDataLength);
}
if (rph->ReparseDataLength < sizeof(wof_external_info)) {
throw formatted_error("rph->ReparseDataLength was {} bytes, expected at least {}.",
rph->ReparseDataLength, sizeof(wof_external_info));
}
auto wofei = (wof_external_info*)rph->DataBuffer;
if (wofei->Version != WOF_CURRENT_VERSION)
throw formatted_error("Unsupported WOF version {}.", wofei->Version);
if (wofei->Provider == WOF_PROVIDER_WIM)
throw formatted_error("Unsupported WOF provider WOF_PROVIDER_WIM.");
else if (wofei->Provider != WOF_PROVIDER_FILE)
throw formatted_error("Unsupported WOF provider {}.", wofei->Provider);
if (rph->ReparseDataLength < sizeof(wof_external_info) + sizeof(file_provider_external_info_v0)) {
throw formatted_error("rph->ReparseDataLength was {} bytes, expected {}.",
rph->ReparseDataLength, sizeof(wof_external_info) + sizeof(file_provider_external_info_v0));
}
const auto& fpei = *(file_provider_external_info_v0*)&wofei[1];
if (fpei.Version != FILE_PROVIDER_CURRENT_VERSION) {
throw formatted_error("rph->FILE_PROVIDER_EXTERNAL_INFO_V0 Version was {}, expected {}.",
fpei.Version, FILE_PROVIDER_CURRENT_VERSION);
}
reparse_point.clear();
mappings.clear();
auto wcdsv = string_view((char*)wof_compressed_data.data(), wof_compressed_data.size());
switch (fpei.Algorithm) {
case FILE_PROVIDER_COMPRESSION_XPRESS4K:
inline_data = do_xpress_decompress(wcdsv, (uint32_t)file_size, 4096);
break;
case FILE_PROVIDER_COMPRESSION_LZX:
inline_data = do_lzx_decompress(wcdsv, (uint32_t)file_size);
break;
case FILE_PROVIDER_COMPRESSION_XPRESS8K:
inline_data = do_xpress_decompress(wcdsv, (uint32_t)file_size, 8192);
break;
case FILE_PROVIDER_COMPRESSION_XPRESS16K:
inline_data = do_xpress_decompress(wcdsv, (uint32_t)file_size, 16384);
break;
default:
throw formatted_error("Unrecognized WOF compression algorithm {}", fpei.Algorithm);
}
} catch (const exception& e) {
if (filename.empty())
filename = f.get_filename();
fmt::print(stderr, "{}: {}\n", filename, e.what());
}
}
ii.generation = 1;
ii.transid = 1;
if (!is_dir && !reparse_point.empty()) {
inline_data = reparse_point;
file_size = reparse_point.size();
vdl = inline_data.size();
} else if (!symlink.empty()) {
mappings.clear();
file_size = symlink.size();
inline_data.resize(symlink.size());
memcpy(inline_data.data(), symlink.data(), symlink.size());
vdl = inline_data.size();
}
if (!is_dir)
ii.st_size = file_size;
ii.st_nlink = (uint32_t)links.size();
if (mode.has_value())
ii.st_mode = mode.value();
else {
if (is_dir)
ii.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
else
ii.st_mode = __S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
if (!symlink.empty())
ii.st_mode |= __S_IFLNK;
}
ii.sequence = 1;
if (nocsum && !is_dir)
ii.flags = BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM;
if (!mappings.empty()) {
buffer_t buf(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
auto& ed = *(EXTENT_DATA*)buf.data();
mapped_inodes++;
auto& ed2 = *(EXTENT_DATA2*)&ed.data;
ed.generation = 1;
ed.compression = btrfs_compression::none;
ed.encryption = 0;
ed.encoding = 0;
ed.type = btrfs_extent_type::regular;
process_mappings(dev, inode, mappings, runs);
if (vdl < file_size) {
uint64_t alloc_size = sector_align(file_size, sector_size);
uint64_t alloc_vdl = sector_align(vdl, sector_size);
if (!mappings.empty() && (mappings.back().vcn + mappings.back().length) < alloc_size / sector_size) {
mappings.emplace_back(0, mappings.back().vcn + mappings.back().length,
(alloc_size / sector_size) - mappings.back().vcn - mappings.back().length);
}
while (alloc_vdl < alloc_size) { // for whole sectors, replace with sparse extents
if (!mappings.empty()) {
auto& m = mappings.back();
if (m.length * sector_size > alloc_size - alloc_vdl) {
uint64_t sub = (alloc_size - alloc_vdl) / sector_size;
if (sub > 0) {
m.length -= sub * sector_size;
alloc_size -= sub * sector_size;
}
break;
} else {
alloc_size -= m.length * sector_size;
mappings.pop_back();
}
} else {
alloc_size = alloc_vdl;
break;
}
}
if (vdl < alloc_size) { // zero end of final sector if necessary
buffer_t sector(sector_size);
dev.seek((mappings.back().lcn + mappings.back().length - 1) * cluster_size);
dev.read(sector.data(), sector.size());
memset(sector.data() + (vdl % sector_size), 0, sector_size - (vdl % sector_size));
dev.seek((mappings.back().lcn + mappings.back().length - 1) * cluster_size);
dev.write(sector.data(), sector.size());
}
}
for (const auto& m : mappings) {
if (m.lcn != 0) { // not sparse
ed.decoded_size = ed2.size = ed2.num_bytes = m.length * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
ii.st_blocks += ed.decoded_size;
ed2.address = (m.lcn * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster) + chunk_virt_offset;
ed2.offset = 0;
add_item(r, inode, btrfs_key_type::EXTENT_DATA, m.vcn * dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster, buf);
}
}
} else if (!inline_data.empty()) {
if (inline_data.size() > max_inline) {
buffer_t buf(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
auto compression = opt_compression;
auto& ed = *(EXTENT_DATA*)buf.data();
auto& ed2 = *(EXTENT_DATA2*)&ed.data;
rewritten_inodes++;
ed.generation = 1;
ed.compression = btrfs_compression::none;
ed.encryption = 0;
ed.encoding = 0;
ed.type = btrfs_extent_type::regular;
// round to nearest sector, and zero end
if (inline_data.size() & (sector_size - 1)) {
auto oldlen = inline_data.size();
inline_data.resize((size_t)sector_align(inline_data.size(), sector_size));
memset(inline_data.data() + oldlen, 0, inline_data.size() - oldlen);
}
// FIXME - do by sparse extents, if longer than a sector
if (vdl < inline_data.size())
memset(inline_data.data() + vdl, 0, (size_t)(inline_data.size() - vdl));
uint64_t pos = 0;
string_view data{(char*)inline_data.data(), inline_data.size()};
while (!data.empty()) {
uint64_t len, lcn, cl;
bool inserted = false;
buffer_t compdata;
if (compression == btrfs_compression::none)
len = min((size_t)max_extent_size, data.length());
#if defined(WITH_ZLIB) || defined(WITH_LZO) || defined(WITH_ZSTD)
else if (data.length() <= cluster_size) {
len = min((size_t)max_extent_size, data.length());
ed.compression = btrfs_compression::none;
} else {
optional c;
len = min((size_t)max_comp_extent_size, data.length());
switch (compression) {
#ifdef WITH_ZLIB
case btrfs_compression::zlib:
c = zlib_compress(data.substr(0, len), cluster_size);
break;
#endif
#ifdef WITH_LZO
case btrfs_compression::lzo:
c = lzo_compress(data.substr(0, len), cluster_size);
break;
#endif
#ifdef WITH_ZSTD
case btrfs_compression::zstd:
c = zstd_compress(data.substr(0, len), cluster_size);
break;
#endif
default:
break;
}
if (c.has_value()) {
compdata = c.value();
ed.compression = compression;
ii.flags |= BTRFS_INODE_COMPRESS;
} else // incompressible
ed.compression = btrfs_compression::none;
// if first part of file incompressible, give up on rest and add nocomp flag
if (pos == 0 && ed.compression == btrfs_compression::none) {
ii.flags |= BTRFS_INODE_NOCOMPRESS;
compression = btrfs_compression::none;
len = min((size_t)max_extent_size, data.length());
}
// FIXME - set xattr for compression type?
}
#endif
ed.decoded_size = ed2.num_bytes = len;
ed2.size = ed.compression == btrfs_compression::none ? len : compdata.size();
ii.st_blocks += ed.decoded_size;
ed2.address = allocate_data(ed2.size, true);
ed2.offset = 0;
dev.seek(ed2.address - chunk_virt_offset);
if (ed.compression == btrfs_compression::none)
dev.write((uint8_t*)data.data(), (size_t)len);
else
dev.write(compdata.data(), compdata.size());
add_item(r, inode, btrfs_key_type::EXTENT_DATA, pos, buf);
lcn = (ed2.address - chunk_virt_offset) / cluster_size;
cl = ed2.size / cluster_size;
auto& rl = runs[(ed2.address - chunk_virt_offset) / data_chunk_size];
for (auto it = rl.begin(); it != rl.end(); it++) {
auto& r = *it;
if (r.offset >= lcn + cl) {
rl.emplace(it, lcn, cl, inode, pos / cluster_size, false, true);
inserted = true;
break;
}
}
if (!inserted)
rl.emplace_back(lcn, cl, inode, pos / cluster_size, false, true);
if (data.length() > len) {
pos += len;
data = data.substr((size_t)len);
} else
break;
}
inline_data.clear();
} else {
buffer_t buf(offsetof(EXTENT_DATA, data[0]) + inline_data.size());
auto& ed = *(EXTENT_DATA*)buf.data();
inline_inodes++;
// FIXME - compress inline extents?
ed.generation = 1;
ed.decoded_size = inline_data.size();
ed.compression = btrfs_compression::none;
ed.encryption = 0;
ed.encoding = 0;
ed.type = btrfs_extent_type::inline_extent;
memcpy(ed.data, inline_data.data(), inline_data.size());
if (vdl < inline_data.size())
memset(ed.data + vdl, 0, (size_t)(inline_data.size() - vdl));
add_item_move(r, inode, btrfs_key_type::EXTENT_DATA, 0, buf);
ii.st_blocks = inline_data.size();
}
}
add_item(r, inode, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM));
if (item_type == btrfs_inode_type::unknown) {
if (is_dir)
item_type = btrfs_inode_type::directory;
else if (!symlink.empty())
item_type = btrfs_inode_type::symlink;
else
item_type = btrfs_inode_type::file;
}
for (const auto& l : links) {
if (get<0>(l) == NTFS_ROOT_DIR_INODE)
link_inode(r, inode, SUBVOL_ROOT_INODE, get<1>(l), item_type);
else
link_inode(r, inode, get<0>(l) + inode_offset, get<1>(l), item_type);
}
if (!sd.empty()) {
// FIXME - omit SD if only one hard link and implied from parent?
xattrs.emplace(EA_NTACL, make_pair(EA_NTACL_HASH, sd));
}
if (atts_set) {
char val[16], *val2;
val2 = &val[sizeof(val) - 1];
do {
uint8_t c = atts % 16;
*val2 = (char)(c <= 9 ? (c + '0') : (c - 0xa + 'a'));
val2--;
atts >>= 4;
} while (atts != 0);
*val2 = 'x';
val2--;
*val2 = '0';
auto sv = string_view(val2, val + sizeof(val) - val2);
buffer_t buf(sv.size());
memcpy(buf.data(), sv.data(), sv.size());
xattrs.emplace(EA_DOSATTRIB, make_pair(EA_DOSATTRIB_HASH, buf));
}
if (!reparse_point.empty() && is_dir)
xattrs.emplace(EA_REPARSE, make_pair(EA_REPARSE_HASH, reparse_point));
for (const auto& xa : xattrs) {
// FIXME - collisions (make hash key of map?)
set_xattr(r, inode, xa.first, get<0>(xa.second), get<1>(xa.second));
}
}
static void create_inodes(root& r, const buffer_t& mftbmp, ntfs& dev, runs_t& runs, ntfs_file& secure,
enum btrfs_compression compression, bool nocsum) {
list inodes;
list skiplist;
uint64_t total = 0, num = 0;
r.dir_seqs[SUBVOL_ROOT_INODE] = 3;
parse_bitmap(mftbmp, inodes);
for (const auto& l : inodes) {
total += l.length;
}
while (!inodes.empty()) {
auto& run = inodes.front();
uint64_t ntfs_inode = run.offset;
uint64_t inode = ntfs_inode + inode_offset;
bool dir;
try {
if (ntfs_inode >= first_ntfs_inode)
add_inode(r, inode, ntfs_inode, dir, runs, secure, dev, skiplist, compression, nocsum);
else if (ntfs_inode != NTFS_ROOT_DIR_INODE)
populate_skip_list(dev, ntfs_inode, skiplist);
} catch (...) {
clear_line();
throw;
}
num++;
fmt::print("Processing inode {} / {} ({:1.1f}%)\r", num, total, (float)num * 100.0f / (float)total);
fflush(stdout);
if (run.length == 1)
inodes.pop_front();
else {
run.offset++;
run.length--;
}
}
fmt::print("\n");
}
static uint64_t get_extent_data_ref_hash2(uint64_t root, uint64_t objid, uint64_t offset) {
uint32_t high_crc = 0xffffffff, low_crc = 0xffffffff;
high_crc = calc_crc32c(high_crc, (uint8_t*)&root, sizeof(uint64_t));
low_crc = calc_crc32c(low_crc, (uint8_t*)&objid, sizeof(uint64_t));
low_crc = calc_crc32c(low_crc, (uint8_t*)&offset, sizeof(uint64_t));
return ((uint64_t)high_crc << 31) ^ (uint64_t)low_crc;
}
static void create_data_extent_items(root& extent_root, const runs_t& runs, uint32_t cluster_size, uint64_t image_subvol_id,
uint64_t image_inode) {
for (const auto& rs : runs) {
for (const auto& r : rs.second) {
uint64_t img_addr;
if (r.inode == dummy_inode)
continue;
if (r.relocated) {
for (const auto& reloc : relocs) {
if (reloc.new_start == r.offset) {
img_addr = reloc.old_start * cluster_size;
break;
}
}
} else
img_addr = r.offset * cluster_size;
if (r.inode == 0) {
data_item di;
di.extent_item.refcount = 1;
di.extent_item.generation = 1;
di.extent_item.flags = EXTENT_ITEM_DATA;
di.type = btrfs_key_type::EXTENT_DATA_REF;
di.edr.root = image_subvol_id;
di.edr.objid = image_inode;
di.edr.count = 1;
di.edr.offset = img_addr;
add_item(extent_root, (r.offset * cluster_size) + chunk_virt_offset, btrfs_key_type::EXTENT_ITEM, r.length * cluster_size,
&di, sizeof(data_item));
} else if (r.not_in_img) {
data_item di;
di.extent_item.refcount = 1;
di.extent_item.generation = 1;
di.extent_item.flags = EXTENT_ITEM_DATA;
di.type = btrfs_key_type::EXTENT_DATA_REF;
di.edr.root = BTRFS_ROOT_FSTREE;
di.edr.objid = r.inode;
di.edr.count = 1;
di.edr.offset = r.file_offset * cluster_size;
add_item(extent_root, (r.offset * cluster_size) + chunk_virt_offset, btrfs_key_type::EXTENT_ITEM, r.length * cluster_size,
&di, sizeof(data_item));
} else {
data_item2 di2;
EXTENT_DATA_REF* e1;
EXTENT_DATA_REF* e2;
di2.extent_item.refcount = 2;
di2.extent_item.generation = 1;
di2.extent_item.flags = EXTENT_ITEM_DATA;
di2.type1 = btrfs_key_type::EXTENT_DATA_REF;
di2.type2 = btrfs_key_type::EXTENT_DATA_REF;
auto hash1 = get_extent_data_ref_hash2(image_subvol_id, image_inode, img_addr);
auto hash2 = get_extent_data_ref_hash2(BTRFS_ROOT_FSTREE, r.inode, r.file_offset * cluster_size);
if (hash2 > hash1) {
e1 = &di2.edr2;
e2 = &di2.edr1;
} else {
e1 = &di2.edr1;
e2 = &di2.edr2;
}
e1->root = image_subvol_id;
e1->objid = image_inode;
e1->count = 1;
e1->offset = img_addr;
e2->root = BTRFS_ROOT_FSTREE;
e2->objid = r.inode;
e2->count = 1;
e2->offset = r.file_offset * cluster_size;
add_item(extent_root, (r.offset * cluster_size) + chunk_virt_offset, btrfs_key_type::EXTENT_ITEM, r.length * cluster_size,
&di2, sizeof(data_item2));
}
}
}
}
#ifdef USE_MMAP
class memory_map {
public:
memory_map(int fd, uint64_t off, size_t length) : length(length) {
ptr = (uint8_t*)mmap(nullptr, length, PROT_READ, MAP_SHARED, fd, off);
if (ptr == MAP_FAILED)
throw formatted_error("mmap failed (errno = {})", errno);
}
~memory_map() {
munmap(ptr, length);
}
uint8_t* ptr;
size_t length;
};
#endif
static void calc_checksums(root& csum_root, runs_t runs, ntfs& dev, enum btrfs_csum_type csum_type) {
uint32_t sector_size = 0x1000; // FIXME
uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
list runs2;
uint64_t total = 0, num = 0;
uint32_t csum_size;
switch (csum_type) {
case btrfs_csum_type::crc32c:
csum_size = sizeof(uint32_t);
break;
case btrfs_csum_type::xxhash:
csum_size = sizeof(uint64_t);
break;
case btrfs_csum_type::sha256:
case btrfs_csum_type::blake2:
csum_size = 32;
break;
}
// See __MAX_CSUM_ITEMS in kernel
auto max_run = (uint32_t)((tree_size - sizeof(tree_header) - (sizeof(leaf_node) * 2)) / csum_size) - 1;
// FIXME - these are clusters, when they should be sectors
// split and merge runs
for (auto& r2 : runs) {
auto& rs = r2.second;
bool first = true;
while (!rs.empty()) {
auto& r = rs.front();
if (r.inode == dummy_inode) {
rs.pop_front();
continue;
}
if (first || runs2.back().offset + runs2.back().length < r.offset || runs2.back().length == max_run) {
// create new run
if (r.length > max_run) {
runs2.emplace_back(r.offset, max_run);
r.offset += max_run;
r.length -= max_run;
} else {
runs2.emplace_back(r.offset, r.length);
rs.pop_front();
}
first = false;
continue;
}
// continue existing run
if (runs2.back().length + r.length <= max_run) {
runs2.back().length += r.length;
rs.pop_front();
continue;
}
r.offset += max_run - runs2.back().length;
r.length -= max_run - runs2.back().length;
runs2.back().length = max_run;
}
}
for (const auto& r : runs2) {
total += r.length;
}
#ifdef USE_MMAP
unique_ptr mm;
uint64_t old_chunk = 0;
#endif
for (const auto& r : runs2) {
buffer_t csums;
if (r.offset * cluster_size >= orig_device_size)
break;
csums.resize((size_t)(r.length * cluster_size * csum_size / sector_size));
#ifdef USE_MMAP
uint64_t chunk = (r.offset * cluster_size) / data_chunk_size;
if (!mm || old_chunk != chunk) {
mm.reset(new memory_map(dev.fd, chunk * data_chunk_size, min(data_chunk_size, orig_device_size - (chunk * data_chunk_size))));
old_chunk = chunk;
}
string_view sv((char*)mm->ptr + ((r.offset * cluster_size) % data_chunk_size), r.length * cluster_size);
#else
buffer_t data((size_t)(r.length * cluster_size));
dev.seek(r.offset * cluster_size);
dev.read(data.data(), data.size());
auto sv = string_view((char*)data.data(), data.size());
#endif
auto msg = [&]() {
num++;
if (num % 1000 == 0 || num == total) {
fmt::print("Calculating checksums {} / {} ({:1.1f}%)\r", num, total, (float)num * 100.0f / (float)total);
fflush(stdout);
}
};
switch (csum_type) {
case btrfs_csum_type::crc32c: {
auto csum = (uint32_t*)&csums[0];
while (sv.length() > 0) {
*csum = ~calc_crc32c(0xffffffff, (const uint8_t*)sv.data(), sector_size);
csum++;
sv = sv.substr(sector_size);
msg();
}
break;
}
case btrfs_csum_type::xxhash: {
auto csum = (uint64_t*)&csums[0];
while (sv.length() > 0) {
*csum = XXH64(sv.data(), sector_size, 0);
csum++;
sv = sv.substr(sector_size);
msg();
}
break;
}
case btrfs_csum_type::sha256: {
auto csum = (uint8_t*)&csums[0];
while (sv.length() > 0) {
calc_sha256(csum, sv.data(), sector_size);
csum += csum_size;
sv = sv.substr(sector_size);
msg();
}
break;
}
case btrfs_csum_type::blake2: {
auto csum = (uint8_t*)&csums[0];
while (sv.length() > 0) {
blake2b(csum, csum_size, sv.data(), sector_size);
csum += csum_size;
sv = sv.substr(sector_size);
msg();
}
break;
}
}
add_item(csum_root, EXTENT_CSUM_ID, btrfs_key_type::EXTENT_CSUM, (r.offset * cluster_size) + chunk_virt_offset, &csums[0], (uint16_t)(r.length * cluster_size * csum_size / sector_size));
}
fmt::print("\n");
}
static void protect_cluster(ntfs& dev, runs_t& runs, uint64_t cluster) {
if (!split_runs(dev, runs, cluster, 1, dummy_inode, 0))
return;
buffer_t data;
uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
uint64_t addr = allocate_data(cluster_size, false) - chunk_virt_offset;
if ((cluster + 1) * cluster_size > orig_device_size)
data.resize((size_t)(orig_device_size - (cluster * cluster_size)));
else
data.resize((size_t)cluster_size);
dev.seek(cluster * cluster_size);
dev.read(data.data(), data.size());
dev.seek(addr);
dev.write(data.data(), data.size());
relocs.emplace_back(cluster, 1, addr / cluster_size);
uint64_t clusters_per_chunk = data_chunk_size / (uint64_t)cluster_size;
uint64_t cluster_addr = (cluster * cluster_size) + chunk_virt_offset;
for (auto& c : chunks) {
if (c.offset <= cluster_addr && c.offset + c.length > cluster_addr) {
c.used -= cluster_size;
break;
}
}
uint64_t chunk = (addr / (uint64_t)cluster_size) / clusters_per_chunk;
if (runs.count(chunk) != 0) {
auto& r = runs.at(chunk);
for (auto it = r.begin(); it != r.end(); it++) {
if (it->offset > addr / cluster_size) {
r.emplace(it, addr / cluster_size, 1, 0, 0, true);
return;
}
}
}
auto& r = runs[chunk];
r.emplace_back(addr / cluster_size, 1, 0, 0, true);
}
static void protect_superblocks(ntfs& dev, runs_t& runs) {
uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
unsigned int i = 0;
while (superblock_addrs[i] != 0) {
if (superblock_addrs[i] > device_size - sizeof(superblock))
break;
uint64_t cluster_start = (superblock_addrs[i] - (superblock_addrs[i] % stripe_length)) / cluster_size;
uint64_t cluster_end = sector_align(superblock_addrs[i] - (superblock_addrs[i] % stripe_length) + stripe_length, cluster_size) / cluster_size;
for (uint64_t j = cluster_start; j < cluster_end; j++) {
protect_cluster(dev, runs, j);
}
i++;
}
// also relocate first cluster
protect_cluster(dev, runs, 0);
if (reloc_last_sector)
protect_cluster(dev, runs, device_size / cluster_size);
if (last_chunk_end < device_size) {
uint64_t cluster_start = last_chunk_end / cluster_size;
uint64_t cluster_end = device_size / cluster_size;
if (reloc_last_sector)
cluster_end--;
for (auto i = cluster_start; i <= cluster_end; i++) {
protect_cluster(dev, runs, i);
}
}
}
static void clear_first_cluster(ntfs& dev) {
uint32_t cluster_size = dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster;
buffer_t data(cluster_size);
memset(data.data(), 0, data.size());
dev.seek(0);
dev.write(data.data(), data.size());
}
static void calc_used_space(const runs_t& runs, uint32_t cluster_size) {
for (const auto& rl : runs) {
uint64_t offset = (rl.first * data_chunk_size) + chunk_virt_offset;
for (auto& c : chunks) {
if (offset == c.offset) {
for (const auto& r : rl.second) {
if ((r.offset * cluster_size) + chunk_virt_offset - c.offset < c.length)
c.used += r.length * cluster_size;
}
break;
}
}
}
}
static void populate_root_root(root& root_root) {
INODE_ITEM ii;
static const char default_subvol[] = "default";
static const uint32_t default_hash = 0x8dbfc2d2;
for (const auto& r : roots) {
if (r.id != BTRFS_ROOT_ROOT && r.id != BTRFS_ROOT_CHUNK)
add_to_root_root(r, root_root);
}
add_inode_ref(root_root, BTRFS_ROOT_FSTREE, BTRFS_ROOT_TREEDIR, 0, "default");
memset(&ii, 0, sizeof(INODE_ITEM));
ii.generation = 1;
ii.transid = 1;
ii.st_nlink = 1;
ii.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
add_item(root_root, BTRFS_ROOT_TREEDIR, btrfs_key_type::INODE_ITEM, 0, &ii, sizeof(INODE_ITEM));
add_inode_ref(root_root, BTRFS_ROOT_TREEDIR, BTRFS_ROOT_TREEDIR, 0, "..");
buffer_t buf(offsetof(DIR_ITEM, name[0]) + sizeof(default_subvol) - 1);
auto& di = *(DIR_ITEM*)buf.data();
di.key.obj_id = BTRFS_ROOT_FSTREE;
di.key.obj_type = btrfs_key_type::ROOT_ITEM;
di.key.offset = 0xffffffffffffffff;
di.transid = 0;
di.m = 0;
di.n = sizeof(default_subvol) - 1;
di.type = btrfs_inode_type::directory;
memcpy(di.name, default_subvol, sizeof(default_subvol) - 1);
add_item_move(root_root, BTRFS_ROOT_TREEDIR, btrfs_key_type::DIR_ITEM, default_hash, buf);
}
static void add_subvol_uuid(root& r) {
add_item(r, *(uint64_t*)&subvol_uuid, btrfs_key_type::SUBVOL_UUID, *(uint64_t*)&subvol_uuid.uuid[sizeof(uint64_t)],
&image_subvol_id, sizeof(image_subvol_id));
}
static void update_dir_sizes(root& r) {
for (auto& it : r.items) {
if (it.first.obj_type == btrfs_key_type::INODE_ITEM && r.dir_size.count(it.first.obj_id) != 0) {
auto& ii = *(INODE_ITEM*)it.second.data();
// FIXME - would it speed things up if we removed the entry from dir_size map here?
ii.st_size = r.dir_size.at(it.first.obj_id);
}
}
}
static void convert(ntfs& dev, enum btrfs_compression compression, enum btrfs_csum_type csum_type, bool nocsum) {
uint32_t sector_size = 0x1000; // FIXME
uint64_t cluster_size = (uint64_t)dev.boot_sector->BytesPerSector * (uint64_t)dev.boot_sector->SectorsPerCluster;
runs_t runs;
static const uint64_t image_inode = 0x101;
// FIXME - die if cluster size not multiple of 4096
{
default_random_engine generator;
generator.seed((unsigned int)chrono::high_resolution_clock::now().time_since_epoch().count());
fs_uuid = generate_uuid(generator);
chunk_uuid = generate_uuid(generator);
dev_uuid = generate_uuid(generator);
subvol_uuid = generate_uuid(generator);
}
device_size = orig_device_size = dev.boot_sector->TotalSectors * dev.boot_sector->BytesPerSector;
if (device_size % sector_size != 0) {
device_size -= device_size % sector_size;
reloc_last_sector = true;
}
space_list.emplace_back(0, device_size);
ntfs_file bitmap(dev, NTFS_BITMAP_INODE);
auto bmpdata = bitmap.read();
create_data_chunks(dev, bmpdata);
roots.emplace_back(BTRFS_ROOT_ROOT);
root& root_root = roots.back();
roots.emplace_back(BTRFS_ROOT_EXTENT);
root& extent_root = roots.back();
roots.emplace_back(BTRFS_ROOT_CHUNK);
root& chunk_root = roots.back();
add_dev_item(chunk_root);
roots.emplace_back(BTRFS_ROOT_DEVTREE);
root& devtree_root = roots.back();
add_dev_stats(devtree_root);
roots.emplace_back(BTRFS_ROOT_FSTREE);
root& fstree_root = roots.back();
populate_fstree(fstree_root);
roots.emplace_back(BTRFS_ROOT_DATA_RELOC);
populate_fstree(roots.back());
roots.emplace_back(BTRFS_ROOT_CHECKSUM);
root& csum_root = roots.back();
root& image_subvol = add_image_subvol(root_root, fstree_root);
parse_data_bitmap(dev, bmpdata, runs);
// make sure runs don't go beyond end of device
while (!runs.empty() && (runs.rbegin()->second.back().offset * cluster_size) + runs.rbegin()->second.back().length > device_size) {
auto& r = runs.rbegin()->second;
if (r.back().offset * cluster_size >= orig_device_size) {
r.pop_back();
if (r.empty())
runs.erase(prev(runs.end()));
} else {
uint64_t len = orig_device_size - (r.back().offset * cluster_size);
if (len % cluster_size)
r.back().length = (len / cluster_size) + 1;
else
r.back().length = len / cluster_size;
break;
}
}
protect_superblocks(dev, runs);
calc_used_space(runs, dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster);
auto mftbmp = dev.mft->read(0, 0, ntfs_attribute::BITMAP);
{
ntfs_file secure(dev, NTFS_SECURE_INODE);
create_inodes(fstree_root, mftbmp, dev, runs, secure, compression, nocsum);
}
fmt::print("Mapped {} inodes directly.\n", mapped_inodes);
fmt::print("Rewrote {} inodes.\n", rewritten_inodes);
fmt::print("Inlined {} inodes.\n", inline_inodes);
create_image(image_subvol, dev, runs, image_inode, nocsum);
roots.emplace_back(BTRFS_ROOT_UUID);
add_subvol_uuid(roots.back());
create_data_extent_items(extent_root, runs, dev.boot_sector->BytesPerSector * dev.boot_sector->SectorsPerCluster,
image_subvol.id, image_inode);
fmt::print("Updating directory sizes\n");
for (auto& r : roots) {
if (!r.dir_size.empty())
update_dir_sizes(r);
}
if (!nocsum)
calc_checksums(csum_root, runs, dev, csum_type);
populate_root_root(root_root);
for (auto& r : roots) {
if (r.id != BTRFS_ROOT_EXTENT && r.id != BTRFS_ROOT_CHUNK && r.id != BTRFS_ROOT_DEVTREE)
r.create_trees(extent_root, csum_type);
}
do {
bool extents_changed = false;
chunks_changed = false;
for (auto& c : chunks) {
if (!c.added) {
add_chunk(chunk_root, devtree_root, extent_root, c);
c.added = true;
}
}
for (auto& r : roots) {
if (r.id == BTRFS_ROOT_EXTENT || r.id == BTRFS_ROOT_CHUNK || r.id == BTRFS_ROOT_DEVTREE) {
r.old_addresses.swap(r.addresses);
r.addresses.clear();
// FIXME - unallocate metadata and changed used value in chunks
r.metadata_size -= r.trees.size() * tree_size;
r.trees.clear();
r.allocations_done = false;
r.create_trees(extent_root, csum_type);
if (r.allocations_done)
extents_changed = true;
}
}
if (!chunks_changed && !extents_changed)
break;
} while (true);
// update tree addresses and levels in-place in root 1
update_root_root(root_root, csum_type);
// update used value in BLOCK_GROUP_ITEMs
update_extent_root(extent_root, csum_type);
// update bytes_used in DEV_ITEM in root 3
update_chunk_root(chunk_root, csum_type);
for (auto& r : roots) {
r.write_trees(dev);
}
write_superblocks(dev, chunk_root, root_root, compression, csum_type);
clear_first_cluster(dev);
}
#if defined(__i386__) || defined(__x86_64__)
static void check_cpu() noexcept {
#ifndef _MSC_VER
unsigned int cpuInfo[4];
__get_cpuid(1, &cpuInfo[0], &cpuInfo[1], &cpuInfo[2], &cpuInfo[3]);
if (cpuInfo[2] & bit_SSE4_2)
calc_crc32c = calc_crc32c_hw;
#else
int cpuInfo[4];
__cpuid(cpuInfo, 1);
if (cpuInfo[2] & (1 << 20))
calc_crc32c = calc_crc32c_hw;
#endif
}
#endif
static enum btrfs_compression parse_compression_type(string_view s) {
if (s == "none")
return btrfs_compression::none;
else if (s == "zlib")
return btrfs_compression::zlib;
else if (s == "lzo")
return btrfs_compression::lzo;
else if (s == "zstd")
return btrfs_compression::zstd;
else
throw formatted_error("Unrecognized compression type {}.", s);
}
static enum btrfs_csum_type parse_csum_type(string_view s) {
if (s == "crc32c")
return btrfs_csum_type::crc32c;
else if (s == "xxhash")
return btrfs_csum_type::xxhash;
else if (s == "sha256")
return btrfs_csum_type::sha256;
else if (s == "blake2")
return btrfs_csum_type::blake2;
else
throw formatted_error("Unrecognized hash type {}.", s);
}
static vector read_args(int argc, char* argv[]) {
vector ret;
for (int i = 0; i < argc; i++) {
ret.emplace_back(argv[i]);
}
return ret;
}
int main(int argc, char* argv[]) {
try {
auto args = read_args(argc, argv);
if (args.size() == 2 && args[1] == "--version") {
fmt::print("ntfs2btrfs " PROJECT_VER "\n");
return 1;
}
if (args.size() < 2 || (args.size() == 2 && (args[1] == "--help" || args[1] == "/?"))) {
fmt::print(R"(Usage: ntfs2btrfs [OPTION]... device
Convert an NTFS filesystem to Btrfs.
-c, --compress=ALGO recompress compressed files; ALGO can be 'zlib',
'lzo', 'zstd', or 'none'.
-h, --hash=ALGO checksum algorithm to use; ALGO can be 'crc32c'
(default), 'xxhash', 'sha256', or 'blake2'
-r, --rollback rollback to the original filesystem
-d, --no-datasum disable data checksums
)");
return 1;
}
string fn;
enum btrfs_compression compression;
enum btrfs_csum_type csum_type;
bool do_rollback = false, nocsum = false;
#ifdef WITH_ZSTD
compression = btrfs_compression::zstd;
#elif defined(WITH_LZO)
compression = btrfs_compression::lzo;
#elif defined(WITH_ZLIB)
compression = btrfs_compression::zlib;
#else
compression = btrfs_compression::none;
#endif
csum_type = btrfs_csum_type::crc32c;
for (size_t i = 1; i < args.size(); i++) {
const auto& arg = args[i];
if (!arg.empty() && arg[0] == '-') {
if (arg == "-c") {
if (i == args.size() - 1)
throw runtime_error("No value given for -c option.");
compression = parse_compression_type(args[i+1]);
i++;
} else if (arg.substr(0, 11) == "--compress=")
compression = parse_compression_type(arg.substr(11));
else if (arg == "-h") {
if (i == args.size() - 1)
throw runtime_error("No value given for -h option.");
csum_type = parse_csum_type(args[i+1]);
i++;
} else if (arg.substr(0, 7) == "--hash=")
csum_type = parse_csum_type(arg.substr(11));
else if (arg == "-r" || arg == "--rollback")
do_rollback = true;
else if (arg == "-d" || arg == "--no-datasum")
nocsum = true;
else
throw formatted_error("Unrecognized option {}.", arg);
} else {
if (!fn.empty())
throw runtime_error("Multiple devices given.");
fn = arg;
}
}
if (fn.empty())
throw runtime_error("No device given.");
#if defined(__i386__) || defined(__x86_64__)
check_cpu();
#endif
if (do_rollback) {
rollback(fn);
return 0;
}
if (nocsum && compression != btrfs_compression::none) {
compression = btrfs_compression::none;
fmt::print("Disabling compression as it requires checksums to be enabled.\n");
} else {
#ifndef WITH_ZLIB
if (compression == btrfs_compression::zlib)
throw runtime_error("Zlib compression not compiled in.");
#endif
#ifndef WITH_LZO
if (compression == btrfs_compression::lzo)
throw runtime_error("LZO compression not compiled in.");
#endif
#ifndef WITH_ZSTD
if (compression == btrfs_compression::zstd)
throw runtime_error("Zstd compression not compiled in.");
#endif
switch (compression) {
case btrfs_compression::zlib:
fmt::print("Using Zlib compression.\n");
break;
case btrfs_compression::lzo:
fmt::print("Using LZO compression.\n");
break;
case btrfs_compression::zstd:
fmt::print("Using Zstd compression.\n");
break;
case btrfs_compression::none:
fmt::print("Not using compression.\n");
break;
}
}
switch (csum_type) {
case btrfs_csum_type::crc32c:
fmt::print("Using CRC32C for checksums.\n");
break;
case btrfs_csum_type::xxhash:
fmt::print("Using xxHash for checksums.\n");
break;
case btrfs_csum_type::sha256:
fmt::print("Using SHA256 for checksums.\n");
break;
case btrfs_csum_type::blake2:
fmt::print("Using Blake2 for checksums.\n");
break;
}
if (nocsum)
fmt::print("Not calculating checksums.\n");
ntfs dev(fn);
convert(dev, compression, csum_type, nocsum);
} catch (const exception& e) {
cerr << e.what() << endl;
return 1;
}
return 0;
}
ntfs2btrfs-20240115/src/ntfs2btrfs.h000066400000000000000000000301701455127722500171170ustar00rootroot00000000000000/* Copyright (c) Mark Harmstone 2020
*
* This file is part of ntfs2btrfs.
*
* Ntfs2btrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 2 of the Licence, or
* (at your option) any later version.
*
* Ntfs2btrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public Licence
* along with Ntfs2btrfs. If not, see . */
#pragma once
#include "btrfs.h"
#include "config.h"
#include
#include