diff --git a/external/unarr/.editorconfig b/external/unarr/.editorconfig new file mode 100644 index 00000000..da88865e --- /dev/null +++ b/external/unarr/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +trim_trailing_whitespace = true +end_of_line = lf +insert_final_newline = true +indent_style = space +indent_size = 4 + +[{*.yml,CMakeLists.txt,*.md,*.in}] +indent_size = 2 + +[{*.md}] +trim_trailing_whitespace = false diff --git a/external/unarr/.gitattributes b/external/unarr/.gitattributes new file mode 100644 index 00000000..3d2afe78 --- /dev/null +++ b/external/unarr/.gitattributes @@ -0,0 +1,79 @@ +# Auto detect text files and perform LF normalization +# http://davidlaing.com/2012/09/19/customise-your-gitattributes-to-become-a-git-ninja/ +* text=auto + +# +# The above will handle all files NOT found below +# + +# Documents +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain +*.md text +*.adoc text +*.textile text +*.mustache text +*.csv text +*.tab text +*.tsv text +*.sql text + +# Graphics +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.tif binary +*.tiff binary +*.ico binary +# SVG treated as an asset (binary) by default. If you want to treat it as text, +# comment-out the following line and uncomment the line after. +*.svg binary +#*.svg text +*.eps binary + + +# Sources +*.c text +*.cc text +*.cxx text +*.cpp text +*.c++ text +*.hpp text +*.h text +*.h++ text +*.hh text + +# Compiled Object files +*.slo binary +*.lo binary +*.o binary +*.obj binary + +# Precompiled Headers +*.gch binary +*.pch binary + +# Compiled Dynamic libraries +*.so binary +*.dylib binary +*.dll binary + +# Compiled Static libraries +*.lai binary +*.la binary +*.a binary +*.lib binary + +# Executables +*.exe binary +*.out binary +*.app binary diff --git a/external/unarr/.gitignore b/external/unarr/.gitignore new file mode 100644 index 00000000..1851feb3 --- /dev/null +++ b/external/unarr/.gitignore @@ -0,0 +1,54 @@ +build* +test/corpus/fuzzed +!test/corpus/integration/*.7z +!test/corpus/integration/*.rar +!test/corpus/integration/*.tar +!test/corpus/integration/*.zip +*.orig + +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# VS Code +.vscode diff --git a/external/unarr/AUTHORS b/external/unarr/AUTHORS new file mode 100644 index 00000000..081d5a01 --- /dev/null +++ b/external/unarr/AUTHORS @@ -0,0 +1,17 @@ +unarr contains code by: + +* The Unarchiver project (https://bitbucket.org/kosovan/theunarchiver/) +* Simon Bünzli (zeniko at gmail.com, http://www.zeniko.ch/#SumatraPDF) +* Felix Kauselmann (licorn at gmail.com) +* Bastien Nocera (hadess at hadess.net, http://www.hadess.net/) +* Wang Xin-yu (王昕宇) (comicfans44 at gmail.com) +* Liu Xiang (liuxiang at loongson.cn, https://www.loongson.cn/) +* Mastercoms (mastercoms at tuta.io) + +Most code is licensed under LGPLv3 (see COPYING). Exceptions are in code +included from other projects: + +Files License URL +---------------------------------------------------------------------------------- +common/crc32.c Public Domain https://gnunet.org/svn/gnunet/src/util/crypto_crc.c +lzmasdk/*.* Public Domain http://www.7-zip.org/sdk.html diff --git a/external/unarr/CHANGELOG.md b/external/unarr/CHANGELOG.md new file mode 100644 index 00000000..6d09c3e8 --- /dev/null +++ b/external/unarr/CHANGELOG.md @@ -0,0 +1,73 @@ +# (lib)unarr changelog + +## 1.1.1 (2023-10-23) + +### Fixed +* Fix heap corruption in rar filters (reported by Radosław Madej from Check Point Research) + +### Other +* Update Readme + +## 1.1.0 (2023-09-03) + +### Added +* libFuzzer target for coverage-guided fuzz testing (Wang Xin-yu (王昕宇)) +* Unit testing using CMocka +* Integration tests +* Update lzma SDK to v23.01 + +### Changed +* Build 7z support by default + +### Fixed +* Fix pkg-config when using absolute paths +* Fix bzip2 integration + +## 1.1.0.beta1 - 2022-05-01 + +### Added +* Support building unarr-test using CMake +* Build options for disabling system library usage/detection (bzip2, liblzma, zlib) +* Create CMake config-files for downstream integration +* New ar_entry_get_raw_name function for getting raw zip filenames (usefull for faulty zip archives with non-spec filename encodings) +* Support for Loongson CPUs (Liu Xiang) +* Faster crc32 based on Intel slice-by-8 algorithm + +### Changed +* Update LZMA SDK code to version 21.07 +* Restore limited support for 7z archive extraction (using an embedded subset of LZMA SDK) +* Convert source to LF line endings +* Increase UNARR_API_VERSION to 110 +* Use internal crc32 implementation by default + +### Fixed +* Fixed a possible memleak in rar filter code found by clang static analyzer +* Fixed some edge cases that could lead to nullpointer dereferences and/or undefined behavior in rar extraction code +* Fixed out of bonds memmove in zip code +* Fixed memleak when trying to open an invalid 7z file +* Fix some minor problems with BZip2 and liblzma not added correctly to pkg-config +* Fix MinGW build + +## 1.0.1 - 2017-11-04 +This is a bugfix release. + +### Fixed +* Fixed typo in pkg-config.pc.cmake template + +## 1.0.0 - 2017-09-22 + +### Added +* Cmake based build system for library builds +* Support for pkg-config (libunarr.pc) +* Windows compatible export header for DLL builds +* xz utils / libLZMA can be used as decoder for LZMA1 and XZ (LZMA2) compressed +ZIP archives. +* The internal LZMA1 decoder can be replaced with xz utils / libLZMA if present + +### Changed +* LZMA SDK code was updated to version 17.01 beta +* 7z extraction support is currently broken due to LZMA SDK api changes. +* Unarr sample application (unarr-test) and its makefile (legacy unarr build system) have been moved to the [test](test) folder + +### Fixed +* Various small bugfixes related to compiler warnings diff --git a/external/unarr/CMakeLists.txt b/external/unarr/CMakeLists.txt new file mode 100644 index 00000000..1be57b6f --- /dev/null +++ b/external/unarr/CMakeLists.txt @@ -0,0 +1,287 @@ +cmake_minimum_required(VERSION 3.3 FATAL_ERROR) + +project( + unarr + VERSION 1.1.0 + LANGUAGES C) +set(PROJECT_DESCRIPTION "A decompression library for rar, tar and zip files.") + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +# Set build type to default if unset. +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE + "Release" + CACHE STRING "Choose the type of build." FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" + "MinSizeRel" "RelWithDebInfo") +endif() + +# Build options +option(ENABLE_7Z "Build with 7z support" ON) +option(USE_SYSTEM_BZ2 "Build with system bzip2 if possible" ON) +option(USE_SYSTEM_LZMA "Build with system lzma/xz if possible" ON) +option(USE_SYSTEM_ZLIB "Build with system zlib if possible" ON) +option(USE_ZLIB_CRC "Use zlib crc32" OFF) + +option(BUILD_SHARED_LIBS "Build ${PROJECT_NAME} as a shared library" ON) + +option(BUILD_INTEGRATION_TESTS + "Build unarr-test executable and integration tests" OFF) +option(BUILD_FUZZER "Build libFuzzer coverage-guided fuzzer test" OFF) +option(BUILD_UNIT_TESTS "Build unit tests (requires CMocka)" OFF) + +if(BUILD_FUZZER) + if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + set(sanitize_opts "-fsanitize=fuzzer,address,undefined") + else() + message(FATAL_ERROR "Fuzzer build requires a Clang compiler") + endif() +endif() + +# Build target +add_library( + unarr + _7z/_7z.h + _7z/_7z.c + common/allocator.h + common/unarr-imp.h + common/conv.c + common/crc32.c + # common/custalloc.c + common/stream.c + common/unarr.c + lzmasdk/7zTypes.h + lzmasdk/Compiler.h + lzmasdk/CpuArch.h + lzmasdk/Ppmd.h + lzmasdk/Ppmd7.h + lzmasdk/Ppmd8.h + lzmasdk/Precomp.h + lzmasdk/CpuArch.c + lzmasdk/Ppmd7.c + lzmasdk/Ppmd8.c + lzmasdk/Ppmd7Dec.c + lzmasdk/Ppmd7aDec.c + lzmasdk/Ppmd8Dec.c + rar/lzss.h + rar/rar.h + rar/rarvm.h + rar/filter-rar.c + rar/uncompress-rar.c + rar/huffman-rar.c + rar/rar.c + rar/rarvm.c + rar/parse-rar.c + tar/tar.h + tar/parse-tar.c + tar/tar.c + zip/inflate.h + zip/zip.h + zip/inflate.c + zip/parse-zip.c + zip/uncompress-zip.c + zip/zip.c) + +set_target_properties( + unarr + PROPERTIES PUBLIC_HEADER unarr.h + C_VISIBILITY_PRESET hidden + C_STANDARD 99 + C_STANDARD_REQUIRED ON + DEFINE_SYMBOL UNARR_EXPORT_SYMBOLS + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR}) + +# Add include directories for build and install +target_include_directories( + unarr PUBLIC $ + $) + +if(BUILD_SHARED_LIBS) + target_compile_definitions(unarr PUBLIC UNARR_IS_SHARED_LIBRARY) +endif() + +if(USE_SYSTEM_BZ2) + find_package(BZip2) +endif() +if(BZIP2_FOUND) + if(TARGET BZip2::BZip2) + target_link_libraries(unarr PRIVATE BZip2::BZip2) + else() + target_include_directories(unarr PRIVATE ${BZIP2_INCLUDE_DIRS}) + target_link_libraries(unarr PRIVATE ${BZIP2_LIBRARIES}) + endif() + target_compile_definitions(unarr PRIVATE -DHAVE_BZIP2) + # Bzip2 upstream does not supply a .pc file. Add it to Libs.private. + set(PROJECT_LIBS_PRIVATE "-I${BZIP2_INCLUDE_DIRS} -l${BZIP2_LIBRARIES}") + set(UNARR_DEPENDS_BZip2 "find_dependency(BZip2)") +endif() + +if(USE_SYSTEM_LZMA) + find_package(LibLZMA) +endif() +if(LIBLZMA_FOUND) + if(TARGET LibLZMA::LibLZMA) + target_link_libraries(unarr PRIVATE LibLZMA::LibLZMA) + else() + target_include_directories(unarr PRIVATE ${LIBLZMA_INCLUDE_DIRS}) + target_link_libraries(unarr PRIVATE ${LIBLZMA_LIBRARIES}) + endif() + target_compile_definitions(unarr PRIVATE -DHAVE_LIBLZMA) + set(PROJECT_REQUIRES_PRIVATE "${PROJECT_REQUIRES_PRIVATE} liblzma") + set(UNARR_DEPENDS_LibLZMA "find_dependency(LibLZMA)") +else() + target_sources(unarr PRIVATE lzmasdk/LzmaDec.h lzmasdk/LzmaDec.c) +endif() + +if(USE_SYSTEM_ZLIB) + find_package(ZLIB) +endif() +if(ZLIB_FOUND) + if(TARGET ZLIB::ZLIB) + target_link_libraries(unarr PRIVATE ZLIB::ZLIB) + else() + target_include_directories(unarr PRIVATE ${ZLIB_INCLUDE_DIRS}) + target_link_libraries(unarr PRIVATE ${ZLIB_LIBRARIES}) + endif() + target_compile_definitions( + unarr PRIVATE HAVE_ZLIB $<$:USE_ZLIB_CRC>) + # Add zlib to libunarr.pc Requires.private + set(PROJECT_REQUIRES_PRIVATE "${PROJECT_REQUIRES_PRIVATE} zlib") + set(UNARR_DEPENDS_ZLIB "find_dependency(ZLIB)") +endif() + +if(ENABLE_7Z) + target_sources( + unarr + PRIVATE lzmasdk/7z.h + lzmasdk/7zArcIn.c + lzmasdk/7zBuf.h + lzmasdk/7zBuf.c + lzmasdk/7zDec.c + lzmasdk/7zStream.c + lzmasdk/7zWindows.h + lzmasdk/Bcj2.h + lzmasdk/Bcj2.c + lzmasdk/Bra.c + lzmasdk/Bra.h + lzmasdk/Bra86.c + lzmasdk/7zCrc.h + lzmasdk/Delta.h + lzmasdk/Delta.c + lzmasdk/Lzma2Dec.h + lzmasdk/Lzma2Dec.c) + if(LIBLZMA_FOUND) # TODO: Replace 7z lzma with system lzma + target_sources(unarr PRIVATE lzmasdk/LzmaDec.h lzmasdk/LzmaDec.c) + endif() + target_compile_definitions(unarr PRIVATE -DHAVE_7Z -DZ7_PPMD_SUPPORT) +endif() + +# Compiler specific settings + +if(UNIX + OR MINGW + OR MSYS) + target_compile_options( + unarr + PRIVATE -Wall + -Wextra + -pedantic + -Wstrict-prototypes + -Wmissing-prototypes + -Werror-implicit-function-declaration + $<$:-fomit-frame-pointer> + $<$: + -Wno-missing-field-initializers>) + if(BUILD_FUZZER) + target_compile_options(unarr PUBLIC "${sanitize_opts}") + target_compile_definitions( + unarr PRIVATE -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) + endif() + + target_compile_definitions(unarr PRIVATE -D_FILE_OFFSET_BITS=64) + + # Linker flags + + if(BUILD_FUZZER) + set(linker_opts "${sanitize_opts}") + else() + if("${CMAKE_C_COMPILER_ID}" STREQUAL "AppleClang") + set(linker_opts "-Wl,-undefined,error") + else() + set(linker_opts "-Wl,--as-needed -Wl,--no-undefined") + endif() + endif() + + set_target_properties(unarr PROPERTIES LINK_FLAGS "${linker_opts}") +endif() + +if(MSVC) + target_compile_options(unarr PRIVATE /W3 $<$:/Ox>) + target_compile_definitions(unarr PRIVATE _CRT_SECURE_NO_WARNINGS) +endif() + +# Include tests + +if(BUILD_UNIT_TESTS + OR BUILD_INTEGRATION_TESTS + OR BUILD_FUZZER) + enable_testing() + add_subdirectory(test) +endif() + +# Generate paths for pkg-config file +if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}") + set(PROJECT_INSTALL_LIBDIR ${CMAKE_INSTALL_LIBDIR}) +else() + set(PROJECT_INSTALL_LIBDIR "\${prefix}/${CMAKE_INSTALL_LIBDIR}") +endif() +if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") + set(PROJECT_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}) +else() + set(PROJECT_INSTALL_INCLUDEDIR "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +endif() + +# Write pkg-config file +configure_file("pkg-config.pc.cmake" "lib${PROJECT_NAME}.pc" @ONLY) + +# Install library and header +install( + TARGETS unarr + EXPORT unarr-targets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +# Install pkg-config file +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${PROJECT_NAME}.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + +# Export and install targets +install( + EXPORT unarr-targets + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/unarr + NAMESPACE unarr::) + +# Make project importable from build dir +export( + TARGETS unarr + FILE unarr-targets.cmake + NAMESPACE unarr::) + +# Write a config file for installation +configure_package_config_file( + unarr-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/unarr-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/unarr) + +# Write version file +write_basic_package_version_file(unarr-version.cmake + COMPATIBILITY AnyNewerVersion) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/unarr-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/unarr-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/unarr) diff --git a/external/unarr/COPYING b/external/unarr/COPYING new file mode 100644 index 00000000..b14ca0a5 --- /dev/null +++ b/external/unarr/COPYING @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/external/unarr/README.md b/external/unarr/README.md new file mode 100644 index 00000000..66cc9878 --- /dev/null +++ b/external/unarr/README.md @@ -0,0 +1,128 @@ +|Build|Status| +|---|------| +|Linux|[![Build Status](https://dev.azure.com/licorn0647/licorn/_apis/build/status/selmf.unarr?branchName=master&jobName=Linux)](https://dev.azure.com/licorn0647/licorn/_build/latest?definitionId=2&branchName=master)| +|MacOS|[![Build Status](https://dev.azure.com/licorn0647/licorn/_apis/build/status/selmf.unarr?branchName=master&jobName=MacOS)](https://dev.azure.com/licorn0647/licorn/_build/latest?definitionId=2&branchName=master)| +|Windows|[![Build Status](https://dev.azure.com/licorn0647/licorn/_apis/build/status/selmf.unarr?branchName=master&jobName=Windows)](https://dev.azure.com/licorn0647/licorn/_build/latest?definitionId=2&branchName=master)| + +# (lib)unarr + +**(lib)unarr** is a decompression library for RAR, TAR, ZIP and 7z* archives. + +It was forked from **unarr**, which originated as a port of the RAR extraction +features from The Unarchiver project required for extracting images from comic +book archives. [Zeniko](https://github.com/zeniko/) wrote unarr as an +alternative to libarchive which didn't have support for parsing filters or +solid compression at the time. + +While (lib)unarr was started with the intent of providing unarr with a proper +cmake based build system suitable for packaging and cross-platform development, +it's focus has now been extended to provide code maintenance and to continue the +development of unarr, which no longer is maintained. + +## Getting started + +### Prebuilt packages +[![Packaging status](https://repology.org/badge/vertical-allrepos/unarr.svg)](https://repology.org/metapackage/unarr) + +#### From OBS +[.deb package](https://software.opensuse.org//download.html?project=home%3Aselmf&package=libunarr) +[.rpm package](https://software.opensuse.org//download.html?project=home%3Aselmf%3Ayacreader-rpm&package=libunarr) + +### Building from source + +#### Dependencies + +(lib)unarr can take advantage of the following libraries if they are present: + +* bzip2 +* xz / libLZMA +* zlib + +#### CMake + +```bash +mkdir build +cd build +cmake .. +make +``` + +... as a static library + +```bash +cmake .. -DBUILD_SHARED_LIBS=OFF +``` + +By default, (lib)unarr will try to detect and use system libraries like bzip2, +xz/LibLZMA and zlib. If this is undesirable, you can override this behavior by +specifying: + +```bash +cmake .. -DUSE_SYSTEM_BZ2=OFF -DUSE_SYSTEM_LZMA=OFF -DUSE_SYSTEM_ZLIB=OFF +``` + +Install + +```bash +make install +``` + +#### Testing + +Unarr supports unit tests, integration tests and fuzzing. + + +```bash +cmake .. -DBUILD_UNIT_TESTS=ON -DBUILD_INTEGRATION_TESTS=ON +``` + +To build the unit tests, the *cmocka* unit testing framework is required. + +Building the integration tests also enables the *unarr-test* executable +which can be used to run additional tests on user-provided archive files. + +Building the fuzzer target will provide a coverage-guided fuzzer based +on llvm libfuzzer. It should be treated as a stand-alone target. + +```bash +cmake .. -DBUILD_FUZZER=ON +``` + +All tests can be run using ctest or their respective executables. + +## Usage + +### Examples + +Check [unarr.h](unarr.h) and [unarr-test](test/main.c) to get a general feel +for the api and usage. + +The unarr-test sample application can be used to test archives. + +To build it, use: + +```bash +cmake .. -DBUILD_INTEGRATION_TESTS=ON +``` + +## Limitations + +Unarr was written for comic book archives, so it currently doesn't support: + +* password protected archives +* self extracting archives +* split archives + +### 7z support + +7z support for large files with solid compression is currently limited by a +known performance problem in the ANSI-C based LZMA SDK +(see https://github.com/zeniko/unarr/issues/4). + +Fixing this problem will require modification or replacement of the LZMA SDK +code used. + +### Rar support + +RAR5 is currently not supported. There are plans to add this in a future version, +but as of now this is still work in progress. diff --git a/external/unarr/_7z/_7z.c b/external/unarr/_7z/_7z.c new file mode 100644 index 00000000..797ad6e5 --- /dev/null +++ b/external/unarr/_7z/_7z.c @@ -0,0 +1,205 @@ +/* Copyright 2018 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "_7z.h" + +#ifdef HAVE_7Z + +static void *gSzAlloc_Alloc(ISzAllocPtr self, size_t size) { (void)self; return malloc(size); } +static void gSzAlloc_Free(ISzAllocPtr self, void *ptr) { (void)self; free(ptr); } +static ISzAlloc gSzAlloc = { gSzAlloc_Alloc, gSzAlloc_Free }; + +static SRes CSeekStream_Read(const ISeekInStream *p, void *data, size_t *size) +{ + struct CSeekStream *stm = (struct CSeekStream *) p; + *size = ar_read(stm->stream, data, *size); + return SZ_OK; +} + +static SRes CSeekStream_Seek(const ISeekInStream *p, Int64 *pos, ESzSeek origin) +{ + struct CSeekStream *stm = (struct CSeekStream *) p; + if (!ar_seek(stm->stream, *pos, (int)origin)) + return SZ_ERROR_FAIL; + *pos = ar_tell(stm->stream); + return SZ_OK; +} + +static void CSeekStream_CreateVTable(struct CSeekStream *in_stream, ar_stream *stream) +{ + in_stream->super.Read = CSeekStream_Read; + in_stream->super.Seek = CSeekStream_Seek; + in_stream->stream = stream; +} + +#ifndef USE_7Z_CRC32 +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size) +{ + return ar_crc32(0, data, size); +} +#endif + +static void _7z_close(ar_archive *ar) +{ + ar_archive_7z *_7z = (ar_archive_7z *)ar; + free(_7z->entry_name); + SzArEx_Free(&_7z->data, &gSzAlloc); + IAlloc_Free(&gSzAlloc, _7z->uncomp.buffer); + IAlloc_Free(&gSzAlloc, _7z->look_stream.buf); +} + +static const char *_7z_get_name(ar_archive *ar, bool raw); + +static bool _7z_parse_entry(ar_archive *ar, off64_t offset) +{ + ar_archive_7z *_7z = (ar_archive_7z *)ar; + //const CSzFileItem *item = _7z->data.db.PackPositions + offset; + + if (offset < 0 || offset > _7z->data.NumFiles) { + warn("Offsets must be between 0 and %u", _7z->data.NumFiles); + return false; + } + if (offset == _7z->data.NumFiles) { + ar->at_eof = true; + return false; + } + + ar->entry_offset = offset; + ar->entry_offset_next = offset + 1; + ar->entry_size_uncompressed = (size_t)SzArEx_GetFileSize(&_7z->data, offset); + ar->entry_filetime = SzBitWithVals_Check(&_7z->data.MTime, offset) ? + (time64_t)(_7z->data.MTime.Vals[offset].Low | + ((time64_t)_7z->data.MTime.Vals[offset].High << 32)) + : 0; + free(_7z->entry_name); + _7z->entry_name = NULL; + _7z->uncomp.initialized = false; + + if (SzArEx_IsDir(&_7z->data, offset)) { + log("Skipping directory entry \"%s\"", _7z_get_name(ar, false)); + return _7z_parse_entry(ar, offset + 1); + } + + return true; +} + +static char *SzArEx_GetFileNameUtf8(const CSzArEx *p, UInt32 fileIndex) +{ + size_t len = p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; + const Byte *src = p->FileNames + p->FileNameOffsets[fileIndex] * 2; + const Byte *srcEnd = src + len * 2; + size_t size = len * 3; + char *str, *out; + + if (size == (size_t)-1) + return NULL; + str = malloc(size + 1); + if (!str) + return NULL; + + for (out = str; src < srcEnd - 1; src += 2) { + out += ar_conv_rune_to_utf8(src[0] | src[1] << 8, out, str + size - out); + } + *out = '\0'; + + return str; +} + +static const char *_7z_get_name(ar_archive *ar, bool raw) +{ + if (raw) + return NULL; + + ar_archive_7z *_7z = (ar_archive_7z *)ar; + if (!_7z->entry_name && ar->entry_offset_next && !ar->at_eof) { + _7z->entry_name = SzArEx_GetFileNameUtf8(&_7z->data, (UInt32)ar->entry_offset); + /* normalize path separators */ + if (_7z->entry_name) { + char *p = _7z->entry_name; + while ((p = strchr(p, '\\')) != NULL) { + *p = '/'; + } + } + } + return _7z->entry_name; +} + +static bool _7z_uncompress(ar_archive *ar, void *buffer, size_t buffer_size) +{ + ar_archive_7z *_7z = (ar_archive_7z *)ar; + struct ar_archive_7z_uncomp *uncomp = &_7z->uncomp; + + if (!uncomp->initialized) { + /* TODO: this uncompresses all data for solid compressions */ + SRes res = SzArEx_Extract(&_7z->data, &_7z->look_stream.vt, (UInt32)ar->entry_offset, &uncomp->folder_index, &uncomp->buffer, &uncomp->buffer_size, &uncomp->offset, &uncomp->bytes_left, &gSzAlloc, &gSzAlloc); + if (res != SZ_OK) { + warn("Failed to extract file at index %" PRIi64 " (failed with error %d)", ar->entry_offset, res); + return false; + } + if (uncomp->bytes_left != ar->entry_size_uncompressed) { + warn("Uncompressed sizes don't match (%" PRIuPTR " != %" PRIuPTR ")", uncomp->bytes_left, ar->entry_size_uncompressed); + return false; + } + uncomp->initialized = true; + } + + if (buffer_size > uncomp->bytes_left) { + warn("Requesting too much data (%" PRIuPTR " < %" PRIuPTR ")", uncomp->bytes_left, buffer_size); + return false; + } + + memcpy(buffer, uncomp->buffer + uncomp->offset + ar->entry_size_uncompressed - uncomp->bytes_left, buffer_size); + uncomp->bytes_left -= buffer_size; + + return true; +} + +ar_archive *ar_open_7z_archive(ar_stream *stream) +{ + ar_archive *ar; + ar_archive_7z *_7z; + SRes res; + + if (!ar_seek(stream, 0, SEEK_SET)) + return NULL; + + ar = ar_open_archive(stream, sizeof(ar_archive_7z), _7z_close, _7z_parse_entry, _7z_get_name, _7z_uncompress, NULL, 0); + if (!ar) + return NULL; + + _7z = (ar_archive_7z *)ar; + CSeekStream_CreateVTable(&_7z->in_stream, stream); + LookToRead2_CreateVTable(&_7z->look_stream, False); + _7z->look_stream.realStream = &_7z->in_stream.super; + _7z->look_stream.buf = ISzAlloc_Alloc(&gSzAlloc, 1 << 18); + _7z->look_stream.bufSize = 1 << 18; + LookToRead2_INIT(&_7z->look_stream); + + +#ifdef USE_7Z_CRC32 + CrcGenerateTable(); +#endif + + SzArEx_Init(&_7z->data); + res = SzArEx_Open(&_7z->data, &_7z->look_stream.vt, &gSzAlloc, &gSzAlloc); + if (res != SZ_OK) { + if (res != SZ_ERROR_NO_ARCHIVE) + warn("Invalid 7z archive (failed with error %d)", res); + ISzAlloc_Free(&gSzAlloc, _7z->look_stream.buf); + free(ar); + return NULL; + } + + return ar; +} + +#else + +ar_archive *ar_open_7z_archive(ar_stream *stream) +{ + (void)stream; + warn("7z support requires 7z SDK (define HAVE_7Z)"); + return NULL; +} + +#endif diff --git a/external/unarr/_7z/_7z.h b/external/unarr/_7z/_7z.h new file mode 100644 index 00000000..7f207e17 --- /dev/null +++ b/external/unarr/_7z/_7z.h @@ -0,0 +1,47 @@ +/* Copyright 2018 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef _7z_7z_h +#define _7z_7z_h + +#include "../common/unarr-imp.h" + +#include "../lzmasdk/7zTypes.h" +#ifdef HAVE_7Z +#include "../lzmasdk/7z.h" +#endif + +typedef struct ar_archive_7z_s ar_archive_7z; + +struct CSeekStream { + ISeekInStream super; + ar_stream *stream; +}; + +struct ar_archive_7z_uncomp { + bool initialized; + + UInt32 folder_index; + Byte *buffer; + size_t buffer_size; + + size_t offset; + size_t bytes_left; +}; + +struct ar_archive_7z_s { + ar_archive super; + struct CSeekStream in_stream; +#ifdef HAVE_7Z + CLookToRead2 look_stream; + CSzArEx data; +#endif + char *entry_name; + struct ar_archive_7z_uncomp uncomp; +}; + +#ifndef USE_7Z_CRC32 +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size); +#endif + +#endif diff --git a/external/unarr/azure-pipelines.yml b/external/unarr/azure-pipelines.yml new file mode 100644 index 00000000..6c8c5bd1 --- /dev/null +++ b/external/unarr/azure-pipelines.yml @@ -0,0 +1,75 @@ +# https://aka.ms/yaml + +trigger: +- master +- develop + +jobs: + +- job: Linux + pool: + vmImage: 'ubuntu-latest' + steps: + - script: | + sudo apt-get install -y zlib1g-dev libbz2-dev liblzma-dev libcmocka-dev + displayName: 'Install dependencies' + - task: CMake@1 + inputs: + cmakeArgs: '-DBUILD_INTEGRATION_TESTS=ON -DBUILD_UNIT_TESTS=ON ..' + displayName: 'Generate CMake cache' + - task: CMake@1 + inputs: + cmakeArgs: '--build .' + displayName: 'Build' + - script: ctest --output-on-failure --no-compress-output -T test + workingDirectory: build + displayName: Run tests + - task: PublishTestResults@2 + inputs: + testResultsFormat: 'cTest' + testResultsFiles: build/Testing/*/Test.xml + +- job: Windows + pool: + vmImage: 'windows-latest' + steps: + - script: vcpkg install cmocka --triplet x64-windows + displayName: Install dependencies + - task: CMake@1 + inputs: + cmakeArgs: '-DBUILD_INTEGRATION_TESTS=ON -DBUILD_UNIT_TESTS=ON -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake ..' + displayName: 'Generate CMake cache' + - task: CMake@1 + inputs: + cmakeArgs: '--build . --config Release' + displayName: 'Build' + - script: ctest --output-on-failure --no-compress-output --extra-verbose -C Release -T test + workingDirectory: build + displayName: Run tests + - task: PublishTestResults@2 + inputs: + testResultsFormat: 'cTest' + testResultsFiles: build/Testing/*/Test.xml + +- job: MacOS + pool: + vmImage: 'macOS-latest' + steps: + - script: | + brew install cmocka + displayName: 'Install dependencies' + - task: CMake@1 + inputs: + cmakeArgs: '-DBUILD_INTEGRATION_TESTS=ON -DBUILD_UNIT_TESTS=ON ..' + displayName: 'Generate CMake cache' + - task: CMake@1 + inputs: + cmakeArgs: '--build .' + displayName: 'Build' + - script: ctest --output-on-failure --no-compress-output -T test + workingDirectory: build + displayName: Run tests + - task: PublishTestResults@2 + inputs: + testResultsFormat: 'cTest' + testResultsFiles: build/Testing/*/Test.xml diff --git a/external/unarr/common/allocator.h b/external/unarr/common/allocator.h new file mode 100644 index 00000000..41199c80 --- /dev/null +++ b/external/unarr/common/allocator.h @@ -0,0 +1,29 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef common_allocator_h +#define common_allocator_h + +#ifdef USE_CUSTOM_ALLOCATOR + +#include + +typedef void *(* custom_malloc_fn)(void *opaque, size_t size); +typedef void (* custom_free_fn)(void *opaque, void *ptr); + +void ar_set_custom_allocator(custom_malloc_fn custom_malloc, custom_free_fn custom_free, void *opaque); + +#define malloc(size) ar_malloc(size) +#define calloc(count, size) ar_calloc(count, size) +#define free(ptr) ar_free(ptr) + +#define realloc(ptr, size) _use_malloc_memcpy_free_instead(ptr, size) +#define strdup(str) _use_malloc_memcpy_instead(str) + +#elif !defined(NDEBUG) && defined(_MSC_VER) + +#include + +#endif + +#endif diff --git a/external/unarr/common/conv.c b/external/unarr/common/conv.c new file mode 100644 index 00000000..4398539b --- /dev/null +++ b/external/unarr/common/conv.c @@ -0,0 +1,96 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "unarr-imp.h" + +#include + +/* data from http://en.wikipedia.org/wiki/Cp437 */ +static const wchar_t gCp437[256] = { + 0, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266C, 0x263C, + 0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8, 0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC, + ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 0x2302, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229, + 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0, +}; + +size_t ar_conv_rune_to_utf8(wchar_t rune, char *out, size_t size) +{ + if (size < 1) + return 0; + if (rune < 0x0080) { + *out++ = rune & 0x7F; + return 1; + } + if (rune < 0x0800 && size >= 2) { + *out++ = 0xC0 | ((rune >> 6) & 0x1F); + *out++ = 0x80 | (rune & 0x3F); + return 2; + } + if (size >= 3) { + if ((0xD800 <= rune && rune <= 0xDFFF) || rune >= 0x10000) + rune = 0xFFFD; + *out++ = 0xE0 | ((rune >> 12) & 0x0F); + *out++ = 0x80 | ((rune >> 6) & 0x3F); + *out++ = 0x80 | (rune & 0x3F); + return 3; + } + *out++ = '?'; + return 1; +} + +char *ar_conv_dos_to_utf8(const char *astr) +{ + char *str, *out; + const char *in; + size_t size; + + size = 0; + for (in = astr; *in; in++) { + char buf[4]; + size += ar_conv_rune_to_utf8(gCp437[(uint8_t)*in], buf, sizeof(buf)); + } + + if (size == (size_t)-1) + return NULL; + str = malloc(size + 1); + if (!str) + return NULL; + + for (in = astr, out = str; *in; in++) { + out += ar_conv_rune_to_utf8(gCp437[(uint8_t)*in], out, str + size - out); + } + *out = '\0'; + + return str; +} + +time64_t ar_conv_dosdate_to_filetime(uint32_t dosdate) +{ + struct tm tm; + time_t t1, t2; + + tm.tm_sec = (dosdate & 0x1F) * 2; + tm.tm_min = (dosdate >> 5) & 0x3F; + tm.tm_hour = (dosdate >> 11) & 0x1F; + tm.tm_mday = (dosdate >> 16) & 0x1F; + tm.tm_mon = ((dosdate >> 21) & 0x0F) - 1; + tm.tm_year = ((dosdate >> 25) & 0x7F) + 80; + tm.tm_isdst = -1; + + t1 = mktime(&tm); + t2 = mktime(gmtime(&t1)); + + return (time64_t)(2 * t1 - t2 + 11644473600) * 10000000; +} diff --git a/external/unarr/common/crc32.c b/external/unarr/common/crc32.c new file mode 100644 index 00000000..1eba8d0b --- /dev/null +++ b/external/unarr/common/crc32.c @@ -0,0 +1,86 @@ +/* Copyright 2022 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ +#include "unarr-imp.h" + +#if !defined HAVE_ZLIB || !defined USE_ZLIB_CRC + +/* + crc32 calculation based on Intel slice-by-8 algorithm with lookup-table generation code + adapted from https://gnunet.org/svn/gnunet/src/util/crypto_crc.c (public domain) */ + +static inline uint32_t uint32le(const uint8_t *data) { return data[0] | data[1] << 8 | data[2] << 16 | (uint32_t)data[3] << 24; } + +static bool crc_table_ready = false; +static uint32_t crc_table[8][256]; + +uint32_t ar_crc32(uint32_t crc32, const uint8_t * data, size_t data_len) +{ + if (!crc_table_ready) { + + static const uint32_t crc_poly = 0xEDB88320; + + uint32_t h = 1; + crc_table[0][0] = 0; + + for (unsigned int i = 128; i; i >>= 1) { + h = (h >> 1) ^ ((h & 1) ? crc_poly : 0); + for (unsigned int j = 0; j < 256; j += 2 * i) { + crc_table[0][i+j] = crc_table[0][j] ^ h; + } + } + + for (unsigned int i = 0; i < 256; i++) { + for (unsigned int j = 1; j < 8; j++) { + crc_table[j][i] = (crc_table[j-1][i] >> 8) ^ crc_table[0][crc_table[j-1][i] & 0xFF]; + } + } + + crc_table_ready = true; + } + + crc32 ^= 0xFFFFFFFF; + + while (data_len >= 8) { + + uint32_t tmp = crc32 ^ uint32le(data); + + crc32 = crc_table[7][ tmp & 0xFF ] ^ + crc_table[6][(tmp >> 8) & 0xFF ] ^ + crc_table[5][(tmp >> 16) & 0xFF ] ^ + crc_table[4][ tmp >> 24 ]; + + tmp = uint32le(data + 4); + + crc32 ^= crc_table[3][tmp & 0xFF] ^ + crc_table[2][(tmp >> 8) & 0xFF] ^ + crc_table[1][(tmp >> 16) & 0xFF] ^ + crc_table[0][ tmp >> 24 ]; + + data += 8; + data_len -= 8; + } + + while (data_len-- > 0) { + crc32 = (crc32 >> 8) ^ crc_table[0][(crc32 ^ *data++) & 0xFF]; + } + + return crc32 ^ 0xFFFFFFFF; +} + +#else + +#include + +uint32_t ar_crc32(uint32_t crc, const unsigned char *data, size_t data_len) +{ +#if SIZE_MAX > UINT32_MAX + while (data_len > UINT32_MAX) { + crc = crc32(crc, data, UINT32_MAX); + data += UINT32_MAX; + data_len -= UINT32_MAX; + } +#endif + return crc32(crc, data, (uint32_t)data_len); +} + +#endif diff --git a/external/unarr/common/custalloc.c b/external/unarr/common/custalloc.c new file mode 100644 index 00000000..9bd63b8a --- /dev/null +++ b/external/unarr/common/custalloc.c @@ -0,0 +1,49 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include +#include +#include + +typedef void *(* custom_malloc_fn)(void *opaque, size_t size); +typedef void (* custom_free_fn)(void *opaque, void *ptr); + +static void *default_malloc(void *opaque, size_t size) { (void)opaque; return malloc(size); } +static void default_free(void *opaque, void *ptr) { (void)opaque; free(ptr); } + +static struct { + custom_malloc_fn malloc; + custom_free_fn free; + void *opaque; +} gAllocator = { + default_malloc, + default_free, + NULL, +}; + +void *ar_malloc(size_t size) +{ + return gAllocator.malloc(gAllocator.opaque, size); +} + +void *ar_calloc(size_t count, size_t size) +{ + void *ptr = NULL; + if (size <= SIZE_MAX / count) + ptr = ar_malloc(count * size); + if (ptr) + memset(ptr, 0, count * size); + return ptr; +} + +void ar_free(void *ptr) +{ + gAllocator.free(gAllocator.opaque, ptr); +} + +void ar_set_custom_allocator(custom_malloc_fn custom_malloc, custom_free_fn custom_free, void *opaque) +{ + gAllocator.malloc = custom_malloc ? custom_malloc : default_malloc; + gAllocator.free = custom_free ? custom_free : default_free; + gAllocator.opaque = opaque; +} diff --git a/external/unarr/common/stream.c b/external/unarr/common/stream.c new file mode 100644 index 00000000..6d8ddffb --- /dev/null +++ b/external/unarr/common/stream.c @@ -0,0 +1,217 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "unarr-imp.h" + +ar_stream *ar_open_stream(void *data, ar_stream_close_fn close, ar_stream_read_fn read, ar_stream_seek_fn seek, ar_stream_tell_fn tell) +{ + ar_stream *stream = malloc(sizeof(ar_stream)); + if (!stream) { + close(data); + return NULL; + } + stream->data = data; + stream->close = close; + stream->read = read; + stream->seek = seek; + stream->tell = tell; + return stream; +} + +void ar_close(ar_stream *stream) +{ + if (stream) + stream->close(stream->data); + free(stream); +} + +size_t ar_read(ar_stream *stream, void *buffer, size_t count) +{ + return stream->read(stream->data, buffer, count); +} + +bool ar_seek(ar_stream *stream, off64_t offset, int origin) +{ + return stream->seek(stream->data, offset, origin); +} + +bool ar_skip(ar_stream *stream, off64_t count) +{ + return stream->seek(stream->data, count, SEEK_CUR); +} + +off64_t ar_tell(ar_stream *stream) +{ + return stream->tell(stream->data); +} + +/***** stream based on FILE *****/ + +static void file_close(void *data) +{ + fclose(data); +} + +static size_t file_read(void *data, void *buffer, size_t count) +{ + return fread(buffer, 1, count, data); +} + +static bool file_seek(void *data, off64_t offset, int origin) +{ +#ifdef _WIN32 + return _fseeki64(data, offset, origin) == 0; +#else +#if _POSIX_C_SOURCE >= 200112L + if (sizeof(off_t) == 8) + return fseeko(data, offset, origin) == 0; +#endif + if (offset > INT32_MAX || offset < INT32_MIN) + return false; + return fseek(data, (long)offset, origin) == 0; +#endif +} + +static off64_t file_tell(void *data) +{ +#ifdef _WIN32 + return _ftelli64(data); +#elif _POSIX_C_SOURCE >= 200112L + return ftello(data); +#else + return ftell(data); +#endif +} + +ar_stream *ar_open_file(const char *path) +{ + FILE *f = path ? fopen(path, "rb") : NULL; + if (!f) + return NULL; + return ar_open_stream(f, file_close, file_read, file_seek, file_tell); +} + +#ifdef _WIN32 +ar_stream *ar_open_file_w(const wchar_t *path) +{ + FILE *f = path ? _wfopen(path, L"rb") : NULL; + if (!f) + return NULL; + return ar_open_stream(f, file_close, file_read, file_seek, file_tell); +} +#endif + +/***** stream based on preallocated memory *****/ + +struct MemoryStream { + const uint8_t *data; + size_t length; + size_t offset; +}; + +static void memory_close(void *data) +{ + struct MemoryStream *stm = data; + free(stm); +} + +static size_t memory_read(void *data, void *buffer, size_t count) +{ + struct MemoryStream *stm = data; + if (count > stm->length - stm->offset) + count = stm->length - stm->offset; + memcpy(buffer, stm->data + stm->offset, count); + stm->offset += count; + return count; +} + +static bool memory_seek(void *data, off64_t offset, int origin) +{ + struct MemoryStream *stm = data; + if (origin == SEEK_CUR) + offset += stm->offset; + else if (origin == SEEK_END) + offset += stm->length; + if (offset < 0 || offset > (off64_t)stm->length || (size_t)offset > stm->length) + return false; + stm->offset = (size_t)offset; + return true; +} + +static off64_t memory_tell(void *data) +{ + struct MemoryStream *stm = data; + return stm->offset; +} + +ar_stream *ar_open_memory(const void *data, size_t datalen) +{ + struct MemoryStream *stm = malloc(sizeof(struct MemoryStream)); + if (!stm) + return NULL; + stm->data = data; + stm->length = datalen; + stm->offset = 0; + return ar_open_stream(stm, memory_close, memory_read, memory_seek, memory_tell); +} + +#ifdef _WIN32 +/***** stream based on IStream *****/ + +#define COBJMACROS +#include + +static void stream_close(void *data) +{ + IUnknown_Release((IStream *)data); +} + +static size_t stream_read(void *data, void *buffer, size_t count) +{ + size_t read = 0; + HRESULT res; + ULONG cbRead; +#ifdef _WIN64 + while (count > ULONG_MAX) { + res = IStream_Read((IStream *)data, buffer, ULONG_MAX, &cbRead); + if (FAILED(res)) + return read; + read += cbRead; + buffer = (BYTE *)buffer + ULONG_MAX; + count -= ULONG_MAX; + } +#endif + res = IStream_Read((IStream *)data, buffer, (ULONG)count, &cbRead); + if (SUCCEEDED(res)) + read += cbRead; + return read; +} + +static bool stream_seek(void *data, off64_t offset, int origin) +{ + LARGE_INTEGER off; + ULARGE_INTEGER n; + HRESULT res; + off.QuadPart = offset; + res = IStream_Seek((IStream *)data, off, origin, &n); + return SUCCEEDED(res); +} + +static off64_t stream_tell(void *data) +{ + LARGE_INTEGER zero = { 0 }; + ULARGE_INTEGER n = { 0 }; + IStream_Seek((IStream *)data, zero, SEEK_CUR, &n); + return (off64_t)n.QuadPart; +} + +ar_stream *ar_open_istream(IStream *stream) +{ + LARGE_INTEGER zero = { 0 }; + HRESULT res = IStream_Seek(stream, zero, STREAM_SEEK_SET, NULL); + if (FAILED(res)) + return NULL; + IUnknown_AddRef(stream); + return ar_open_stream(stream, stream_close, stream_read, stream_seek, stream_tell); +} +#endif diff --git a/external/unarr/common/unarr-imp.h b/external/unarr/common/unarr-imp.h new file mode 100644 index 00000000..d45d1018 --- /dev/null +++ b/external/unarr/common/unarr-imp.h @@ -0,0 +1,84 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* this is the common private/implementation API of unarr which should only be used by unarr code */ + +#ifndef common_unarr_imp_h +#define common_unarr_imp_h + +#include "../unarr.h" +#include "allocator.h" + +#include +#include +#include +#include +#include + +/***** conv ****/ + +size_t ar_conv_rune_to_utf8(wchar_t rune, char *out, size_t size); +char *ar_conv_dos_to_utf8(const char *astr); +time64_t ar_conv_dosdate_to_filetime(uint32_t dosdate); + +/***** crc32 *****/ + +uint32_t ar_crc32(uint32_t crc32, const unsigned char *data, size_t data_len); + +/***** stream *****/ + +typedef void (* ar_stream_close_fn)(void *data); +typedef size_t (* ar_stream_read_fn)(void *data, void *buffer, size_t count); +typedef bool (* ar_stream_seek_fn)(void *data, off64_t offset, int origin); +typedef off64_t (* ar_stream_tell_fn)(void *data); + +struct ar_stream_s { + ar_stream_close_fn close; + ar_stream_read_fn read; + ar_stream_seek_fn seek; + ar_stream_tell_fn tell; + void *data; +}; + +ar_stream *ar_open_stream(void *data, ar_stream_close_fn close, ar_stream_read_fn read, ar_stream_seek_fn seek, ar_stream_tell_fn tell); + +/***** unarr *****/ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#define warn(...) ar_log("!", __FILE__, __LINE__, __VA_ARGS__) +#else +#define warn(...) ((void)0) +#endif +#ifndef NDEBUG +#define log(...) ar_log("-", __FILE__, __LINE__, __VA_ARGS__) +#else +#define log(...) ((void)0) +#endif +void ar_log(const char *prefix, const char *file, int line, const char *msg, ...); + +typedef void (* ar_archive_close_fn)(ar_archive *ar); +typedef bool (* ar_parse_entry_fn)(ar_archive *ar, off64_t offset); +typedef const char *(* ar_entry_get_name_fn)(ar_archive *ar, bool raw); +typedef bool (* ar_entry_uncompress_fn)(ar_archive *ar, void *buffer, size_t count); +typedef size_t (* ar_get_global_comment_fn)(ar_archive *ar, void *buffer, size_t count); + +struct ar_archive_s { + ar_archive_close_fn close; + ar_parse_entry_fn parse_entry; + ar_entry_get_name_fn get_name; + ar_entry_uncompress_fn uncompress; + ar_get_global_comment_fn get_comment; + + ar_stream *stream; + bool at_eof; + off64_t entry_offset; + off64_t entry_offset_first; + off64_t entry_offset_next; + size_t entry_size_uncompressed; + time64_t entry_filetime; +}; + +ar_archive *ar_open_archive(ar_stream *stream, size_t struct_size, ar_archive_close_fn close, ar_parse_entry_fn parse_entry, + ar_entry_get_name_fn get_name, ar_entry_uncompress_fn uncompress, ar_get_global_comment_fn get_comment, + off64_t first_entry_offset); + +#endif diff --git a/external/unarr/common/unarr.c b/external/unarr/common/unarr.c new file mode 100644 index 00000000..f77fa432 --- /dev/null +++ b/external/unarr/common/unarr.c @@ -0,0 +1,114 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "unarr-imp.h" + +ar_archive *ar_open_archive(ar_stream *stream, size_t struct_size, ar_archive_close_fn close, ar_parse_entry_fn parse_entry, + ar_entry_get_name_fn get_name, ar_entry_uncompress_fn uncompress, ar_get_global_comment_fn get_comment, + off64_t first_entry_offset) +{ + ar_archive *ar = malloc(struct_size); + if (!ar) + return NULL; + memset(ar, 0, struct_size); + ar->close = close; + ar->parse_entry = parse_entry; + ar->get_name = get_name; + ar->uncompress = uncompress; + ar->get_comment = get_comment; + ar->stream = stream; + ar->entry_offset_first = first_entry_offset; + ar->entry_offset_next = first_entry_offset; + return ar; +} + +void ar_close_archive(ar_archive *ar) +{ + if (ar) + ar->close(ar); + free(ar); +} + +bool ar_at_eof(ar_archive *ar) +{ + return ar->at_eof; +} + +bool ar_parse_entry(ar_archive *ar) +{ + return ar->parse_entry(ar, ar->entry_offset_next); +} + +bool ar_parse_entry_at(ar_archive *ar, off64_t offset) +{ + ar->at_eof = false; + return ar->parse_entry(ar, offset ? offset : ar->entry_offset_first); +} + +bool ar_parse_entry_for(ar_archive *ar, const char *entry_name) +{ + ar->at_eof = false; + if (!entry_name) + return false; + if (!ar_parse_entry_at(ar, ar->entry_offset_first)) + return false; + do { + const char *name = ar_entry_get_name(ar); + if (name && strcmp(name, entry_name) == 0) + return true; + } while (ar_parse_entry(ar)); + return false; +} + +const char *ar_entry_get_name(ar_archive *ar) +{ + return ar->get_name(ar, false); +} + +const char *ar_entry_get_raw_name(ar_archive *ar) +{ + return ar->get_name(ar, true); +} + +off64_t ar_entry_get_offset(ar_archive *ar) +{ + return ar->entry_offset; +} + +size_t ar_entry_get_size(ar_archive *ar) +{ + return ar->entry_size_uncompressed; +} + +time64_t ar_entry_get_filetime(ar_archive *ar) +{ + return ar->entry_filetime; +} + +bool ar_entry_uncompress(ar_archive *ar, void *buffer, size_t count) +{ + return ar->uncompress(ar, buffer, count); +} + +size_t ar_get_global_comment(ar_archive *ar, void *buffer, size_t count) +{ + if (!ar->get_comment) + return 0; + return ar->get_comment(ar, buffer, count); +} + +void ar_log(const char *prefix, const char *file, int line, const char *msg, ...) +{ + va_list args; + va_start(args, msg); + if (prefix) + fprintf(stderr, "%s ", prefix); + if (strrchr(file, '/')) + file = strrchr(file, '/') + 1; + if (strrchr(file, '\\')) + file = strrchr(file, '\\') + 1; + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, msg, args); + fprintf(stderr, "\n"); + va_end(args); +} diff --git a/external/unarr/lzmasdk/7z.h b/external/unarr/lzmasdk/7z.h new file mode 100644 index 00000000..9e27c015 --- /dev/null +++ b/external/unarr/lzmasdk/7z.h @@ -0,0 +1,204 @@ +/* 7z.h -- 7z interface +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_H +#define ZIP7_INC_7Z_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define k7zStartHeaderSize 0x20 +#define k7zSignatureSize 6 + +extern const Byte k7zSignature[k7zSignatureSize]; + +typedef struct +{ + const Byte *Data; + size_t Size; +} CSzData; + +/* CSzCoderInfo & CSzFolder support only default methods */ + +typedef struct +{ + size_t PropsOffset; + UInt32 MethodID; + Byte NumStreams; + Byte PropsSize; +} CSzCoderInfo; + +typedef struct +{ + UInt32 InIndex; + UInt32 OutIndex; +} CSzBond; + +#define SZ_NUM_CODERS_IN_FOLDER_MAX 4 +#define SZ_NUM_BONDS_IN_FOLDER_MAX 3 +#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4 + +typedef struct +{ + UInt32 NumCoders; + UInt32 NumBonds; + UInt32 NumPackStreams; + UInt32 UnpackStream; + UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX]; + CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX]; + CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX]; +} CSzFolder; + + +SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd); + +typedef struct +{ + UInt32 Low; + UInt32 High; +} CNtfsFileTime; + +typedef struct +{ + Byte *Defs; /* MSB 0 bit numbering */ + UInt32 *Vals; +} CSzBitUi32s; + +typedef struct +{ + Byte *Defs; /* MSB 0 bit numbering */ + // UInt64 *Vals; + CNtfsFileTime *Vals; +} CSzBitUi64s; + +#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0) + +#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0) + +typedef struct +{ + UInt32 NumPackStreams; + UInt32 NumFolders; + + UInt64 *PackPositions; // NumPackStreams + 1 + CSzBitUi32s FolderCRCs; // NumFolders + + size_t *FoCodersOffsets; // NumFolders + 1 + UInt32 *FoStartPackStreamIndex; // NumFolders + 1 + UInt32 *FoToCoderUnpackSizes; // NumFolders + 1 + Byte *FoToMainUnpackSizeIndex; // NumFolders + UInt64 *CoderUnpackSizes; // for all coders in all folders + + Byte *CodersData; + + UInt64 RangeLimit; +} CSzAr; + +UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); + +SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, + ILookInStreamPtr stream, UInt64 startPos, + Byte *outBuffer, size_t outSize, + ISzAllocPtr allocMain); + +typedef struct +{ + CSzAr db; + + UInt64 startPosAfterHeader; + UInt64 dataPos; + + UInt32 NumFiles; + + UInt64 *UnpackPositions; // NumFiles + 1 + // Byte *IsEmptyFiles; + Byte *IsDirs; + CSzBitUi32s CRCs; + + CSzBitUi32s Attribs; + // CSzBitUi32s Parents; + CSzBitUi64s MTime; + CSzBitUi64s CTime; + + UInt32 *FolderToFile; // NumFolders + 1 + UInt32 *FileToFolder; // NumFiles + + size_t *FileNameOffsets; /* in 2-byte steps */ + Byte *FileNames; /* UTF-16-LE */ +} CSzArEx; + +#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i)) + +#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i]) + +void SzArEx_Init(CSzArEx *p); +void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc); +UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder); +int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize); + +/* +if dest == NULL, the return value specifies the required size of the buffer, + in 16-bit characters, including the null-terminating character. +if dest != NULL, the return value specifies the number of 16-bit characters that + are written to the dest, including the null-terminating character. */ + +size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest); + +/* +size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex); +UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest); +*/ + + + +/* + SzArEx_Extract extracts file from archive + + *outBuffer must be 0 before first call for each new archive. + + Extracting cache: + If you need to decompress more than one file, you can send + these values from previous call: + *blockIndex, + *outBuffer, + *outBufferSize + You can consider "*outBuffer" as cache of solid block. If your archive is solid, + it will increase decompression speed. + + If you use external function, you can declare these 3 cache variables + (blockIndex, outBuffer, outBufferSize) as static in that external function. + + Free *outBuffer and set *outBuffer to 0, if you want to flush cache. +*/ + +SRes SzArEx_Extract( + const CSzArEx *db, + ILookInStreamPtr inStream, + UInt32 fileIndex, /* index of file */ + UInt32 *blockIndex, /* index of solid block */ + Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ + size_t *outBufferSize, /* buffer size for output buffer */ + size_t *offset, /* offset of stream for required file in *outBuffer */ + size_t *outSizeProcessed, /* size of file in *outBuffer */ + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp); + + +/* +SzArEx_Open Errors: +SZ_ERROR_NO_ARCHIVE +SZ_ERROR_ARCHIVE +SZ_ERROR_UNSUPPORTED +SZ_ERROR_MEM +SZ_ERROR_CRC +SZ_ERROR_INPUT_EOF +SZ_ERROR_FAIL +*/ + +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, + ISzAllocPtr allocMain, ISzAllocPtr allocTemp); + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/7zArcIn.c b/external/unarr/lzmasdk/7zArcIn.c new file mode 100644 index 00000000..43fa7c21 --- /dev/null +++ b/external/unarr/lzmasdk/7zArcIn.c @@ -0,0 +1,1786 @@ +/* 7zArcIn.c -- 7z Input functions +2023-05-11 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7z.h" +#include "7zBuf.h" +#include "7zCrc.h" +#include "CpuArch.h" + +#define MY_ALLOC(T, p, size, alloc) \ + { if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; } + +#define MY_ALLOC_ZE(T, p, size, alloc) \ + { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) } + +#define MY_ALLOC_AND_CPY(to, size, from, alloc) \ + { MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); } + +#define MY_ALLOC_ZE_AND_CPY(to, size, from, alloc) \ + { if ((size) == 0) to = NULL; else { MY_ALLOC_AND_CPY(to, size, from, alloc) } } + +#define k7zMajorVersion 0 + +enum EIdEnum +{ + k7zIdEnd, + k7zIdHeader, + k7zIdArchiveProperties, + k7zIdAdditionalStreamsInfo, + k7zIdMainStreamsInfo, + k7zIdFilesInfo, + k7zIdPackInfo, + k7zIdUnpackInfo, + k7zIdSubStreamsInfo, + k7zIdSize, + k7zIdCRC, + k7zIdFolder, + k7zIdCodersUnpackSize, + k7zIdNumUnpackStream, + k7zIdEmptyStream, + k7zIdEmptyFile, + k7zIdAnti, + k7zIdName, + k7zIdCTime, + k7zIdATime, + k7zIdMTime, + k7zIdWinAttrib, + k7zIdComment, + k7zIdEncodedHeader, + k7zIdStartPos, + k7zIdDummy + // k7zNtSecure, + // k7zParent, + // k7zIsReal +}; + +const Byte k7zSignature[k7zSignatureSize] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + +#define SzBitUi32s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } + +static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) +{ + if (num == 0) + { + p->Defs = NULL; + p->Vals = NULL; + } + else + { + MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc) + MY_ALLOC(UInt32, p->Vals, num, alloc) + } + return SZ_OK; +} + +static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; + ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; +} + +#define SzBitUi64s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } + +static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; + ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; +} + + +static void SzAr_Init(CSzAr *p) +{ + p->NumPackStreams = 0; + p->NumFolders = 0; + + p->PackPositions = NULL; + SzBitUi32s_INIT(&p->FolderCRCs) + + p->FoCodersOffsets = NULL; + p->FoStartPackStreamIndex = NULL; + p->FoToCoderUnpackSizes = NULL; + p->FoToMainUnpackSizeIndex = NULL; + p->CoderUnpackSizes = NULL; + + p->CodersData = NULL; + + p->RangeLimit = 0; +} + +static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->PackPositions); + SzBitUi32s_Free(&p->FolderCRCs, alloc); + + ISzAlloc_Free(alloc, p->FoCodersOffsets); + ISzAlloc_Free(alloc, p->FoStartPackStreamIndex); + ISzAlloc_Free(alloc, p->FoToCoderUnpackSizes); + ISzAlloc_Free(alloc, p->FoToMainUnpackSizeIndex); + ISzAlloc_Free(alloc, p->CoderUnpackSizes); + + ISzAlloc_Free(alloc, p->CodersData); + + SzAr_Init(p); +} + + +void SzArEx_Init(CSzArEx *p) +{ + SzAr_Init(&p->db); + + p->NumFiles = 0; + p->dataPos = 0; + + p->UnpackPositions = NULL; + p->IsDirs = NULL; + + p->FolderToFile = NULL; + p->FileToFolder = NULL; + + p->FileNameOffsets = NULL; + p->FileNames = NULL; + + SzBitUi32s_INIT(&p->CRCs) + SzBitUi32s_INIT(&p->Attribs) + // SzBitUi32s_INIT(&p->Parents) + SzBitUi64s_INIT(&p->MTime) + SzBitUi64s_INIT(&p->CTime) +} + +void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->UnpackPositions); + ISzAlloc_Free(alloc, p->IsDirs); + + ISzAlloc_Free(alloc, p->FolderToFile); + ISzAlloc_Free(alloc, p->FileToFolder); + + ISzAlloc_Free(alloc, p->FileNameOffsets); + ISzAlloc_Free(alloc, p->FileNames); + + SzBitUi32s_Free(&p->CRCs, alloc); + SzBitUi32s_Free(&p->Attribs, alloc); + // SzBitUi32s_Free(&p->Parents, alloc); + SzBitUi64s_Free(&p->MTime, alloc); + SzBitUi64s_Free(&p->CTime, alloc); + + SzAr_Free(&p->db, alloc); + SzArEx_Init(p); +} + + +static int TestSignatureCandidate(const Byte *testBytes) +{ + unsigned i; + for (i = 0; i < k7zSignatureSize; i++) + if (testBytes[i] != k7zSignature[i]) + return 0; + return 1; +} + +#define SzData_CLEAR(p) { (p)->Data = NULL; (p)->Size = 0; } + +#define SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) \ + (_sd_)->Size--; dest = *(_sd_)->Data++; + +#define SZ_READ_BYTE_SD(_sd_, dest) \ + if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; \ + SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) + +#define SZ_READ_BYTE(dest) SZ_READ_BYTE_SD(sd, dest) + +#define SZ_READ_BYTE_2(dest) \ + if (sd.Size == 0) return SZ_ERROR_ARCHIVE; \ + sd.Size--; dest = *sd.Data++; + +#define SKIP_DATA(sd, size) { sd->Size -= (size_t)(size); sd->Data += (size_t)(size); } +#define SKIP_DATA2(sd, size) { sd.Size -= (size_t)(size); sd.Data += (size_t)(size); } + +#define SZ_READ_32(dest) if (sd.Size < 4) return SZ_ERROR_ARCHIVE; \ + dest = GetUi32(sd.Data); SKIP_DATA2(sd, 4); + +static Z7_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value) +{ + Byte firstByte, mask; + unsigned i; + UInt32 v; + + SZ_READ_BYTE(firstByte) + if ((firstByte & 0x80) == 0) + { + *value = firstByte; + return SZ_OK; + } + SZ_READ_BYTE(v) + if ((firstByte & 0x40) == 0) + { + *value = (((UInt32)firstByte & 0x3F) << 8) | v; + return SZ_OK; + } + SZ_READ_BYTE(mask) + *value = v | ((UInt32)mask << 8); + mask = 0x20; + for (i = 2; i < 8; i++) + { + Byte b; + if ((firstByte & mask) == 0) + { + const UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1); + *value |= (highPart << (8 * i)); + return SZ_OK; + } + SZ_READ_BYTE(b) + *value |= ((UInt64)b << (8 * i)); + mask >>= 1; + } + return SZ_OK; +} + + +static Z7_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value) +{ + Byte firstByte; + UInt64 value64; + if (sd->Size == 0) + return SZ_ERROR_ARCHIVE; + firstByte = *sd->Data; + if ((firstByte & 0x80) == 0) + { + *value = firstByte; + sd->Data++; + sd->Size--; + return SZ_OK; + } + RINOK(ReadNumber(sd, &value64)) + if (value64 >= (UInt32)0x80000000 - 1) + return SZ_ERROR_UNSUPPORTED; + if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 4))) + return SZ_ERROR_UNSUPPORTED; + *value = (UInt32)value64; + return SZ_OK; +} + +#define ReadID(sd, value) ReadNumber(sd, value) + +static SRes SkipData(CSzData *sd) +{ + UInt64 size; + RINOK(ReadNumber(sd, &size)) + if (size > sd->Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA(sd, size) + return SZ_OK; +} + +static SRes WaitId(CSzData *sd, UInt32 id) +{ + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == id) + return SZ_OK; + if (type == k7zIdEnd) + return SZ_ERROR_ARCHIVE; + RINOK(SkipData(sd)) + } +} + +static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v) +{ + const UInt32 numBytes = (numItems + 7) >> 3; + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + *v = sd->Data; + SKIP_DATA(sd, numBytes) + return SZ_OK; +} + +static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems) +{ + Byte b = 0; + unsigned m = 0; + UInt32 sum = 0; + for (; numItems != 0; numItems--) + { + if (m == 0) + { + b = *bits++; + m = 8; + } + m--; + sum += ((b >> m) & 1); + } + return sum; +} + +static Z7_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc) +{ + Byte allAreDefined; + Byte *v2; + const UInt32 numBytes = (numItems + 7) >> 3; + *v = NULL; + SZ_READ_BYTE(allAreDefined) + if (numBytes == 0) + return SZ_OK; + if (allAreDefined == 0) + { + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc) + SKIP_DATA(sd, numBytes) + return SZ_OK; + } + MY_ALLOC(Byte, *v, numBytes, alloc) + v2 = *v; + memset(v2, 0xFF, (size_t)numBytes); + { + const unsigned numBits = (unsigned)numItems & 7; + if (numBits != 0) + v2[(size_t)numBytes - 1] = (Byte)((((UInt32)1 << numBits) - 1) << (8 - numBits)); + } + return SZ_OK; +} + +static Z7_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) +{ + UInt32 i; + CSzData sd; + UInt32 *vals; + const Byte *defs; + MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc) + sd = *sd2; + defs = crcs->Defs; + vals = crcs->Vals; + for (i = 0; i < numItems; i++) + if (SzBitArray_Check(defs, i)) + { + SZ_READ_32(vals[i]) + } + else + vals[i] = 0; + *sd2 = sd; + return SZ_OK; +} + +static SRes ReadBitUi32s(CSzData *sd, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) +{ + SzBitUi32s_Free(crcs, alloc); + RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc)) + return ReadUi32s(sd, numItems, crcs, alloc); +} + +static SRes SkipBitUi32s(CSzData *sd, UInt32 numItems) +{ + Byte allAreDefined; + UInt32 numDefined = numItems; + SZ_READ_BYTE(allAreDefined) + if (!allAreDefined) + { + const size_t numBytes = (numItems + 7) >> 3; + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + numDefined = CountDefinedBits(sd->Data, numItems); + SKIP_DATA(sd, numBytes) + } + if (numDefined > (sd->Size >> 2)) + return SZ_ERROR_ARCHIVE; + SKIP_DATA(sd, (size_t)numDefined * 4) + return SZ_OK; +} + +static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc) +{ + RINOK(SzReadNumber32(sd, &p->NumPackStreams)) + + RINOK(WaitId(sd, k7zIdSize)) + MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc) + { + UInt64 sum = 0; + UInt32 i; + const UInt32 numPackStreams = p->NumPackStreams; + for (i = 0; i < numPackStreams; i++) + { + UInt64 packSize; + p->PackPositions[i] = sum; + RINOK(ReadNumber(sd, &packSize)) + sum += packSize; + if (sum < packSize) + return SZ_ERROR_ARCHIVE; + } + p->PackPositions[i] = sum; + } + + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + return SZ_OK; + if (type == k7zIdCRC) + { + /* CRC of packed streams is unused now */ + RINOK(SkipBitUi32s(sd, p->NumPackStreams)) + continue; + } + RINOK(SkipData(sd)) + } +} + +/* +static SRes SzReadSwitch(CSzData *sd) +{ + Byte external; + RINOK(SzReadByte(sd, &external)); + return (external == 0) ? SZ_OK: SZ_ERROR_UNSUPPORTED; +} +*/ + +#define k_NumCodersStreams_in_Folder_MAX (SZ_NUM_BONDS_IN_FOLDER_MAX + SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX) + +SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) +{ + UInt32 numCoders, i; + UInt32 numInStreams = 0; + const Byte *dataStart = sd->Data; + + f->NumCoders = 0; + f->NumBonds = 0; + f->NumPackStreams = 0; + f->UnpackStream = 0; + + RINOK(SzReadNumber32(sd, &numCoders)) + if (numCoders == 0 || numCoders > SZ_NUM_CODERS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (i = 0; i < numCoders; i++) + { + Byte mainByte; + CSzCoderInfo *coder = f->Coders + i; + unsigned idSize, j; + UInt64 id; + + SZ_READ_BYTE(mainByte) + if ((mainByte & 0xC0) != 0) + return SZ_ERROR_UNSUPPORTED; + + idSize = (unsigned)(mainByte & 0xF); + if (idSize > sizeof(id)) + return SZ_ERROR_UNSUPPORTED; + if (idSize > sd->Size) + return SZ_ERROR_ARCHIVE; + id = 0; + for (j = 0; j < idSize; j++) + { + id = ((id << 8) | *sd->Data); + sd->Data++; + sd->Size--; + } + if (id > (UInt32)0xFFFFFFFF) + return SZ_ERROR_UNSUPPORTED; + coder->MethodID = (UInt32)id; + + coder->NumStreams = 1; + coder->PropsOffset = 0; + coder->PropsSize = 0; + + if ((mainByte & 0x10) != 0) + { + UInt32 numStreams; + + RINOK(SzReadNumber32(sd, &numStreams)) + if (numStreams > k_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + coder->NumStreams = (Byte)numStreams; + + RINOK(SzReadNumber32(sd, &numStreams)) + if (numStreams != 1) + return SZ_ERROR_UNSUPPORTED; + } + + numInStreams += coder->NumStreams; + + if (numInStreams > k_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + + if ((mainByte & 0x20) != 0) + { + UInt32 propsSize = 0; + RINOK(SzReadNumber32(sd, &propsSize)) + if (propsSize > sd->Size) + return SZ_ERROR_ARCHIVE; + if (propsSize >= 0x80) + return SZ_ERROR_UNSUPPORTED; + coder->PropsOffset = (size_t)(sd->Data - dataStart); + coder->PropsSize = (Byte)propsSize; + sd->Data += (size_t)propsSize; + sd->Size -= (size_t)propsSize; + } + } + + /* + if (numInStreams == 1 && numCoders == 1) + { + f->NumPackStreams = 1; + f->PackStreams[0] = 0; + } + else + */ + { + Byte streamUsed[k_NumCodersStreams_in_Folder_MAX]; + UInt32 numBonds, numPackStreams; + + numBonds = numCoders - 1; + if (numInStreams < numBonds) + return SZ_ERROR_ARCHIVE; + if (numBonds > SZ_NUM_BONDS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + f->NumBonds = numBonds; + + numPackStreams = numInStreams - numBonds; + if (numPackStreams > SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + f->NumPackStreams = numPackStreams; + + for (i = 0; i < numInStreams; i++) + streamUsed[i] = False; + + if (numBonds != 0) + { + Byte coderUsed[SZ_NUM_CODERS_IN_FOLDER_MAX]; + + for (i = 0; i < numCoders; i++) + coderUsed[i] = False; + + for (i = 0; i < numBonds; i++) + { + CSzBond *bp = f->Bonds + i; + + RINOK(SzReadNumber32(sd, &bp->InIndex)) + if (bp->InIndex >= numInStreams || streamUsed[bp->InIndex]) + return SZ_ERROR_ARCHIVE; + streamUsed[bp->InIndex] = True; + + RINOK(SzReadNumber32(sd, &bp->OutIndex)) + if (bp->OutIndex >= numCoders || coderUsed[bp->OutIndex]) + return SZ_ERROR_ARCHIVE; + coderUsed[bp->OutIndex] = True; + } + + for (i = 0; i < numCoders; i++) + if (!coderUsed[i]) + { + f->UnpackStream = i; + break; + } + + if (i == numCoders) + return SZ_ERROR_ARCHIVE; + } + + if (numPackStreams == 1) + { + for (i = 0; i < numInStreams; i++) + if (!streamUsed[i]) + break; + if (i == numInStreams) + return SZ_ERROR_ARCHIVE; + f->PackStreams[0] = i; + } + else + for (i = 0; i < numPackStreams; i++) + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + f->PackStreams[i] = index; + } + } + + f->NumCoders = numCoders; + + return SZ_OK; +} + + +static Z7_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num) +{ + CSzData sd; + sd = *sd2; + for (; num != 0; num--) + { + Byte firstByte, mask; + unsigned i; + SZ_READ_BYTE_2(firstByte) + if ((firstByte & 0x80) == 0) + continue; + if ((firstByte & 0x40) == 0) + { + if (sd.Size == 0) + return SZ_ERROR_ARCHIVE; + sd.Size--; + sd.Data++; + continue; + } + mask = 0x20; + for (i = 2; i < 8 && (firstByte & mask) != 0; i++) + mask >>= 1; + if (i > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, i) + } + *sd2 = sd; + return SZ_OK; +} + + +#define k_Scan_NumCoders_MAX 64 +#define k_Scan_NumCodersStreams_in_Folder_MAX 64 + + +static SRes ReadUnpackInfo(CSzAr *p, + CSzData *sd2, + UInt32 numFoldersMax, + const CBuf *tempBufs, UInt32 numTempBufs, + ISzAllocPtr alloc) +{ + CSzData sd; + + UInt32 fo, numFolders, numCodersOutStreams, packStreamIndex; + const Byte *startBufPtr; + Byte external; + + RINOK(WaitId(sd2, k7zIdFolder)) + + RINOK(SzReadNumber32(sd2, &numFolders)) + if (numFolders > numFoldersMax) + return SZ_ERROR_UNSUPPORTED; + p->NumFolders = numFolders; + + SZ_READ_BYTE_SD(sd2, external) + if (external == 0) + sd = *sd2; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd2, &index)) + if (index >= numTempBufs) + return SZ_ERROR_ARCHIVE; + sd.Data = tempBufs[index].data; + sd.Size = tempBufs[index].size; + } + + MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc) + MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc) + + startBufPtr = sd.Data; + + packStreamIndex = 0; + numCodersOutStreams = 0; + + for (fo = 0; fo < numFolders; fo++) + { + UInt32 numCoders, ci, numInStreams = 0; + + p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr); + + RINOK(SzReadNumber32(&sd, &numCoders)) + if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (ci = 0; ci < numCoders; ci++) + { + Byte mainByte; + unsigned idSize; + UInt32 coderInStreams; + + SZ_READ_BYTE_2(mainByte) + if ((mainByte & 0xC0) != 0) + return SZ_ERROR_UNSUPPORTED; + idSize = (mainByte & 0xF); + if (idSize > 8) + return SZ_ERROR_UNSUPPORTED; + if (idSize > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, idSize) + + coderInStreams = 1; + + if ((mainByte & 0x10) != 0) + { + UInt32 coderOutStreams; + RINOK(SzReadNumber32(&sd, &coderInStreams)) + RINOK(SzReadNumber32(&sd, &coderOutStreams)) + if (coderInStreams > k_Scan_NumCodersStreams_in_Folder_MAX || coderOutStreams != 1) + return SZ_ERROR_UNSUPPORTED; + } + + numInStreams += coderInStreams; + + if ((mainByte & 0x20) != 0) + { + UInt32 propsSize; + RINOK(SzReadNumber32(&sd, &propsSize)) + if (propsSize > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, propsSize) + } + } + + { + UInt32 indexOfMainStream = 0; + UInt32 numPackStreams = 1; + + if (numCoders != 1 || numInStreams != 1) + { + Byte streamUsed[k_Scan_NumCodersStreams_in_Folder_MAX]; + Byte coderUsed[k_Scan_NumCoders_MAX]; + + UInt32 i; + const UInt32 numBonds = numCoders - 1; + if (numInStreams < numBonds) + return SZ_ERROR_ARCHIVE; + + if (numInStreams > k_Scan_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (i = 0; i < numInStreams; i++) + streamUsed[i] = False; + for (i = 0; i < numCoders; i++) + coderUsed[i] = False; + + for (i = 0; i < numBonds; i++) + { + UInt32 index; + + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numCoders || coderUsed[index]) + return SZ_ERROR_ARCHIVE; + coderUsed[index] = True; + } + + numPackStreams = numInStreams - numBonds; + + if (numPackStreams != 1) + for (i = 0; i < numPackStreams; i++) + { + UInt32 index; + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + } + + for (i = 0; i < numCoders; i++) + if (!coderUsed[i]) + { + indexOfMainStream = i; + break; + } + + if (i == numCoders) + return SZ_ERROR_ARCHIVE; + } + + p->FoStartPackStreamIndex[fo] = packStreamIndex; + p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; + p->FoToMainUnpackSizeIndex[fo] = (Byte)indexOfMainStream; + numCodersOutStreams += numCoders; + if (numCodersOutStreams < numCoders) + return SZ_ERROR_UNSUPPORTED; + if (numPackStreams > p->NumPackStreams - packStreamIndex) + return SZ_ERROR_ARCHIVE; + packStreamIndex += numPackStreams; + } + } + + p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; + + { + const size_t dataSize = (size_t)(sd.Data - startBufPtr); + p->FoStartPackStreamIndex[fo] = packStreamIndex; + p->FoCodersOffsets[fo] = dataSize; + MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc) + } + + if (external != 0) + { + if (sd.Size != 0) + return SZ_ERROR_ARCHIVE; + sd = *sd2; + } + + RINOK(WaitId(&sd, k7zIdCodersUnpackSize)) + + MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc) + { + UInt32 i; + for (i = 0; i < numCodersOutStreams; i++) + { + RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i)) + } + } + + for (;;) + { + UInt64 type; + RINOK(ReadID(&sd, &type)) + if (type == k7zIdEnd) + { + *sd2 = sd; + return SZ_OK; + } + if (type == k7zIdCRC) + { + RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc)) + continue; + } + RINOK(SkipData(&sd)) + } +} + + +UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex) +{ + return p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex] + p->FoToMainUnpackSizeIndex[folderIndex]]; +} + + +typedef struct +{ + UInt32 NumTotalSubStreams; + UInt32 NumSubDigests; + CSzData sdNumSubStreams; + CSzData sdSizes; + CSzData sdCRCs; +} CSubStreamInfo; + + +static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) +{ + UInt64 type = 0; + UInt32 numSubDigests = 0; + const UInt32 numFolders = p->NumFolders; + UInt32 numUnpackStreams = numFolders; + UInt32 numUnpackSizesInData = 0; + + for (;;) + { + RINOK(ReadID(sd, &type)) + if (type == k7zIdNumUnpackStream) + { + UInt32 i; + ssi->sdNumSubStreams.Data = sd->Data; + numUnpackStreams = 0; + numSubDigests = 0; + for (i = 0; i < numFolders; i++) + { + UInt32 numStreams; + RINOK(SzReadNumber32(sd, &numStreams)) + if (numUnpackStreams > numUnpackStreams + numStreams) + return SZ_ERROR_UNSUPPORTED; + numUnpackStreams += numStreams; + if (numStreams != 0) + numUnpackSizesInData += (numStreams - 1); + if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i)) + numSubDigests += numStreams; + } + ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data); + continue; + } + if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + + if (!ssi->sdNumSubStreams.Data) + { + numSubDigests = numFolders; + if (p->FolderCRCs.Defs) + numSubDigests = numFolders - CountDefinedBits(p->FolderCRCs.Defs, numFolders); + } + + ssi->NumTotalSubStreams = numUnpackStreams; + ssi->NumSubDigests = numSubDigests; + + if (type == k7zIdSize) + { + ssi->sdSizes.Data = sd->Data; + RINOK(SkipNumbers(sd, numUnpackSizesInData)) + ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data); + RINOK(ReadID(sd, &type)) + } + + for (;;) + { + if (type == k7zIdEnd) + return SZ_OK; + if (type == k7zIdCRC) + { + ssi->sdCRCs.Data = sd->Data; + RINOK(SkipBitUi32s(sd, numSubDigests)) + ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data); + } + else + { + RINOK(SkipData(sd)) + } + RINOK(ReadID(sd, &type)) + } +} + +static SRes SzReadStreamsInfo(CSzAr *p, + CSzData *sd, + UInt32 numFoldersMax, const CBuf *tempBufs, UInt32 numTempBufs, + UInt64 *dataOffset, + CSubStreamInfo *ssi, + ISzAllocPtr alloc) +{ + UInt64 type; + + SzData_CLEAR(&ssi->sdSizes) + SzData_CLEAR(&ssi->sdCRCs) + SzData_CLEAR(&ssi->sdNumSubStreams) + + *dataOffset = 0; + RINOK(ReadID(sd, &type)) + if (type == k7zIdPackInfo) + { + RINOK(ReadNumber(sd, dataOffset)) + if (*dataOffset > p->RangeLimit) + return SZ_ERROR_ARCHIVE; + RINOK(ReadPackInfo(p, sd, alloc)) + if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset) + return SZ_ERROR_ARCHIVE; + RINOK(ReadID(sd, &type)) + } + if (type == k7zIdUnpackInfo) + { + RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc)) + RINOK(ReadID(sd, &type)) + } + if (type == k7zIdSubStreamsInfo) + { + RINOK(ReadSubStreamsInfo(p, sd, ssi)) + RINOK(ReadID(sd, &type)) + } + else + { + ssi->NumTotalSubStreams = p->NumFolders; + // ssi->NumSubDigests = 0; + } + + return (type == k7zIdEnd ? SZ_OK : SZ_ERROR_UNSUPPORTED); +} + +static SRes SzReadAndDecodePackedStreams( + ILookInStreamPtr inStream, + CSzData *sd, + CBuf *tempBufs, + UInt32 numFoldersMax, + UInt64 baseOffset, + CSzAr *p, + ISzAllocPtr allocTemp) +{ + UInt64 dataStartPos; + UInt32 fo; + CSubStreamInfo ssi; + + RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp)) + + dataStartPos += baseOffset; + if (p->NumFolders == 0) + return SZ_ERROR_ARCHIVE; + + for (fo = 0; fo < p->NumFolders; fo++) + Buf_Init(tempBufs + fo); + + for (fo = 0; fo < p->NumFolders; fo++) + { + CBuf *tempBuf = tempBufs + fo; + const UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo); + if ((size_t)unpackSize != unpackSize) + return SZ_ERROR_MEM; + if (!Buf_Create(tempBuf, (size_t)unpackSize, allocTemp)) + return SZ_ERROR_MEM; + } + + for (fo = 0; fo < p->NumFolders; fo++) + { + const CBuf *tempBuf = tempBufs + fo; + RINOK(LookInStream_SeekTo(inStream, dataStartPos)) + RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp)) + } + + return SZ_OK; +} + +static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size_t *offsets) +{ + size_t pos = 0; + *offsets++ = 0; + if (numFiles == 0) + return (size == 0) ? SZ_OK : SZ_ERROR_ARCHIVE; + if (size < 2) + return SZ_ERROR_ARCHIVE; + if (data[size - 2] != 0 || data[size - 1] != 0) + return SZ_ERROR_ARCHIVE; + do + { + const Byte *p; + if (pos == size) + return SZ_ERROR_ARCHIVE; + for (p = data + pos; + #ifdef _WIN32 + *(const UInt16 *)(const void *)p != 0 + #else + p[0] != 0 || p[1] != 0 + #endif + ; p += 2); + pos = (size_t)(p - data) + 2; + *offsets++ = (pos >> 1); + } + while (--numFiles); + return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; +} + +static Z7_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, + CSzData *sd2, + const CBuf *tempBufs, UInt32 numTempBufs, + ISzAllocPtr alloc) +{ + CSzData sd; + UInt32 i; + CNtfsFileTime *vals; + Byte *defs; + Byte external; + + RINOK(ReadBitVector(sd2, num, &p->Defs, alloc)) + + SZ_READ_BYTE_SD(sd2, external) + if (external == 0) + sd = *sd2; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd2, &index)) + if (index >= numTempBufs) + return SZ_ERROR_ARCHIVE; + sd.Data = tempBufs[index].data; + sd.Size = tempBufs[index].size; + } + + MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc) + vals = p->Vals; + defs = p->Defs; + for (i = 0; i < num; i++) + if (SzBitArray_Check(defs, i)) + { + if (sd.Size < 8) + return SZ_ERROR_ARCHIVE; + vals[i].Low = GetUi32(sd.Data); + vals[i].High = GetUi32(sd.Data + 4); + SKIP_DATA2(sd, 8) + } + else + vals[i].High = vals[i].Low = 0; + + if (external == 0) + *sd2 = sd; + + return SZ_OK; +} + + +#define NUM_ADDITIONAL_STREAMS_MAX 8 + + +static SRes SzReadHeader2( + CSzArEx *p, /* allocMain */ + CSzData *sd, + ILookInStreamPtr inStream, + CBuf *tempBufs, UInt32 *numTempBufs, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp + ) +{ + CSubStreamInfo ssi; + +{ + UInt64 type; + + SzData_CLEAR(&ssi.sdSizes) + SzData_CLEAR(&ssi.sdCRCs) + SzData_CLEAR(&ssi.sdNumSubStreams) + + ssi.NumSubDigests = 0; + ssi.NumTotalSubStreams = 0; + + RINOK(ReadID(sd, &type)) + + if (type == k7zIdArchiveProperties) + { + for (;;) + { + UInt64 type2; + RINOK(ReadID(sd, &type2)) + if (type2 == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdAdditionalStreamsInfo) + { + CSzAr tempAr; + SRes res; + + SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + + res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX, + p->startPosAfterHeader, &tempAr, allocTemp); + *numTempBufs = tempAr.NumFolders; + SzAr_Free(&tempAr, allocTemp); + + if (res != SZ_OK) + return res; + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdMainStreamsInfo) + { + RINOK(SzReadStreamsInfo(&p->db, sd, (UInt32)1 << 30, tempBufs, *numTempBufs, + &p->dataPos, &ssi, allocMain)) + p->dataPos += p->startPosAfterHeader; + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdEnd) + { + return SZ_OK; + } + + if (type != k7zIdFilesInfo) + return SZ_ERROR_ARCHIVE; +} + +{ + UInt32 numFiles = 0; + UInt32 numEmptyStreams = 0; + const Byte *emptyStreams = NULL; + const Byte *emptyFiles = NULL; + + RINOK(SzReadNumber32(sd, &numFiles)) + p->NumFiles = numFiles; + + for (;;) + { + UInt64 type; + UInt64 size; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + break; + RINOK(ReadNumber(sd, &size)) + if (size > sd->Size) + return SZ_ERROR_ARCHIVE; + + if (type >= ((UInt32)1 << 8)) + { + SKIP_DATA(sd, size) + } + else switch ((unsigned)type) + { + case k7zIdName: + { + size_t namesSize; + const Byte *namesData; + Byte external; + + SZ_READ_BYTE(external) + if (external == 0) + { + namesSize = (size_t)size - 1; + namesData = sd->Data; + } + else + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= *numTempBufs) + return SZ_ERROR_ARCHIVE; + namesData = (tempBufs)[index].data; + namesSize = (tempBufs)[index].size; + } + + if ((namesSize & 1) != 0) + return SZ_ERROR_ARCHIVE; + MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain) + MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain) + RINOK(SzReadFileNames(p->FileNames, namesSize, numFiles, p->FileNameOffsets)) + if (external == 0) + { + SKIP_DATA(sd, namesSize) + } + break; + } + case k7zIdEmptyStream: + { + RINOK(RememberBitVector(sd, numFiles, &emptyStreams)) + numEmptyStreams = CountDefinedBits(emptyStreams, numFiles); + emptyFiles = NULL; + break; + } + case k7zIdEmptyFile: + { + RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles)) + break; + } + case k7zIdWinAttrib: + { + Byte external; + CSzData sdSwitch; + CSzData *sdPtr; + SzBitUi32s_Free(&p->Attribs, allocMain); + RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain)) + + SZ_READ_BYTE(external) + if (external == 0) + sdPtr = sd; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= *numTempBufs) + return SZ_ERROR_ARCHIVE; + sdSwitch.Data = (tempBufs)[index].data; + sdSwitch.Size = (tempBufs)[index].size; + sdPtr = &sdSwitch; + } + RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain)) + break; + } + /* + case k7zParent: + { + SzBitUi32s_Free(&p->Parents, allocMain); + RINOK(ReadBitVector(sd, numFiles, &p->Parents.Defs, allocMain)); + RINOK(SzReadSwitch(sd)); + RINOK(ReadUi32s(sd, numFiles, &p->Parents, allocMain)); + break; + } + */ + case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; + case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; + default: + { + SKIP_DATA(sd, size) + } + } + } + + if (numFiles - numEmptyStreams != ssi.NumTotalSubStreams) + return SZ_ERROR_ARCHIVE; + + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + + { + UInt32 i; + UInt32 emptyFileIndex = 0; + UInt32 folderIndex = 0; + UInt32 remSubStreams = 0; + UInt32 numSubStreams = 0; + UInt64 unpackPos = 0; + const Byte *digestsDefs = NULL; + const Byte *digestsVals = NULL; + UInt32 digestIndex = 0; + Byte isDirMask = 0; + Byte crcMask = 0; + Byte mask = 0x80; + + MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain) + MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain) + MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain) + MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain) + + RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain)) + + if (ssi.sdCRCs.Size != 0) + { + Byte allDigestsDefined = 0; + SZ_READ_BYTE_SD_NOCHECK(&ssi.sdCRCs, allDigestsDefined) + if (allDigestsDefined) + digestsVals = ssi.sdCRCs.Data; + else + { + const size_t numBytes = (ssi.NumSubDigests + 7) >> 3; + digestsDefs = ssi.sdCRCs.Data; + digestsVals = digestsDefs + numBytes; + } + } + + for (i = 0; i < numFiles; i++, mask >>= 1) + { + if (mask == 0) + { + const UInt32 byteIndex = (i - 1) >> 3; + p->IsDirs[byteIndex] = isDirMask; + p->CRCs.Defs[byteIndex] = crcMask; + isDirMask = 0; + crcMask = 0; + mask = 0x80; + } + + p->UnpackPositions[i] = unpackPos; + p->CRCs.Vals[i] = 0; + + if (emptyStreams && SzBitArray_Check(emptyStreams, i)) + { + if (emptyFiles) + { + if (!SzBitArray_Check(emptyFiles, emptyFileIndex)) + isDirMask |= mask; + emptyFileIndex++; + } + else + isDirMask |= mask; + if (remSubStreams == 0) + { + p->FileToFolder[i] = (UInt32)-1; + continue; + } + } + + if (remSubStreams == 0) + { + for (;;) + { + if (folderIndex >= p->db.NumFolders) + return SZ_ERROR_ARCHIVE; + p->FolderToFile[folderIndex] = i; + numSubStreams = 1; + if (ssi.sdNumSubStreams.Data) + { + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) + } + remSubStreams = numSubStreams; + if (numSubStreams != 0) + break; + { + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + unpackPos += folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + } + folderIndex++; + } + } + + p->FileToFolder[i] = folderIndex; + + if (emptyStreams && SzBitArray_Check(emptyStreams, i)) + continue; + + if (--remSubStreams == 0) + { + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + const UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]]; + if (folderUnpackSize < unpackPos - startFolderUnpackPos) + return SZ_ERROR_ARCHIVE; + unpackPos = startFolderUnpackPos + folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + + if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, folderIndex)) + { + p->CRCs.Vals[i] = p->db.FolderCRCs.Vals[folderIndex]; + crcMask |= mask; + } + folderIndex++; + } + else + { + UInt64 v; + RINOK(ReadNumber(&ssi.sdSizes, &v)) + unpackPos += v; + if (unpackPos < v) + return SZ_ERROR_ARCHIVE; + } + if ((crcMask & mask) == 0 && digestsVals) + { + if (!digestsDefs || SzBitArray_Check(digestsDefs, digestIndex)) + { + p->CRCs.Vals[i] = GetUi32(digestsVals); + digestsVals += 4; + crcMask |= mask; + } + digestIndex++; + } + } + + if (mask != 0x80) + { + const UInt32 byteIndex = (i - 1) >> 3; + p->IsDirs[byteIndex] = isDirMask; + p->CRCs.Defs[byteIndex] = crcMask; + } + + p->UnpackPositions[i] = unpackPos; + + if (remSubStreams != 0) + return SZ_ERROR_ARCHIVE; + + for (;;) + { + p->FolderToFile[folderIndex] = i; + if (folderIndex >= p->db.NumFolders) + break; + if (!ssi.sdNumSubStreams.Data) + return SZ_ERROR_ARCHIVE; + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) + if (numSubStreams != 0) + return SZ_ERROR_ARCHIVE; + /* + { + UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + unpackPos += folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + } + */ + folderIndex++; + } + + if (ssi.sdNumSubStreams.Data && ssi.sdNumSubStreams.Size != 0) + return SZ_ERROR_ARCHIVE; + } +} + return SZ_OK; +} + + +static SRes SzReadHeader( + CSzArEx *p, + CSzData *sd, + ILookInStreamPtr inStream, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + UInt32 i; + UInt32 numTempBufs = 0; + SRes res; + CBuf tempBufs[NUM_ADDITIONAL_STREAMS_MAX]; + + for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++) + Buf_Init(tempBufs + i); + + res = SzReadHeader2(p, sd, inStream, + tempBufs, &numTempBufs, + allocMain, allocTemp); + + for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++) + Buf_Free(tempBufs + i, allocTemp); + + RINOK(res) + + if (sd->Size != 0) + return SZ_ERROR_FAIL; + + return res; +} + +static SRes SzArEx_Open2( + CSzArEx *p, + ILookInStreamPtr inStream, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + Byte header[k7zStartHeaderSize]; + Int64 startArcPos; + UInt64 nextHeaderOffset, nextHeaderSize; + size_t nextHeaderSizeT; + UInt32 nextHeaderCRC; + CBuf buf; + SRes res; + + startArcPos = 0; + RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR)) + + RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE)) + + if (!TestSignatureCandidate(header)) + return SZ_ERROR_NO_ARCHIVE; + if (header[6] != k7zMajorVersion) + return SZ_ERROR_UNSUPPORTED; + + nextHeaderOffset = GetUi64(header + 12); + nextHeaderSize = GetUi64(header + 20); + nextHeaderCRC = GetUi32(header + 28); + + p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize; + + if (CrcCalc(header + 12, 20) != GetUi32(header + 8)) + return SZ_ERROR_CRC; + + p->db.RangeLimit = nextHeaderOffset; + + nextHeaderSizeT = (size_t)nextHeaderSize; + if (nextHeaderSizeT != nextHeaderSize) + return SZ_ERROR_MEM; + if (nextHeaderSizeT == 0) + return SZ_OK; + if (nextHeaderOffset > nextHeaderOffset + nextHeaderSize || + nextHeaderOffset > nextHeaderOffset + nextHeaderSize + k7zStartHeaderSize) + return SZ_ERROR_NO_ARCHIVE; + + { + Int64 pos = 0; + RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)) + if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) + return SZ_ERROR_INPUT_EOF; + } + + RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)) + + if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp)) + return SZ_ERROR_MEM; + + res = LookInStream_Read(inStream, buf.data, nextHeaderSizeT); + + if (res == SZ_OK) + { + res = SZ_ERROR_ARCHIVE; + if (CrcCalc(buf.data, nextHeaderSizeT) == nextHeaderCRC) + { + CSzData sd; + UInt64 type; + sd.Data = buf.data; + sd.Size = buf.size; + + res = ReadID(&sd, &type); + + if (res == SZ_OK && type == k7zIdEncodedHeader) + { + CSzAr tempAr; + CBuf tempBuf; + Buf_Init(&tempBuf); + + SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + + res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp); + SzAr_Free(&tempAr, allocTemp); + + if (res != SZ_OK) + { + Buf_Free(&tempBuf, allocTemp); + } + else + { + Buf_Free(&buf, allocTemp); + buf.data = tempBuf.data; + buf.size = tempBuf.size; + sd.Data = buf.data; + sd.Size = buf.size; + res = ReadID(&sd, &type); + } + } + + if (res == SZ_OK) + { + if (type == k7zIdHeader) + { + /* + CSzData sd2; + unsigned ttt; + for (ttt = 0; ttt < 40000; ttt++) + { + SzArEx_Free(p, allocMain); + sd2 = sd; + res = SzReadHeader(p, &sd2, inStream, allocMain, allocTemp); + if (res != SZ_OK) + break; + } + */ + res = SzReadHeader(p, &sd, inStream, allocMain, allocTemp); + } + else + res = SZ_ERROR_UNSUPPORTED; + } + } + } + + Buf_Free(&buf, allocTemp); + return res; +} + + +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, + ISzAllocPtr allocMain, ISzAllocPtr allocTemp) +{ + const SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp); + if (res != SZ_OK) + SzArEx_Free(p, allocMain); + return res; +} + + +SRes SzArEx_Extract( + const CSzArEx *p, + ILookInStreamPtr inStream, + UInt32 fileIndex, + UInt32 *blockIndex, + Byte **tempBuf, + size_t *outBufferSize, + size_t *offset, + size_t *outSizeProcessed, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + const UInt32 folderIndex = p->FileToFolder[fileIndex]; + SRes res = SZ_OK; + + *offset = 0; + *outSizeProcessed = 0; + + if (folderIndex == (UInt32)-1) + { + ISzAlloc_Free(allocMain, *tempBuf); + *blockIndex = folderIndex; + *tempBuf = NULL; + *outBufferSize = 0; + return SZ_OK; + } + + if (*tempBuf == NULL || *blockIndex != folderIndex) + { + const UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + /* + UInt64 unpackSizeSpec = + p->UnpackPositions[p->FolderToFile[(size_t)folderIndex + 1]] - + p->UnpackPositions[p->FolderToFile[folderIndex]]; + */ + const size_t unpackSize = (size_t)unpackSizeSpec; + + if (unpackSize != unpackSizeSpec) + return SZ_ERROR_MEM; + *blockIndex = folderIndex; + ISzAlloc_Free(allocMain, *tempBuf); + *tempBuf = NULL; + + if (res == SZ_OK) + { + *outBufferSize = unpackSize; + if (unpackSize != 0) + { + *tempBuf = (Byte *)ISzAlloc_Alloc(allocMain, unpackSize); + if (*tempBuf == NULL) + res = SZ_ERROR_MEM; + } + + if (res == SZ_OK) + { + res = SzAr_DecodeFolder(&p->db, folderIndex, + inStream, p->dataPos, *tempBuf, unpackSize, allocTemp); + } + } + } + + if (res == SZ_OK) + { + const UInt64 unpackPos = p->UnpackPositions[fileIndex]; + *offset = (size_t)(unpackPos - p->UnpackPositions[p->FolderToFile[folderIndex]]); + *outSizeProcessed = (size_t)(p->UnpackPositions[(size_t)fileIndex + 1] - unpackPos); + if (*offset + *outSizeProcessed > *outBufferSize) + return SZ_ERROR_FAIL; + if (SzBitWithVals_Check(&p->CRCs, fileIndex)) + if (CrcCalc(*tempBuf + *offset, *outSizeProcessed) != p->CRCs.Vals[fileIndex]) + res = SZ_ERROR_CRC; + } + + return res; +} + + +size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest) +{ + const size_t offs = p->FileNameOffsets[fileIndex]; + const size_t len = p->FileNameOffsets[fileIndex + 1] - offs; + if (dest != 0) + { + size_t i; + const Byte *src = p->FileNames + offs * 2; + for (i = 0; i < len; i++) + dest[i] = GetUi16(src + i * 2); + } + return len; +} + +/* +size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex) +{ + size_t len; + if (!p->FileNameOffsets) + return 1; + len = 0; + for (;;) + { + UInt32 parent = (UInt32)(Int32)-1; + len += p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; + if SzBitWithVals_Check(&p->Parents, fileIndex) + parent = p->Parents.Vals[fileIndex]; + if (parent == (UInt32)(Int32)-1) + return len; + fileIndex = parent; + } +} + +UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest) +{ + BoolInt needSlash; + if (!p->FileNameOffsets) + { + *(--dest) = 0; + return dest; + } + needSlash = False; + for (;;) + { + UInt32 parent = (UInt32)(Int32)-1; + size_t curLen = p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; + SzArEx_GetFileNameUtf16(p, fileIndex, dest - curLen); + if (needSlash) + *(dest - 1) = '/'; + needSlash = True; + dest -= curLen; + + if SzBitWithVals_Check(&p->Parents, fileIndex) + parent = p->Parents.Vals[fileIndex]; + if (parent == (UInt32)(Int32)-1) + return dest; + fileIndex = parent; + } +} +*/ diff --git a/external/unarr/lzmasdk/7zBuf.c b/external/unarr/lzmasdk/7zBuf.c new file mode 100644 index 00000000..8865c32a --- /dev/null +++ b/external/unarr/lzmasdk/7zBuf.c @@ -0,0 +1,36 @@ +/* 7zBuf.c -- Byte Buffer +2017-04-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "7zBuf.h" + +void Buf_Init(CBuf *p) +{ + p->data = 0; + p->size = 0; +} + +int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc) +{ + p->size = 0; + if (size == 0) + { + p->data = 0; + return 1; + } + p->data = (Byte *)ISzAlloc_Alloc(alloc, size); + if (p->data) + { + p->size = size; + return 1; + } + return 0; +} + +void Buf_Free(CBuf *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->data); + p->data = 0; + p->size = 0; +} diff --git a/external/unarr/lzmasdk/7zBuf.h b/external/unarr/lzmasdk/7zBuf.h new file mode 100644 index 00000000..c0ba8a7b --- /dev/null +++ b/external/unarr/lzmasdk/7zBuf.h @@ -0,0 +1,35 @@ +/* 7zBuf.h -- Byte Buffer +2023-03-04 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_BUF_H +#define ZIP7_INC_7Z_BUF_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef struct +{ + Byte *data; + size_t size; +} CBuf; + +void Buf_Init(CBuf *p); +int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc); +void Buf_Free(CBuf *p, ISzAllocPtr alloc); + +typedef struct +{ + Byte *data; + size_t size; + size_t pos; +} CDynBuf; + +void DynBuf_Construct(CDynBuf *p); +void DynBuf_SeekToBeg(CDynBuf *p); +int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc); +void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/7zCrc.h b/external/unarr/lzmasdk/7zCrc.h new file mode 100644 index 00000000..4afaeae4 --- /dev/null +++ b/external/unarr/lzmasdk/7zCrc.h @@ -0,0 +1,27 @@ +/* 7zCrc.h -- CRC32 calculation +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_CRC_H +#define ZIP7_INC_7Z_CRC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +extern UInt32 g_CrcTable[]; + +/* Call CrcGenerateTable one time before other CRC functions */ +void Z7_FASTCALL CrcGenerateTable(void); + +#define CRC_INIT_VAL 0xFFFFFFFF +#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL) +#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size); +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size); + +typedef UInt32 (Z7_FASTCALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/7zDec.c b/external/unarr/lzmasdk/7zDec.c new file mode 100644 index 00000000..96c60359 --- /dev/null +++ b/external/unarr/lzmasdk/7zDec.c @@ -0,0 +1,648 @@ +/* 7zDec.c -- Decoding from 7z folder +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #define Z7_PPMD_SUPPORT */ + +#include "7z.h" +#include "7zCrc.h" + +#include "Bcj2.h" +#include "Bra.h" +#include "CpuArch.h" +#include "Delta.h" +#include "LzmaDec.h" +#include "Lzma2Dec.h" +#ifdef Z7_PPMD_SUPPORT +#include "Ppmd7.h" +#endif + +#define k_Copy 0 +#ifndef Z7_NO_METHOD_LZMA2 +#define k_LZMA2 0x21 +#endif +#define k_LZMA 0x30101 +#define k_BCJ2 0x303011B + +#if !defined(Z7_NO_METHODS_FILTERS) +#define Z7_USE_BRANCH_FILTER +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARM64) +#define Z7_USE_FILTER_ARM64 +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARM64 0xa +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARMT) +#define Z7_USE_FILTER_ARMT +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARMT 0x3030701 +#endif + +#ifndef Z7_NO_METHODS_FILTERS +#define k_Delta 3 +#define k_BCJ 0x3030103 +#define k_PPC 0x3030205 +#define k_IA64 0x3030401 +#define k_ARM 0x3030501 +#define k_SPARC 0x3030805 +#endif + +#ifdef Z7_PPMD_SUPPORT + +#define k_PPMD 0x30401 + +typedef struct +{ + IByteIn vt; + const Byte *cur; + const Byte *end; + const Byte *begin; + UInt64 processed; + BoolInt extra; + SRes res; + ILookInStreamPtr inStream; +} CByteInToLook; + +static Byte ReadByte(IByteInPtr pp) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CByteInToLook) + if (p->cur != p->end) + return *p->cur++; + if (p->res == SZ_OK) + { + size_t size = (size_t)(p->cur - p->begin); + p->processed += size; + p->res = ILookInStream_Skip(p->inStream, size); + size = (1 << 25); + p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size); + p->cur = p->begin; + p->end = p->begin + size; + if (size != 0) + return *p->cur++; + } + p->extra = True; + return 0; +} + +static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CPpmd7 ppmd; + CByteInToLook s; + SRes res = SZ_OK; + + s.vt.Read = ReadByte; + s.inStream = inStream; + s.begin = s.end = s.cur = NULL; + s.extra = False; + s.res = SZ_OK; + s.processed = 0; + + if (propsSize != 5) + return SZ_ERROR_UNSUPPORTED; + + { + unsigned order = props[0]; + UInt32 memSize = GetUi32(props + 1); + if (order < PPMD7_MIN_ORDER || + order > PPMD7_MAX_ORDER || + memSize < PPMD7_MIN_MEM_SIZE || + memSize > PPMD7_MAX_MEM_SIZE) + return SZ_ERROR_UNSUPPORTED; + Ppmd7_Construct(&ppmd); + if (!Ppmd7_Alloc(&ppmd, memSize, allocMain)) + return SZ_ERROR_MEM; + Ppmd7_Init(&ppmd, order); + } + { + ppmd.rc.dec.Stream = &s.vt; + if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec)) + res = SZ_ERROR_DATA; + else if (!s.extra) + { + Byte *buf = outBuffer; + const Byte *lim = buf + outSize; + for (; buf != lim; buf++) + { + int sym = Ppmd7z_DecodeSymbol(&ppmd); + if (s.extra || sym < 0) + break; + *buf = (Byte)sym; + } + if (buf != lim) + res = SZ_ERROR_DATA; + else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) + { + /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */ + res = SZ_ERROR_DATA; + } + } + if (s.extra) + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + else if (s.processed + (size_t)(s.cur - s.begin) != inSize) + res = SZ_ERROR_DATA; + } + Ppmd7_Free(&ppmd, allocMain); + return res; +} + +#endif + + +static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CLzmaDec state; + SRes res = SZ_OK; + + LzmaDec_CONSTRUCT(&state) + RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain)) + state.dic = outBuffer; + state.dicBufSize = outSize; + LzmaDec_Init(&state); + + for (;;) + { + const void *inBuf = NULL; + size_t lookahead = (1 << 18); + if (lookahead > inSize) + lookahead = (size_t)inSize; + res = ILookInStream_Look(inStream, &inBuf, &lookahead); + if (res != SZ_OK) + break; + + { + SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos; + ELzmaStatus status; + res = LzmaDec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status); + lookahead -= inProcessed; + inSize -= inProcessed; + if (res != SZ_OK) + break; + + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + { + if (outSize != state.dicPos || inSize != 0) + res = SZ_ERROR_DATA; + break; + } + + if (outSize == state.dicPos && inSize == 0 && status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK) + break; + + if (inProcessed == 0 && dicPos == state.dicPos) + { + res = SZ_ERROR_DATA; + break; + } + + res = ILookInStream_Skip(inStream, inProcessed); + if (res != SZ_OK) + break; + } + } + + LzmaDec_FreeProbs(&state, allocMain); + return res; +} + + +#ifndef Z7_NO_METHOD_LZMA2 + +static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CLzma2Dec state; + SRes res = SZ_OK; + + Lzma2Dec_CONSTRUCT(&state) + if (propsSize != 1) + return SZ_ERROR_DATA; + RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain)) + state.decoder.dic = outBuffer; + state.decoder.dicBufSize = outSize; + Lzma2Dec_Init(&state); + + for (;;) + { + const void *inBuf = NULL; + size_t lookahead = (1 << 18); + if (lookahead > inSize) + lookahead = (size_t)inSize; + res = ILookInStream_Look(inStream, &inBuf, &lookahead); + if (res != SZ_OK) + break; + + { + SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos; + ELzmaStatus status; + res = Lzma2Dec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status); + lookahead -= inProcessed; + inSize -= inProcessed; + if (res != SZ_OK) + break; + + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + { + if (outSize != state.decoder.dicPos || inSize != 0) + res = SZ_ERROR_DATA; + break; + } + + if (inProcessed == 0 && dicPos == state.decoder.dicPos) + { + res = SZ_ERROR_DATA; + break; + } + + res = ILookInStream_Skip(inStream, inProcessed); + if (res != SZ_OK) + break; + } + } + + Lzma2Dec_FreeProbs(&state, allocMain); + return res; +} + +#endif + + +static SRes SzDecodeCopy(UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer) +{ + while (inSize > 0) + { + const void *inBuf; + size_t curSize = (1 << 18); + if (curSize > inSize) + curSize = (size_t)inSize; + RINOK(ILookInStream_Look(inStream, &inBuf, &curSize)) + if (curSize == 0) + return SZ_ERROR_INPUT_EOF; + memcpy(outBuffer, inBuf, curSize); + outBuffer += curSize; + inSize -= curSize; + RINOK(ILookInStream_Skip(inStream, curSize)) + } + return SZ_OK; +} + +static BoolInt IS_MAIN_METHOD(UInt32 m) +{ + switch (m) + { + case k_Copy: + case k_LZMA: + #ifndef Z7_NO_METHOD_LZMA2 + case k_LZMA2: + #endif + #ifdef Z7_PPMD_SUPPORT + case k_PPMD: + #endif + return True; + } + return False; +} + +static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c) +{ + return + c->NumStreams == 1 + /* && c->MethodID <= (UInt32)0xFFFFFFFF */ + && IS_MAIN_METHOD((UInt32)c->MethodID); +} + +#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumStreams == 4) + +static SRes CheckSupportedFolder(const CSzFolder *f) +{ + if (f->NumCoders < 1 || f->NumCoders > 4) + return SZ_ERROR_UNSUPPORTED; + if (!IS_SUPPORTED_CODER(&f->Coders[0])) + return SZ_ERROR_UNSUPPORTED; + if (f->NumCoders == 1) + { + if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBonds != 0) + return SZ_ERROR_UNSUPPORTED; + return SZ_OK; + } + + + #if defined(Z7_USE_BRANCH_FILTER) + + if (f->NumCoders == 2) + { + const CSzCoderInfo *c = &f->Coders[1]; + if ( + /* c->MethodID > (UInt32)0xFFFFFFFF || */ + c->NumStreams != 1 + || f->NumPackStreams != 1 + || f->PackStreams[0] != 0 + || f->NumBonds != 1 + || f->Bonds[0].InIndex != 1 + || f->Bonds[0].OutIndex != 0) + return SZ_ERROR_UNSUPPORTED; + switch ((UInt32)c->MethodID) + { + #if !defined(Z7_NO_METHODS_FILTERS) + case k_Delta: + case k_BCJ: + case k_PPC: + case k_IA64: + case k_SPARC: + case k_ARM: + #endif + #ifdef Z7_USE_FILTER_ARM64 + case k_ARM64: + #endif + #ifdef Z7_USE_FILTER_ARMT + case k_ARMT: + #endif + break; + default: + return SZ_ERROR_UNSUPPORTED; + } + return SZ_OK; + } + + #endif + + + if (f->NumCoders == 4) + { + if (!IS_SUPPORTED_CODER(&f->Coders[1]) + || !IS_SUPPORTED_CODER(&f->Coders[2]) + || !IS_BCJ2(&f->Coders[3])) + return SZ_ERROR_UNSUPPORTED; + if (f->NumPackStreams != 4 + || f->PackStreams[0] != 2 + || f->PackStreams[1] != 6 + || f->PackStreams[2] != 1 + || f->PackStreams[3] != 0 + || f->NumBonds != 3 + || f->Bonds[0].InIndex != 5 || f->Bonds[0].OutIndex != 0 + || f->Bonds[1].InIndex != 4 || f->Bonds[1].OutIndex != 1 + || f->Bonds[2].InIndex != 3 || f->Bonds[2].OutIndex != 2) + return SZ_ERROR_UNSUPPORTED; + return SZ_OK; + } + + return SZ_ERROR_UNSUPPORTED; +} + + + + + + +static SRes SzFolder_Decode2(const CSzFolder *folder, + const Byte *propsData, + const UInt64 *unpackSizes, + const UInt64 *packPositions, + ILookInStreamPtr inStream, UInt64 startPos, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain, + Byte *tempBuf[]) +{ + UInt32 ci; + SizeT tempSizes[3] = { 0, 0, 0}; + SizeT tempSize3 = 0; + Byte *tempBuf3 = 0; + + RINOK(CheckSupportedFolder(folder)) + + for (ci = 0; ci < folder->NumCoders; ci++) + { + const CSzCoderInfo *coder = &folder->Coders[ci]; + + if (IS_MAIN_METHOD((UInt32)coder->MethodID)) + { + UInt32 si = 0; + UInt64 offset; + UInt64 inSize; + Byte *outBufCur = outBuffer; + SizeT outSizeCur = outSize; + if (folder->NumCoders == 4) + { + const UInt32 indices[] = { 3, 2, 0 }; + const UInt64 unpackSize = unpackSizes[ci]; + si = indices[ci]; + if (ci < 2) + { + Byte *temp; + outSizeCur = (SizeT)unpackSize; + if (outSizeCur != unpackSize) + return SZ_ERROR_MEM; + temp = (Byte *)ISzAlloc_Alloc(allocMain, outSizeCur); + if (!temp && outSizeCur != 0) + return SZ_ERROR_MEM; + outBufCur = tempBuf[1 - ci] = temp; + tempSizes[1 - ci] = outSizeCur; + } + else if (ci == 2) + { + if (unpackSize > outSize) /* check it */ + return SZ_ERROR_PARAM; + tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize); + tempSize3 = outSizeCur = (SizeT)unpackSize; + } + else + return SZ_ERROR_UNSUPPORTED; + } + offset = packPositions[si]; + inSize = packPositions[(size_t)si + 1] - offset; + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) + + if (coder->MethodID == k_Copy) + { + if (inSize != outSizeCur) /* check it */ + return SZ_ERROR_DATA; + RINOK(SzDecodeCopy(inSize, inStream, outBufCur)) + } + else if (coder->MethodID == k_LZMA) + { + RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #ifndef Z7_NO_METHOD_LZMA2 + else if (coder->MethodID == k_LZMA2) + { + RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #endif + #ifdef Z7_PPMD_SUPPORT + else if (coder->MethodID == k_PPMD) + { + RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #endif + else + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->MethodID == k_BCJ2) + { + const UInt64 offset = packPositions[1]; + const UInt64 s3Size = packPositions[2] - offset; + + if (ci != 3) + return SZ_ERROR_UNSUPPORTED; + + tempSizes[2] = (SizeT)s3Size; + if (tempSizes[2] != s3Size) + return SZ_ERROR_MEM; + tempBuf[2] = (Byte *)ISzAlloc_Alloc(allocMain, tempSizes[2]); + if (!tempBuf[2] && tempSizes[2] != 0) + return SZ_ERROR_MEM; + + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) + RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2])) + + if ((tempSizes[0] & 3) != 0 || + (tempSizes[1] & 3) != 0 || + tempSize3 + tempSizes[0] + tempSizes[1] != outSize) + return SZ_ERROR_DATA; + + { + CBcj2Dec p; + + p.bufs[0] = tempBuf3; p.lims[0] = tempBuf3 + tempSize3; + p.bufs[1] = tempBuf[0]; p.lims[1] = tempBuf[0] + tempSizes[0]; + p.bufs[2] = tempBuf[1]; p.lims[2] = tempBuf[1] + tempSizes[1]; + p.bufs[3] = tempBuf[2]; p.lims[3] = tempBuf[2] + tempSizes[2]; + + p.dest = outBuffer; + p.destLim = outBuffer + outSize; + + Bcj2Dec_Init(&p); + RINOK(Bcj2Dec_Decode(&p)) + + { + unsigned i; + for (i = 0; i < 4; i++) + if (p.bufs[i] != p.lims[i]) + return SZ_ERROR_DATA; + if (p.dest != p.destLim || !Bcj2Dec_IsMaybeFinished(&p)) + return SZ_ERROR_DATA; + } + } + } + #if defined(Z7_USE_BRANCH_FILTER) + else if (ci == 1) + { + #if !defined(Z7_NO_METHODS_FILTERS) + if (coder->MethodID == k_Delta) + { + if (coder->PropsSize != 1) + return SZ_ERROR_UNSUPPORTED; + { + Byte state[DELTA_STATE_SIZE]; + Delta_Init(state); + Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize); + } + continue; + } + #endif + + #ifdef Z7_USE_FILTER_ARM64 + if (coder->MethodID == k_ARM64) + { + UInt32 pc = 0; + if (coder->PropsSize == 4) + pc = GetUi32(propsData + coder->PropsOffset); + else if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc); + continue; + } + #endif + + #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) + { + if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + #define CASE_BRA_CONV(isa) case k_ ## isa: Z7_BRANCH_CONV_DEC(isa)(outBuffer, outSize, 0); break; // pc = 0; + switch (coder->MethodID) + { + #if !defined(Z7_NO_METHODS_FILTERS) + case k_BCJ: + { + UInt32 state = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0 + break; + } + CASE_BRA_CONV(PPC) + CASE_BRA_CONV(IA64) + CASE_BRA_CONV(SPARC) + CASE_BRA_CONV(ARM) + #endif + #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) + CASE_BRA_CONV(ARMT) + #endif + default: + return SZ_ERROR_UNSUPPORTED; + } + continue; + } + #endif + } // (c == 1) + #endif + else + return SZ_ERROR_UNSUPPORTED; + } + + return SZ_OK; +} + + +SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, + ILookInStreamPtr inStream, UInt64 startPos, + Byte *outBuffer, size_t outSize, + ISzAllocPtr allocMain) +{ + SRes res; + CSzFolder folder; + CSzData sd; + + const Byte *data = p->CodersData + p->FoCodersOffsets[folderIndex]; + sd.Data = data; + sd.Size = p->FoCodersOffsets[(size_t)folderIndex + 1] - p->FoCodersOffsets[folderIndex]; + + res = SzGetNextFolderItem(&folder, &sd); + + if (res != SZ_OK) + return res; + + if (sd.Size != 0 + || folder.UnpackStream != p->FoToMainUnpackSizeIndex[folderIndex] + || outSize != SzAr_GetFolderUnpackSize(p, folderIndex)) + return SZ_ERROR_FAIL; + { + unsigned i; + Byte *tempBuf[3] = { 0, 0, 0}; + + res = SzFolder_Decode2(&folder, data, + &p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex]], + p->PackPositions + p->FoStartPackStreamIndex[folderIndex], + inStream, startPos, + outBuffer, (SizeT)outSize, allocMain, tempBuf); + + for (i = 0; i < 3; i++) + ISzAlloc_Free(allocMain, tempBuf[i]); + + if (res == SZ_OK) + if (SzBitWithVals_Check(&p->FolderCRCs, folderIndex)) + if (CrcCalc(outBuffer, outSize) != p->FolderCRCs.Vals[folderIndex]) + res = SZ_ERROR_CRC; + + return res; + } +} diff --git a/external/unarr/lzmasdk/7zStream.c b/external/unarr/lzmasdk/7zStream.c new file mode 100644 index 00000000..74e75b65 --- /dev/null +++ b/external/unarr/lzmasdk/7zStream.c @@ -0,0 +1,199 @@ +/* 7zStream.c -- 7z Stream functions +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zTypes.h" + + +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize) +{ + size_t size = *processedSize; + *processedSize = 0; + while (size != 0) + { + size_t cur = size; + const SRes res = ISeqInStream_Read(stream, buf, &cur); + *processedSize += cur; + buf = (void *)((Byte *)buf + cur); + size -= cur; + if (res != SZ_OK) + return res; + if (cur == 0) + return SZ_OK; + } + return SZ_OK; +} + +/* +SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ISeqInStream_Read(stream, buf, &processed)) + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size) +{ + return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} +*/ + + +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf) +{ + size_t processed = 1; + RINOK(ISeqInStream_Read(stream, buf, &processed)) + return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; +} + + + +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset) +{ + Int64 t = (Int64)offset; + return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); +} + +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size) +{ + const void *lookBuf; + if (*size == 0) + return SZ_OK; + RINOK(ILookInStream_Look(stream, &lookBuf, size)) + memcpy(buf, lookBuf, *size); + return ILookInStream_Skip(stream, *size); +} + +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ILookInStream_Read(stream, buf, &processed)) + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size) +{ + return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + + + +#define GET_LookToRead2 Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLookToRead2) + +static SRes LookToRead2_Look_Lookahead(ILookInStreamPtr pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + size2 = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, &size2); + p->size = size2; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Look_Exact(ILookInStreamPtr pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + if (*size > p->bufSize) + *size = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, size); + size2 = p->size = *size; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Skip(ILookInStreamPtr pp, size_t offset) +{ + GET_LookToRead2 + p->pos += offset; + return SZ_OK; +} + +static SRes LookToRead2_Read(ILookInStreamPtr pp, void *buf, size_t *size) +{ + GET_LookToRead2 + size_t rem = p->size - p->pos; + if (rem == 0) + return ISeekInStream_Read(p->realStream, buf, size); + if (rem > *size) + rem = *size; + memcpy(buf, p->buf + p->pos, rem); + p->pos += rem; + *size = rem; + return SZ_OK; +} + +static SRes LookToRead2_Seek(ILookInStreamPtr pp, Int64 *pos, ESzSeek origin) +{ + GET_LookToRead2 + p->pos = p->size = 0; + return ISeekInStream_Seek(p->realStream, pos, origin); +} + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) +{ + p->vt.Look = lookahead ? + LookToRead2_Look_Lookahead : + LookToRead2_Look_Exact; + p->vt.Skip = LookToRead2_Skip; + p->vt.Read = LookToRead2_Read; + p->vt.Seek = LookToRead2_Seek; +} + + + +static SRes SecToLook_Read(ISeqInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToLook) + return LookInStream_LookRead(p->realStream, buf, size); +} + +void SecToLook_CreateVTable(CSecToLook *p) +{ + p->vt.Read = SecToLook_Read; +} + +static SRes SecToRead_Read(ISeqInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToRead) + return ILookInStream_Read(p->realStream, buf, size); +} + +void SecToRead_CreateVTable(CSecToRead *p) +{ + p->vt.Read = SecToRead_Read; +} diff --git a/external/unarr/lzmasdk/7zTypes.h b/external/unarr/lzmasdk/7zTypes.h new file mode 100644 index 00000000..1fcb2473 --- /dev/null +++ b/external/unarr/lzmasdk/7zTypes.h @@ -0,0 +1,597 @@ +/* 7zTypes.h -- Basic types +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_7Z_TYPES_H +#define ZIP7_7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#else +#include +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + /* + // C11/C++11: + #include + #define MY_ALIGN(n) alignas(n) + */ + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) + +#else // _WIN32 + +// #define ENV_HAVE_LSTAT +typedef int WRes; + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY_FACILITY_ERRNO 0x800 +#define MY_FACILITY_WIN32 7 +#define MY_FACILITY_WRes MY_FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY_FACILITY_WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INTERNAL_ERROR 1359L +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_PARAMETER EINVAL +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY_FACILITY_WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ + +#endif + + +#ifndef RINOK +#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; } +#endif + +#ifndef RINOK_WRes +#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef Z7_DECL_Int32_AS_long +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + +#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL) + + +#ifdef Z7_DECL_Int64_AS_long + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#else +#if defined(__clang__) || defined(__GNUC__) +#include +typedef int64_t Int64; +typedef uint64_t UInt64; +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +// #define UINT64_CONST(n) n ## ULL +#endif +#endif + +#endif + +#define UINT64_CONST(n) n + + +#ifdef Z7_DECL_SizeT_AS_unsigned_int +typedef unsigned int SizeT; +#else +typedef size_t SizeT; +#endif + +/* +#if (defined(_MSC_VER) && _MSC_VER <= 1200) +typedef size_t MY_uintptr_t; +#else +#include +typedef uintptr_t MY_uintptr_t; +#endif +*/ + +typedef int BoolInt; +/* typedef BoolInt Bool; */ +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define Z7_STDCALL __stdcall +#else +#define Z7_STDCALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define Z7_NO_INLINE __declspec(noinline) +#else +#define Z7_NO_INLINE +#endif + +#define Z7_FORCE_INLINE __forceinline + +#define Z7_CDECL __cdecl +#define Z7_FASTCALL __fastcall + +#else // _MSC_VER + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) +#define Z7_NO_INLINE __attribute__((noinline)) +#define Z7_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define Z7_NO_INLINE +#define Z7_FORCE_INLINE +#endif + +#define Z7_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define Z7_FASTCALL __attribute__((fastcall)) +// #define Z7_FASTCALL __attribute__((cdecl)) +#define Z7_FASTCALL +#elif defined(MY_CPU_AMD64) +// #define Z7_FASTCALL __attribute__((ms_abi)) +#define Z7_FASTCALL +#else +#define Z7_FASTCALL +#endif + +#endif // _MSC_VER + + +/* The following interfaces use first parameter as pointer to structure */ + +// #define Z7_C_IFACE_CONST_QUAL +#define Z7_C_IFACE_CONST_QUAL const + +#define Z7_C_IFACE_DECL(a) \ + struct a ## _; \ + typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \ + typedef struct a ## _ a; \ + struct a ## _ + + +Z7_C_IFACE_DECL (IByteIn) +{ + Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +Z7_C_IFACE_DECL (IByteOut) +{ + void (*Write)(IByteOutPtr p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +Z7_C_IFACE_DECL (ISeqInStream) +{ + SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* try to read as much as avail in stream and limited by (*processedSize) */ +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize); +/* it can return SZ_ERROR_INPUT_EOF */ +// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size); +// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf); + + +Z7_C_IFACE_DECL (ISeqOutStream) +{ + size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +Z7_C_IFACE_DECL (ISeekInStream) +{ + SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +Z7_C_IFACE_DECL (ILookInStream) +{ + SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(ILookInStreamPtr p, size_t offset); + /* offset must be <= output(*size) of Look */ + SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size); + + +typedef struct +{ + ILookInStream vt; + ISeekInStreamPtr realStream; + + size_t pos; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + ILookInStreamPtr realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + ILookInStreamPtr realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +Z7_C_IFACE_DECL (ICompressProgress) +{ + SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; + +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef Z7_container_of + +/* +#define Z7_container_of(ptr, type, m) container_of(ptr, type, m) +#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define Z7_container_of(ptr, type, m) \ + ((type *)(void *)((char *)(void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +#define Z7_container_of_CONST(ptr, type, m) \ + ((const type *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +/* +#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \ + ((type *)(void *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) +*/ + +#endif + +#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) + +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) +*/ +#if defined (__clang__) || defined(__GNUC__) +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL +#endif + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ + Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ + Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) + + +// #define ZIP7_DECLARE_HANDLE(name) typedef void *name; +#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name; + + +#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifndef Z7_ARRAY_SIZE +#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +#define k_PropVar_TimePrec_0 0 +#define k_PropVar_TimePrec_Unix 1 +#define k_PropVar_TimePrec_DOS 2 +#define k_PropVar_TimePrec_HighPrec 3 +#define k_PropVar_TimePrec_Base 16 +#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7) +#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9) + +EXTERN_C_END + +#endif + +/* +#ifndef Z7_ST +#ifdef _7ZIP_ST +#define Z7_ST +#endif +#endif +*/ diff --git a/external/unarr/lzmasdk/7zWindows.h b/external/unarr/lzmasdk/7zWindows.h new file mode 100644 index 00000000..42c6db8b --- /dev/null +++ b/external/unarr/lzmasdk/7zWindows.h @@ -0,0 +1,101 @@ +/* 7zWindows.h -- StdAfx +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_WINDOWS_H +#define ZIP7_INC_7Z_WINDOWS_H + +#ifdef _WIN32 + +#if defined(__clang__) +# pragma clang diagnostic push +#endif + +#if defined(_MSC_VER) + +#pragma warning(push) +#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' + +#if _MSC_VER == 1900 +// for old kit10 versions +// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext': +#endif +// win10 Windows Kit: +#endif // _MSC_VER + +#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64) +// for msvc6 without sdk2003 +#define RPC_NO_WINDOWS_H +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +// #if defined(__GNUC__) && !defined(__clang__) +#include +#else +#include +#endif +// #include +// #include + +// but if precompiled with clang-cl then we need +// #include +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#if defined(__clang__) +# pragma clang diagnostic pop +#endif + +#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64) +#ifndef _W64 + +typedef long LONG_PTR, *PLONG_PTR; +typedef unsigned long ULONG_PTR, *PULONG_PTR; +typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; + +#define Z7_OLD_WIN_SDK +#endif // _W64 +#endif // _MSC_VER == 1200 + +#ifdef Z7_OLD_WIN_SDK + +#ifndef INVALID_FILE_ATTRIBUTES +#define INVALID_FILE_ATTRIBUTES ((DWORD)-1) +#endif +#ifndef INVALID_SET_FILE_POINTER +#define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif +#ifndef FILE_SPECIAL_ACCESS +#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS) +#endif + +// ShlObj.h: +// #define BIF_NEWDIALOGSTYLE 0x0040 + +#pragma warning(disable : 4201) +// #pragma warning(disable : 4115) + +#undef VARIANT_TRUE +#define VARIANT_TRUE ((VARIANT_BOOL)-1) +#endif + +#endif // Z7_OLD_WIN_SDK + +#ifdef UNDER_CE +#undef VARIANT_TRUE +#define VARIANT_TRUE ((VARIANT_BOOL)-1) +#endif + + +#if defined(_MSC_VER) +#if _MSC_VER >= 1400 && _MSC_VER <= 1600 + // BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed + // string.h + // #pragma warning(disable : 4514) +#endif +#endif + + +/* #include "7zTypes.h" */ + +#endif diff --git a/external/unarr/lzmasdk/Bcj2.c b/external/unarr/lzmasdk/Bcj2.c new file mode 100644 index 00000000..7cb57ad6 --- /dev/null +++ b/external/unarr/lzmasdk/Bcj2.c @@ -0,0 +1,290 @@ +/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) +2023-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bcj2.h" +#include "CpuArch.h" + +#define kTopValue ((UInt32)1 << 24) +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +// UInt32 bcj2_stats[256 + 2][2]; + +void Bcj2Dec_Init(CBcj2Dec *p) +{ + unsigned i; + p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK; + p->ip = 0; + p->temp = 0; + p->range = 0; + p->code = 0; + for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) + p->probs[i] = kBitModelTotal >> 1; +} + +SRes Bcj2Dec_Decode(CBcj2Dec *p) +{ + UInt32 v = p->temp; + // const Byte *src; + if (p->range <= 5) + { + UInt32 code = p->code; + p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */ + for (; p->range != 5; p->range++) + { + if (p->range == 1 && code != 0) + return SZ_ERROR_DATA; + if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) + { + p->state = BCJ2_STREAM_RC; + return SZ_OK; + } + code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + p->code = code; + } + if (code == 0xffffffff) + return SZ_ERROR_DATA; + p->range = 0xffffffff; + } + // else + { + unsigned state = p->state; + // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop + if (BCJ2_IS_32BIT_STREAM(state)) + { + const Byte *cur = p->bufs[state]; + if (cur == p->lims[state]) + return SZ_OK; + p->bufs[state] = cur + 4; + { + const UInt32 ip = p->ip + 4; + v = GetBe32a(cur) - ip; + p->ip = ip; + } + state = BCJ2_DEC_STATE_ORIG_0; + } + if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4) + { + Byte *dest = p->dest; + for (;;) + { + if (dest == p->destLim) + { + p->state = state; + p->temp = v; + return SZ_OK; + } + *dest++ = (Byte)v; + p->dest = dest; + if (++state == BCJ2_DEC_STATE_ORIG_3 + 1) + break; + v >>= 8; + } + } + } + + // src = p->bufs[BCJ2_STREAM_MAIN]; + for (;;) + { + /* + if (BCJ2_IS_32BIT_STREAM(p->state)) + p->state = BCJ2_DEC_STATE_OK; + else + */ + { + if (p->range < kTopValue) + { + if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) + { + p->state = BCJ2_STREAM_RC; + p->temp = v; + return SZ_OK; + } + p->range <<= 8; + p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + } + { + const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; + const Byte *srcLim; + Byte *dest = p->dest; + { + const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); + SizeT num = (SizeT)(p->destLim - dest); + if (num >= rem) + num = rem; + #define NUM_ITERS 4 + #if (NUM_ITERS & (NUM_ITERS - 1)) == 0 + num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x)) + #else + num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x)) + #endif + srcLim = src + num; + } + + #define NUM_SHIFT_BITS 24 + #define ONE_ITER(indx) { \ + const unsigned b = src[indx]; \ + *dest++ = (Byte)b; \ + v = (v << NUM_SHIFT_BITS) | b; \ + if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ + if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ + ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ + /* ++dest */; /* v = b; */ } + + if (src != srcLim) + for (;;) + { + /* The dependency chain of 2-cycle for (v) calculation is not big problem here. + But we can remove dependency chain with v = b in the end of loop. */ + ONE_ITER(0) + #if (NUM_ITERS > 1) + ONE_ITER(1) + #if (NUM_ITERS > 2) + ONE_ITER(2) + #if (NUM_ITERS > 3) + ONE_ITER(3) + #if (NUM_ITERS > 4) + ONE_ITER(4) + #if (NUM_ITERS > 5) + ONE_ITER(5) + #if (NUM_ITERS > 6) + ONE_ITER(6) + #if (NUM_ITERS > 7) + ONE_ITER(7) + #endif + #endif + #endif + #endif + #endif + #endif + #endif + + src += NUM_ITERS; + if (src == srcLim) + break; + } + + if (src == srcLim) + #if (NUM_ITERS > 1) + for (;;) + #endif + { + #if (NUM_ITERS > 1) + if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim) + #endif + { + const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); + p->bufs[BCJ2_STREAM_MAIN] = src; + p->dest = dest; + p->ip += (UInt32)num; + /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ + p->state = + src == p->lims[BCJ2_STREAM_MAIN] ? + (unsigned)BCJ2_STREAM_MAIN : + (unsigned)BCJ2_DEC_STATE_ORIG; + p->temp = v; + return SZ_OK; + } + #if (NUM_ITERS > 1) + ONE_ITER(0) + src++; + #endif + } + + { + const SizeT num = (SizeT)(dest - p->dest); + p->dest = dest; // p->dest += num; + p->bufs[BCJ2_STREAM_MAIN] += num; // = src; + p->ip += (UInt32)num; + } + { + UInt32 bound, ttt; + CBcj2Prob *prob; // unsigned index; + /* + prob = p->probs + (unsigned)((Byte)v == 0xe8 ? + 2 + (Byte)(v >> 8) : + ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1)); + */ + { + const unsigned c = ((v + 0x17) >> 6) & 1; + prob = p->probs + (unsigned) + (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); + // (Byte) + // 8x->0 : e9->1 : xxe8->xx+2 + // 8x->0x100 : e9->0x101 : xxe8->xx + // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v)); + // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v)); + } + ttt = *prob; + bound = (p->range >> kNumBitModelTotalBits) * ttt; + if (p->code < bound) + { + // bcj2_stats[prob - p->probs][0]++; + p->range = bound; + *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + continue; + } + { + // bcj2_stats[prob - p->probs][1]++; + p->range -= bound; + p->code -= bound; + *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); + } + } + } + } + { + /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */ + // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; + const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; + const Byte *cur = p->bufs[cj]; + Byte *dest; + SizeT rem; + if (cur == p->lims[cj]) + { + p->state = cj; + break; + } + v = GetBe32a(cur); + p->bufs[cj] = cur + 4; + { + const UInt32 ip = p->ip + 4; + v -= ip; + p->ip = ip; + } + dest = p->dest; + rem = (SizeT)(p->destLim - dest); + if (rem < 4) + { + if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8; + if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8; + if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}} + p->temp = v; + p->dest = dest + rem; + p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; + break; + } + SetUi32(dest, v) + v >>= 24; + p->dest = dest + 4; + } + } + + if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC]) + { + p->range <<= 8; + p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + } + return SZ_OK; +} + +#undef NUM_ITERS +#undef ONE_ITER +#undef NUM_SHIFT_BITS +#undef kTopValue +#undef kNumBitModelTotalBits +#undef kBitModelTotal +#undef kNumMoveBits diff --git a/external/unarr/lzmasdk/Bcj2.h b/external/unarr/lzmasdk/Bcj2.h new file mode 100644 index 00000000..4575545b --- /dev/null +++ b/external/unarr/lzmasdk/Bcj2.h @@ -0,0 +1,332 @@ +/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2) +2023-03-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_BCJ2_H +#define ZIP7_INC_BCJ2_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define BCJ2_NUM_STREAMS 4 + +enum +{ + BCJ2_STREAM_MAIN, + BCJ2_STREAM_CALL, + BCJ2_STREAM_JUMP, + BCJ2_STREAM_RC +}; + +enum +{ + BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS, + BCJ2_DEC_STATE_ORIG_1, + BCJ2_DEC_STATE_ORIG_2, + BCJ2_DEC_STATE_ORIG_3, + + BCJ2_DEC_STATE_ORIG, + BCJ2_DEC_STATE_ERROR /* after detected data error */ +}; + +enum +{ + BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS, + BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */ +}; + + +/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */ +#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2) + +/* +CBcj2Dec / CBcj2Enc +bufs sizes: + BUF_SIZE(n) = lims[n] - bufs[n] +bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4: + (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0 + (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0 +*/ + +// typedef UInt32 CBcj2Prob; +typedef UInt16 CBcj2Prob; + +/* +BCJ2 encoder / decoder internal requirements: + - If last bytes of stream contain marker (e8/e8/0f8x), then + there is also encoded symbol (0 : no conversion) in RC stream. + - One case of overlapped instructions is supported, + if last byte of converted instruction is (0f) and next byte is (8x): + marker [xx xx xx 0f] 8x + then the pair (0f 8x) is treated as marker. +*/ + +/* ---------- BCJ2 Decoder ---------- */ + +/* +CBcj2Dec: +(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: + bufs[BCJ2_STREAM_MAIN] >= dest && + bufs[BCJ2_STREAM_MAIN] - dest >= + BUF_SIZE(BCJ2_STREAM_CALL) + + BUF_SIZE(BCJ2_STREAM_JUMP) + reserve = bufs[BCJ2_STREAM_MAIN] - dest - + ( BUF_SIZE(BCJ2_STREAM_CALL) + + BUF_SIZE(BCJ2_STREAM_JUMP) ) + and additional conditions: + if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve != 1) : if (ver < v23.00) + } + else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve >= 6) : if (ver < v23.00) + (reserve >= 4) : if (ver >= v23.00) + We need that (reserve) because after first call of Bcj2Dec_Decode(), + CBcj2Dec::temp can contain up to 4 bytes for writing to (dest). + } + (reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode(). + (reserve == 0) also is allowed in case of multi-call, if we use fixed buffers, + and (reserve) is calculated from full (final) sizes of all streams before first call. +*/ + +typedef struct +{ + const Byte *bufs[BCJ2_NUM_STREAMS]; + const Byte *lims[BCJ2_NUM_STREAMS]; + Byte *dest; + const Byte *destLim; + + unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ + + UInt32 ip; /* property of starting base for decoding */ + UInt32 temp; /* Byte temp[4]; */ + UInt32 range; + UInt32 code; + CBcj2Prob probs[2 + 256]; +} CBcj2Dec; + + +/* Note: + Bcj2Dec_Init() sets (CBcj2Dec::ip = 0) + if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init() +*/ +void Bcj2Dec_Init(CBcj2Dec *p); + + +/* Bcj2Dec_Decode(): + returns: + SZ_OK + SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct +*/ +SRes Bcj2Dec_Decode(CBcj2Dec *p); + +/* To check that decoding was finished you can compare + sizes of processed streams with sizes known from another sources. + You must do at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. + additional optional checks: + - the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC) + - the checks Bcj2Dec_IsMaybeFinished*() + also before actual decoding you can check that the + following condition is met for stream sizes: + ( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) ) +*/ + +/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for + additional input data in BCJ2_STREAM_MAIN stream. + Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN) + +/* if the stream decoding was finished correctly, then range decoder + part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0). + Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0) + +/* use Bcj2Dec_IsMaybeFinished() only as additional check + after at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. +*/ +#define Bcj2Dec_IsMaybeFinished(_p_) ( \ + Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \ + Bcj2Dec_IsMaybeFinished_code(_p_)) + + + +/* ---------- BCJ2 Encoder ---------- */ + +typedef enum +{ + BCJ2_ENC_FINISH_MODE_CONTINUE, + BCJ2_ENC_FINISH_MODE_END_BLOCK, + BCJ2_ENC_FINISH_MODE_END_STREAM +} EBcj2Enc_FinishMode; + +/* + BCJ2_ENC_FINISH_MODE_CONTINUE: + process non finished encoding. + It notifies the encoder that additional further calls + can provide more input data (src) than provided by current call. + In that case the CBcj2Enc encoder still can move (src) pointer + up to (srcLim), but CBcj2Enc encoder can store some of the last + processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer. + at return: + (CBcj2Enc::src will point to position that includes + processed data and data copied to (temp[]) buffer) + That data from (temp[]) buffer will be used in further calls. + + BCJ2_ENC_FINISH_MODE_END_BLOCK: + finish encoding of current block (ended at srcLim) without RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) && + CBcj2Enc::src == CBcj2Enc::srcLim) + : it shows that block encoding was finished. And the encoder is + ready for new (src) data or for stream finish operation. + finished block means + { + CBcj2Enc has completed block encoding up to (srcLim). + (1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will + not cross block boundary at (srcLim). + temporary CBcj2Enc buffer for (ORIG) src data is empty. + 3 output uncompressed streams (MAIN, CALL, JUMP) were flushed. + RC stream was not flushed. And RC stream will cross block boundary. + } + Note: some possible implementation of BCJ2 encoder could + write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(), + and it could calculate symbol for RC in another call of Bcj2Enc_Encode(). + BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol. + And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls. + So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK + to ensure that RC symbol is calculated and written in proper block. + + BCJ2_ENC_FINISH_MODE_END_STREAM + finish encoding of stream (ended at srcLim) fully including RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED) + : it shows that stream encoding was finished fully, + and all output streams were flushed fully. + also Bcj2Enc_IsFinished() can be called. +*/ + + +/* + 32-bit relative offset in JUMP/CALL commands is + - (mod 4 GiB) for 32-bit x86 code + - signed Int32 for 64-bit x86-64 code + BCJ2 encoder also does internal relative to absolute address conversions. + And there are 2 possible ways to do it: + before v23: we used 32-bit variables and (mod 4 GiB) conversion + since v23: we use 64-bit variables and (signed Int32 offset) conversion. + The absolute address condition for conversion in v23: + ((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64) + note that if (fileSize64 > 2 GiB). there is difference between + old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23). + And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases. +*/ + +/* +// for old (v22) way for conversion: +typedef UInt32 CBcj2Enc_ip_unsigned; +typedef Int32 CBcj2Enc_ip_signed; +#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31) +*/ +typedef UInt64 CBcj2Enc_ip_unsigned; +typedef Int64 CBcj2Enc_ip_signed; + +/* maximum size of file that can be used for conversion condition */ +#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2) + +/* default value of fileSize64_minus1 variable that means + that absolute address limitation will not be used */ +#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1) + +/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */ +#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \ + ((CBcj2Enc_ip_unsigned)(fileSize) - 1) + +/* set CBcj2Enc::fileSize64_minus1 variable from size of file */ +#define Bcj2Enc_SET_FileSize(p, fileSize) \ + (p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize); + + +typedef struct +{ + Byte *bufs[BCJ2_NUM_STREAMS]; + const Byte *lims[BCJ2_NUM_STREAMS]; + const Byte *src; + const Byte *srcLim; + + unsigned state; + EBcj2Enc_FinishMode finishMode; + + Byte context; + Byte flushRem; + Byte isFlushState; + + Byte cache; + UInt32 range; + UInt64 low; + UInt64 cacheSize; + + // UInt32 context; // for marker version, it can include marker flag. + + /* (ip64) and (fileIp64) correspond to virtual source stream position + that doesn't include data in temp[] */ + CBcj2Enc_ip_unsigned ip64; /* current (ip) position */ + CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */ + CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */ + UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */ + // UInt32 relatExcludeBits; + + UInt32 tempTarget; + unsigned tempPos; /* the number of bytes that were copied to temp[] buffer + (tempPos <= 4) outside of Bcj2Enc_Encode() */ + // Byte temp[4]; // for marker version + Byte temp[8]; + CBcj2Prob probs[2 + 256]; +} CBcj2Enc; + +void Bcj2Enc_Init(CBcj2Enc *p); + + +/* +Bcj2Enc_Encode(): at exit: + p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream + (bufs[p->State] == lims[p->State]) + p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream + (src == srcLim) + p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream +*/ +void Bcj2Enc_Encode(CBcj2Enc *p); + +/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream. + CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer. + (CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after + fully processed data and after data copied to temp buffer. + So if the caller needs to get real number of fully processed input + bytes (without look ahead data in temp buffer), + the caller must subtruct (CBcj2Enc::tempPos) value from processed size + value that is calculated based on current (CBcj2Enc::src): + cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) - + Bcj2Enc_Get_AvailInputSize_in_Temp(&enc); +*/ +/* get the size of input data that was stored in temp[] buffer: */ +#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos) + +#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0) + +/* Note : the decoder supports overlapping of marker (0f 80). + But we can eliminate such overlapping cases by setting + the limit for relative offset conversion as + CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB) +*/ +/* default value for CBcj2Enc::relatLimit */ +#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24) +#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31) +// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5 + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Bra.c b/external/unarr/lzmasdk/Bra.c new file mode 100644 index 00000000..22e0e478 --- /dev/null +++ b/external/unarr/lzmasdk/Bra.c @@ -0,0 +1,420 @@ +/* Bra.c -- Branch converters for RISC code +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bra.h" +#include "CpuArch.h" +#include "RotateDefs.h" + +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name + +#define Z7_BRANCH_FUNC_MAIN(name) \ +static \ +Z7_FORCE_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) + +#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ + { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ + +#ifdef Z7_EXTRACT_ONLY +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) +#else +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) +#endif + +#if defined(__clang__) +#define BR_EXTERNAL_FOR +#define BR_NEXT_ITERATION continue; +#else +#define BR_EXTERNAL_FOR for (;;) +#define BR_NEXT_ITERATION break; +#endif + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + // GCC is not good for __builtin_expect() here + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // #define Z7_unlikely [[unlikely]] + // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_likely [[likely]] +#else + // #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +Z7_BRANCH_FUNC_MAIN(ARM64) +{ + // Byte *p = data; + const Byte *lim; + const UInt32 flag = (UInt32)1 << (24 - 4); + const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); + size &= ~(SizeT)3; + // if (size == 0) return p; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + BR_EXTERNAL_FOR + { + // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (;;) + { + UInt32 v; + if Z7_UNLIKELY(p == lim) + return p; + v = GetUi32a(p); + p += 4; + if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) + { + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x03ffffff; + v |= 0x94000000; + SetUi32a(p - 4, v) + BR_NEXT_ITERATION + } + // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) + v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) + { + UInt32 z, c; + // v = rotrFixed(v, 8); + v += flag; if Z7_UNLIKELY(v & mask) continue; + z = (v & 0xffffffe0) | (v >> 26); + c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; + BR_CONVERT_VAL(z, c) + v &= 0x1f; + v |= 0x90000000; + v |= z << 26; + v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); + SetUi32a(p - 4, v) + } + } + } +} +Z7_BRANCH_FUNCS_IMP(ARM64) + + +Z7_BRANCH_FUNC_MAIN(ARM) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to next instruction */ + pc += 8 - 4; + + for (;;) + { + for (;;) + { + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + } + { + UInt32 v = GetUi32a(p - 4); + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x00ffffff; + v |= 0xeb000000; + SetUi32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(ARM) + + +Z7_BRANCH_FUNC_MAIN(PPC) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + for (;;) + { + UInt32 v; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + // v = GetBe32a(p); + v = *(UInt32 *)(void *)p; + p += 4; + // if ((v & 0xfc000003) == 0x48000001) break; + // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; + if Z7_UNLIKELY( + ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) + & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; + } + { + v = Z7_CONV_NATIVE_TO_BE_32(v); + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= 0x03ffffff; + v |= 0x48000000; + SetBe32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(PPC) + + +#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED +#define BR_SPARC_USE_ROTATE +#endif + +Z7_BRANCH_FUNC_MAIN(SPARC) +{ + // Byte *p = data; + const Byte *lim; + const UInt32 flag = (UInt32)1 << 22; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + for (;;) + { + UInt32 v; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + /* // the code without GetBe32a(): + { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } + */ + v = GetBe32a(p); + p += 4; + #ifdef BR_SPARC_USE_ROTATE + v = rotlFixed(v, 2); + v += (flag << 2) - 1; + if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) + #else + v += (UInt32)5 << 29; + v ^= (UInt32)7 << 29; + v += flag; + if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) + #endif + break; + } + { + // UInt32 v = GetBe32a(p - 4); + #ifndef BR_SPARC_USE_ROTATE + v <<= 2; + #endif + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= (flag << 3) - 1; + #ifdef BR_SPARC_USE_ROTATE + v -= (flag << 2) - 1; + v = rotrFixed(v, 2); + #else + v -= (flag << 2); + v >>= 2; + v |= (UInt32)1 << 30; + #endif + SetBe32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(SPARC) + + +Z7_BRANCH_FUNC_MAIN(ARMT) +{ + // Byte *p = data; + Byte *lim; + size &= ~(SizeT)1; + // if (size == 0) return p; + if (size <= 2) return p; + size -= 2; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to the +2 instructions from current instruction */ + // pc += 4 - 4; + // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; + // #define ARMT_TAIL_PROC { goto armt_tail; } + #define ARMT_TAIL_PROC { return p; } + + do + { + /* in MSVC 32-bit x86 compilers: + UInt32 version : it loads value from memory with movzx + Byte version : it loads value to 8-bit register (AL/CL) + movzx version is slightly faster in some cpus + */ + unsigned b1; + // Byte / unsigned + b1 = p[1]; + // optimized version to reduce one (p >= lim) check: + // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) + for (;;) + { + unsigned b3; // Byte / UInt32 + /* (Byte)(b3) normalization can use low byte computations in MSVC. + It gives smaller code, and no loss of speed in some compilers/cpus. + But new MSVC 32-bit x86 compilers use more slow load + from memory to low byte register in that case. + So we try to use full 32-bit computations for faster code. + */ + // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; + } + { + /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. + But gcc/clang for arm64 can use bfi instruction for full code here */ + UInt32 v = + ((UInt32)GetUi16a(p - 2) << 11) | + ((UInt32)GetUi16a(p) & 0x7FF); + /* + UInt32 v = + ((UInt32)p[1 - 2] << 19) + + (((UInt32)p[1] & 0x7) << 8) + + (((UInt32)p[-2] << 11)) + + (p[0]); + */ + p += 2; + { + UInt32 c = BR_PC_GET >> 1; + BR_CONVERT_VAL(v, c) + } + SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) + SetUi16a(p - 2, (UInt16)(v | 0xf800)) + /* + p[-4] = (Byte)(v >> 11); + p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); + p[-2] = (Byte)v; + p[-1] = (Byte)(0xf8 | (v >> 8)); + */ + } + } + while (p < lim); + return p; + // armt_tail: + // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; + // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); + // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); + // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); +} +Z7_BRANCH_FUNCS_IMP(ARMT) + + +// #define BR_IA64_NO_INLINE + +Z7_BRANCH_FUNC_MAIN(IA64) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)15; + lim = p + size; + pc -= 1 << 4; + pc >>= 4 - 1; + // pc -= 1 << 1; + + for (;;) + { + unsigned m; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); + p += 16; + pc += 1 << 1; + if (m &= 3) + break; + } + { + p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. + do + { + const UInt32 t = + #if defined(MY_CPU_X86_OR_AMD64) + // we use 32-bit load here to reduce code size on x86: + GetUi32(p); + #else + GetUi16(p); + #endif + UInt32 z = GetUi32(p + 1) >> m; + p += 5; + if (((t >> m) & (0x70 << 1)) == 0 + && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) + { + UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; + z ^= v; + #ifdef BR_IA64_NO_INLINE + v |= (v & ((UInt32)1 << (23 + 1))) >> 3; + { + UInt32 c = pc; + BR_CONVERT_VAL(v, c) + } + v &= (0x1fffff << 1) | 1; + #else + { + if (encoding) + { + // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits + pc &= (0x1fffff << 1) | 1; + v += pc; + } + else + { + // pc |= 0xc00000 << 1; // we need to set at least 2 bits + pc |= ~(UInt32)((0x1fffff << 1) | 1); + v -= pc; + } + } + v &= ~(UInt32)(0x600000 << 1); + #endif + v += (0x700000 << 1); + v &= (0x8fffff << 1) | 1; + z |= v; + z <<= m; + SetUi32(p + 1 - 5, z) + } + m++; + } + while (m &= 3); // while (m < 4); + } + } +} +Z7_BRANCH_FUNCS_IMP(IA64) diff --git a/external/unarr/lzmasdk/Bra.h b/external/unarr/lzmasdk/Bra.h new file mode 100644 index 00000000..a4ee568e --- /dev/null +++ b/external/unarr/lzmasdk/Bra.h @@ -0,0 +1,99 @@ +/* Bra.h -- Branch converters for executables +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_BRA_H +#define ZIP7_INC_BRA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define Z7_BRANCH_CONV_DEC(name) z7_BranchConv_ ## name ## _Dec +#define Z7_BRANCH_CONV_ENC(name) z7_BranchConv_ ## name ## _Enc +#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec +#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc + +#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc) +#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state) + +typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv)); +typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt)); + +#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0 +Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_DEC(X86)); +Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_ENC(X86)); + +#define Z7_BRANCH_FUNCS_DECL(name) \ +Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_DEC(name)); \ +Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_ENC(name)); + +Z7_BRANCH_FUNCS_DECL(ARM64) +Z7_BRANCH_FUNCS_DECL(ARM) +Z7_BRANCH_FUNCS_DECL(ARMT) +Z7_BRANCH_FUNCS_DECL(PPC) +Z7_BRANCH_FUNCS_DECL(SPARC) +Z7_BRANCH_FUNCS_DECL(IA64) + +/* +These functions convert data that contain CPU instructions. +Each such function converts relative addresses to absolute addresses in some +branch instructions: CALL (in all converters) and JUMP (X86 converter only). +Such conversion allows to increase compression ratio, if we compress that data. + +There are 2 types of converters: + Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc); + Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state); +Each Converter supports 2 versions: one for encoding +and one for decoding (_Enc/_Dec postfixes in function name). + +In params: + data : data buffer + size : size of data + pc : current virtual Program Counter (Instruction Pinter) value +In/Out param: + state : pointer to state variable (for X86 converter only) + +Return: + The pointer to position in (data) buffer after last byte that was processed. + If the caller calls converter again, it must call it starting with that position. + But the caller is allowed to move data in buffer. so pointer to + current processed position also will be changed for next call. + Also the caller must increase internal (pc) value for next call. + +Each converter has some characteristics: Endian, Alignment, LookAhead. + Type Endian Alignment LookAhead + + X86 little 1 4 + ARMT little 2 2 + ARM little 4 0 + ARM64 little 4 0 + PPC big 4 0 + SPARC big 4 0 + IA64 little 16 0 + + (data) must be aligned for (Alignment). + processed size can be calculated as: + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0) + it means that converter needs more data for processing. + If (size < Alignment + LookAhead) + then (processed == 0) is allowed. + +Example code for conversion in loop: + UInt32 pc = 0; + size = 0; + for (;;) + { + size += Load_more_input_data(data + size); + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0 && no_more_input_data_after_size) + break; // we stop convert loop + data += processed; + size -= processed; + pc += processed; + } +*/ + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Bra86.c b/external/unarr/lzmasdk/Bra86.c new file mode 100644 index 00000000..d81f392a --- /dev/null +++ b/external/unarr/lzmasdk/Bra86.c @@ -0,0 +1,187 @@ +/* Bra86.c -- Branch converter for X86 code (BCJ) +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bra.h" +#include "CpuArch.h" + + +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name + +#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) + +#ifdef MY_CPU_LE_UNALIGN + #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; + #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) +#else + #define BR86_PREPARE_BCJ_SCAN + // bad for MSVC X86 (partial write to byte reg): + #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) + // bad for old MSVC (partial write to byte reg): + // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) +#endif + +static +Z7_FORCE_INLINE +Z7_ATTRIB_NO_VECTOR +Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) +{ + if (size < 5) + return p; + { + // Byte *p = data; + const Byte *lim = p + size - 4; + unsigned mask = (unsigned)*state; // & 7; +#ifdef BR_CONV_USE_OPT_PC_PTR + /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), + because call/jump offset is relative to the next instruction. + if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), + because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. + */ + pc += 4; +#endif + BR_PC_INIT + goto start; + + for (;; mask |= 4) + { + // cont: mask |= 4; + start: + if (p >= lim) + goto fin; + { + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + } + goto main_loop; + + m0: p--; + m1: p--; + m2: p--; + if (mask == 0) + goto a3; + if (p > lim) + goto fin_p; + + // if (((0x17u >> mask) & 1) == 0) + if (mask > 4 || mask == 3) + { + mask >>= 1; + continue; // goto cont; + } + mask >>= 1; + if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) + continue; // goto cont; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + { + mask <<= 3; + if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) + { + v ^= (((UInt32)0x100 << mask) - 1); + #ifdef MY_CPU_X86 + // for X86 : we can recalculate (c) to reduce register pressure + c = BR_PC_GET; + #endif + BR_CONVERT_VAL(v, c) + } + mask = 0; + } + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; + } + + main_loop: + if (p >= lim) + goto fin; + for (;;) + { + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto a0; } + if (BR86_IS_BCJ_BYTE(1)) { goto a1; } + if (BR86_IS_BCJ_BYTE(2)) { goto a2; } + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + if (p >= lim) + goto fin; + } + + a0: p--; + a1: p--; + a2: p--; + a3: + if (p > lim) + goto fin_p; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; + } + } + +fin_p: + p--; +fin: + // the following processing for tail is optional and can be commented + /* + lim += 4; + for (; p < lim; p++, mask >>= 1) + if ((*p & 0xfe) == 0xe8) + break; + */ + *state = (UInt32)mask; + return p; + } +} + + +#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ + { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } + +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) +#ifndef Z7_EXTRACT_ONLY +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) +#endif diff --git a/external/unarr/lzmasdk/Compiler.h b/external/unarr/lzmasdk/Compiler.h new file mode 100644 index 00000000..185a52de --- /dev/null +++ b/external/unarr/lzmasdk/Compiler.h @@ -0,0 +1,159 @@ +/* Compiler.h : Compiler specific defines and pragmas +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_COMPILER_H +#define ZIP7_INC_COMPILER_H + +#if defined(__clang__) +# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif +#if defined(__clang__) && defined(__apple_build_version__) +# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__clang__) +# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__GNUC__) +# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#ifdef _MSC_VER +#if !defined(__clang__) && !defined(__GNUC__) +#define Z7_MSC_VER_ORIGINAL _MSC_VER +#endif +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +#define Z7_MINGW +#endif + +// #pragma GCC diagnostic ignored "-Wunknown-pragmas" + +#ifdef __clang__ +// padding size of '' with 4 bytes to alignment boundary +#pragma GCC diagnostic ignored "-Wpadded" +#endif + + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + +#if defined(_MSC_VER) && _MSC_VER >= 1800 +#pragma warning(disable : 4464) // relative include path contains '..' +#endif + +// == 1200 : -O1 : for __forceinline +// >= 1900 : -O1 : for printf +#pragma warning(disable : 4710) // function not inlined + +#if _MSC_VER < 1900 +// winnt.h: 'Int64ShllMod32' +#pragma warning(disable : 4514) // unreferenced inline function has been removed +#endif + +#if _MSC_VER < 1300 +// #pragma warning(disable : 4702) // unreachable code +// Bra.c : -O1: +#pragma warning(disable : 4714) // function marked as __forceinline not inlined +#endif + +/* +#if _MSC_VER > 1400 && _MSC_VER <= 1900 +// strcat: This function or variable may be unsafe +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif +*/ + +#if _MSC_VER > 1200 +// -Wall warnings + +#pragma warning(disable : 4711) // function selected for automatic inline expansion +#pragma warning(disable : 4820) // '2' bytes padding added after data member + +#if _MSC_VER >= 1400 && _MSC_VER < 1920 +// 1400: string.h: _DBG_MEMCPY_INLINE_ +// 1600 - 191x : smmintrin.h __cplusplus' +// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' +#pragma warning(disable : 4668) + +// 1400 - 1600 : WinDef.h : 'FARPROC' : +// 1900 - 191x : immintrin.h: _readfsbase_u32 +// no function prototype given : converting '()' to '(void)' +#pragma warning(disable : 4255) +#endif + +#if _MSC_VER >= 1914 +// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#pragma warning(disable : 5045) +#endif + +#endif // _MSC_VER > 1200 +#endif // _MSC_VER + + +#if defined(__clang__) && (__clang_major__ >= 4) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("clang loop unroll(disable)") \ + _Pragma("clang loop vectorize(disable)") + #define Z7_ATTRIB_NO_VECTORIZE +#elif defined(__GNUC__) && (__GNUC__ >= 5) + #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) + // __attribute__((optimize("no-unroll-loops"))); + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE +#elif defined(_MSC_VER) && (_MSC_VER >= 1920) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("loop( no_vector )") + #define Z7_ATTRIB_NO_VECTORIZE +#else + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + #define Z7_ATTRIB_NO_VECTORIZE +#endif + +#if defined(MY_CPU_X86_OR_AMD64) && ( \ + defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5)) + #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse"))) +#else + #define Z7_ATTRIB_NO_SSE +#endif + +#define Z7_ATTRIB_NO_VECTOR \ + Z7_ATTRIB_NO_VECTORIZE \ + Z7_ATTRIB_NO_SSE + + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // GCC is not good for __builtin_expect() + #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_unlikely [[unlikely]] + // #define Z7_likely [[likely]] +#else + #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000)) +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") +#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/external/unarr/lzmasdk/CpuArch.c b/external/unarr/lzmasdk/CpuArch.c new file mode 100644 index 00000000..33f8a3ab --- /dev/null +++ b/external/unarr/lzmasdk/CpuArch.c @@ -0,0 +1,823 @@ +/* CpuArch.c -- CPU specific code +2023-05-18 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #include + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + +#undef NEED_CHECK_FOR_CPUID +#if !defined(MY_CPU_AMD64) +#define NEED_CHECK_FOR_CPUID +#endif + +/* + cpuid instruction supports (subFunction) parameter in ECX, + that is used only with some specific (function) parameter values. + But we always use only (subFunction==0). +*/ +/* + __cpuid(): MSVC and GCC/CLANG use same function/macro name + but parameters are different. + We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function. +*/ + +#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \ + || defined(__clang__) /* && (__clang_major__ >= 10) */ + +/* there was some CLANG/GCC compilers that have issues with + rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined). + compiler's contains the macro __cpuid() that is similar to our code. + The history of __cpuid() changes in CLANG/GCC: + GCC: + 2007: it preserved ebx for (__PIC__ && __i386__) + 2013: it preserved rbx and ebx for __PIC__ + 2014: it doesn't preserves rbx and ebx anymore + we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem. + CLANG: + 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check. + Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)? + Do we need __PIC__ test for CLANG or we must care about rbx even if + __PIC__ is not defined? +*/ + +#define ASM_LN "\n" + +#if defined(MY_CPU_AMD64) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%rbx, %q1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%rbx, %q1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(0) means (subFunction = 0), + 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#elif defined(MY_CPU_X86) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%ebx, %k1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%ebx, %k1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + +#else + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "cpuid" \ + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + +#endif + + +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + x86_cpuid_MACRO(p, func) +} + + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + UInt32 a; + __asm__ __volatile__ ( + ASM_LN "pushf" + ASM_LN "pushf" + ASM_LN "pop %0" + // ASM_LN "movl %0, %1" + // ASM_LN "xorl $0x200000, %0" + ASM_LN "btc %1, %0" + ASM_LN "push %0" + ASM_LN "popf" + ASM_LN "pushf" + ASM_LN "pop %0" + ASM_LN "xorl (%%esp), %0" + + ASM_LN "popf" + ASM_LN + : "=&r" (a) // "=a" + : "i" (EFALGS_CPUID_BIT) + ); + if ((a & (1 << EFALGS_CPUID_BIT)) == 0) + return 0; + #endif + { + UInt32 p[4]; + x86_cpuid_MACRO(p, 0) + return p[0]; + } +} + +#undef ASM_LN + +#elif !defined(_MSC_VER) + +/* +// for gcc/clang and other: we can try to use __cpuid macro: +#include +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + __cpuid(func, p[0], p[1], p[2], p[3]); +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return (UInt32)__get_cpuid_max(0, NULL); +} +*/ +// for unsupported cpuid: +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(func) + p[0] = p[1] = p[2] = p[3] = 0; +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return 0; +} + +#else // _MSC_VER + +#if !defined(MY_CPU_AMD64) + +UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + __asm pushfd + __asm pushfd + /* + __asm pop eax + // __asm mov edx, eax + __asm btc eax, EFALGS_CPUID_BIT + __asm push eax + */ + __asm btc dword ptr [esp], EFALGS_CPUID_BIT + __asm popfd + __asm pushfd + __asm pop eax + // __asm xor eax, edx + __asm xor eax, [esp] + // __asm push edx + __asm popfd + __asm and eax, (1 shl EFALGS_CPUID_BIT) + __asm jz end_func + #endif + __asm push ebx + __asm xor eax, eax // func + __asm xor ecx, ecx // subFunction (optional) for (func == 0) + __asm cpuid + __asm pop ebx + #if defined(NEED_CHECK_FOR_CPUID) + end_func: + #endif + __asm ret 0 +} + +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm xor ecx, ecx // subfunction (optional) for (func == 0) + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 0 +} + +#else // MY_CPU_AMD64 + + #if _MSC_VER >= 1600 + #include + #define MY_cpuidex __cpuidex + #else +/* + __cpuid (func == (0 or 7)) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) x64 doesn't clear ECX + We still can use __cpuid for low (func) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FASTCALL / NO_INLINE func, + So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + +DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! +*/ +static +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo) +{ + UNUSED_VAR(subFunction) + __cpuid(CPUInfo, func); +} + #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) + #pragma message("======== MY_cpuidex_HACK WAS USED ========") + #endif // _MSC_VER >= 1600 + +#if !defined(MY_CPU_AMD64) +/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, + so we disable inlining here */ +Z7_NO_INLINE +#endif +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + MY_cpuidex((int *)p, (int)func, 0); +} + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + int a[4]; + MY_cpuidex(a, 0, 0); + return a[0]; +} + +#endif // MY_CPU_AMD64 +#endif // _MSC_VER + +#if defined(NEED_CHECK_FOR_CPUID) +#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; } +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif +#undef NEED_CHECK_FOR_CPUID + + +static +BoolInt x86cpuid_Func_1(UInt32 *p) +{ + CHECK_CPUID_IS_SUPPORTED + z7_x86_cpuid(p, 1); + return True; +} + +/* +static const UInt32 kVendors[][1] = +{ + { 0x756E6547 }, // , 0x49656E69, 0x6C65746E }, + { 0x68747541 }, // , 0x69746E65, 0x444D4163 }, + { 0x746E6543 } // , 0x48727561, 0x736C7561 } +}; +*/ + +/* +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; +int x86cpuid_GetFirm(const Cx86cpuid *p); +#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf)) +#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf)) +#define x86cpuid_ver_GetStepping(ver) (ver & 0xf) + +int x86cpuid_GetFirm(const Cx86cpuid *p) +{ + unsigned i; + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++) + { + const UInt32 *v = kVendors[i]; + if (v[0] == p->vendor[0] + // && v[1] == p->vendor[1] + // && v[2] == p->vendor[2] + ) + return (int)i; + } + return -1; +} + +BoolInt CPU_Is_InOrder() +{ + Cx86cpuid p; + UInt32 family, model; + if (!x86cpuid_CheckAndRead(&p)) + return True; + + family = x86cpuid_ver_GetFamily(p.ver); + model = x86cpuid_ver_GetModel(p.ver); + + switch (x86cpuid_GetFirm(&p)) + { + case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( + // In-Order Atom CPU + model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 + || model == 0x26 // 45 nm, Z6xx + || model == 0x27 // 32 nm, Z2460 + || model == 0x35 // 32 nm, Z2760 + || model == 0x36 // 32 nm, N2xxx, D2xxx + ))); + case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); + case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); + } + return False; // v23 : unknown processors are not In-Order +} +*/ + +#ifdef _WIN32 +#include "7zWindows.h" +#endif + +#if !defined(MY_CPU_AMD64) && defined(_WIN32) + +/* for legacy SSE ia32: there is no user-space cpu instruction to check + that OS supports SSE register storing/restoring on context switches. + So we need some OS-specific function to check that it's safe to use SSE registers. +*/ + +Z7_FORCE_INLINE +static BoolInt CPU_Sys_Is_SSE_Supported(void) +{ +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4996) // `GetVersion': was declared deprecated +#endif + /* low byte is major version of Windows + We suppose that any Windows version since + Windows2000 (major == 5) supports SSE registers */ + return (Byte)GetVersion() >= 5; +#if defined(_MSC_VER) + #pragma warning(pop) +#endif +} +#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; +#else +#define CHECK_SYS_SSE_SUPPORT +#endif + + +#if !defined(MY_CPU_AMD64) + +BoolInt CPU_IsSupported_CMOV(void) +{ + UInt32 a[4]; + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 15) & 1; +} + +BoolInt CPU_IsSupported_SSE(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSE2(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 26) & 1; +} + +#endif + + +static UInt32 x86cpuid_Func_1_ECX(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return a[2]; +} + +BoolInt CPU_IsSupported_AES(void) +{ + return (x86cpuid_Func_1_ECX() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3(void) +{ + return (x86cpuid_Func_1_ECX() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41(void) +{ + return (x86cpuid_Func_1_ECX() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA(void) +{ + CHECK_SYS_SSE_SUPPORT + + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + return (d[1] >> 29) & 1; + } +} + +/* +MSVC: _xgetbv() intrinsic is available since VS2010SP1. + MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in + that we can use or check. + For any 32-bit x86 we can use asm code in MSVC, + but MSVC asm code is huge after compilation. + So _xgetbv() is better + +ICC: _xgetbv() intrinsic is available (in what version of ICC?) + ICC defines (__GNUC___) and it supports gnu assembler + also ICC supports MASM style code with -use-msasm switch. + but ICC doesn't support __attribute__((__target__)) + +GCC/CLANG 9: + _xgetbv() is macro that works via __builtin_ia32_xgetbv() + and we need __attribute__((__target__("xsave")). + But with __target__("xsave") the function will be not + inlined to function that has no __target__("xsave") attribute. + If we want _xgetbv() call inlining, then we should use asm version + instead of calling _xgetbv(). + Note:intrinsic is broke before GCC 8.2: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684 +*/ + +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \ + || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \ + || defined(__GNUC__) && (__GNUC__ >= 9) \ + || defined(__clang__) && (__clang_major__ >= 9) +// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler +#if defined(__INTEL_COMPILER) +#define ATTRIB_XGETBV +#elif defined(__GNUC__) || defined(__clang__) +// we don't define ATTRIB_XGETBV here, because asm version is better for inlining. +// #define ATTRIB_XGETBV __attribute__((__target__("xsave"))) +#else +#define ATTRIB_XGETBV +#endif +#endif + +#if defined(ATTRIB_XGETBV) +#include +#endif + + +// XFEATURE_ENABLED_MASK/XCR0 +#define MY_XCR_XFEATURE_ENABLED_MASK 0 + +#if defined(ATTRIB_XGETBV) +ATTRIB_XGETBV +#endif +static UInt64 x86_xgetbv_0(UInt32 num) +{ +#if defined(ATTRIB_XGETBV) + { + return + #if (defined(_MSC_VER)) + _xgetbv(num); + #else + __builtin_ia32_xgetbv( + #if !defined(__clang__) + (int) + #endif + num); + #endif + } + +#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC) + + UInt32 a, d; + #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) + __asm__ + ( + "xgetbv" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #else // is old gcc + __asm__ + ( + ".byte 0x0f, 0x01, 0xd0" "\n\t" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #endif + return ((UInt64)d << 32) | a; + // return a; + +#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64) + + UInt32 a, d; + __asm { + push eax + push edx + push ecx + mov ecx, num; + // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK + _emit 0x0f + _emit 0x01 + _emit 0xd0 + mov a, eax + mov d, edx + pop ecx + pop edx + pop eax + } + return ((UInt64)d << 32) | a; + // return a; + +#else // it's unknown compiler + // #error "Need xgetbv function" + UNUSED_VAR(num) + // for MSVC-X64 we could call external function from external file. + /* Actually we had checked OSXSAVE/AVX in cpuid before. + So it's expected that OS supports at least AVX and below. */ + // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0 + return + // (1 << 0) | // x87 + (1 << 1) // SSE + | (1 << 2); // AVX + +#endif +} + +#ifdef _WIN32 +/* + Windows versions do not know about new ISA extensions that + can be introduced. But we still can use new extensions, + even if Windows doesn't report about supporting them, + But we can use new extensions, only if Windows knows about new ISA extension + that changes the number or size of registers: SSE, AVX/XSAVE, AVX512 + So it's enough to check + MY_PF_AVX_INSTRUCTIONS_AVAILABLE + instead of + MY_PF_AVX2_INSTRUCTIONS_AVAILABLE +*/ +#define MY_PF_XSAVE_ENABLED 17 +// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36 +// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37 +// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38 +// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39 +// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40 +// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41 +#endif + +BoolInt CPU_IsSupported_AVX(void) +{ + #ifdef _WIN32 + if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED)) + return False; + /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from + some latest Win10 revisions. But we need AVX in older Windows also. + So we don't use the following check: */ + /* + if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE)) + return False; + */ + #endif + + /* + OS must use new special XSAVE/XRSTOR instructions to save + AVX registers when it required for context switching. + At OS statring: + OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions. + Also OS sets bitmask in XCR0 register that defines what + registers will be processed by XSAVE instruction: + XCR0.SSE[bit 0] - x87 registers and state + XCR0.SSE[bit 1] - SSE registers and state + XCR0.AVX[bit 2] - AVX registers and state + CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27]. + So we can read that bit in user-space. + XCR0 is available for reading in user-space by new XGETBV instruction. + */ + { + const UInt32 c = x86cpuid_Func_1_ECX(); + if (0 == (1 + & (c >> 28) // AVX instructions are supported by hardware + & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS. + return False; + } + + /* also we can check + CPUID.1:ECX.XSAVE [bit 26] : that shows that + XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware. + But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */ + + /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1), + in most cases we expect that OS also will support storing/restoring + for AVX and SSE states at least. + But to be ensure for that we call user-space instruction + XGETBV(0) to get XCR0 value that contains bitmask that defines + what exact states(registers) OS have enabled for storing/restoring. + */ + + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=%d\n", bm); + return 1 + & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring + } + // since Win7SP1: we can use GetEnabledXStateFeatures(); +} + + +BoolInt CPU_IsSupported_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5); // avx2 + } +} + +BoolInt CPU_IsSupported_VAES_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (d[2] >> 9); // vaes // VEX-256/EVEX + } +} + +BoolInt CPU_IsSupported_PageGB(void) +{ + CHECK_CPUID_IS_SUPPORTED + { + UInt32 d[4]; + z7_x86_cpuid(d, 0x80000000); + if (d[0] < 0x80000001) + return False; + z7_x86_cpuid(d, 0x80000001); + return (d[3] >> 26) & 1; + } +} + + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include "7zWindows.h" + +BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ +/* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); +*/ + +static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name) +{ + UInt32 val = 0; + if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + +BoolInt CPU_IsSupported_CRC32(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); +} + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#include + +#define USE_HWCAP + +#ifdef USE_HWCAP + +#include + + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + MY_HWCAP_CHECK_FUNC_2(name, name) + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +// MY_HWCAP_CHECK_FUNC (ASIMD) +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return 0; } + MY_HWCAP_CHECK_FUNC(NEON) + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + const int res = z7_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + +#endif diff --git a/external/unarr/lzmasdk/CpuArch.h b/external/unarr/lzmasdk/CpuArch.h new file mode 100644 index 00000000..8e5d8a54 --- /dev/null +++ b/external/unarr/lzmasdk/CpuArch.h @@ -0,0 +1,523 @@ +/* CpuArch.h -- CPU specific code +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_CPU_ARCH_H +#define ZIP7_INC_CPU_ARCH_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +MY_CPU_LE means that CPU is LITTLE ENDIAN. +MY_CPU_BE means that CPU is BIG ENDIAN. +If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. + +MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) +*/ + +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) + #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_ARM64) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 + #ifdef __ILP32__ + #define MY_CPU_NAME "arm64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "arm64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_ARMT + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_ARM32 + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT +#endif + + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#define MY_CPU_PPC_OR_PPC64 + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#else + #define MY_CPU_NAME "ppc" + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif +#endif + + +#if defined(__riscv) \ + || defined(__riscv__) + #if __riscv_xlen == 32 + #define MY_CPU_NAME "riscv32" + #elif __riscv_xlen == 64 + #define MY_CPU_NAME "riscv64" + #else + #define MY_CPU_NAME "riscv" + #endif +#endif + + +#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) +#define MY_CPU_X86_OR_AMD64 +#endif + +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ + || defined(MY_CPU_IA64_LE) \ + || defined(__LITTLE_ENDIAN__) \ + || defined(__ARMEL__) \ + || defined(__THUMBEL__) \ + || defined(__AARCH64EL__) \ + || defined(__MIPSEL__) \ + || defined(__MIPSEL) \ + || defined(_MIPSEL) \ + || defined(__BFIN__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define MY_CPU_LE +#endif + +#if defined(__BIG_ENDIAN__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEB__) \ + || defined(__AARCH64EB__) \ + || defined(__MIPSEB__) \ + || defined(__MIPSEB) \ + || defined(_MIPSEB) \ + || defined(__m68k__) \ + || defined(__s390__) \ + || defined(__s390x__) \ + || defined(__zarch__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define MY_CPU_BE +#endif + + +#if defined(MY_CPU_LE) && defined(MY_CPU_BE) + #error Stop_Compiling_Bad_Endian +#endif + +#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE) + #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time +#endif + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT +#endif + +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + + +#ifndef MY_CPU_NAME + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + +#ifdef __has_builtin + #define Z7_has_builtin(x) __has_builtin(x) +#else + #define Z7_has_builtin(x) 0 +#endif + + +#define Z7_BSWAP32_CONST(v) \ + ( (((UInt32)(v) << 24) ) \ + | (((UInt32)(v) << 8) & (UInt32)0xff0000) \ + | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \ + | (((UInt32)(v) >> 24) )) + + +#if defined(_MSC_VER) && (_MSC_VER >= 1300) + +#include + +/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */ + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +#define Z7_BSWAP16(v) _byteswap_ushort(v) +#define Z7_BSWAP32(v) _byteswap_ulong (v) +#define Z7_BSWAP64(v) _byteswap_uint64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) + +#define Z7_BSWAP16(v) __builtin_bswap16(v) +#define Z7_BSWAP32(v) __builtin_bswap32(v) +#define Z7_BSWAP64(v) __builtin_bswap64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#else + +#define Z7_BSWAP16(v) ((UInt16) \ + ( ((UInt32)(v) << 8) \ + | ((UInt32)(v) >> 8) \ + )) + +#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v) + +#define Z7_BSWAP64(v) \ + ( ( ( (UInt64)(v) ) << 8 * 7 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \ + | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \ + | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \ + | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \ + | ( ( (UInt64)(v) >> 8 * 7 ) ) \ + ) + +#endif + + + +#ifdef MY_CPU_LE + #if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM64) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) + /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. + So we can't use unaligned 64-bit operations. */ + #define MY_CPU_LE_UNALIGN + #endif +#endif + + +#ifdef MY_CPU_LE_UNALIGN + +#define GetUi16(p) (*(const UInt16 *)(const void *)(p)) +#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 +#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif + +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } + +#else + +#define GetUi16(p) ( (UInt16) ( \ + ((const Byte *)(p))[0] | \ + ((UInt16)((const Byte *)(p))[1] << 8) )) + +#define GetUi32(p) ( \ + ((const Byte *)(p))[0] | \ + ((UInt32)((const Byte *)(p))[1] << 8) | \ + ((UInt32)((const Byte *)(p))[2] << 16) | \ + ((UInt32)((const Byte *)(p))[3] << 24)) + +#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); } + +#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); \ + _ppp_[2] = (Byte)(_vvv_ >> 16); \ + _ppp_[3] = (Byte)(_vvv_ >> 24); } + +#endif + + +#ifndef GetUi64 +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) +#endif + +#ifndef SetUi64 +#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ + SetUi32(_ppp2_ , (UInt32)_vvv2_) \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) } +#endif + + +#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) + +#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } + +#if defined(MY_CPU_LE_UNALIGN_64) +#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#endif + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte *)(p))[0] << 24) | \ + ((UInt32)((const Byte *)(p))[1] << 16) | \ + ((UInt32)((const Byte *)(p))[2] << 8) | \ + ((const Byte *)(p))[3] ) + +#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 24); \ + _ppp_[1] = (Byte)(_vvv_ >> 16); \ + _ppp_[2] = (Byte)(_vvv_ >> 8); \ + _ppp_[3] = (Byte)_vvv_; } + +#endif + +#ifndef GetBe64 +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) +#endif + +#ifndef GetBe16 +#define GetBe16(p) ( (UInt16) ( \ + ((UInt16)((const Byte *)(p))[0] << 8) | \ + ((const Byte *)(p))[1] )) +#endif + + +#if defined(MY_CPU_BE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_NATIVE_TO_BE_32(v) (v) +#elif defined(MY_CPU_LE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +#else +#error Stop_Compiling_Unknown_Endian_CONV +#endif + + +#if defined(MY_CPU_BE) + +#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetUi32a(p) GetUi32(p) +#define GetUi16a(p) GetUi16(p) +#define SetUi32a(p, v) SetUi32(p, v) +#define SetUi16a(p, v) SetUi16(p, v) + +#elif defined(MY_CPU_LE) + +#define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetBe32a(p) GetBe32(p) +#define GetBe16a(p) GetBe16(p) +#define SetBe32a(p, v) SetBe32(p, v) +#define SetBe16a(p, v) SetBe16(p, v) + +#else +#error Stop_Compiling_Unknown_Endian_CPU_a +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_OR_ARM64) \ + || defined(MY_CPU_PPC_OR_PPC64) + #define Z7_CPU_FAST_ROTATE_SUPPORTED +#endif + + +#ifdef MY_CPU_X86_OR_AMD64 + +void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function); +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); +#if defined(MY_CPU_AMD64) +#define Z7_IF_X86_CPUID_SUPPORTED +#else +#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc()) +#endif + +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX(void); +BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_CMOV(void); +BoolInt CPU_IsSupported_SSE(void); +BoolInt CPU_IsSupported_SSE2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif + +#endif + +#if defined(__APPLE__) +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +#endif + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Delta.c b/external/unarr/lzmasdk/Delta.c new file mode 100644 index 00000000..c4a4499f --- /dev/null +++ b/external/unarr/lzmasdk/Delta.c @@ -0,0 +1,169 @@ +/* Delta.c -- Delta converter +2021-02-09 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Delta.h" + +void Delta_Init(Byte *state) +{ + unsigned i; + for (i = 0; i < DELTA_STATE_SIZE; i++) + state[i] = 0; +} + + +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + Byte temp[DELTA_STATE_SIZE]; + + if (size == 0) + return; + + { + unsigned i = 0; + do + temp[i] = state[i]; + while (++i != delta); + } + + if (size <= delta) + { + unsigned i = 0, k; + do + { + Byte b = *data; + *data++ = (Byte)(b - temp[i]); + temp[i] = b; + } + while (++i != size); + + k = 0; + + do + { + if (i == delta) + i = 0; + state[k] = temp[i++]; + } + while (++k != delta); + + return; + } + + { + Byte *p = data + size - delta; + { + unsigned i = 0; + do + state[i] = *p++; + while (++i != delta); + } + { + const Byte *lim = data + delta; + ptrdiff_t dif = -(ptrdiff_t)delta; + + if (((ptrdiff_t)size + dif) & 1) + { + --p; *p = (Byte)(*p - p[dif]); + } + + while (p != lim) + { + --p; *p = (Byte)(*p - p[dif]); + --p; *p = (Byte)(*p - p[dif]); + } + + dif = -dif; + + do + { + --p; *p = (Byte)(*p - temp[--dif]); + } + while (dif != 0); + } + } +} + + +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + unsigned i; + const Byte *lim; + + if (size == 0) + return; + + i = 0; + lim = data + size; + + if (size <= delta) + { + do + *data = (Byte)(*data + state[i++]); + while (++data != lim); + + for (; delta != i; state++, delta--) + *state = state[i]; + data -= i; + } + else + { + /* + #define B(n) b ## n + #define I(n) Byte B(n) = state[n]; + #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); } + #define F(n) if (data != lim) { U(n) } + + if (delta == 1) + { + I(0) + if ((lim - data) & 1) { U(0) } + while (data != lim) { U(0) U(0) } + data -= 1; + } + else if (delta == 2) + { + I(0) I(1) + lim -= 1; while (data < lim) { U(0) U(1) } + lim += 1; F(0) + data -= 2; + } + else if (delta == 3) + { + I(0) I(1) I(2) + lim -= 2; while (data < lim) { U(0) U(1) U(2) } + lim += 2; F(0) F(1) + data -= 3; + } + else if (delta == 4) + { + I(0) I(1) I(2) I(3) + lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) } + lim += 3; F(0) F(1) F(2) + data -= 4; + } + else + */ + { + do + { + *data = (Byte)(*data + state[i++]); + data++; + } + while (i != delta); + + { + ptrdiff_t dif = -(ptrdiff_t)delta; + do + *data = (Byte)(*data + data[dif]); + while (++data != lim); + data += dif; + } + } + } + + do + *state++ = *data; + while (++data != lim); +} diff --git a/external/unarr/lzmasdk/Delta.h b/external/unarr/lzmasdk/Delta.h new file mode 100644 index 00000000..70609541 --- /dev/null +++ b/external/unarr/lzmasdk/Delta.h @@ -0,0 +1,19 @@ +/* Delta.h -- Delta converter +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_DELTA_H +#define ZIP7_INC_DELTA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define DELTA_STATE_SIZE 256 + +void Delta_Init(Byte *state); +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size); +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size); + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Lzma2Dec.c b/external/unarr/lzmasdk/Lzma2Dec.c new file mode 100644 index 00000000..388cbc71 --- /dev/null +++ b/external/unarr/lzmasdk/Lzma2Dec.c @@ -0,0 +1,491 @@ +/* Lzma2Dec.c -- LZMA2 Decoder +2023-03-03 : Igor Pavlov : Public domain */ + +/* #define SHOW_DEBUG_INFO */ + +#include "Precomp.h" + +#ifdef SHOW_DEBUG_INFO +#include +#endif + +#include + +#include "Lzma2Dec.h" + +/* +00000000 - End of data +00000001 U U - Uncompressed, reset dic, need reset state and set new prop +00000010 U U - Uncompressed, no reset +100uuuuu U U P P - LZMA, no reset +101uuuuu U U P P - LZMA, reset state +110uuuuu U U P P S - LZMA, reset state + set new prop +111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic + + u, U - Unpack Size + P - Pack Size + S - Props +*/ + +#define LZMA2_CONTROL_COPY_RESET_DIC 1 + +#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0) + +#define LZMA2_LCLP_MAX 4 +#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) + +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif + +typedef enum +{ + LZMA2_STATE_CONTROL, + LZMA2_STATE_UNPACK0, + LZMA2_STATE_UNPACK1, + LZMA2_STATE_PACK0, + LZMA2_STATE_PACK1, + LZMA2_STATE_PROP, + LZMA2_STATE_DATA, + LZMA2_STATE_DATA_CONT, + LZMA2_STATE_FINISHED, + LZMA2_STATE_ERROR +} ELzma2State; + +static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props) +{ + UInt32 dicSize; + if (prop > 40) + return SZ_ERROR_UNSUPPORTED; + dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop); + props[0] = (Byte)LZMA2_LCLP_MAX; + props[1] = (Byte)(dicSize); + props[2] = (Byte)(dicSize >> 8); + props[3] = (Byte)(dicSize >> 16); + props[4] = (Byte)(dicSize >> 24); + return SZ_OK; +} + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)) + return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)) + return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +void Lzma2Dec_Init(CLzma2Dec *p) +{ + p->state = LZMA2_STATE_CONTROL; + p->needInitLevel = 0xE0; + p->isExtraMode = False; + p->unpackSize = 0; + + // p->decoder.dicPos = 0; // we can use it instead of full init + LzmaDec_Init(&p->decoder); +} + +// ELzma2State +static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) +{ + switch (p->state) + { + case LZMA2_STATE_CONTROL: + p->isExtraMode = False; + p->control = b; + PRF(printf("\n %8X", (unsigned)p->decoder.dicPos)); + PRF(printf(" %02X", (unsigned)b)); + if (b == 0) + return LZMA2_STATE_FINISHED; + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (b == LZMA2_CONTROL_COPY_RESET_DIC) + p->needInitLevel = 0xC0; + else if (b > 2 || p->needInitLevel == 0xE0) + return LZMA2_STATE_ERROR; + } + else + { + if (b < p->needInitLevel) + return LZMA2_STATE_ERROR; + p->needInitLevel = 0; + p->unpackSize = (UInt32)(b & 0x1F) << 16; + } + return LZMA2_STATE_UNPACK0; + + case LZMA2_STATE_UNPACK0: + p->unpackSize |= (UInt32)b << 8; + return LZMA2_STATE_UNPACK1; + + case LZMA2_STATE_UNPACK1: + p->unpackSize |= (UInt32)b; + p->unpackSize++; + PRF(printf(" %7u", (unsigned)p->unpackSize)); + return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; + + case LZMA2_STATE_PACK0: + p->packSize = (UInt32)b << 8; + return LZMA2_STATE_PACK1; + + case LZMA2_STATE_PACK1: + p->packSize |= (UInt32)b; + p->packSize++; + // if (p->packSize < 5) return LZMA2_STATE_ERROR; + PRF(printf(" %5u", (unsigned)p->packSize)); + return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA; + + case LZMA2_STATE_PROP: + { + unsigned lc, lp; + if (b >= (9 * 5 * 5)) + return LZMA2_STATE_ERROR; + lc = b % 9; + b /= 9; + p->decoder.prop.pb = (Byte)(b / 5); + lp = b % 5; + if (lc + lp > LZMA2_LCLP_MAX) + return LZMA2_STATE_ERROR; + p->decoder.prop.lc = (Byte)lc; + p->decoder.prop.lp = (Byte)lp; + return LZMA2_STATE_DATA; + } + } + return LZMA2_STATE_ERROR; +} + +static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size) +{ + memcpy(p->dic + p->dicPos, src, size); + p->dicPos += size; + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size) + p->checkDicSize = p->prop.dicSize; + p->processedPos += (UInt32)size; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); + + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->state != LZMA2_STATE_ERROR) + { + SizeT dicPos; + + if (p->state == LZMA2_STATE_FINISHED) + { + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + dicPos = p->decoder.dicPos; + + if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + (*srcLen)++; + p->state = Lzma2Dec_UpdateState(p, *src++); + if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED) + break; + continue; + } + + { + SizeT inCur = inSize - *srcLen; + SizeT outCur = dicLimit - dicPos; + ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY; + + if (outCur >= p->unpackSize) + { + outCur = (SizeT)p->unpackSize; + curFinishMode = LZMA_FINISH_END; + } + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (inCur == 0) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + if (p->state == LZMA2_STATE_DATA) + { + BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC); + LzmaDec_InitDicAndState(&p->decoder, initDic, False); + } + + if (inCur > outCur) + inCur = outCur; + if (inCur == 0) + break; + + LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur); + + src += inCur; + *srcLen += inCur; + p->unpackSize -= (UInt32)inCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + SRes res; + + if (p->state == LZMA2_STATE_DATA) + { + BoolInt initDic = (p->control >= 0xE0); + BoolInt initState = (p->control >= 0xA0); + LzmaDec_InitDicAndState(&p->decoder, initDic, initState); + p->state = LZMA2_STATE_DATA_CONT; + } + + if (inCur > p->packSize) + inCur = (SizeT)p->packSize; + + res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status); + + src += inCur; + *srcLen += inCur; + p->packSize -= (UInt32)inCur; + outCur = p->decoder.dicPos - dicPos; + p->unpackSize -= (UInt32)outCur; + + if (res != 0) + break; + + if (*status == LZMA_STATUS_NEEDS_MORE_INPUT) + { + if (p->packSize == 0) + break; + return SZ_OK; + } + + if (inCur == 0 && outCur == 0) + { + if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + || p->unpackSize != 0 + || p->packSize != 0) + break; + p->state = LZMA2_STATE_CONTROL; + } + + *status = LZMA_STATUS_NOT_SPECIFIED; + } + } + } + + *status = LZMA_STATUS_NOT_SPECIFIED; + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; +} + + + + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, + const Byte *src, SizeT *srcLen, + int checkFinishBlock) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + + while (p->state != LZMA2_STATE_ERROR) + { + if (p->state == LZMA2_STATE_FINISHED) + return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK; + + if (outSize == 0 && !checkFinishBlock) + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + (*srcLen)++; + + p->state = Lzma2Dec_UpdateState(p, *src++); + + if (p->state == LZMA2_STATE_UNPACK0) + { + // if (p->decoder.dicPos != 0) + if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0) + return LZMA2_PARSE_STATUS_NEW_BLOCK; + // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED; + } + + // The following code can be commented. + // It's not big problem, if we read additional input bytes. + // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state. + + if (outSize == 0 && p->state != LZMA2_STATE_FINISHED) + { + // checkFinishBlock is true. So we expect that block must be finished, + // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here + // break; + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + } + + if (p->state == LZMA2_STATE_DATA) + return LZMA2_PARSE_STATUS_NEW_CHUNK; + + continue; + } + + if (outSize == 0) + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + + { + SizeT inCur = inSize - *srcLen; + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (inCur == 0) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + if (inCur > p->unpackSize) + inCur = p->unpackSize; + if (inCur > outSize) + inCur = outSize; + p->decoder.dicPos += inCur; + src += inCur; + *srcLen += inCur; + outSize -= inCur; + p->unpackSize -= (UInt32)inCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + p->isExtraMode = True; + + if (inCur == 0) + { + if (p->packSize != 0) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + } + else if (p->state == LZMA2_STATE_DATA) + { + p->state = LZMA2_STATE_DATA_CONT; + if (*src != 0) + { + // first byte of lzma chunk must be Zero + *srcLen += 1; + p->packSize--; + break; + } + } + + if (inCur > p->packSize) + inCur = (SizeT)p->packSize; + + src += inCur; + *srcLen += inCur; + p->packSize -= (UInt32)inCur; + + if (p->packSize == 0) + { + SizeT rem = outSize; + if (rem > p->unpackSize) + rem = p->unpackSize; + p->decoder.dicPos += rem; + p->unpackSize -= (UInt32)rem; + outSize -= rem; + if (p->unpackSize == 0) + p->state = LZMA2_STATE_CONTROL; + } + } + } + } + + p->state = LZMA2_STATE_ERROR; + return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED; +} + + + + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen, inSize = *srcLen; + *srcLen = *destLen = 0; + + for (;;) + { + SizeT inCur = inSize, outCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + + if (p->decoder.dicPos == p->decoder.dicBufSize) + p->decoder.dicPos = 0; + dicPos = p->decoder.dicPos; + curFinishMode = LZMA_FINISH_ANY; + outCur = p->decoder.dicBufSize - dicPos; + + if (outCur >= outSize) + { + outCur = outSize; + curFinishMode = finishMode; + } + + res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status); + + src += inCur; + inSize -= inCur; + *srcLen += inCur; + outCur = p->decoder.dicPos - dicPos; + memcpy(dest, p->decoder.dic + dicPos, outCur); + dest += outCur; + outSize -= outCur; + *destLen += outCur; + if (res != 0) + return res; + if (outCur == 0 || outSize == 0) + return SZ_OK; + } +} + + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzma2Dec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + Lzma2Dec_CONSTRUCT(&p) + RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)) + p.decoder.dic = dest; + p.decoder.dicBufSize = outSize; + Lzma2Dec_Init(&p); + *srcLen = inSize; + res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.decoder.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + Lzma2Dec_FreeProbs(&p, alloc); + return res; +} + +#undef PRF diff --git a/external/unarr/lzmasdk/Lzma2Dec.h b/external/unarr/lzmasdk/Lzma2Dec.h new file mode 100644 index 00000000..1f5233a7 --- /dev/null +++ b/external/unarr/lzmasdk/Lzma2Dec.h @@ -0,0 +1,121 @@ +/* Lzma2Dec.h -- LZMA2 Decoder +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA2_DEC_H +#define ZIP7_INC_LZMA2_DEC_H + +#include "LzmaDec.h" + +EXTERN_C_BEGIN + +/* ---------- State Interface ---------- */ + +typedef struct +{ + unsigned state; + Byte control; + Byte needInitLevel; + Byte isExtraMode; + Byte _pad_; + UInt32 packSize; + UInt32 unpackSize; + CLzmaDec decoder; +} CLzma2Dec; + +#define Lzma2Dec_CONSTRUCT(p) LzmaDec_CONSTRUCT(&(p)->decoder) +#define Lzma2Dec_Construct(p) Lzma2Dec_CONSTRUCT(p) +#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc) +#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc) + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); +void Lzma2Dec_Init(CLzma2Dec *p); + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen or dicLimit). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + SZ_ERROR_DATA - Data error +*/ + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- LZMA2 block and chunk parsing ---------- */ + +/* +Lzma2Dec_Parse() parses compressed data stream up to next independent block or next chunk data. +It can return LZMA_STATUS_* code or LZMA2_PARSE_STATUS_* code: + - LZMA2_PARSE_STATUS_NEW_BLOCK - there is new block, and 1 additional byte (control byte of next block header) was read from input. + - LZMA2_PARSE_STATUS_NEW_CHUNK - there is new chunk, and only lzma2 header of new chunk was read. + CLzma2Dec::unpackSize contains unpack size of that chunk +*/ + +typedef enum +{ +/* + LZMA_STATUS_NOT_SPECIFIED // data error + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED // + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK // unused +*/ + LZMA2_PARSE_STATUS_NEW_BLOCK = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + 1, + LZMA2_PARSE_STATUS_NEW_CHUNK +} ELzma2ParseStatus; + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, // output size + const Byte *src, SizeT *srcLen, + int checkFinishBlock // set (checkFinishBlock = 1), if it must read full input data, if decoder.dicPos reaches blockMax position. + ); + +/* +LZMA2 parser doesn't decode LZMA chunks, so we must read + full input LZMA chunk to decode some part of LZMA chunk. + +Lzma2Dec_GetUnpackExtra() returns the value that shows + max possible number of output bytes that can be output by decoder + at current input positon. +*/ + +#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0) + + +/* ---------- One Call Interface ---------- */ + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/LzmaDec.c b/external/unarr/lzmasdk/LzmaDec.c new file mode 100644 index 00000000..69bb8bba --- /dev/null +++ b/external/unarr/lzmasdk/LzmaDec.c @@ -0,0 +1,1363 @@ +/* LzmaDec.c -- LZMA Decoder +2023-04-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #include "CpuArch.h" */ +#include "LzmaDec.h" + +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) + +#define RC_INIT_SIZE 5 + +#ifndef Z7_LZMA_DEC_OPT + +#define kNumMoveBits 5 +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p) i = (i + i); A0; } else \ + { UPDATE_1(p) i = (i + i) + 1; A1; } + +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i) A0; } else \ + { UPDATE_1(p + i) A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define Z7_LZMA_SIZE_OPT */ + +#ifdef Z7_LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + i -= 0x40; } +#endif + +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) +#define MATCHED_LITER_DEC \ + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + +#endif // Z7_LZMA_DEC_OPT + + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK i = (i + i); A0; } else \ + { UPDATE_1_CHECK i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK i += m; m += m; } else \ + { UPDATE_1_CHECK m += m; i += m; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) + +#define kNumStates 12 +#define kNumStates2 16 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) + +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) + +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign +#endif + +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS +#endif + + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) + +#define LZMA_DIC_MIN (1 << 12) + +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. + } + +Processing: + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. + +Out: + RangeCoder is normalized + Result: + SZ_OK - OK + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined +*/ + + +#ifdef Z7_LZMA_DEC_OPT + +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob) + prob = probs + Literal; + if (processedPos != 0 || checkDicSize != 0) + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); + processedPos++; + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + #ifdef Z7_LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif + } + else + { + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + #ifdef Z7_LZMA_SIZE_OPT + do + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + } + while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif + } + + dic[dicPos++] = (Byte)symbol; + continue; + } + + { + UPDATE_1(prob) + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob) + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + UPDATE_0(prob) + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob) + } + else + { + UInt32 distance; + UPDATE_1(prob) + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + distance = rep1; + } + else + { + UPDATE_1(prob) + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + distance = rep2; + } + else + { + UPDATE_1(prob) + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + + #ifdef Z7_LZMA_SIZE_OPT + { + unsigned lim, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen) + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen) + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + lim = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, lim, len) + len += offset; + } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE; + len = 1; + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + len -= 8; + } + else + { + UPDATE_1(probLen) + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + len = 1; + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + } + else + { + UPDATE_1(probLen) + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len) + len += kLenNumLowSymbols * 2; + } + } + } + #endif + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance) + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos; + { + UInt32 m = 1; + distance++; + do + { + REV_BIT_VAR(prob, distance, m) + } + while (--numDirectBits); + distance -= m; + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + REV_BIT_CONST(prob, i, 1) + REV_BIT_CONST(prob, i, 2) + REV_BIT_CONST(prob, i, 4) + REV_BIT_LAST (prob, i, 8) + distance |= i; + } + if (distance == (UInt32)0xFFFFFFFF) + { + len = kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) + { + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; + } + } + + len += kMatchMinLen; + + { + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + { + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; + } + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); + + processedPos += (UInt32)curLen; + + len -= curLen; + if (curLen <= dicBufSize - pos) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; + dicPos += (SizeT)curLen; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + + NORMALIZE + + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = (UInt32)state; + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; + return SZ_OK; +} +#endif + + + +static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; + { + SizeT dicPos = p->dicPos; + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += (UInt32)len; + p->remainLen -= (UInt32)len; + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do + { + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + } + while (--len); + p->dicPos = dicPos; + } +} + + +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + +static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; + } + { + int res = LZMA_DECODE_REAL(p, limit, bufLimit); + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + return res; + } +} + + + +typedef enum +{ + DUMMY_INPUT_EOF, /* need more input data */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = *bufOut; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + ELzmaDummy res; + + for (;;) + { + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + const CLzmaProb *probLit; + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + break; + } + else + { + UPDATE_1_CHECK + } + } + else + { + UPDATE_1_CHECK + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + } + else + { + UPDATE_1_CHECK + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + } + else + { + UPDATE_1_CHECK + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len) + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot) + if (posSlot >= kStartPosModelIndex) + { + unsigned numDirectBits = ((posSlot >> 1) - 1); + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + unsigned m = 1; + do + { + REV_BIT_CHECK(prob, i, m) + } + while (--numDirectBits); + } + } + } + } + break; + } + NORMALIZE_CHECK + + *bufOut = buf; + return res; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) +{ + p->remainLen = kMatchSpecLenStart + 1; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->remainLen = kMatchSpecLenStart + 2; + } + if (initState) + p->remainLen = kMatchSpecLenStart + 2; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + + +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN_NOT_FINISHED_FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + if (p->remainLen > kMatchSpecLenStart) + { + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; + + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } + + p->remainLen = 0; + } + + for (;;) + { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + + int checkEndMarkNow = 0; + + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + RETURN_NOT_FINISHED_FOR_FINISH + } + checkEndMarkNow = 1; + } + + // (p->remainLen == 0) + + if (p->tempBufSize == 0) + { + const Byte *bufLimit; + int dummyProcessed = -1; + + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; + (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN_NOT_FINISHED_FOR_FINISH + } + + bufLimit = src; + // we will decode only one iteration + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + + p->buf = src; + + { + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) + { + if (processed > inSize) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + continue; + } + + { + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN_NOT_FINISHED_FOR_FINISH + } + } + + p->buf = p->tempBuf; + + { + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + } + } + } + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; +} + + + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->probs); + p->probs = NULL; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->dic); + p->dic = NULL; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = (Byte)(d % 9); + d /= 9; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); + if (!p->probs) + return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); + if (!p->dic) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzmaDec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + LzmaDec_CONSTRUCT(&p) + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)) + p.dic = dest; + p.dicBufSize = outSize; + LzmaDec_Init(&p); + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + LzmaDec_FreeProbs(&p, alloc); + return res; +} diff --git a/external/unarr/lzmasdk/LzmaDec.h b/external/unarr/lzmasdk/LzmaDec.h new file mode 100644 index 00000000..b0ce28fa --- /dev/null +++ b/external/unarr/lzmasdk/LzmaDec.h @@ -0,0 +1,237 @@ +/* LzmaDec.h -- LZMA Decoder +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA_DEC_H +#define ZIP7_INC_LZMA_DEC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define Z7_LZMA_PROB32 */ +/* Z7_LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +typedef +#ifdef Z7_LZMA_PROB32 + UInt32 +#else + UInt16 +#endif + CLzmaProb; + + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct +{ + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + /* Don't change this structure. ASM code can use it. */ + CLzmaProps prop; + CLzmaProb *probs; + CLzmaProb *probs_1664; + Byte *dic; + SizeT dicBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; + UInt32 processedPos; + UInt32 checkDicSize; + UInt32 reps[4]; + UInt32 state; + UInt32 remainLen; + + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p) + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Construct() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure +*/ + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Ppmd.h b/external/unarr/lzmasdk/Ppmd.h new file mode 100644 index 00000000..66b26266 --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd.h @@ -0,0 +1,169 @@ +/* Ppmd.h -- PPMD codec common code +2023-03-05 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#ifndef ZIP7_INC_PPMD_H +#define ZIP7_INC_PPMD_H + +#include "CpuArch.h" + +EXTERN_C_BEGIN + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +/* + PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block. + if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields. + if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields. + if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed, + if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional, + and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit. + PPMD code works slightly faster in (PPMD_32BIT) mode. +*/ + #define PPMD_32BIT +#endif + +#define PPMD_INT_BITS 7 +#define PPMD_PERIOD_BITS 7 +#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS)) + +#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift)) +#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2) +#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob)) +#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob)) + +#define PPMD_N1 4 +#define PPMD_N2 4 +#define PPMD_N3 4 +#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) +#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) + +MY_CPU_pragma_pack_push_1 +/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ + +/* SEE-contexts for PPM-contexts with masked symbols */ +typedef struct +{ + UInt16 Summ; /* Freq */ + Byte Shift; /* Speed of Freq change; low Shift is for fast change */ + Byte Count; /* Count to next change of Shift */ +} CPpmd_See; + +#define Ppmd_See_UPDATE(p) \ + { if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ + { (p)->Summ = (UInt16)((p)->Summ << 1); \ + (p)->Count = (Byte)(3 << (p)->Shift++); }} + + +typedef struct +{ + Byte Symbol; + Byte Freq; + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State; + +typedef struct CPpmd_State2_ +{ + Byte Symbol; + Byte Freq; +} CPpmd_State2; + +typedef struct CPpmd_State4_ +{ + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State4; + +MY_CPU_pragma_pop + +/* + PPMD code can write full CPpmd_State structure data to CPpmd*_Context + at (byte offset = 2) instead of some fields of original CPpmd*_Context structure. + + If we use pointers to different types, but that point to shared + memory space, we can have aliasing problem (strict aliasing). + + XLC compiler in -O2 mode can change the order of memory write instructions + in relation to read instructions, if we have use pointers to different types. + + To solve that aliasing problem we use combined CPpmd*_Context structure + with unions that contain the fields from both structures: + the original CPpmd*_Context and CPpmd_State. + So we can access the fields from both structures via one pointer, + and the compiler doesn't change the order of write instructions + in relation to read instructions. + + If we don't use memory write instructions to shared memory in + some local code, and we use only reading instructions (read only), + then probably it's safe to use pointers to different types for reading. +*/ + + + +#ifdef PPMD_32BIT + + #define Ppmd_Ref_Type(type) type * + #define Ppmd_GetRef(p, ptr) (ptr) + #define Ppmd_GetPtr(p, ptr) (ptr) + #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr) + +#else + + #define Ppmd_Ref_Type(type) UInt32 + #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) + #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs)) + +#endif // PPMD_32BIT + + +typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref; +typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref; +typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref; + + +/* +#ifdef MY_CPU_LE_UNALIGN +// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache. +#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0) +#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v) + +#else +*/ + +/* + We can write 16-bit halves to 32-bit (Successor) field in any selected order. + But the native order is more consistent way. + So we use the native order, if LE/BE order can be detected here at compile time. +*/ + +#ifdef MY_CPU_BE + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); } + +#else + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); } + +#endif + +// #endif + + +#define PPMD_SetAllBitsIn256Bytes(p) \ + { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \ + p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }} + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Ppmd7.c b/external/unarr/lzmasdk/Ppmd7.c new file mode 100644 index 00000000..6e1307e2 --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd7.c @@ -0,0 +1,1122 @@ +/* Ppmd7.c -- PPMdH codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#include "Precomp.h" + +#include + +#include "Ppmd7.h" + +/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */ +// #define PPMD7_ORDER_0_SUPPPORT + +MY_ALIGN(16) +static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) +static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) +#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx]) + +#define REF(ptr) Ppmd_GetRef(p, ptr) + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +#define STATS(ctx) Ppmd7_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +typedef CPpmd7_Context * PPMD7_CTX_PTR; + +struct CPpmd7_Node_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref; + +typedef struct CPpmd7_Node_ +{ + UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */ + UInt16 NU; + CPpmd7_Node_Ref Next; /* must be at offset >= 4 */ + CPpmd7_Node_Ref Prev; +} CPpmd7_Node; + +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node) + +void Ppmd7_Construct(CPpmd7 *p) +{ + unsigned i, k, m; + + p->Base = NULL; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 3; i++) + p->NS2Indx[i] = (Byte)i; + + for (m = i, k = 1; i < 256; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 2; + } + + memcpy(p->ExpEscape, PPMD7_kExpEscape, 16); +} + + +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Base); + p->Size = 0; + p->Base = NULL; +} + + +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) +{ + if (!p->Base || p->Size != size) + { + Ppmd7_Free(p, alloc); + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) + return False; + p->Size = size; + } + return True; +} + + + +// ---------- Internal Memory Allocator ---------- + +/* We can use CPpmd7_Node in list of free units (as in Ppmd8) + But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode() +*/ + +#define EMPTY_NODE 0 + + +static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx) +{ + *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; + // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; + + p->FreeList[indx] = REF(node); + +} + + +static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx) +{ + CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); + p->FreeList[indx] = *node; + // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]); + // p->FreeList[indx] = node->Next; + return node; +} + + +static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + Ppmd7_InsertNode(p, ptr, i); +} + + +/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ + +typedef union +{ + CPpmd7_Node Node; + CPpmd7_Node_Ref NextRef; +} CPpmd7_Node_Union; + +/* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks() + we use single linked list similar to Ppmd8 code */ + + +static void Ppmd7_GlueFreeBlocks(CPpmd7 *p) +{ + /* + we use first UInt16 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0 + CPpmd7_Context { UInt16 NumStats; : NumStats != 0 + CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record + : Stamp == 1 for head record and guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record. + */ + CPpmd7_Node_Ref head, n = 0; + + p->GlueCount = 255; + + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1; + + { + /* Create list of free blocks. + We still need one additional list walk pass before Glue. */ + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + const UInt16 nu = I2U_UInt16(i); + CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + /* Don't change the order of the following commands: */ + CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next); + const CPpmd7_Node_Ref tmp = next; + next = un->NextRef; + un->Node.Stamp = EMPTY_NODE; + un->Node.NU = nu; + un->Node.Next = n; + n = tmp; + } + } + } + + head = n; + /* Glue and Fill must walk the list in same direction */ + { + /* Glue free blocks */ + CPpmd7_Node_Ref *prev = &head; + while (n) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + n = node->Next; + if (nu == 0) + { + *prev = n; + continue; + } + prev = &node->Next; + for (;;) + { + CPpmd7_Node *node2 = node + nu; + nu += node2->NU; + if (node2->Stamp != EMPTY_NODE || nu >= 0x10000) + break; + node->NU = (UInt16)nu; + node2->NU = 0; + } + } + } + + /* Fill lists of free blocks */ + for (n = head; n != 0;) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1); + } + Ppmd7_InsertNode(p, node, i); + } +} + + +Z7_NO_INLINE +static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx) +{ + unsigned i; + + if (p->GlueCount == 0) + { + Ppmd7_GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return Ppmd7_RemoveNode(p, indx); + } + + i = indx; + + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; + p->GlueCount--; + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL; + } + } + while (p->FreeList[i] == 0); + + { + void *block = Ppmd7_RemoveNode(p, i); + Ppmd7_SplitBlock(p, block, i, indx); + return block; + } +} + + +static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx) +{ + if (p->FreeList[indx] != 0) + return Ppmd7_RemoveNode(p, indx); + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } + } + return Ppmd7_AllocUnitsRare(p, indx); +} + + +#define MEM_12_CPY(dest, src, num) \ + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + + +/* +static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd7_RemoveNode(p, i1); + MEM_12_CPY(ptr, oldPtr, newNU) + Ppmd7_InsertNode(p, oldPtr, i0); + return ptr; + } + Ppmd7_SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} +*/ + + +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + Ppmd_SET_SUCCESSOR(p, v) +} + + + +Z7_NO_INLINE +static +void Ppmd7_RestartModel(CPpmd7 *p) +{ + unsigned i, k; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + + p->Text = p->Base + p->AlignOffset; + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + { + CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + + mc->NumStats = 256; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + #ifdef PPMD7_ORDER_0_SUPPPORT + if (p->MaxOrder == 0) + { + CPpmd_Void_Ref r = REF(mc); + s = p->FoundState; + for (i = 0; i < 256; i++, s++) + SetSuccessor(s, r); + return; + } + #endif + } + + for (i = 0; i < 128; i++) + + + + for (k = 0; k < 8; k++) + { + unsigned m; + UInt16 *dest = p->BinSumm[i] + k; + const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2)); + for (m = 0; m < 64; m += 8) + dest[m] = val; + } + + + for (i = 0; i < 25; i++) + { + + CPpmd_See *s = p->See[i]; + + + + unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 16; k++, s++) + { + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); + s->Count = 4; + } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ +} + + +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) +{ + p->MaxOrder = maxOrder; + + Ppmd7_RestartModel(p); +} + + + +/* + Ppmd7_CreateSuccessors() + It's called when (FoundState->Successor) is RAW-Successor, + that is the link to position in Raw text. + So we create Context records and write the links to + FoundState->Successor and to identical RAW-Successors in suffix + contexts of MinContex. + + The function returns: + if (OrderFall == 0) then MinContext is already at MAX order, + { return pointer to new or existing context of same MAX order } + else + { return pointer to new real context that will be (Order+1) in comparison with MinContext + + also it can return pointer to real context of same order, +*/ + +Z7_NO_INLINE +static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p) +{ + PPMD7_CTX_PTR c = p->MinContext; + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + Byte newSym, newFreq; + unsigned numPs = 0; + CPpmd_State *ps[PPMD7_MAX_ORDER]; + + if (p->OrderFall != 0) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + + + if (c->NumStats != 1) + { + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + + } + else + { + s = ONE_STATE(c); + + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + // (c) is real record Context here, + c = CTX(successor); + if (numPs == 0) + { + // (c) is real record MAX Order Context here, + // So we don't need to create any new contexts. + return c; + } + break; + } + ps[numPs++] = s; + } + + // All created contexts will have single-symbol with new RAW-Successor + // All new RAW-Successors will point to next position in RAW text + // after FoundState->Successor + + newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch); + upBranch++; + + + if (c->NumStats == 1) + newFreq = ONE_STATE(c)->Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + cf - is frequency of symbol that will be Successor in new context records. + s0 - is commulative frequency sum of another symbols from parent context. + max(newFreq)= (s->Freq + 1), when (s0 == 1) + we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[] + so (s->Freq < 128) - is requirement for multi-symbol contexts + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1)); + } + + // Create new single-symbol contexts from low order to high order in loop + + do + { + PPMD7_CTX_PTR c1; + /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0); + else + { + c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + + c1->NumStats = 1; + ONE_STATE(c1)->Symbol = newSym; + ONE_STATE(c1)->Freq = newFreq; + SetSuccessor(ONE_STATE(c1), upBranch); + c1->Suffix = REF(c); + SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + + + +#define SWAP_STATES(s) \ + { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } + + +void Ppmd7_UpdateModel(CPpmd7 *p); +Z7_NO_INLINE +void Ppmd7_UpdateModel(CPpmd7 *p) +{ + CPpmd_Void_Ref maxSuccessor, minSuccessor; + PPMD7_CTX_PTR c, mc; + unsigned s0, ns; + + + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + /* Update Freqs in Suffix Context */ + + c = SUFFIX(p->MinContext); + + if (c->NumStats == 1) + { + CPpmd_State *s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + CPpmd_State *s = STATS(c); + Byte sym = p->FoundState->Symbol; + + if (s->Symbol != sym) + { + do + { + // s++; if (s->Symbol == sym) break; + s++; + } + while (s->Symbol != sym); + + if (s[0].Freq >= s[-1].Freq) + { + SWAP_STATES(s) + s--; + } + } + + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + } + + + if (p->OrderFall == 0) + { + /* MAX ORDER context */ + /* (FoundState->Successor) is RAW-Successor. */ + p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p); + if (!p->MinContext) + { + Ppmd7_RestartModel(p); + return; + } + SetSuccessor(p->FoundState, REF(p->MinContext)); + return; + } + + + /* NON-MAX ORDER context */ + + { + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + Ppmd7_RestartModel(p); + return; + } + maxSuccessor = REF(text); + } + + minSuccessor = SUCCESSOR(p->FoundState); + + if (minSuccessor) + { + // there is Successor for FoundState in MinContext. + // So the next context will be one order higher than MinContext. + + if (minSuccessor <= maxSuccessor) + { + // minSuccessor is RAW-Successor. So we will create real contexts records: + PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p); + if (!cs) + { + Ppmd7_RestartModel(p); + return; + } + minSuccessor = REF(cs); + } + + // minSuccessor now is real Context pointer that points to existing (Order+1) context + + if (--p->OrderFall == 0) + { + /* + if we move to MaxOrder context, then minSuccessor will be common Succesor for both: + MinContext that is (MaxOrder - 1) + MaxContext that is (MaxOrder) + so we don't need new RAW-Successor, and we can use real minSuccessor + as succssors for both MinContext and MaxContext. + */ + maxSuccessor = minSuccessor; + + /* + if (MaxContext != MinContext) + { + there was order fall from MaxOrder and we don't need current symbol + to transfer some RAW-Succesors to real contexts. + So we roll back pointer in raw data for one position. + } + */ + p->Text -= (p->MaxContext != p->MinContext); + } + } + else + { + /* + FoundState has NULL-Successor here. + And only root 0-order context can contain NULL-Successors. + We change Successor in FoundState to RAW-Successor, + And next context will be same 0-order root Context. + */ + SetSuccessor(p->FoundState, maxSuccessor); + minSuccessor = REF(p->MinContext); + } + + mc = p->MinContext; + c = p->MaxContext; + + p->MaxContext = p->MinContext = CTX(minSuccessor); + + if (c == mc) + return; + + // s0 : is pure Escape Freq + s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1); + + do + { + unsigned ns1; + UInt32 sum; + + if ((ns1 = c->NumStats) != 1) + { + if ((ns1 & 1) == 0) + { + /* Expand for one UNIT */ + unsigned oldNU = ns1 >> 1; + unsigned i = U2I(oldNU); + if (i != U2I((size_t)oldNU + 1)) + { + void *ptr = Ppmd7_AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + Ppmd7_RestartModel(p); + return; + } + oldPtr = STATS(c); + MEM_12_CPY(ptr, oldPtr, oldNU) + Ppmd7_InsertNode(p, oldPtr, i); + c->Union4.Stats = STATS_REF(ptr); + } + } + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 3 here. + total increase of Union2.SummFreq for all symbols is less than 256 here */ + sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ + // sum = (UInt16)sum; + } + else + { + // instead of One-symbol context we create 2-symbol context + CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0); + if (!s) + { + Ppmd7_RestartModel(p); + return; + } + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context + s->Freq = (Byte)freq; + // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here + sum = freq + p->InitEsc + (ns > 3); + } + } + + { + CPpmd_State *s = STATS(c) + ns1; + UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq; + UInt32 sf = (UInt32)s0 + sum; + s->Symbol = p->FoundState->Symbol; + c->NumStats = (UInt16)(ns1 + 1); + SetSuccessor(s, maxSuccessor); + + if (cf < 6 * sf) + { + cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf); + sum += 3; + /* It can add (0, 1, 2) to Escape_Freq */ + } + else + { + cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; + } + c = SUFFIX(c); + } + while (c != mc); +} + + + +Z7_NO_INLINE +static void Ppmd7_Rescale(CPpmd7 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) + { + CPpmd_State tmp = *s; + do + s[0] = s[-1]; + while (--s != stats); + *s = tmp; + } + + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + /* + if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context + if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context + */ + + adder = (p->OrderFall != 0); + + #ifdef PPMD7_ORDER_0_SUPPPORT + adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = (unsigned)p->MinContext->NumStats - 1; + s->Freq = (Byte)sumFreq; + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + CPpmd_State tmp = *s; + CPpmd_State *s1 = s; + do + { + s1[0] = s1[-1]; + } + while (--s1 != stats && freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + /* Remove all items with Freq == 0 */ + CPpmd7_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + /* We increase (escFreq) for the number of removed symbols. + So we will have (0.5) increase for Escape_Freq in avarage per + removed symbol after Escape_Freq halving */ + escFreq += i; + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (UInt16)(numStatsNew); + n0 = (numStats + 1) >> 1; + + if (numStatsNew == 1) + { + /* Create Single-Symbol context */ + unsigned freq = stats->Freq; + + do + { + escFreq >>= 1; + freq = (freq + 1) >> 1; + } + while (escFreq > 1); + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; // (freq <= 260 / 4) + p->FoundState = s; + Ppmd7_InsertNode(p, stats, U2I(n0)); + return; + } + + n1 = (numStatsNew + 1) >> 1; + if (n0 != n1) + { + // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + unsigned i0 = U2I(n0); + unsigned i1 = U2I(n1); + if (i0 != i1) + { + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd7_RemoveNode(p, i1); + p->MinContext->Union4.Stats = STATS_REF(ptr); + MEM_12_CPY(ptr, (const void *)stats, n1) + Ppmd7_InsertNode(p, stats, i0); + } + else + Ppmd7_SplitBlock(p, stats, i0, i1); + } + } + } + { + CPpmd7_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + // Escape_Freq halving here + p->FoundState = STATS(mc); + } +} + + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) +{ + CPpmd_See *see; + const CPpmd7_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 256) + { + unsigned nonMasked = numStats - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats) + + 4 * (unsigned)(numMasked > nonMasked) + + p->HiBitsFlag; + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + *escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + + +static void Ppmd7_NextContext(CPpmd7 *p) +{ + PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); +} + + +void Ppmd7_Update1(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + SWAP_STATES(s) + p->FoundState = --s; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + } + Ppmd7_NextContext(p); +} + + +void Ppmd7_Update1_0(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + CPpmd7_Context *mc = p->MinContext; + unsigned freq = s->Freq; + unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq > summFreq); + p->RunLength += (int)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + Ppmd7_NextContext(p); +} + + +/* +void Ppmd7_UpdateBin(CPpmd7 *p) +{ + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 128)); + p->PrevSuccess = 1; + p->RunLength++; + Ppmd7_NextContext(p); +} +*/ + +void Ppmd7_Update2(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->RunLength = p->InitRL; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + Ppmd7_UpdateModel(p); +} + + + +/* +PPMd Memory Map: +{ + [ 0 ] contains subset of original raw text, that is required to create context + records, Some symbols are not written, when max order context was reached + [ Text ] free area + [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records + [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items +[ HiUnit ] CPpmd7_Context records + [ Size ] end of array +} + +These addresses don't cross at any time. +And the following condtions is true for addresses: + (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size) + +Raw text is BYTE--aligned. +the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs. + +Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. +The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. +The code doesn't free UNITs allocated for CPpmd7_Context records. + +The code calls Ppmd7_RestartModel(), when there is no free memory for allocation. +And Ppmd7_RestartModel() changes the state to orignal start state, with full free block. + + +The code allocates UNITs with the following order: + +Allocation of 1 UNIT for Context record + - from free space (HiUnit) down to (LoUnit) + - from FreeList[0] + - Ppmd7_AllocUnitsRare() + +Ppmd7_AllocUnits() for CPpmd_State vectors: + - from FreeList[i] + - from free space (LoUnit) up to (HiUnit) + - Ppmd7_AllocUnitsRare() + +Ppmd7_AllocUnitsRare() + - if (GlueCount == 0) + { Glue lists, GlueCount = 255, allocate from FreeList[i]] } + - loop for all higher sized FreeList[...] lists + - from (UnitsStart - Text), GlueCount-- + - ERROR + + +Each Record with Context contains the CPpmd_State vector, where each +CPpmd_State contains the link to Successor. +There are 3 types of Successor: + 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored + only in 0-order Root Context Record. + We use 0 value as NULL-Successor + 2) RAW-Successor - the link to position in raw text, + that "RAW-Successor" is being created after first + occurrence of new symbol for some existing context record. + (RAW-Successor > 0). + 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1), + that record is being created when we go via RAW-Successor again. + +For any successors at any time: the following condtions are true for Successor links: +(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor) + + +---------- Symbol Frequency, SummFreq and Range in Range_Coder ---------- + +CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq + +The PPMd code tries to fulfill the condition: + (SummFreq <= (256 * 128 = RC::kBot)) + +We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) +So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. +If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. +SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions. +Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for +max-order context. + +When the PPMd code still break (Total <= RC::Range) condition in range coder, +we have two ways to resolve that problem: + 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. + 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. +*/ + +#undef MAX_FREQ +#undef UNIT_SIZE +#undef U2B +#undef U2I +#undef I2U +#undef I2U_UInt16 +#undef REF +#undef STATS_REF +#undef CTX +#undef STATS +#undef ONE_STATE +#undef SUFFIX +#undef NODE +#undef EMPTY_NODE +#undef MEM_12_CPY +#undef SUCCESSOR +#undef SWAP_STATES diff --git a/external/unarr/lzmasdk/Ppmd7.h b/external/unarr/lzmasdk/Ppmd7.h new file mode 100644 index 00000000..d9eb326d --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd7.h @@ -0,0 +1,181 @@ +/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#ifndef ZIP7_INC_PPMD7_H +#define ZIP7_INC_PPMD7_H + +#include "Ppmd.h" + +EXTERN_C_BEGIN + +#define PPMD7_MIN_ORDER 2 +#define PPMD7_MAX_ORDER 64 + +#define PPMD7_MIN_MEM_SIZE (1 << 11) +#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3) + +struct CPpmd7_Context_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref; + +// MY_CPU_pragma_pack_push_1 + +typedef struct CPpmd7_Context_ +{ + UInt16 NumStats; + + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + + CPpmd7_Context_Ref Suffix; +} CPpmd7_Context; + +// MY_CPU_pragma_pop + +#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + + + + +typedef struct +{ + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteInPtr Stream; +} CPpmd7_RangeDec; + + +typedef struct +{ + UInt32 Range; + Byte Cache; + // Byte _dummy_[3]; + UInt64 Low; + UInt64 CacheSize; + IByteOutPtr Stream; +} CPpmd7z_RangeEnc; + + +typedef struct +{ + CPpmd7_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + + + + + union + { + CPpmd7_RangeDec dec; + CPpmd7z_RangeEnc enc; + } rc; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + + Byte NS2BSIndx[256], NS2Indx[256]; + Byte ExpEscape[16]; + CPpmd_See DummySee, See[25][16]; + UInt16 BinSumm[128][64]; + // int LastSymbol; +} CPpmd7; + + +void Ppmd7_Construct(CPpmd7 *p); +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc); +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc); +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder); +#define Ppmd7_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context) +#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) + +void Ppmd7_Update1(CPpmd7 *p); +void Ppmd7_Update1_0(CPpmd7 *p); +void Ppmd7_Update2(CPpmd7 *p); + +#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3)) +#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4)) +// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3)) +// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4)) + +#define Ppmd7_GetBinSumm(p) \ + &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \ + + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \ + + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ] + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale); + + +/* +We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure: + 1) Ppmd7a_*: original PPMdH + 2) Ppmd7z_*: modified PPMdH with 7z Range Coder +Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH) +*/ + +/* ---------- Decode ---------- */ + +#define PPMD7_SYM_END (-1) +#define PPMD7_SYM_ERROR (-2) + +/* +You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init() + +Ppmd7*_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD7_SYM_END : End of payload marker + -2 : PPMD7_SYM_ERROR : Data error +*/ + +/* Ppmd7a_* : original PPMdH */ +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7a_DecodeSymbol(CPpmd7 *p); + +/* Ppmd7z_* : modified PPMdH with 7z Range Coder */ +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7z_DecodeSymbol(CPpmd7 *p); +// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim); + + +/* ---------- Encode ---------- */ + +void Ppmd7z_Init_RangeEnc(CPpmd7 *p); +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p); +// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol); +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim); + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Ppmd7Dec.c b/external/unarr/lzmasdk/Ppmd7Dec.c new file mode 100644 index 00000000..83238282 --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd7Dec.c @@ -0,0 +1,312 @@ +/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTopValue ((UInt32)1 << 24) + + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + if (READ_BYTE(p) != 0) + return False; + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \ + { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8; + +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + + + R->Code -= start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd7z_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +// typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] +// Z7_FORCE_INLINE +// static +int Ppmd7z_DecodeSymbol(CPpmd7 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + const UInt32 summFreq = p->MinContext->Union2.SummFreq; + + + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = (unsigned)p->MinContext->NumStats - 1; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt16 *prob = Ppmd7_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM_1(R) + /* we can use single byte normalization here because of + (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */ + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CPpmd7_Context *c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + + R->Code -= size0; + R->Range -= size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + + CPpmd_See *see; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + + + + + count = RC_GetThreshold(freqSum); + + if (count < hiCnt) + { + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update2(p); + return sym; + } + + if (count >= freqSum) + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +/* +Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) +{ + int sym = 0; + if (buf != lim) + do + { + sym = Ppmd7z_DecodeSymbol(p); + if (sym < 0) + break; + *buf = (Byte)sym; + } + while (++buf < lim); + p->LastSymbol = sym; + return buf; +} +*/ + +#undef kTopValue +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/external/unarr/lzmasdk/Ppmd7aDec.c b/external/unarr/lzmasdk/Ppmd7aDec.c new file mode 100755 index 00000000..55e164e1 --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd7aDec.c @@ -0,0 +1,295 @@ +/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTop ((UInt32)1 << 24) +#define kBot ((UInt32)1 << 15) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + p->Low = 0; + + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM(p) \ + while ((p->Low ^ (p->Low + p->Range)) < kTop \ + || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \ + p->Code = (p->Code << 8) | READ_BYTE(p); \ + p->Range <<= 8; p->Low <<= 8; } + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7a_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + start *= R->Range; + R->Low += start; + R->Code -= start; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd7a_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] + + +int Ppmd7a_DecodeSymbol(CPpmd7 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + const UInt32 summFreq = p->MinContext->Union2.SummFreq; + + if (summFreq > R->Range) + return PPMD7_SYM_ERROR; + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = (unsigned)p->MinContext->NumStats - 1; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt16 *prob = Ppmd7_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM(R) + + + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + R->Low += size0; + R->Code -= size0; + R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + + CPpmd_See *see; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + + if (freqSum > R->Range) + return PPMD7_SYM_ERROR; + + count = RC_GetThreshold(freqSum); + + if (count < hiCnt) + { + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update2(p); + return sym; + } + + if (count >= freqSum) + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +#undef kTop +#undef kBot +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/external/unarr/lzmasdk/Ppmd8.c b/external/unarr/lzmasdk/Ppmd8.c new file mode 100755 index 00000000..28abf279 --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd8.c @@ -0,0 +1,1565 @@ +/* Ppmd8.c -- PPMdI codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */ + +#include "Precomp.h" + +#include + +#include "Ppmd8.h" + + + + +MY_ALIGN(16) +static const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) +static const UInt16 PPMD8_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) + + +#define REF(ptr) Ppmd_GetRef(p, ptr) + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) +#define STATS(ctx) Ppmd8_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd8Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +typedef CPpmd8_Context * PPMD8_CTX_PTR; + +struct CPpmd8_Node_; + +typedef Ppmd_Ref_Type(struct CPpmd8_Node_) CPpmd8_Node_Ref; + +typedef struct CPpmd8_Node_ +{ + UInt32 Stamp; + + CPpmd8_Node_Ref Next; + UInt32 NU; +} CPpmd8_Node; + +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd8_Node) + +void Ppmd8_Construct(CPpmd8 *p) +{ + unsigned i, k, m; + + p->Base = NULL; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 5; i++) + p->NS2Indx[i] = (Byte)i; + + for (m = i, k = 1; i < 260; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 4; + } + + memcpy(p->ExpEscape, PPMD8_kExpEscape, 16); +} + + +void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Base); + p->Size = 0; + p->Base = NULL; +} + + +BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc) +{ + if (!p->Base || p->Size != size) + { + Ppmd8_Free(p, alloc); + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) + return False; + p->Size = size; + } + return True; +} + + + +// ---------- Internal Memory Allocator ---------- + + + + + + +#define EMPTY_NODE 0xFFFFFFFF + + +static void Ppmd8_InsertNode(CPpmd8 *p, void *node, unsigned indx) +{ + ((CPpmd8_Node *)node)->Stamp = EMPTY_NODE; + ((CPpmd8_Node *)node)->Next = (CPpmd8_Node_Ref)p->FreeList[indx]; + ((CPpmd8_Node *)node)->NU = I2U(indx); + p->FreeList[indx] = REF(node); + p->Stamps[indx]++; +} + + +static void *Ppmd8_RemoveNode(CPpmd8 *p, unsigned indx) +{ + CPpmd8_Node *node = NODE((CPpmd8_Node_Ref)p->FreeList[indx]); + p->FreeList[indx] = node->Next; + p->Stamps[indx]--; + + return node; +} + + +static void Ppmd8_SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd8_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + Ppmd8_InsertNode(p, ptr, i); +} + + + + + + + + + + + + + + +static void Ppmd8_GlueFreeBlocks(CPpmd8 *p) +{ + /* + we use first UInt32 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0xFF + CPpmd8_Context { Byte NumStats; Byte Flags; UInt16 SummFreq; : Flags != 0xFF ??? + CPpmd8_Node { UInt32 Stamp : Stamp == 0xFFFFFFFF for free record + : Stamp == 0 for guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd8_Context record + */ + CPpmd8_Node_Ref n; + + p->GlueCount = 1 << 13; + memset(p->Stamps, 0, sizeof(p->Stamps)); + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0; + + { + /* Glue free blocks */ + CPpmd8_Node_Ref *prev = &n; + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + + CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + CPpmd8_Node *node = NODE(next); + UInt32 nu = node->NU; + *prev = next; + next = node->Next; + if (nu != 0) + { + CPpmd8_Node *node2; + prev = &(node->Next); + while ((node2 = node + nu)->Stamp == EMPTY_NODE) + { + nu += node2->NU; + node2->NU = 0; + node->NU = nu; + } + } + } + } + + *prev = 0; + } + + + + + + + + + + + + + + + + + + + + + /* Fill lists of free blocks */ + while (n != 0) + { + CPpmd8_Node *node = NODE(n); + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + Ppmd8_InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd8_InsertNode(p, node + k, (unsigned)nu - k - 1); + } + Ppmd8_InsertNode(p, node, i); + } +} + + +Z7_NO_INLINE +static void *Ppmd8_AllocUnitsRare(CPpmd8 *p, unsigned indx) +{ + unsigned i; + + if (p->GlueCount == 0) + { + Ppmd8_GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return Ppmd8_RemoveNode(p, indx); + } + + i = indx; + + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; + p->GlueCount--; + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : (NULL); + } + } + while (p->FreeList[i] == 0); + + { + void *block = Ppmd8_RemoveNode(p, i); + Ppmd8_SplitBlock(p, block, i, indx); + return block; + } +} + + +static void *Ppmd8_AllocUnits(CPpmd8 *p, unsigned indx) +{ + if (p->FreeList[indx] != 0) + return Ppmd8_RemoveNode(p, indx); + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } + } + return Ppmd8_AllocUnitsRare(p, indx); +} + + +#define MEM_12_CPY(dest, src, num) \ + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + + + +static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd8_RemoveNode(p, i1); + MEM_12_CPY(ptr, oldPtr, newNU) + Ppmd8_InsertNode(p, oldPtr, i0); + return ptr; + } + Ppmd8_SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} + + +static void FreeUnits(CPpmd8 *p, void *ptr, unsigned nu) +{ + Ppmd8_InsertNode(p, ptr, U2I(nu)); +} + + +static void SpecialFreeUnit(CPpmd8 *p, void *ptr) +{ + if ((Byte *)ptr != p->UnitsStart) + Ppmd8_InsertNode(p, ptr, 0); + else + { + #ifdef PPMD8_FREEZE_SUPPORT + *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts() */ + #endif + p->UnitsStart += UNIT_SIZE; + } +} + + +/* +static void *MoveUnitsUp(CPpmd8 *p, void *oldPtr, unsigned nu) +{ + unsigned indx = U2I(nu); + void *ptr; + if ((Byte *)oldPtr > p->UnitsStart + (1 << 14) || REF(oldPtr) > p->FreeList[indx]) + return oldPtr; + ptr = Ppmd8_RemoveNode(p, indx); + MEM_12_CPY(ptr, oldPtr, nu) + if ((Byte *)oldPtr != p->UnitsStart) + Ppmd8_InsertNode(p, oldPtr, indx); + else + p->UnitsStart += U2B(I2U(indx)); + return ptr; +} +*/ + +static void ExpandTextArea(CPpmd8 *p) +{ + UInt32 count[PPMD_NUM_INDEXES]; + unsigned i; + + memset(count, 0, sizeof(count)); + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0; + + { + CPpmd8_Node *node = (CPpmd8_Node *)(void *)p->UnitsStart; + while (node->Stamp == EMPTY_NODE) + { + UInt32 nu = node->NU; + node->Stamp = 0; + count[U2I(nu)]++; + node += nu; + } + p->UnitsStart = (Byte *)node; + } + + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + UInt32 cnt = count[i]; + if (cnt == 0) + continue; + { + CPpmd8_Node_Ref *prev = (CPpmd8_Node_Ref *)&p->FreeList[i]; + CPpmd8_Node_Ref n = *prev; + p->Stamps[i] -= cnt; + for (;;) + { + CPpmd8_Node *node = NODE(n); + n = node->Next; + if (node->Stamp != 0) + { + prev = &node->Next; + continue; + } + *prev = n; + if (--cnt == 0) + break; + } + } + } +} + + +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +static void Ppmd8State_SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + Ppmd_SET_SUCCESSOR(p, v) +} + +#define RESET_TEXT(offs) { p->Text = p->Base + p->AlignOffset + (offs); } + +Z7_NO_INLINE +static +void Ppmd8_RestartModel(CPpmd8 *p) +{ + unsigned i, k, m; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + memset(p->Stamps, 0, sizeof(p->Stamps)); + RESET_TEXT(0) + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + { + CPpmd8_Context *mc = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd8_AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + mc->Flags = 0; + mc->NumStats = 256 - 1; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + Ppmd8State_SetSuccessor(s, 0); + } + } + + + + + + + + + + + + + for (i = m = 0; m < 25; m++) + { + while (p->NS2Indx[i] == m) + i++; + for (k = 0; k < 8; k++) + { + unsigned r; + UInt16 *dest = p->BinSumm[m] + k; + const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD8_kInitBinEsc[k] / (i + 1)); + for (r = 0; r < 64; r += 8) + dest[r] = val; + } + } + + for (i = m = 0; m < 24; m++) + { + unsigned summ; + CPpmd_See *s; + while (p->NS2Indx[(size_t)i + 3] == m + 3) + i++; + s = p->See[m]; + summ = ((2 * i + 5) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 32; k++, s++) + { + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); + s->Count = 7; + } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ +} + + +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod) +{ + p->MaxOrder = maxOrder; + p->RestoreMethod = restoreMethod; + Ppmd8_RestartModel(p); +} + + +#define FLAG_RESCALED (1 << 2) +// #define FLAG_SYM_HIGH (1 << 3) +#define FLAG_PREV_HIGH (1 << 4) + +#define HiBits_Prepare(sym) ((unsigned)(sym) + 0xC0) + +#define HiBits_Convert_3(flags) (((flags) >> (8 - 3)) & (1 << 3)) +#define HiBits_Convert_4(flags) (((flags) >> (8 - 4)) & (1 << 4)) + +#define PPMD8_HiBitsFlag_3(sym) HiBits_Convert_3(HiBits_Prepare(sym)) +#define PPMD8_HiBitsFlag_4(sym) HiBits_Convert_4(HiBits_Prepare(sym)) + +// #define PPMD8_HiBitsFlag_3(sym) (0x08 * ((sym) >= 0x40)) +// #define PPMD8_HiBitsFlag_4(sym) (0x10 * ((sym) >= 0x40)) + +/* +Refresh() is called when we remove some symbols (successors) in context. +It increases Escape_Freq for sum of all removed symbols. +*/ + +static void Refresh(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned oldNU, unsigned scale) +{ + unsigned i = ctx->NumStats, escFreq, sumFreq, flags; + CPpmd_State *s = (CPpmd_State *)ShrinkUnits(p, STATS(ctx), oldNU, (i + 2) >> 1); + ctx->Union4.Stats = REF(s); + + // #ifdef PPMD8_FREEZE_SUPPORT + /* + (ctx->Union2.SummFreq >= ((UInt32)1 << 15)) can be in FREEZE mode for some files. + It's not good for range coder. So new versions of support fix: + - original PPMdI code rev.1 + + original PPMdI code rev.2 + - 7-Zip default ((PPMD8_FREEZE_SUPPORT is not defined) + + 7-Zip (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE) + if we use that fixed line, we can lose compatibility with some files created before fix + if we don't use that fixed line, the program can work incorrectly in FREEZE mode in rare case. + */ + // if (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE) + { + scale |= (ctx->Union2.SummFreq >= ((UInt32)1 << 15)); + } + // #endif + + + + flags = HiBits_Prepare(s->Symbol); + { + unsigned freq = s->Freq; + escFreq = ctx->Union2.SummFreq - freq; + freq = (freq + scale) >> scale; + sumFreq = freq; + s->Freq = (Byte)freq; + } + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + scale) >> scale; + sumFreq += freq; + s->Freq = (Byte)freq; + flags |= HiBits_Prepare(s->Symbol); + } + while (--i); + + ctx->Union2.SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale)); + ctx->Flags = (Byte)((ctx->Flags & (FLAG_PREV_HIGH + FLAG_RESCALED * scale)) + HiBits_Convert_3(flags)); +} + + +static void SWAP_STATES(CPpmd_State *t1, CPpmd_State *t2) +{ + CPpmd_State tmp = *t1; + *t1 = *t2; + *t2 = tmp; +} + + +/* +CutOff() reduces contexts: + It conversts Successors at MaxOrder to another Contexts to NULL-Successors + It removes RAW-Successors and NULL-Successors that are not Order-0 + and it removes contexts when it has no Successors. + if the (Union4.Stats) is close to (UnitsStart), it moves it up. +*/ + +static CPpmd_Void_Ref CutOff(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order) +{ + int ns = ctx->NumStats; + unsigned nu; + CPpmd_State *stats; + + if (ns == 0) + { + CPpmd_State *s = ONE_STATE(ctx); + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart) + { + if (order < p->MaxOrder) + successor = CutOff(p, CTX(successor), order + 1); + else + successor = 0; + Ppmd8State_SetSuccessor(s, successor); + if (successor || order <= 9) /* O_BOUND */ + return REF(ctx); + } + SpecialFreeUnit(p, ctx); + return 0; + } + + nu = ((unsigned)ns + 2) >> 1; + // ctx->Union4.Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), nu)); + { + unsigned indx = U2I(nu); + stats = STATS(ctx); + + if ((UInt32)((Byte *)stats - p->UnitsStart) <= (1 << 14) + && (CPpmd_Void_Ref)ctx->Union4.Stats <= p->FreeList[indx]) + { + void *ptr = Ppmd8_RemoveNode(p, indx); + ctx->Union4.Stats = STATS_REF(ptr); + MEM_12_CPY(ptr, (const void *)stats, nu) + if ((Byte *)stats != p->UnitsStart) + Ppmd8_InsertNode(p, stats, indx); + else + p->UnitsStart += U2B(I2U(indx)); + stats = ptr; + } + } + + { + CPpmd_State *s = stats + (unsigned)ns; + do + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) < p->UnitsStart) + { + CPpmd_State *s2 = stats + (unsigned)(ns--); + if (order) + { + if (s != s2) + *s = *s2; + } + else + { + SWAP_STATES(s, s2); + Ppmd8State_SetSuccessor(s2, 0); + } + } + else + { + if (order < p->MaxOrder) + Ppmd8State_SetSuccessor(s, CutOff(p, CTX(successor), order + 1)); + else + Ppmd8State_SetSuccessor(s, 0); + } + } + while (--s >= stats); + } + + if (ns != ctx->NumStats && order) + { + if (ns < 0) + { + FreeUnits(p, stats, nu); + SpecialFreeUnit(p, ctx); + return 0; + } + ctx->NumStats = (Byte)ns; + if (ns == 0) + { + const Byte sym = stats->Symbol; + ctx->Flags = (Byte)((ctx->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(sym)); + // *ONE_STATE(ctx) = *stats; + ctx->Union2.State2.Symbol = sym; + ctx->Union2.State2.Freq = (Byte)(((unsigned)stats->Freq + 11) >> 3); + ctx->Union4.State4.Successor_0 = stats->Successor_0; + ctx->Union4.State4.Successor_1 = stats->Successor_1; + FreeUnits(p, stats, nu); + } + else + { + Refresh(p, ctx, nu, ctx->Union2.SummFreq > 16 * (unsigned)ns); + } + } + + return REF(ctx); +} + + + +#ifdef PPMD8_FREEZE_SUPPORT + +/* +RemoveBinContexts() + It conversts Successors at MaxOrder to another Contexts to NULL-Successors + It changes RAW-Successors to NULL-Successors + removes Bin Context without Successor, if suffix of that context is also binary. +*/ + +static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order) +{ + if (!ctx->NumStats) + { + CPpmd_State *s = ONE_STATE(ctx); + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder) + successor = RemoveBinContexts(p, CTX(successor), order + 1); + else + successor = 0; + Ppmd8State_SetSuccessor(s, successor); + /* Suffix context can be removed already, since different (high-order) + Successors may refer to same context. So we check Flags == 0xFF (Stamp == EMPTY_NODE) */ + if (!successor && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF)) + { + FreeUnits(p, ctx, 1); + return 0; + } + } + else + { + CPpmd_State *s = STATS(ctx) + ctx->NumStats; + do + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder) + Ppmd8State_SetSuccessor(s, RemoveBinContexts(p, CTX(successor), order + 1)); + else + Ppmd8State_SetSuccessor(s, 0); + } + while (--s >= STATS(ctx)); + } + + return REF(ctx); +} + +#endif + + + +static UInt32 GetUsedMemory(const CPpmd8 *p) +{ + UInt32 v = 0; + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + v += p->Stamps[i] * I2U(i); + return p->Size - (UInt32)(p->HiUnit - p->LoUnit) - (UInt32)(p->UnitsStart - p->Text) - U2B(v); +} + +#ifdef PPMD8_FREEZE_SUPPORT + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1, fSuccessor) +#else + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1) +#endif + + +static void RestoreModel(CPpmd8 *p, PPMD8_CTX_PTR ctxError + #ifdef PPMD8_FREEZE_SUPPORT + , PPMD8_CTX_PTR fSuccessor + #endif + ) +{ + PPMD8_CTX_PTR c; + CPpmd_State *s; + RESET_TEXT(0) + + // we go here in cases of error of allocation for context (c1) + // Order(MinContext) < Order(ctxError) <= Order(MaxContext) + + // We remove last symbol from each of contexts [p->MaxContext ... ctxError) contexts + // So we rollback all created (symbols) before error. + for (c = p->MaxContext; c != ctxError; c = SUFFIX(c)) + if (--(c->NumStats) == 0) + { + s = STATS(c); + c->Flags = (Byte)((c->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(s->Symbol)); + // *ONE_STATE(c) = *s; + c->Union2.State2.Symbol = s->Symbol; + c->Union2.State2.Freq = (Byte)(((unsigned)s->Freq + 11) >> 3); + c->Union4.State4.Successor_0 = s->Successor_0; + c->Union4.State4.Successor_1 = s->Successor_1; + + SpecialFreeUnit(p, s); + } + else + { + /* Refresh() can increase Escape_Freq on value of Freq of last symbol, that was added before error. + so the largest possible increase for Escape_Freq is (8) from value before ModelUpoadet() */ + Refresh(p, c, ((unsigned)c->NumStats + 3) >> 1, 0); + } + + // increase Escape Freq for context [ctxError ... p->MinContext) + for (; c != p->MinContext; c = SUFFIX(c)) + if (c->NumStats == 0) + { + // ONE_STATE(c) + c->Union2.State2.Freq = (Byte)(((unsigned)c->Union2.State2.Freq + 1) >> 1); + } + else if ((c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 4)) > 128 + 4 * c->NumStats) + Refresh(p, c, ((unsigned)c->NumStats + 2) >> 1, 1); + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + p->MaxContext = fSuccessor; + p->GlueCount += !(p->Stamps[1] & 1); // why? + } + else if (p->RestoreMethod == PPMD8_RESTORE_METHOD_FREEZE) + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + RemoveBinContexts(p, p->MaxContext, 0); + // we change the current mode to (PPMD8_RESTORE_METHOD_FREEZE + 1) + p->RestoreMethod = PPMD8_RESTORE_METHOD_FREEZE + 1; + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } + else + #endif + if (p->RestoreMethod == PPMD8_RESTORE_METHOD_RESTART || GetUsedMemory(p) < (p->Size >> 1)) + Ppmd8_RestartModel(p); + else + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + do + { + CutOff(p, p->MaxContext, 0); + ExpandTextArea(p); + } + while (GetUsedMemory(p) > 3 * (p->Size >> 2)); + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } + p->MinContext = p->MaxContext; +} + + + +Z7_NO_INLINE +static PPMD8_CTX_PTR Ppmd8_CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, PPMD8_CTX_PTR c) +{ + + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + Byte newSym, newFreq, flags; + unsigned numPs = 0; + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; /* fixed over Shkarin's code. Maybe it could work without + 1 too. */ + + if (!skip) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + + if (s1) { s = s1; s1 = NULL; } + else if (c->NumStats != 0) + { + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + if (s->Freq < MAX_FREQ - 9) { s->Freq++; c->Union2.SummFreq++; } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (!SUFFIX(c)->NumStats & (s->Freq < 24))); + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + + c = CTX(successor); + if (numPs == 0) + { + + + return c; + } + break; + } + ps[numPs++] = s; + } + + + + + + newSym = *(const Byte *)Ppmd8_GetPtr(p, upBranch); + upBranch++; + flags = (Byte)(PPMD8_HiBitsFlag_4(p->FoundState->Symbol) + PPMD8_HiBitsFlag_3(newSym)); + + if (c->NumStats == 0) + newFreq = c->Union2.State2.Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + + + max(newFreq)= (s->Freq - 1), when (s0 == 1) + + + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0))); + } + + + + do + { + PPMD8_CTX_PTR c1; + /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (PPMD8_CTX_PTR)Ppmd8_RemoveNode(p, 0); + else + { + c1 = (PPMD8_CTX_PTR)Ppmd8_AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + c1->Flags = flags; + c1->NumStats = 0; + c1->Union2.State2.Symbol = newSym; + c1->Union2.State2.Freq = newFreq; + Ppmd8State_SetSuccessor(ONE_STATE(c1), upBranch); + c1->Suffix = REF(c); + Ppmd8State_SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + + +static PPMD8_CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, PPMD8_CTX_PTR c) +{ + CPpmd_State *s = NULL; + PPMD8_CTX_PTR c1 = c; + CPpmd_Void_Ref upBranch = REF(p->Text); + + #ifdef PPMD8_FREEZE_SUPPORT + /* The BUG in Shkarin's code was fixed: ps could overflow in CUT_OFF mode. */ + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; + unsigned numPs = 0; + ps[numPs++] = p->FoundState; + #endif + + Ppmd8State_SetSuccessor(p->FoundState, upBranch); + p->OrderFall++; + + for (;;) + { + if (s1) + { + c = SUFFIX(c); + s = s1; + s1 = NULL; + } + else + { + if (!c->Suffix) + { + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1) + p->OrderFall = 1; + } + #endif + return c; + } + c = SUFFIX(c); + if (c->NumStats) + { + if ((s = STATS(c))->Symbol != p->FoundState->Symbol) + do { s++; } while (s->Symbol != p->FoundState->Symbol); + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (s->Freq < 32)); + } + } + if (SUCCESSOR(s)) + break; + #ifdef PPMD8_FREEZE_SUPPORT + ps[numPs++] = s; + #endif + Ppmd8State_SetSuccessor(s, upBranch); + p->OrderFall++; + } + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + c = CTX(SUCCESSOR(s)); + do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1) + p->OrderFall = 1; + return c; + } + else + #endif + if (SUCCESSOR(s) <= upBranch) + { + PPMD8_CTX_PTR successor; + CPpmd_State *s2 = p->FoundState; + p->FoundState = s; + + successor = Ppmd8_CreateSuccessors(p, False, NULL, c); + if (!successor) + Ppmd8State_SetSuccessor(s, 0); + else + Ppmd8State_SetSuccessor(s, REF(successor)); + p->FoundState = s2; + } + + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if (p->OrderFall == 1 && c1 == p->MaxContext) + { + Ppmd8State_SetSuccessor(p->FoundState, successor); + p->Text--; + } + if (successor == 0) + return NULL; + return CTX(successor); + } +} + + + +void Ppmd8_UpdateModel(CPpmd8 *p); +Z7_NO_INLINE +void Ppmd8_UpdateModel(CPpmd8 *p) +{ + CPpmd_Void_Ref maxSuccessor, minSuccessor = SUCCESSOR(p->FoundState); + PPMD8_CTX_PTR c; + unsigned s0, ns, fFreq = p->FoundState->Freq; + Byte flag, fSymbol = p->FoundState->Symbol; + { + CPpmd_State *s = NULL; + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + /* Update Freqs in Suffix Context */ + + c = SUFFIX(p->MinContext); + + if (c->NumStats == 0) + { + s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + Byte sym = p->FoundState->Symbol; + s = STATS(c); + + if (s->Symbol != sym) + { + do + { + + s++; + } + while (s->Symbol != sym); + + if (s[0].Freq >= s[-1].Freq) + { + SWAP_STATES(&s[0], &s[-1]); + s--; + } + } + + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + } + + c = p->MaxContext; + if (p->OrderFall == 0 && minSuccessor) + { + PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, True, s, p->MinContext); + if (!cs) + { + Ppmd8State_SetSuccessor(p->FoundState, 0); + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + Ppmd8State_SetSuccessor(p->FoundState, REF(cs)); + p->MinContext = p->MaxContext = cs; + return; + } + + + + + { + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + RESTORE_MODEL(c, CTX(minSuccessor)); /* check it */ + return; + } + maxSuccessor = REF(text); + } + + if (!minSuccessor) + { + PPMD8_CTX_PTR cs = ReduceOrder(p, s, p->MinContext); + if (!cs) + { + RESTORE_MODEL(c, NULL); + return; + } + minSuccessor = REF(cs); + } + else if ((Byte *)Ppmd8_GetPtr(p, minSuccessor) < p->UnitsStart) + { + PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, False, s, p->MinContext); + if (!cs) + { + RESTORE_MODEL(c, NULL); + return; + } + minSuccessor = REF(cs); + } + + if (--p->OrderFall == 0) + { + maxSuccessor = minSuccessor; + p->Text -= (p->MaxContext != p->MinContext); + } + #ifdef PPMD8_FREEZE_SUPPORT + else if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + maxSuccessor = minSuccessor; + RESET_TEXT(0) + p->OrderFall = 0; + } + #endif + } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flag = (Byte)(PPMD8_HiBitsFlag_3(fSymbol)); + s0 = p->MinContext->Union2.SummFreq - (ns = p->MinContext->NumStats) - fFreq; + + for (; c != p->MinContext; c = SUFFIX(c)) + { + unsigned ns1; + UInt32 sum; + + if ((ns1 = c->NumStats) != 0) + { + if ((ns1 & 1) != 0) + { + /* Expand for one UNIT */ + unsigned oldNU = (ns1 + 1) >> 1; + unsigned i = U2I(oldNU); + if (i != U2I((size_t)oldNU + 1)) + { + void *ptr = Ppmd8_AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + oldPtr = STATS(c); + MEM_12_CPY(ptr, oldPtr, oldNU) + Ppmd8_InsertNode(p, oldPtr, i); + c->Union4.Stats = STATS_REF(ptr); + } + } + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 1 here. + an average increase is 1/3 per symbol */ + sum += (3 * ns1 + 1 < ns); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < ???). Do we need to truncate (sum) to 16-bit */ + // sum = (UInt16)sum; + } + else + { + + CPpmd_State *s = (CPpmd_State*)Ppmd8_AllocUnits(p, 0); + if (!s) + { + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // Ppmd8State_SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + + s->Freq = (Byte)freq; + + sum = freq + p->InitEsc + (ns > 2); // Ppmd8 (> 2) + } + } + + { + CPpmd_State *s = STATS(c) + ns1 + 1; + UInt32 cf = 2 * (sum + 6) * (UInt32)fFreq; + UInt32 sf = (UInt32)s0 + sum; + s->Symbol = fSymbol; + c->NumStats = (Byte)(ns1 + 1); + Ppmd8State_SetSuccessor(s, maxSuccessor); + c->Flags |= flag; + if (cf < 6 * sf) + { + cf = (unsigned)1 + (cf > sf) + (cf >= 4 * sf); + sum += 4; + /* It can add (1, 2, 3) to Escape_Freq */ + } + else + { + cf = (unsigned)4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; + } + + } + p->MaxContext = p->MinContext = CTX(minSuccessor); +} + + + +Z7_NO_INLINE +static void Ppmd8_Rescale(CPpmd8 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) + { + CPpmd_State tmp = *s; + do + s[0] = s[-1]; + while (--s != stats); + *s = tmp; + } + + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + + + + + + adder = (p->OrderFall != 0); + + #ifdef PPMD8_FREEZE_SUPPORT + adder |= (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE); + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = p->MinContext->NumStats; + s->Freq = (Byte)sumFreq; + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + CPpmd_State tmp = *s; + CPpmd_State *s1 = s; + do + { + s1[0] = s1[-1]; + } + while (--s1 != stats && freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + /* Remove all items with Freq == 0 */ + CPpmd8_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + + + + escFreq += i; + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (Byte)(numStatsNew); + n0 = (numStats + 2) >> 1; + + if (numStatsNew == 0) + { + + unsigned freq = (2 * (unsigned)stats->Freq + escFreq - 1) / escFreq; + if (freq > MAX_FREQ / 3) + freq = MAX_FREQ / 3; + mc->Flags = (Byte)((mc->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(stats->Symbol)); + + + + + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; + p->FoundState = s; + Ppmd8_InsertNode(p, stats, U2I(n0)); + return; + } + + n1 = (numStatsNew + 2) >> 1; + if (n0 != n1) + mc->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + { + // here we are for max order only. So Ppmd8_MakeEscFreq() doesn't use mc->Flags + // but we still need current (Flags & FLAG_PREV_HIGH), if we will convert context to 1-symbol context later. + /* + unsigned flags = HiBits_Prepare((s = STATS(mc))->Symbol); + i = mc->NumStats; + do { flags |= HiBits_Prepare((++s)->Symbol); } while (--i); + mc->Flags = (Byte)((mc->Flags & ~FLAG_SYM_HIGH) + HiBits_Convert_3(flags)); + */ + } + } + + + + + + + { + CPpmd8_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + mc->Flags |= FLAG_RESCALED; + p->FoundState = STATS(mc); + } +} + + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq) +{ + CPpmd_See *see; + const CPpmd8_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 0xFF) + { + // (3 <= numStats + 2 <= 256) (3 <= NS2Indx[3] and NS2Indx[256] === 26) + see = p->See[(size_t)(unsigned)p->NS2Indx[(size_t)numStats + 2] - 3] + + (mc->Union2.SummFreq > 11 * (numStats + 1)) + + 2 * (unsigned)(2 * numStats < ((unsigned)SUFFIX(mc)->NumStats + numMasked1)) + + mc->Flags; + + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + *escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + + +static void Ppmd8_NextContext(CPpmd8 *p) +{ + PPMD8_CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart) + p->MaxContext = p->MinContext = c; + else + Ppmd8_UpdateModel(p); +} + + +void Ppmd8_Update1(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + SWAP_STATES(s, &s[-1]); + p->FoundState = --s; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + } + Ppmd8_NextContext(p); +} + + +void Ppmd8_Update1_0(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + CPpmd8_Context *mc = p->MinContext; + unsigned freq = s->Freq; + unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq >= summFreq); // Ppmd8 (>=) + p->RunLength += (int)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + Ppmd8_NextContext(p); +} + + +/* +void Ppmd8_UpdateBin(CPpmd8 *p) +{ + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196) + p->PrevSuccess = 1; + p->RunLength++; + Ppmd8_NextContext(p); +} +*/ + +void Ppmd8_Update2(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->RunLength = p->InitRL; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + Ppmd8_UpdateModel(p); +} + +/* H->I changes: + NS2Indx + GlueCount, and Glue method + BinSum + See / EscFreq + Ppmd8_CreateSuccessors updates more suffix contexts + Ppmd8_UpdateModel consts. + PrevSuccess Update + +Flags: + (1 << 2) - the Context was Rescaled + (1 << 3) - there is symbol in Stats with (sym >= 0x40) in + (1 << 4) - main symbol of context is (sym >= 0x40) +*/ + +#undef RESET_TEXT +#undef FLAG_RESCALED +#undef FLAG_PREV_HIGH +#undef HiBits_Prepare +#undef HiBits_Convert_3 +#undef HiBits_Convert_4 +#undef PPMD8_HiBitsFlag_3 +#undef PPMD8_HiBitsFlag_4 +#undef RESTORE_MODEL + +#undef MAX_FREQ +#undef UNIT_SIZE +#undef U2B +#undef U2I +#undef I2U + +#undef REF +#undef STATS_REF +#undef CTX +#undef STATS +#undef ONE_STATE +#undef SUFFIX +#undef NODE +#undef EMPTY_NODE +#undef MEM_12_CPY +#undef SUCCESSOR +#undef SWAP_STATES diff --git a/external/unarr/lzmasdk/Ppmd8.h b/external/unarr/lzmasdk/Ppmd8.h new file mode 100644 index 00000000..fe93fe7c --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd8.h @@ -0,0 +1,181 @@ +/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#ifndef __PPMD8_H +#define __PPMD8_H + +#include "Ppmd.h" + +EXTERN_C_BEGIN + +#define PPMD8_MIN_ORDER 2 +#define PPMD8_MAX_ORDER 16 + + + + +struct CPpmd8_Context_; + +typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref; + +// MY_CPU_pragma_pack_push_1 + +typedef struct CPpmd8_Context_ +{ + Byte NumStats; + Byte Flags; + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + + CPpmd8_Context_Ref Suffix; +} CPpmd8_Context; + +// MY_CPU_pragma_pop + +#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + +/* PPMdI code rev.2 contains the fix over PPMdI code rev.1. + But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed + in FREEZE mode. So we disable FREEZE mode support. */ + +// #define PPMD8_FREEZE_SUPPORT + +enum +{ + PPMD8_RESTORE_METHOD_RESTART, + PPMD8_RESTORE_METHOD_CUT_OFF + #ifdef PPMD8_FREEZE_SUPPORT + , PPMD8_RESTORE_METHOD_FREEZE + #endif + , PPMD8_RESTORE_METHOD_UNSUPPPORTED +}; + + + + + + + + +typedef struct +{ + CPpmd8_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + + UInt32 Range; + UInt32 Code; + UInt32 Low; + union + { + IByteIn *In; + IByteOut *Out; + } Stream; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + UInt32 Stamps[PPMD_NUM_INDEXES]; + Byte NS2BSIndx[256], NS2Indx[260]; + Byte ExpEscape[16]; + CPpmd_See DummySee, See[24][32]; + UInt16 BinSumm[25][64]; + +} CPpmd8; + + +void Ppmd8_Construct(CPpmd8 *p); +BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc); +void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc); +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod); +#define Ppmd8_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context) +#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) + +void Ppmd8_Update1(CPpmd8 *p); +void Ppmd8_Update1_0(CPpmd8 *p); +void Ppmd8_Update2(CPpmd8 *p); + + + + + + +#define Ppmd8_GetBinSumm(p) \ + &p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \ + + p->MinContext->Flags ] + + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale); + + +/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases, + where the original PPMdI code can give "Divide by Zero" operation. + We use the following fix to allow correct working of encoder and decoder in any cases. + We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */ +#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range; + + +/* ---------- Decode ---------- */ + +#define PPMD8_SYM_END (-1) +#define PPMD8_SYM_ERROR (-2) + +/* +You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init() + +Ppmd8_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD8_SYM_END : End of payload marker + -2 : PPMD8_SYM_ERROR : Data error +*/ + + +BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p); +#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd8_DecodeSymbol(CPpmd8 *p); + + + + + + + + +/* ---------- Encode ---------- */ + +#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; } +void Ppmd8_Flush_RangeEnc(CPpmd8 *p); +void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol); + + +EXTERN_C_END + +#endif diff --git a/external/unarr/lzmasdk/Ppmd8Dec.c b/external/unarr/lzmasdk/Ppmd8Dec.c new file mode 100755 index 00000000..72d3626e --- /dev/null +++ b/external/unarr/lzmasdk/Ppmd8Dec.c @@ -0,0 +1,295 @@ +/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#include "Precomp.h" + +#include "Ppmd8.h" + +#define kTop ((UInt32)1 << 24) +#define kBot ((UInt32)1 << 15) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream.In) + +BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + p->Low = 0; + + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM(p) \ + while ((p->Low ^ (p->Low + p->Range)) < kTop \ + || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \ + p->Code = (p->Code << 8) | READ_BYTE(p); \ + p->Range <<= 8; p->Low <<= 8; } + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R p + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd8_RD_Decode(CPpmd8 *p, UInt32 start, UInt32 size) +{ + start *= R->Range; + R->Low += start; + R->Code -= start; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd8_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) +// typedef CPpmd8_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd8_UpdateModel(CPpmd8 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] + + +int Ppmd8_DecodeSymbol(CPpmd8 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 0) + { + CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + PPMD8_CORRECT_SUM_RANGE(p, summFreq) + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = p->MinContext->NumStats; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD8_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd8Context_OneState(p->MinContext); + UInt16 *prob = Ppmd8_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM(R) + + + + // sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol; + // Ppmd8_UpdateBin(p); + { + unsigned freq = s->Freq; + CPpmd8_Context *c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 196)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart) + p->MaxContext = p->MinContext = c; + else + Ppmd8_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(rc2, size0); + R->Low += size0; + R->Code -= size0; + R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + UInt32 freqSum2; + CPpmd_See *see; + CPpmd8_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD8_SYM_END; + mc = Ppmd8_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd8_GetStats(p, mc); + + { + unsigned num = (unsigned)mc->NumStats + 1; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + freqSum2 = freqSum; + PPMD8_CORRECT_SUM_RANGE(R, freqSum2) + + + count = RC_GetThreshold(freqSum2); + + if (count < hiCnt) + { + Byte sym; + // Ppmd_See_UPDATE(see) // new (see->Summ) value can overflow over 16-bits in some rare cases + s = Ppmd8_GetStats(p, p->MinContext); + hiCnt = count; + + + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update2(p); + return sym; + } + + if (count >= freqSum2) + return PPMD8_SYM_ERROR; + + RC_Decode(hiCnt, freqSum2 - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd8_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats + 1; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +#undef kTop +#undef kBot +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/external/unarr/lzmasdk/Precomp.h b/external/unarr/lzmasdk/Precomp.h new file mode 100644 index 00000000..69afb2ff --- /dev/null +++ b/external/unarr/lzmasdk/Precomp.h @@ -0,0 +1,10 @@ +/* Precomp.h -- StdAfx +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_PRECOMP_H +#define ZIP7_INC_PRECOMP_H + +#include "Compiler.h" +/* #include "7zTypes.h" */ + +#endif diff --git a/external/unarr/lzmasdk/RotateDefs.h b/external/unarr/lzmasdk/RotateDefs.h new file mode 100755 index 00000000..c16b4f8e --- /dev/null +++ b/external/unarr/lzmasdk/RotateDefs.h @@ -0,0 +1,50 @@ +/* RotateDefs.h -- Rotate functions +2023-06-18 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_ROTATE_DEFS_H +#define ZIP7_INC_ROTATE_DEFS_H + +#ifdef _MSC_VER + +#include + +/* don't use _rotl with old MINGW. It can insert slow call to function. */ + +/* #if (_MSC_VER >= 1200) */ +#pragma intrinsic(_rotl) +#pragma intrinsic(_rotr) +/* #endif */ + +#define rotlFixed(x, n) _rotl((x), (n)) +#define rotrFixed(x, n) _rotr((x), (n)) + +#if (_MSC_VER >= 1300) +#define Z7_ROTL64(x, n) _rotl64((x), (n)) +#define Z7_ROTR64(x, n) _rotr64((x), (n)) +#else +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#else + +/* new compilers can translate these macros to fast commands. */ + +#if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5) +/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63))) +#else +/* for old GCC / clang: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#endif + +#endif diff --git a/external/unarr/pkg-config.pc.cmake b/external/unarr/pkg-config.pc.cmake new file mode 100644 index 00000000..9055aef2 --- /dev/null +++ b/external/unarr/pkg-config.pc.cmake @@ -0,0 +1,11 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +includedir=@PROJECT_INSTALL_INCLUDEDIR@ +libdir=@PROJECT_INSTALL_LIBDIR@ + +Name: @PROJECT_NAME@ +Description: @PROJECT_DESCRIPTION@ +Version: @PROJECT_VERSION@ +Cflags: -I${includedir} +Requires.private: @PROJECT_REQUIRES_PRIVATE@ +Libs: -L${libdir} -l@PROJECT_NAME@ +Libs.private: @PROJECT_LIBS_PRIVATE@ diff --git a/external/unarr/rar/filter-rar.c b/external/unarr/rar/filter-rar.c new file mode 100644 index 00000000..eaab8a6f --- /dev/null +++ b/external/unarr/rar/filter-rar.c @@ -0,0 +1,711 @@ +/* Copyright 2018 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "rar.h" +#include "rarvm.h" + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARVirtualMachine.m */ +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRAR30Filter.m */ + +struct MemBitReader { + const uint8_t *bytes; + size_t length; + size_t offset; + uint64_t bits; + int available; + bool at_eof; +}; + +struct RARProgramCode { + RARProgram *prog; + uint8_t *staticdata; + uint32_t staticdatalen; + uint8_t *globalbackup; + uint32_t globalbackuplen; + uint64_t fingerprint; + uint32_t usagecount; + uint32_t oldfilterlength; + struct RARProgramCode *next; +}; + +struct RARFilter { + struct RARProgramCode *prog; + uint32_t initialregisters[8]; + uint8_t *globaldata; + uint32_t globaldatalen; + size_t blockstartpos; + uint32_t blocklength; + uint32_t filteredblockaddress; + uint32_t filteredblocklength; + struct RARFilter *next; +}; + +static bool br_fill(struct MemBitReader *br, int bits) +{ + while (br->available < bits && br->offset < br->length) { + br->bits = (br->bits << 8) | br->bytes[br->offset++]; + br->available += 8; + } + if (bits > br->available) { + br->at_eof = true; + return false; + } + return true; +} + +static inline uint32_t br_bits(struct MemBitReader *br, int bits) +{ + if (bits > br->available && (br->at_eof || !br_fill(br, bits))) + return 0; + return (uint32_t)((br->bits >> (br->available -= bits)) & (((uint64_t)1 << bits) - 1)); +} + +static inline bool br_available(struct MemBitReader *br, int bits) +{ + return !br->at_eof && (bits <= br->available || br_fill(br, bits)); +} + +static uint32_t br_next_rarvm_number(struct MemBitReader *br) +{ + uint32_t val; + switch (br_bits(br, 2)) { + case 0: + return br_bits(br, 4); + case 1: + val = br_bits(br, 8); + if (val >= 16) + return val; + return 0xFFFFFF00 | (val << 4) | br_bits(br, 4); + case 2: + return br_bits(br, 16); + default: + return br_bits(br, 32); + } +} + +static void bw_write32le(uint8_t *dst, uint32_t value) +{ + dst[0] = value & 0xFF; + dst[1] = (value >> 8) & 0xFF; + dst[2] = (value >> 16) & 0xFF; + dst[3] = (value >> 24) & 0xFF; +} + +static void rar_delete_program(struct RARProgramCode *prog) +{ + while (prog) { + struct RARProgramCode *next = prog->next; + RARDeleteProgram(prog->prog); + free(prog->staticdata); + free(prog->globalbackup); + free(prog); + prog = next; + } +} + +static bool rar_parse_operand(struct MemBitReader *br, uint8_t instruction, bool bytemode, uint32_t instrcount, uint8_t *addressmode, uint32_t *value) +{ + if (br_bits(br, 1)) { + *addressmode = RARRegisterAddressingMode((uint8_t)br_bits(br, 3)); + *value = 0; + } + else if (br_bits(br, 1)) { + if (br_bits(br, 1)) { + if (br_bits(br, 1)) + *addressmode = RARAbsoluteAddressingMode; + else + *addressmode = RARIndexedAbsoluteAddressingMode((uint8_t)br_bits(br, 3)); + *value = br_next_rarvm_number(br); + } + else { + *addressmode = RARRegisterIndirectAddressingMode((uint8_t)br_bits(br, 3)); + *value = 0; + } + } + else { + *addressmode = RARImmediateAddressingMode; + if (!bytemode) + *value = br_next_rarvm_number(br); + else + *value = br_bits(br, 8); + if (instrcount != (uint32_t)-1 && RARInstructionIsRelativeJump(instruction)) { + if (*value >= 256) /* absolute address */ + *value -= 256; + else { /* relative address */ + if (*value >= 136) + *value -= 264; + else if (*value >= 16) + *value -= 8; + else if (*value >= 8) + *value -= 16; + *value += instrcount; + } + } + } + return !br->at_eof; +} + +static struct RARProgramCode *rar_compile_program(const uint8_t *bytes, size_t length) +{ + struct MemBitReader br = { 0 }; + struct RARProgramCode *prog; + uint32_t instrcount = 0; + uint8_t xor; + size_t i; + + xor = 0; + for (i = 1; i < length; i++) + xor ^= bytes[i]; + if (!length || xor != bytes[0]) + return NULL; + + br.bytes = bytes; + br.length = length; + br.offset = 1; + + prog = calloc(1, sizeof(*prog)); + if (!prog) + return NULL; + prog->prog = RARCreateProgram(); + if (!prog->prog) { + rar_delete_program(prog); + return NULL; + } + prog->fingerprint = ar_crc32(0, bytes, length) | ((uint64_t)length << 32); + + if (br_bits(&br, 1)) { + prog->staticdatalen = br_next_rarvm_number(&br) + 1; + prog->staticdata = malloc(prog->staticdatalen); + if (!prog->staticdata) { + rar_delete_program(prog); + return NULL; + } + for (i = 0; i < prog->staticdatalen; i++) + prog->staticdata[i] = (uint8_t)br_bits(&br, 8); + } + + while (br_available(&br, 8)) { + bool ok = true; + uint8_t instruction = (uint8_t)br_bits(&br, 4); + bool bytemode = false; + int numargs = 0; + uint8_t addrmode1 = 0, addrmode2 = 0; + uint32_t value1 = 0, value2 = 0; + + if ((instruction & 0x08)) + instruction = ((instruction << 2) | (uint8_t)br_bits(&br, 2)) - 24; + if (RARInstructionHasByteMode(instruction)) + bytemode = br_bits(&br, 1) != 0; + ok = RARProgramAddInstr(prog->prog, instruction, bytemode); + numargs = NumberOfRARInstructionOperands(instruction); + if (ok && numargs >= 1) + ok = rar_parse_operand(&br, instruction, bytemode, instrcount, &addrmode1, &value1); + if (ok && numargs == 2) + ok = rar_parse_operand(&br, instruction, bytemode, (uint32_t)-1, &addrmode2, &value2); + if (ok) + ok = RARSetLastInstrOperands(prog->prog, addrmode1, value1, addrmode2, value2); + if (!ok) { + warn("Invalid RAR program instruction"); + rar_delete_program(prog); + return NULL; + } + instrcount++; + } + + if (!RARIsProgramTerminated(prog->prog)) { + if (!RARProgramAddInstr(prog->prog, RARRetInstruction, false)) { + rar_delete_program(prog); + return NULL; + } + } + + return prog; +} + +static bool rar_execute_filter_prog(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t newgloballength; + uint32_t globallength = filter->globaldatalen; + if (globallength > RARProgramSystemGlobalSize) + globallength = RARProgramSystemGlobalSize; + memcpy(&vm->memory[RARProgramSystemGlobalAddress], filter->globaldata, globallength); + if (filter->prog->staticdata) { + uint32_t staticlength = filter->prog->staticdatalen; + if (staticlength > RARProgramUserGlobalSize - globallength) + staticlength = RARProgramUserGlobalSize - globallength; + memcpy(&vm->memory[RARProgramUserGlobalAddress], filter->prog->staticdata, staticlength); + } + RARSetVirtualMachineRegisters(vm, filter->initialregisters); + + if (!RARExecuteProgram(vm, filter->prog->prog)) { + warn("Error while executing program in RAR VM"); + return false; + } + + newgloballength = RARVirtualMachineRead32(vm, RARProgramSystemGlobalAddress + 0x30); + if (newgloballength > RARProgramUserGlobalSize) + newgloballength = RARProgramUserGlobalSize; + if (newgloballength > 0) { + uint32_t newglobaldatalength = RARProgramSystemGlobalSize + newgloballength; + if (newglobaldatalength > filter->globaldatalen) { + uint8_t *newglobaldata = malloc(newglobaldatalength); + if (!newglobaldata) + return false; + free(filter->globaldata); + filter->globaldata = newglobaldata; + } + filter->globaldatalen = newglobaldatalength; + memcpy(filter->globaldata, &vm->memory[RARProgramSystemGlobalAddress], filter->globaldatalen); + } + else + filter->globaldatalen = 0; + + return true; +} + +static struct RARFilter *rar_create_filter(struct RARProgramCode *prog, const uint8_t *globaldata, uint32_t globaldatalen, uint32_t registers[8], size_t startpos, uint32_t length) +{ + struct RARFilter *filter; + + filter = calloc(1, sizeof(*filter)); + if (!filter) + return NULL; + filter->prog = prog; + filter->globaldatalen = globaldatalen > RARProgramSystemGlobalSize ? globaldatalen : RARProgramSystemGlobalSize; + filter->globaldata = calloc(1, filter->globaldatalen); + if (!filter->globaldata) { + free(filter); + return NULL; + } + if (globaldata) + memcpy(filter->globaldata, globaldata, globaldatalen); + if (registers) + memcpy(filter->initialregisters, registers, sizeof(filter->initialregisters)); + filter->blockstartpos = startpos; + filter->blocklength = length; + + return filter; +} + +static void rar_delete_filter(struct RARFilter *filter) +{ + while (filter) { + struct RARFilter *next = filter->next; + free(filter->globaldata); + free(filter); + filter = next; + } +} + +static bool rar_execute_filter_delta(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t length = filter->initialregisters[4]; + uint32_t numchannels = filter->initialregisters[0]; + uint8_t *src, *dst; + uint32_t i, idx; + + if (length > RARProgramWorkSize / 2) + return false; + + src = &vm->memory[0]; + dst = &vm->memory[length]; + for (i = 0; i < numchannels; i++) { + uint8_t lastbyte = 0; + for (idx = i; idx < length; idx += numchannels) + lastbyte = dst[idx] = lastbyte - *src++; + } + + filter->filteredblockaddress = length; + filter->filteredblocklength = length; + + return true; +} + +static bool rar_execute_filter_e8(struct RARFilter *filter, RARVirtualMachine *vm, size_t pos, bool e9also) +{ + uint32_t length = filter->initialregisters[4]; + uint32_t filesize = 0x1000000; + + if (length > RARProgramWorkSize || length < 5) + return false; + + for (uint32_t i = 0; i <= length - 5; i++) { + if (vm->memory[i] == 0xE8 || (e9also && vm->memory[i] == 0xE9)) { + uint32_t currpos = (uint32_t)pos + i + 1; + int32_t address = (int32_t)RARVirtualMachineRead32(vm, i + 1); + if (address < 0 && currpos >= (uint32_t)-address) + RARVirtualMachineWrite32(vm, i + 1, address + filesize); + else if (address >= 0 && (uint32_t)address < filesize) + RARVirtualMachineWrite32(vm, i + 1, address - currpos); + i += 4; + } + } + + filter->filteredblockaddress = 0; + filter->filteredblocklength = length; + + return true; +} + +static bool rar_execute_filter_rgb(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t stride = filter->initialregisters[0]; + uint32_t byteoffset = filter->initialregisters[1]; + uint32_t blocklength = filter->initialregisters[4]; + uint8_t *src, *dst; + + if (blocklength < 2 || blocklength > (RARProgramWorkSize / 2) || stride > blocklength) + return false; + + src = &vm->memory[0]; + dst = &vm->memory[blocklength]; + for (int i = 0; i < 3; i++) { + uint8_t byte = 0; + uint8_t *prev = dst + i - stride; + for (uint32_t j = i; j < blocklength; j += 3) { + if (prev >= dst) { + uint32_t delta1 = abs(prev[3] - prev[0]); + uint32_t delta2 = abs(byte - prev[0]); + uint32_t delta3 = abs(prev[3] - prev[0] + byte - prev[0]); + if (delta1 > delta2 || delta1 > delta3) + byte = delta2 <= delta3 ? prev[3] : prev[0]; + } + byte -= *src++; + dst[j] = byte; + prev += 3; + } + } + for (uint32_t i = byteoffset; i < blocklength - 2; i += 3) { + dst[i] += dst[i + 1]; + dst[i + 2] += dst[i + 1]; + } + + filter->filteredblockaddress = blocklength; + filter->filteredblocklength = blocklength; + + return true; +} + +static bool rar_execute_filter_audio(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t length = filter->initialregisters[4]; + uint32_t numchannels = filter->initialregisters[0]; + uint8_t *src, *dst; + uint32_t i, j; + + if (length > RARProgramWorkSize / 2) + return false; + + src = &vm->memory[0]; + dst = &vm->memory[length]; + for (i = 0; i < numchannels; i++) { + struct AudioState state; + memset(&state, 0, sizeof(state)); + for (j = i; j < length; j += numchannels) { + int8_t delta = (int8_t)*src++; + uint8_t predbyte, byte; + int prederror; + state.delta[2] = state.delta[1]; + state.delta[1] = state.lastdelta - state.delta[0]; + state.delta[0] = state.lastdelta; + predbyte = ((8 * state.lastbyte + state.weight[0] * state.delta[0] + state.weight[1] * state.delta[1] + state.weight[2] * state.delta[2]) >> 3) & 0xFF; + byte = (predbyte - delta) & 0xFF; + prederror = delta << 3; + state.error[0] += abs(prederror); + state.error[1] += abs(prederror - state.delta[0]); state.error[2] += abs(prederror + state.delta[0]); + state.error[3] += abs(prederror - state.delta[1]); state.error[4] += abs(prederror + state.delta[1]); + state.error[5] += abs(prederror - state.delta[2]); state.error[6] += abs(prederror + state.delta[2]); + state.lastdelta = (int8_t)(byte - state.lastbyte); + dst[j] = state.lastbyte = byte; + if (!(state.count++ & 0x1F)) { + uint8_t k, idx = 0; + for (k = 1; k < 7; k++) { + if (state.error[k] < state.error[idx]) + idx = k; + } + memset(state.error, 0, sizeof(state.error)); + switch (idx) { + case 1: if (state.weight[0] >= -16) state.weight[0]--; break; + case 2: if (state.weight[0] < 16) state.weight[0]++; break; + case 3: if (state.weight[1] >= -16) state.weight[1]--; break; + case 4: if (state.weight[1] < 16) state.weight[1]++; break; + case 5: if (state.weight[2] >= -16) state.weight[2]--; break; + case 6: if (state.weight[2] < 16) state.weight[2]++; break; + } + } + } + } + + filter->filteredblockaddress = length; + filter->filteredblocklength = length; + + return true; +} + +static bool rar_execute_filter(struct RARFilter *filter, RARVirtualMachine *vm, size_t pos) +{ + if (filter->prog->fingerprint == 0x1D0E06077D) + return rar_execute_filter_delta(filter, vm); + if (filter->prog->fingerprint == 0x35AD576887) + return rar_execute_filter_e8(filter, vm, pos, false); + if (filter->prog->fingerprint == 0x393CD7E57E) + return rar_execute_filter_e8(filter, vm, pos, true); + if (filter->prog->fingerprint == 0x951C2C5DC8) + return rar_execute_filter_rgb(filter, vm); + if (filter->prog->fingerprint == 0xD8BC85E701) + return rar_execute_filter_audio(filter, vm); + log("Unknown parsing filter 0x%x%08x", (uint32_t)(filter->prog->fingerprint >> 32), (uint32_t)filter->prog->fingerprint); + + /* XADRAR30Filter.m @executeOnVirtualMachine claims that this is required */ + if (filter->prog->globalbackuplen > RARProgramSystemGlobalSize) { + uint8_t *newglobaldata = malloc(filter->prog->globalbackuplen); + if (newglobaldata) { + free(filter->globaldata); + filter->globaldata = newglobaldata; + filter->globaldatalen = filter->prog->globalbackuplen; + memcpy(filter->globaldata, filter->prog->globalbackup, filter->prog->globalbackuplen); + } + } + + filter->initialregisters[6] = (uint32_t)pos; + bw_write32le(&filter->globaldata[0x24], (uint32_t)pos); + bw_write32le(&filter->globaldata[0x28], (uint32_t)((uint64_t)pos >> 32)); + + if (!rar_execute_filter_prog(filter, vm)) + return false; + + filter->filteredblockaddress = RARVirtualMachineRead32(vm, RARProgramSystemGlobalAddress + 0x20) & RARProgramMemoryMask; + filter->filteredblocklength = RARVirtualMachineRead32(vm, RARProgramSystemGlobalAddress + 0x1C) & RARProgramMemoryMask; + if (filter->filteredblockaddress + filter->filteredblocklength >= RARProgramMemorySize) { + filter->filteredblockaddress = filter->filteredblocklength = 0; + return false; + } + + if (filter->globaldatalen > RARProgramSystemGlobalSize) { + uint8_t *newglobalbackup = malloc(filter->globaldatalen); + if (newglobalbackup) { + free(filter->prog->globalbackup); + filter->prog->globalbackup = newglobalbackup; + filter->prog->globalbackuplen = filter->globaldatalen; + memcpy(filter->prog->globalbackup, filter->globaldata, filter->globaldatalen); + } + } + else + filter->prog->globalbackuplen = 0; + + return true; +} + +bool rar_parse_filter(ar_archive_rar *rar, const uint8_t *bytes, uint16_t length, uint8_t flags) +{ + struct ar_archive_rar_uncomp_v3 *uncomp = &rar->uncomp.state.v3; + struct ar_archive_rar_filters *filters = &uncomp->filters; + + struct MemBitReader br = { 0 }; + struct RARProgramCode *prog; + struct RARFilter *filter, **nextfilter; + + uint32_t numprogs, num, blocklength, globaldatalen; + uint8_t *globaldata; + size_t blockstartpos; + uint32_t registers[8] = { 0 }; + uint32_t i; + + br.bytes = bytes; + br.length = length; + + numprogs = 0; + for (prog = filters->progs; prog; prog = prog->next) + numprogs++; + + if ((flags & 0x80)) { + num = br_next_rarvm_number(&br); + if (num == 0) { + rar_delete_filter(filters->stack); + filters->stack = NULL; + rar_delete_program(filters->progs); + filters->progs = NULL; + } + else + num--; + if (num > numprogs) { + warn("Invalid program number"); + return false; + } + filters->lastfilternum = num; + } + else + num = filters->lastfilternum; + + prog = filters->progs; + for (i = 0; i < num; i++) { + if (prog) { + prog = prog->next; + } + else { + warn("Invalid filter programm"); + return false; + } + } + if (prog) + prog->usagecount++; + + blockstartpos = br_next_rarvm_number(&br) + (size_t)lzss_position(&rar->uncomp.lzss); + if ((flags & 0x40)) + blockstartpos += 258; + if ((flags & 0x20)) + blocklength = br_next_rarvm_number(&br); + else + blocklength = prog ? prog->oldfilterlength : 0; + + registers[3] = RARProgramSystemGlobalAddress; + registers[4] = blocklength; + registers[5] = prog ? prog->usagecount : 0; + registers[7] = RARProgramMemorySize; + + if ((flags & 0x10)) { + uint8_t mask = (uint8_t)br_bits(&br, 7); + for (i = 0; i < 7; i++) { + if ((mask & (1 << i))) + registers[i] = br_next_rarvm_number(&br); + } + } + + if (!prog) { + uint32_t len = br_next_rarvm_number(&br); + uint8_t *bytecode; + struct RARProgramCode **next; + + if (len == 0 || len > 0x10000) { + warn("Invalid RARVM bytecode length"); + return false; + } + bytecode = malloc(len); + if (!bytecode) + return false; + for (i = 0; i < len; i++) + bytecode[i] = (uint8_t)br_bits(&br, 8); + prog = rar_compile_program(bytecode, len); + if (!prog) { + free(bytecode); + return false; + } + free(bytecode); + next = &filters->progs; + while (*next) + next = &(*next)->next; + *next = prog; + } + prog->oldfilterlength = blocklength; + + globaldata = NULL; + globaldatalen = 0; + if ((flags & 0x08)) { + globaldatalen = br_next_rarvm_number(&br); + if (globaldatalen > RARProgramUserGlobalSize) { + warn("Invalid RARVM data length"); + return false; + } + globaldata = malloc(globaldatalen + RARProgramSystemGlobalSize); + if (!globaldata) + return false; + for (i = 0; i < globaldatalen; i++) + globaldata[i + RARProgramSystemGlobalSize] = (uint8_t)br_bits(&br, 8); + } + + if (br.at_eof) { + free(globaldata); + return false; + } + + filter = rar_create_filter(prog, globaldata, globaldatalen, registers, blockstartpos, blocklength); + free(globaldata); + if (!filter) + return false; + + for (i = 0; i < 7; i++) + bw_write32le(&filter->globaldata[i * 4], registers[i]); + bw_write32le(&filter->globaldata[0x1C], blocklength); + bw_write32le(&filter->globaldata[0x20], 0); + bw_write32le(&filter->globaldata[0x2C], prog->usagecount); + + nextfilter = &filters->stack; + while (*nextfilter) + nextfilter = &(*nextfilter)->next; + *nextfilter = filter; + + if (!filters->stack->next) + filters->filterstart = blockstartpos; + + return true; +} + +bool rar_run_filters(ar_archive_rar *rar) +{ + struct ar_archive_rar_filters *filters = &rar->uncomp.state.v3.filters; + struct RARFilter *filter = filters->stack; + size_t start = filters->filterstart; + size_t end = start + filter->blocklength; + uint32_t lastfilteraddress; + uint32_t lastfilterlength; + + filters->filterstart = SIZE_MAX; + + if ((size_t)rar_expand(rar, end) != end) { + warn("Failed to expand the expected amout of bytes"); + return false; + } + + if (!filters->vm) { + filters->vm = calloc(1, sizeof(*filters->vm)); + if (!filters->vm) + return false; + } + + lzss_copy_bytes_from_window(&rar->uncomp.lzss, filters->vm->memory, start, filter->blocklength); + if (!rar_execute_filter(filter, filters->vm, rar->progress.bytes_done)) { + warn("Failed to execute parsing filter"); + return false; + } + + lastfilteraddress = filter->filteredblockaddress; + lastfilterlength = filter->filteredblocklength; + filters->stack = filter->next; + filter->next = NULL; + rar_delete_filter(filter); + + while ((filter = filters->stack) != NULL && filter->blockstartpos == filters->filterstart && filter->blocklength == lastfilterlength) { + memmove(&filters->vm->memory[0], &filters->vm->memory[lastfilteraddress], lastfilterlength); + if (!rar_execute_filter(filter, filters->vm, rar->progress.bytes_done)) { + warn("Failed to execute parsing filter"); + return false; + } + + lastfilteraddress = filter->filteredblockaddress; + lastfilterlength = filter->filteredblocklength; + filters->stack = filter->next; + filter->next = NULL; + rar_delete_filter(filter); + } + + if (filters->stack) { + if (filters->stack->blockstartpos < end) { + warn("Bad filter order"); + return false; + } + filters->filterstart = filters->stack->blockstartpos; + } + + filters->lastend = end; + filters->bytes = &filters->vm->memory[lastfilteraddress]; + filters->bytes_ready = lastfilterlength; + + return true; +} + +void rar_clear_filters(struct ar_archive_rar_filters *filters) +{ + rar_delete_filter(filters->stack); + rar_delete_program(filters->progs); + free(filters->vm); +} diff --git a/external/unarr/rar/huffman-rar.c b/external/unarr/rar/huffman-rar.c new file mode 100644 index 00000000..8fa6f593 --- /dev/null +++ b/external/unarr/rar/huffman-rar.c @@ -0,0 +1,149 @@ +/* Copyright 2018 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADPrefixCode.m */ + +#include "rar.h" + +bool rar_new_node(struct huffman_code *code) +{ + if (!code->tree) { + code->minlength = INT_MAX; + code->maxlength = INT_MIN; + } + if (code->numentries + 1 >= code->capacity) { + /* in my small file sample, 1024 is the value needed most often */ + int new_capacity = code->capacity ? code->capacity * 2 : 1024; + void *new_tree = calloc(new_capacity, sizeof(*code->tree)); + if (!new_tree) { + warn("OOM during decompression"); + return false; + } + if (code->tree) { + memcpy(new_tree, code->tree, code->capacity * sizeof(*code->tree)); + free(code->tree); + } + code->tree = new_tree; + code->capacity = new_capacity; + } + /* if we have no code->tree at this point something went wrong */ + if (!code->tree) { + warn("Invalid huffman code tree, aborting"); + return false; + } + code->tree[code->numentries].branches[0] = -1; + code->tree[code->numentries].branches[1] = -2; + code->numentries++; + return true; +} + +bool rar_add_value(struct huffman_code *code, int value, int codebits, int length) +{ + int lastnode, bitpos, bit; + + free(code->table); + code->table = NULL; + + if (length > code->maxlength) + code->maxlength = length; + if (length < code->minlength) + code->minlength = length; + + lastnode = 0; + for (bitpos = length - 1; bitpos >= 0; bitpos--) { + bit = (codebits >> bitpos) & 1; + if (rar_is_leaf_node(code, lastnode)) { + warn("Invalid data in bitstream"); /* prefix found */ + return false; + } + if (code->tree[lastnode].branches[bit] < 0) { + if (!rar_new_node(code)) + return false; + code->tree[lastnode].branches[bit] = code->numentries - 1; + } + lastnode = code->tree[lastnode].branches[bit]; + } + + if (code->tree[lastnode].branches[0] != -1 || code->tree[lastnode].branches[1] != -2) { + warn("Invalid data in bitstream"); /* prefix found */ + return false; + } + code->tree[lastnode].branches[0] = code->tree[lastnode].branches[1] = value; + return true; +} + +bool rar_create_code(struct huffman_code *code, uint8_t *lengths, int numsymbols) +{ + int symbolsleft = numsymbols; + int codebits = 0; + int i, j; + + if (!rar_new_node(code)) + return false; + + for (i = 1; i <= 0x0F; i++) { + for (j = 0; j < numsymbols; j++) { + if (lengths[j] != i) + continue; + if (!rar_add_value(code, j, codebits, i)) + return false; + if (--symbolsleft <= 0) + return true; + codebits++; + } + codebits <<= 1; + } + return true; +} + +static bool rar_make_table_rec(struct huffman_code *code, int node, int offset, int depth, int maxdepth) +{ + int currtablesize = 1 << (maxdepth - depth); + + if (node < 0 || code->numentries <= node) { + warn("Invalid data in bitstream"); /* invalid location to Huffman tree specified */ + return false; + } + + if (rar_is_leaf_node(code, node)) { + int i; + for (i = 0; i < currtablesize; i++) { + code->table[offset + i].length = depth; + code->table[offset + i].value = code->tree[node].branches[0]; + } + } + else if (depth == maxdepth) { + code->table[offset].length = maxdepth + 1; + code->table[offset].value = node; + } + else { + if (!rar_make_table_rec(code, code->tree[node].branches[0], offset, depth + 1, maxdepth)) + return false; + if (!rar_make_table_rec(code, code->tree[node].branches[1], offset + currtablesize / 2, depth + 1, maxdepth)) + return false; + } + return true; +} + +bool rar_make_table(struct huffman_code *code) +{ + if (code->minlength <= code->maxlength && code->maxlength <= 10) + code->tablesize = code->maxlength; + else + code->tablesize = 10; + + code->table = calloc(1ULL << code->tablesize, sizeof(*code->table)); + if (!code->table) { + warn("OOM during decompression"); + return false; + } + + return rar_make_table_rec(code, 0, 0, 0, code->tablesize); +} + +void rar_free_code(struct huffman_code *code) +{ + free(code->tree); + free(code->table); + memset(code, 0, sizeof(*code)); +} diff --git a/external/unarr/rar/lzss.h b/external/unarr/rar/lzss.h new file mode 100644 index 00000000..580fe4c5 --- /dev/null +++ b/external/unarr/rar/lzss.h @@ -0,0 +1,88 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/LZSS.h */ + +#ifndef rar_lzss_h +#define rar_lzss_h + +#include +#include +#include +#include + +#if defined(_MSC_VER) && !defined(inline) +#define inline __inline +#endif + +typedef struct { + uint8_t *window; + int mask; + int64_t position; +} LZSS; + +static inline int64_t lzss_position(LZSS *self) { return self->position; } + +static inline int lzss_mask(LZSS *self) { return self->mask; } + +static inline int lzss_size(LZSS *self) { return self->mask + 1; } + +static inline uint8_t *lzss_window_pointer(LZSS *self) { return self->window; } + +static inline int lzss_offset_for_position(LZSS *self, int64_t pos) { return (int)(pos & self->mask); } + +static inline uint8_t *lzss_window_pointer_for_position(LZSS *self, int64_t pos) { return &self->window[lzss_offset_for_position(self, pos)]; } + +static inline int lzss_current_window_offset(LZSS *self) { return lzss_offset_for_position(self, self->position); } + +static inline uint8_t *lzss_current_window_pointer(LZSS *self) { return lzss_window_pointer_for_position(self, self->position); } + +static inline int64_t lzss_next_window_edge_after_position(LZSS *self, int64_t pos) { return (pos + lzss_size(self)) & ~(int64_t)lzss_mask(self); } + +static inline int64_t lzss_next_window_edge(LZSS *self) { return lzss_next_window_edge_after_position(self, self->position); } + +static inline uint8_t lzss_get_byte_from_window(LZSS *self, int64_t pos) { return *lzss_window_pointer_for_position(self, pos); } + +static inline void lzss_emit_literal(LZSS *self, uint8_t literal) { + /* self->window[(self->position & self->mask)] = literal; */ + *lzss_current_window_pointer(self) = literal; + self->position++; +} + +static inline void lzss_emit_match(LZSS *self, int offset, int length) { + int windowoffs = lzss_current_window_offset(self); + int i; + for (i = 0; i < length; i++) { + self->window[(windowoffs + i) & lzss_mask(self)] = self->window[(windowoffs + i - offset) & lzss_mask(self)]; + } + self->position += length; +} + +static inline void lzss_copy_bytes_from_window(LZSS *self, uint8_t *buffer, int64_t startpos, int length) { + int windowoffs = lzss_offset_for_position(self, startpos); + int firstpart = lzss_size(self) - windowoffs; + if (length <= firstpart) { + /* Request fits inside window */ + memcpy(buffer, &self->window[windowoffs], length); + } + else { + /* Request wraps around window */ + memcpy(buffer, &self->window[windowoffs], firstpart); + memcpy(buffer + firstpart, &self->window[0], length - firstpart); + } +} + +static inline bool lzss_initialize(LZSS *self, int windowsize) { + self->window = malloc(windowsize); + if (!self->window) + return false; + + self->mask = windowsize - 1; /* Assume windows are power-of-two sized! */ + memset(self->window, 0, lzss_size(self)); + self->position = 0; + return true; +} + +static inline void lzss_cleanup(LZSS *self) { free(self->window); } + +#endif diff --git a/external/unarr/rar/parse-rar.c b/external/unarr/rar/parse-rar.c new file mode 100644 index 00000000..79ab6f5c --- /dev/null +++ b/external/unarr/rar/parse-rar.c @@ -0,0 +1,239 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARParser.m */ + +#include "rar.h" + +static inline uint8_t uint8le(unsigned char *data) { return data[0]; } +static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; } +static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | (uint32_t)data[3] << 24; } + +bool rar_parse_header(ar_archive *ar, struct rar_header *header) +{ + unsigned char header_data[7]; + size_t read = ar_read(ar->stream, header_data, sizeof(header_data)); + if (read == 0) { + ar->at_eof = true; + return false; + } + if (read < sizeof(header_data)) + return false; + + header->crc = uint16le(header_data + 0); + header->type = uint8le(header_data + 2); + header->flags = uint16le(header_data + 3); + header->size = uint16le(header_data + 5); + + header->datasize = 0; + if ((header->flags & LHD_LONG_BLOCK) || header->type == 0x74) { + unsigned char size_data[4]; + if (!(header->flags & LHD_LONG_BLOCK)) + log("File header without LHD_LONG_BLOCK set"); + read += ar_read(ar->stream, size_data, sizeof(size_data)); + if (read < sizeof(header_data) + sizeof(size_data)) + return false; + header->datasize = uint32le(size_data); + } + + if (header->size < read) { + warn("Invalid header size %d", header->size); + return false; + } + + return true; +} + +bool rar_check_header_crc(ar_archive *ar) +{ + unsigned char buffer[256]; + uint16_t crc16, size; + uint32_t crc32; + + if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) + return false; + if (ar_read(ar->stream, buffer, 7) != 7) + return false; + + crc16 = uint16le(buffer + 0); + size = uint16le(buffer + 5); + if (size < 7) + return false; + size -= 7; + + crc32 = ar_crc32(0, buffer + 2, 5); + while (size > 0) { + if (ar_read(ar->stream, buffer, smin(size, sizeof(buffer))) != smin(size, sizeof(buffer))) + return false; + crc32 = ar_crc32(crc32, buffer, smin(size, sizeof(buffer))); + size -= (uint16_t)smin(size, sizeof(buffer)); + } + return (crc32 & 0xFFFF) == crc16; +} + +bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry) +{ + unsigned char data[21]; + if (ar_read(rar->super.stream, data, sizeof(data)) != sizeof(data)) + return false; + + entry->size = uint32le(data + 0); + entry->os = uint8le(data + 4); + entry->crc = uint32le(data + 5); + entry->dosdate = uint32le(data + 9); + entry->version = uint8le(data + 13); + entry->method = uint8le(data + 14); + entry->namelen = uint16le(data + 15); + entry->attrs = uint32le(data + 17); + if ((header->flags & LHD_LARGE)) { + unsigned char more_data[8]; + if (ar_read(rar->super.stream, more_data, sizeof(more_data)) != sizeof(more_data)) + return false; + header->datasize += (uint64_t)uint32le(more_data + 0); + entry->size += (uint64_t)uint32le(more_data + 4); + } + if (!ar_skip(rar->super.stream, entry->namelen)) + return false; + if ((header->flags & LHD_SALT)) { + log("Skipping LHD_SALT"); + ar_skip(rar->super.stream, 8); + } + + rar->entry.version = entry->version; + rar->entry.method = entry->method; + rar->entry.crc = entry->crc; + rar->entry.header_size = header->size; + rar->entry.solid = entry->version < 20 ? (rar->archive_flags & MHD_SOLID) : (header->flags & LHD_SOLID); + free(rar->entry.name); + rar->entry.name = NULL; + + return true; +} + +/* this seems to be what RAR considers "Unicode" */ +static char *rar_conv_unicode_to_utf8(const char *data, uint16_t len) +{ +#define Check(cond) if (!(cond)) { free(str); return NULL; } else ((void)0) + + uint8_t highbyte, flagbyte, flagbits, size, length, i; + const uint8_t *in = (const uint8_t *)data + strlen(data) + 1; + const uint8_t *end_in = (const uint8_t *)data + len; + char *str = calloc(len + 1, 3); + char *out = str; + char *end_out = str + len * 3; + + if (!str) + return NULL; + if (end_in - in <= 1) { + memcpy(str, data, len); + return str; + } + + highbyte = *in++; + flagbyte = 0; + flagbits = 0; + size = 0; + + while (in < end_in && out < end_out) { + if (flagbits == 0) { + flagbyte = *in++; + flagbits = 8; + } + flagbits -= 2; + switch ((flagbyte >> flagbits) & 3) { + case 0: + Check(in + 1 <= end_in); + out += ar_conv_rune_to_utf8(*in++, out, end_out - out); + size++; + break; + case 1: + Check(in + 1 <= end_in); + out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | *in++, out, end_out - out); + size++; + break; + case 2: + Check(in + 2 <= end_in); + out += ar_conv_rune_to_utf8(((uint16_t)*(in + 1) << 8) | *in, out, end_out - out); + in += 2; + size++; + break; + case 3: + Check(in + 1 <= end_in); + length = *in++; + if ((length & 0x80)) { + uint8_t correction = *in++; + for (i = 0; i < (length & 0x7F) + 2; i++) { + Check(size < len); + out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | (data[size] + (correction & 0xFF)), out, end_out - out); + size++; + } + } + else { + for (i = 0; i < (length & 0x7F) + 2; i++) { + Check(size < len); + out += ar_conv_rune_to_utf8(data[size], out, end_out - out); + size++; + } + } + break; + } + } + + return str; + +#undef Check +} + +const char *rar_get_name(ar_archive *ar, bool raw) +{ + if (raw) + return NULL; + + ar_archive_rar *rar = (ar_archive_rar *)ar; + if (!rar->entry.name) { + unsigned char data[21]; + uint16_t namelen; + char *name; + + struct rar_header header; + if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) + return NULL; + if (!rar_parse_header(ar, &header)) + return NULL; + if (ar_read(ar->stream, data, sizeof(data)) != sizeof(data)) + return NULL; + if ((header.flags & LHD_LARGE) && !ar_skip(ar->stream, 8)) + return NULL; + + namelen = uint16le(data + 15); + name = malloc(namelen + 1); + if (!name || ar_read(ar->stream, name, namelen) != namelen) { + free(name); + return NULL; + } + name[namelen] = '\0'; + + if (!(header.flags & LHD_UNICODE)) { + rar->entry.name = ar_conv_dos_to_utf8(name); + free(name); + } + else if (namelen == strlen(name)) { + rar->entry.name = name; + } + else { + rar->entry.name = rar_conv_unicode_to_utf8(name, namelen); + free(name); + } + /* normalize path separators */ + if (rar->entry.name) { + char *p = rar->entry.name; + while ((p = strchr(p, '\\')) != NULL) { + *p = '/'; + } + } + + if (!ar_seek(ar->stream, ar->entry_offset + rar->entry.header_size, SEEK_SET)) + warn("Couldn't seek back to the end of the entry header"); + } + return rar->entry.name; +} diff --git a/external/unarr/rar/rar.c b/external/unarr/rar/rar.c new file mode 100644 index 00000000..4dfc45ed --- /dev/null +++ b/external/unarr/rar/rar.c @@ -0,0 +1,223 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "rar.h" + +static void rar_close(ar_archive *ar) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + free(rar->entry.name); + rar_clear_uncompress(&rar->uncomp); +} + +static bool rar_parse_entry(ar_archive *ar, off64_t offset) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + struct rar_header header; + struct rar_entry entry; + bool out_of_order = offset != ar->entry_offset_next; + + if (!ar_seek(ar->stream, offset, SEEK_SET)) { + warn("Couldn't seek to offset %" PRIi64, offset); + return false; + } + + for (;;) { + ar->entry_offset = ar_tell(ar->stream); + ar->entry_size_uncompressed = 0; + + if (!rar_parse_header(ar, &header)) + return false; + + ar->entry_offset_next = ar->entry_offset + header.size + header.datasize; + if (ar->entry_offset_next < ar->entry_offset + header.size) { + warn("Integer overflow due to overly large data size"); + return false; + } + + switch (header.type) { + case TYPE_MAIN_HEADER: + if ((header.flags & MHD_PASSWORD)) { + warn("Encrypted archives aren't supported"); + return false; + } + ar_skip(ar->stream, 6 /* reserved data */); + if ((header.flags & MHD_ENCRYPTVER)) { + log("MHD_ENCRYPTVER is set"); + ar_skip(ar->stream, 1); + } + if ((header.flags & MHD_COMMENT)) + log("MHD_COMMENT is set"); + if (ar_tell(ar->stream) - ar->entry_offset > header.size) { + warn("Invalid RAR header size: %d", header.size); + return false; + } + rar->archive_flags = header.flags; + break; + + case TYPE_FILE_ENTRY: + if (!rar_parse_header_entry(rar, &header, &entry)) + return false; + if ((header.flags & LHD_PASSWORD)) + warn("Encrypted entries will fail to uncompress"); + if ((header.flags & LHD_DIRECTORY) == LHD_DIRECTORY) { + if (header.datasize == 0) { + log("Skipping directory entry \"%s\"", rar_get_name(ar, false)); + break; + } + warn("Can't skip directory entries containing data"); + } + if ((header.flags & (LHD_SPLIT_BEFORE | LHD_SPLIT_AFTER))) + warn("Splitting files isn't really supported"); + ar->entry_size_uncompressed = (size_t)entry.size; + ar->entry_filetime = ar_conv_dosdate_to_filetime(entry.dosdate); + if (!rar->entry.solid || rar->entry.method == METHOD_STORE || out_of_order) { + rar_clear_uncompress(&rar->uncomp); + memset(&rar->solid, 0, sizeof(rar->solid)); + } + else { + br_clear_leftover_bits(&rar->uncomp); + } + + rar->solid.restart = rar->entry.solid && (out_of_order || !rar->solid.part_done); + rar->solid.part_done = !ar->entry_size_uncompressed; + rar->progress.data_left = (size_t)header.datasize; + rar->progress.bytes_done = 0; + rar->progress.crc = 0; + + /* TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) */ + if (!rar_check_header_crc(ar)) + warn("Invalid header checksum @%" PRIi64, ar->entry_offset); + if (ar_tell(ar->stream) != ar->entry_offset + rar->entry.header_size) { + warn("Couldn't seek to offset %" PRIi64, ar->entry_offset + rar->entry.header_size); + return false; + } + return true; + + case TYPE_NEWSUB: + log("Skipping newsub header @%" PRIi64, ar->entry_offset); + break; + + case TYPE_END_OF_ARCHIVE: + ar->at_eof = true; + return false; + + default: + log("Unknown RAR header type %02x", header.type); + break; + } + + /* TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) */ + if (!rar_check_header_crc(ar)) + warn("Invalid header checksum @%" PRIi64, ar->entry_offset); + if (!ar_seek(ar->stream, ar->entry_offset_next, SEEK_SET)) { + warn("Couldn't seek to offset %" PRIi64, ar->entry_offset_next); + return false; + } + } +} + +static bool rar_copy_stored(ar_archive_rar *rar, void *buffer, size_t count) +{ + if (count > rar->progress.data_left) { + warn("Unexpected EOS in stored data"); + return false; + } + if (ar_read(rar->super.stream, buffer, count) != count) { + warn("Unexpected EOF in stored data"); + return false; + } + rar->progress.data_left -= count; + rar->progress.bytes_done += count; + return true; +} + +static bool rar_restart_solid(ar_archive *ar) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + off64_t current_offset = ar->entry_offset; + log("Restarting decompression for solid entry"); + if (!ar_parse_entry_at(ar, ar->entry_offset_first)) { + ar_parse_entry_at(ar, current_offset); + return false; + } + while (ar->entry_offset < current_offset) { + size_t size = ar->entry_size_uncompressed; + rar->solid.restart = false; + while (size > 0) { + unsigned char buffer[1024]; + size_t count = smin(size, sizeof(buffer)); + if (!ar_entry_uncompress(ar, buffer, count)) { + ar_parse_entry_at(ar, current_offset); + return false; + } + size -= count; + } + if (!ar_parse_entry(ar)) { + ar_parse_entry_at(ar, current_offset); + return false; + } + } + rar->solid.restart = false; + return true; +} + +static bool rar_uncompress(ar_archive *ar, void *buffer, size_t count) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + if (count > ar->entry_size_uncompressed - rar->progress.bytes_done) { + warn("Requesting too much data (%" PRIuPTR " < %" PRIuPTR ")", ar->entry_size_uncompressed - rar->progress.bytes_done, count); + return false; + } + if (rar->entry.method == METHOD_STORE) { + if (!rar_copy_stored(rar, buffer, count)) + return false; + } + else if (rar->entry.method == METHOD_FASTEST || rar->entry.method == METHOD_FAST || + rar->entry.method == METHOD_NORMAL || rar->entry.method == METHOD_GOOD || + rar->entry.method == METHOD_BEST) { + if (rar->solid.restart && !rar_restart_solid(ar)) { + warn("Failed to produce the required solid decompression state"); + return false; + } + if (!rar_uncompress_part(rar, buffer, count)) + return false; + } + else { + warn("Unknown compression method %#02x", rar->entry.method); + return false; + } + + rar->progress.crc = ar_crc32(rar->progress.crc, buffer, count); + if (rar->progress.bytes_done < ar->entry_size_uncompressed) + return true; + if (rar->progress.data_left) + log("Compressed block has more data than required"); + rar->solid.part_done = true; + rar->solid.size_total += rar->progress.bytes_done; + if (rar->progress.crc != rar->entry.crc) { + warn("Checksum of extracted data doesn't match"); + return false; + } + return true; +} + +ar_archive *ar_open_rar_archive(ar_stream *stream) +{ + char signature[FILE_SIGNATURE_SIZE]; + if (!ar_seek(stream, 0, SEEK_SET)) + return NULL; + if (ar_read(stream, signature, sizeof(signature)) != sizeof(signature)) + return NULL; + if (memcmp(signature, "Rar!\x1A\x07\x00", sizeof(signature)) != 0) { + if (memcmp(signature, "Rar!\x1A\x07\x01", sizeof(signature)) == 0) + warn("RAR 5 format isn't supported"); + else if (memcmp(signature, "RE~^", 4) == 0) + warn("Ancient RAR format isn't supported"); + else if (memcmp(signature, "MZ", 2) == 0 || memcmp(signature, "\x7F\x45LF", 4) == 0) + warn("SFX archives aren't supported"); + return NULL; + } + + return ar_open_archive(stream, sizeof(ar_archive_rar), rar_close, rar_parse_entry, rar_get_name, rar_uncompress, NULL, FILE_SIGNATURE_SIZE); +} diff --git a/external/unarr/rar/rar.h b/external/unarr/rar/rar.h new file mode 100644 index 00000000..a0a420a3 --- /dev/null +++ b/external/unarr/rar/rar.h @@ -0,0 +1,243 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef rar_rar_h +#define rar_rar_h + +#include "../common/unarr-imp.h" + +#include "lzss.h" +#include "../lzmasdk/Ppmd7.h" +#include + +static inline size_t smin(size_t a, size_t b) { return a < b ? a : b; } + +typedef struct ar_archive_rar_s ar_archive_rar; + +/***** parse-rar *****/ + +#define FILE_SIGNATURE_SIZE 7 + +enum block_types { + TYPE_FILE_SIGNATURE = 0x72, TYPE_MAIN_HEADER = 0x73, TYPE_FILE_ENTRY = 0x74, + TYPE_NEWSUB = 0x7A, TYPE_END_OF_ARCHIVE = 0x7B, +}; + +enum archive_flags { + MHD_VOLUME = 1 << 0, MHD_COMMENT = 1 << 1, MHD_LOCK = 1 << 2, + MHD_SOLID = 1 << 3, MHD_PACK_COMMENT = 1 << 4, MHD_AV = 1 << 5, + MHD_PROTECT = 1 << 6, MHD_PASSWORD = 1 << 7, MHD_FIRSTVOLUME = 1 << 8, + MHD_ENCRYPTVER = 1 << 9, + MHD_LONG_BLOCK = 1 << 15, +}; + +enum entry_flags { + LHD_SPLIT_BEFORE = 1 << 0, LHD_SPLIT_AFTER = 1 << 1, LHD_PASSWORD = 1 << 2, + LHD_COMMENT = 1 << 3, LHD_SOLID = 1 << 4, + LHD_DIRECTORY = (1 << 5) | (1 << 6) | (1 << 7), + LHD_LARGE = 1 << 8, LHD_UNICODE = 1 << 9, LHD_SALT = 1 << 10, + LHD_VERSION = 1 << 11, LHD_EXTTIME = 1 << 12, LHD_EXTFLAGS = 1 << 13, + LHD_LONG_BLOCK = 1 << 15, +}; + +enum compression_method { + METHOD_STORE = 0x30, + METHOD_FASTEST = 0x31, METHOD_FAST = 0x32, METHOD_NORMAL = 0x33, + METHOD_GOOD = 0x34, METHOD_BEST = 0x35, +}; + +struct rar_header { + uint16_t crc; + uint8_t type; + uint16_t flags; + uint16_t size; + uint64_t datasize; +}; + +struct rar_entry { + uint64_t size; + uint8_t os; + uint32_t crc; + uint32_t dosdate; + uint8_t version; + uint8_t method; + uint16_t namelen; + uint32_t attrs; +}; + +struct ar_archive_rar_entry { + uint8_t version; + uint8_t method; + uint32_t crc; + uint16_t header_size; + bool solid; + char *name; +}; + +bool rar_parse_header(ar_archive *ar, struct rar_header *header); +bool rar_check_header_crc(ar_archive *ar); +bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry); +const char *rar_get_name(ar_archive *ar, bool raw); + +/***** filter-rar *****/ + +struct RARVirtualMachine; +struct RARProgramCode; +struct RARFilter; + +struct ar_archive_rar_filters { + struct RARVirtualMachine *vm; + struct RARProgramCode *progs; + struct RARFilter *stack; + size_t filterstart; + uint32_t lastfilternum; + size_t lastend; + uint8_t *bytes; + size_t bytes_ready; +}; + +bool rar_parse_filter(ar_archive_rar *rar, const uint8_t *bytes, uint16_t length, uint8_t flags); +bool rar_run_filters(ar_archive_rar *rar); +void rar_clear_filters(struct ar_archive_rar_filters *filters); + +/***** huffman-rar *****/ + +struct huffman_code { + struct { + int branches[2]; + } *tree; + int numentries; + int capacity; + int minlength; + int maxlength; + struct { + int length; + int value; + } *table; + int tablesize; +}; + +bool rar_new_node(struct huffman_code *code); +bool rar_add_value(struct huffman_code *code, int value, int codebits, int length); +bool rar_create_code(struct huffman_code *code, uint8_t *lengths, int numsymbols); +bool rar_make_table(struct huffman_code *code); +void rar_free_code(struct huffman_code *code); + +static inline bool rar_is_leaf_node(struct huffman_code *code, int node) { return code->tree[node].branches[0] == code->tree[node].branches[1]; } + +/***** uncompress-rar *****/ + +#define LZSS_WINDOW_SIZE 0x400000 +#define LZSS_OVERFLOW_SIZE 288 + +#define MAINCODE_SIZE 299 +#define OFFSETCODE_SIZE 60 +#define LOWOFFSETCODE_SIZE 17 +#define LENGTHCODE_SIZE 28 +#define HUFFMAN_TABLE_SIZE MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE + LENGTHCODE_SIZE + +struct ByteReader { + IByteIn super; + ar_archive_rar *rar; +}; + +struct ar_archive_rar_uncomp_v3 { + struct huffman_code maincode; + struct huffman_code offsetcode; + struct huffman_code lowoffsetcode; + struct huffman_code lengthcode; + uint8_t lengthtable[HUFFMAN_TABLE_SIZE]; + uint32_t lastlength; + uint32_t lastoffset; + uint32_t oldoffset[4]; + uint32_t lastlowoffset; + uint32_t numlowoffsetrepeats; + + bool is_ppmd_block; + int ppmd_escape; + CPpmd7 ppmd7_context; + struct ByteReader bytein; + + struct ar_archive_rar_filters filters; +}; + +#define MAINCODE_SIZE_20 298 +#define OFFSETCODE_SIZE_20 48 +#define LENGTHCODE_SIZE_20 28 +#define HUFFMAN_TABLE_SIZE_20 4 * 257 + +struct AudioState { + int8_t weight[5]; + int16_t delta[4]; + int8_t lastdelta; + int error[11]; + int count; + uint8_t lastbyte; +}; + +struct ar_archive_rar_uncomp_v2 { + struct huffman_code maincode; + struct huffman_code offsetcode; + struct huffman_code lengthcode; + struct huffman_code audiocode[4]; + uint8_t lengthtable[HUFFMAN_TABLE_SIZE_20]; + uint32_t lastoffset; + uint32_t lastlength; + uint32_t oldoffset[4]; + uint32_t oldoffsetindex; + + bool audioblock; + uint8_t channel; + uint8_t numchannels; + struct AudioState audiostate[4]; + int8_t channeldelta; +}; + +struct ar_archive_rar_uncomp { + uint8_t version; + + LZSS lzss; + size_t bytes_ready; + bool start_new_table; + + union { + struct ar_archive_rar_uncomp_v3 v3; + struct ar_archive_rar_uncomp_v2 v2; + } state; + + struct StreamBitReader { + uint64_t bits; + int available; + bool at_eof; + } br; +}; + +bool rar_uncompress_part(ar_archive_rar *rar, void *buffer, size_t buffer_size); +int64_t rar_expand(ar_archive_rar *rar, int64_t end); +void rar_clear_uncompress(struct ar_archive_rar_uncomp *uncomp); +static inline void br_clear_leftover_bits(struct ar_archive_rar_uncomp *uncomp) { uncomp->br.available &= ~0x07; } + +/***** rar *****/ + +struct ar_archive_rar_progress { + size_t data_left; + size_t bytes_done; + uint32_t crc; +}; + +struct ar_archive_rar_solid { + size_t size_total; + bool part_done; + bool restart; +}; + +struct ar_archive_rar_s { + ar_archive super; + uint16_t archive_flags; + struct ar_archive_rar_entry entry; + struct ar_archive_rar_uncomp uncomp; + struct ar_archive_rar_progress progress; + struct ar_archive_rar_solid solid; +}; + +#endif diff --git a/external/unarr/rar/rarvm.c b/external/unarr/rar/rarvm.c new file mode 100644 index 00000000..47e32ab2 --- /dev/null +++ b/external/unarr/rar/rarvm.c @@ -0,0 +1,619 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/RARVirtualMachine.c */ + +#include "rarvm.h" +#include "../common/allocator.h" + +#include +#include + +typedef struct RAROpcode_s RAROpcode; + +struct RAROpcode_s { + uint8_t instruction; + uint8_t bytemode; + uint8_t addressingmode1; + uint8_t addressingmode2; + uint32_t value1; + uint32_t value2; +}; + +struct RARProgram_s { + RAROpcode *opcodes; + uint32_t length; + uint32_t capacity; +}; + +/* Program building */ + +RARProgram *RARCreateProgram(void) +{ + return calloc(1, sizeof(RARProgram)); +} + +void RARDeleteProgram(RARProgram *prog) +{ + if (prog) + free(prog->opcodes); + free(prog); +} + +bool RARProgramAddInstr(RARProgram *prog, uint8_t instruction, bool bytemode) +{ + if (instruction >= RARNumberOfInstructions) + return false; + if (bytemode && !RARInstructionHasByteMode(instruction)) + return false; + if (prog->length + 1 >= prog->capacity) { + /* in my small file sample, 16 is the value needed most often */ + uint32_t newCapacity = prog->capacity ? prog->capacity * 4 : 32; + RAROpcode *newCodes = calloc(newCapacity, sizeof(*prog->opcodes)); + if (!newCodes) { + return false; + } + if (prog->opcodes) { + memcpy(newCodes, prog->opcodes, prog->capacity * sizeof(*prog->opcodes)); + free(prog->opcodes); + } + prog->opcodes = newCodes; + prog->capacity = newCapacity; + } + memset(&prog->opcodes[prog->length], 0, sizeof(prog->opcodes[prog->length])); + prog->opcodes[prog->length].instruction = instruction; + if (instruction == RARMovzxInstruction || instruction == RARMovsxInstruction) + prog->opcodes[prog->length].bytemode = 2; /* second argument only */ + else if (bytemode) + prog->opcodes[prog->length].bytemode = (1 | 2); + else + prog->opcodes[prog->length].bytemode = 0; + prog->length++; + return true; +} + +bool RARSetLastInstrOperands(RARProgram *prog, uint8_t addressingmode1, uint32_t value1, uint8_t addressingmode2, uint32_t value2) +{ + RAROpcode *opcode = &prog->opcodes[prog->length - 1]; + int numoperands; + + if (addressingmode1 >= RARNumberOfAddressingModes || addressingmode2 >= RARNumberOfAddressingModes) + return false; + if (!prog->length || opcode->addressingmode1 || opcode->value1 || opcode->addressingmode2 || opcode->value2) + return false; + + numoperands = NumberOfRARInstructionOperands(opcode->instruction); + if (numoperands == 0) + return true; + + if (addressingmode1 == RARImmediateAddressingMode && RARInstructionWritesFirstOperand(opcode->instruction)) + return false; + opcode->addressingmode1 = addressingmode1; + opcode->value1 = value1; + + if (numoperands == 2) { + if (addressingmode2 == RARImmediateAddressingMode && RARInstructionWritesSecondOperand(opcode->instruction)) + return false; + opcode->addressingmode2 = addressingmode2; + opcode->value2 = value2; + } + + return true; +} + +bool RARIsProgramTerminated(RARProgram *prog) +{ + return prog->length > 0 && RARInstructionIsUnconditionalJump(prog->opcodes[prog->length - 1].instruction); +} + +/* Execution */ + +#define EXTMACRO_BEGIN do { +#ifdef _MSC_VER +#define EXTMACRO_END } __pragma(warning(push)) __pragma(warning(disable:4127)) while (0) __pragma(warning(pop)) +#else +#define EXTMACRO_END } while (0) +#endif + +#define CarryFlag 1 +#define ZeroFlag 2 +#define SignFlag 0x80000000 + +#define SignExtend(a) ((uint32_t)((int8_t)(a))) + +static uint32_t _RARGetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode); +static void _RARSetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode, uint32_t data); + +#define GetOperand1() _RARGetOperand(vm, opcode->addressingmode1, opcode->value1, opcode->bytemode & 1) +#define GetOperand2() _RARGetOperand(vm, opcode->addressingmode2, opcode->value2, opcode->bytemode & 2) +#define SetOperand1(data) _RARSetOperand(vm, opcode->addressingmode1, opcode->value1, opcode->bytemode & 1, data) +#define SetOperand2(data) _RARSetOperand(vm, opcode->addressingmode2, opcode->value2, opcode->bytemode & 2, data) + +#define SetFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint32_t result = (res); flags = (result == 0 ? ZeroFlag : (result & SignFlag)) | ((carry) ? CarryFlag : 0); EXTMACRO_END +#define SetByteFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint8_t result = (res); flags = (result == 0 ? ZeroFlag : (SignExtend(result) & SignFlag)) | ((carry) ? CarryFlag : 0); EXTMACRO_END +#define SetFlags(res) SetFlagsWithCarry(res, 0) + +#define SetOperand1AndFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint32_t r = (res); SetFlagsWithCarry(r, carry); SetOperand1(r); EXTMACRO_END +#define SetOperand1AndByteFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint8_t r = (res); SetByteFlagsWithCarry(r, carry); SetOperand1(r); EXTMACRO_END +#define SetOperand1AndFlags(res) EXTMACRO_BEGIN uint32_t r = (res); SetFlags(r); SetOperand1(r); EXTMACRO_END + +#define NextInstruction() { opcode++; continue; } +#define Jump(offs) { uint32_t o = (offs); if (o >= prog->length) return false; opcode = &prog->opcodes[o]; continue; } + +bool RARExecuteProgram(RARVirtualMachine *vm, RARProgram *prog) +{ + RAROpcode *opcode = prog->opcodes; + uint32_t flags = 0; + uint32_t op1, op2, carry, i; + uint32_t counter = 0; + + if (!RARIsProgramTerminated(prog)) + return false; + + while ((uint32_t)(opcode - prog->opcodes) < prog->length && counter++ < RARRuntimeMaxInstructions) { + switch (opcode->instruction) { + case RARMovInstruction: + SetOperand1(GetOperand2()); + NextInstruction(); + + case RARCmpInstruction: + op1 = GetOperand1(); + SetFlagsWithCarry(op1 - GetOperand2(), result > op1); + NextInstruction(); + + case RARAddInstruction: + op1 = GetOperand1(); + if (opcode->bytemode) + SetOperand1AndByteFlagsWithCarry((op1 + GetOperand2()) & 0xFF, result < op1); + else + SetOperand1AndFlagsWithCarry(op1 + GetOperand2(), result < op1); + NextInstruction(); + + case RARSubInstruction: + op1 = GetOperand1(); +#if 0 /* apparently not correctly implemented in the RAR VM */ + if (opcode->bytemode) + SetOperand1AndByteFlagsWithCarry((op1 - GetOperand2()) & 0xFF, result > op1); + else +#endif + SetOperand1AndFlagsWithCarry(op1 - GetOperand2(), result > op1); + NextInstruction(); + + case RARJzInstruction: + if ((flags & ZeroFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJnzInstruction: + if (!(flags & ZeroFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARIncInstruction: + if (opcode->bytemode) + SetOperand1AndFlags((GetOperand1() + 1) & 0xFF); + else + SetOperand1AndFlags(GetOperand1() + 1); + NextInstruction(); + + case RARDecInstruction: + if (opcode->bytemode) + SetOperand1AndFlags((GetOperand1() - 1) & 0xFF); + else + SetOperand1AndFlags(GetOperand1() - 1); + NextInstruction(); + + case RARJmpInstruction: + Jump(GetOperand1()); + + case RARXorInstruction: + SetOperand1AndFlags(GetOperand1() ^ GetOperand2()); + NextInstruction(); + + case RARAndInstruction: + SetOperand1AndFlags(GetOperand1() & GetOperand2()); + NextInstruction(); + + case RAROrInstruction: + SetOperand1AndFlags(GetOperand1() | GetOperand2()); + NextInstruction(); + + case RARTestInstruction: + SetFlags(GetOperand1() & GetOperand2()); + NextInstruction(); + + case RARJsInstruction: + if ((flags & SignFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJnsInstruction: + if (!(flags & SignFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJbInstruction: + if ((flags & CarryFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJbeInstruction: + if ((flags & (CarryFlag | ZeroFlag))) + Jump(GetOperand1()); + NextInstruction(); + + case RARJaInstruction: + if (!(flags & (CarryFlag | ZeroFlag))) + Jump(GetOperand1()); + NextInstruction(); + + case RARJaeInstruction: + if (!(flags & CarryFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARPushInstruction: + vm->registers[7] -= 4; + RARVirtualMachineWrite32(vm, vm->registers[7], GetOperand1()); + NextInstruction(); + + case RARPopInstruction: + SetOperand1(RARVirtualMachineRead32(vm, vm->registers[7])); + vm->registers[7] += 4; + NextInstruction(); + + case RARCallInstruction: + vm->registers[7] -= 4; + RARVirtualMachineWrite32(vm, vm->registers[7], (uint32_t)(opcode - prog->opcodes + 1)); + Jump(GetOperand1()); + + case RARRetInstruction: + if (vm->registers[7] >= RARProgramMemorySize) + return true; + i = RARVirtualMachineRead32(vm, vm->registers[7]); + vm->registers[7] += 4; + Jump(i); + + case RARNotInstruction: + SetOperand1(~GetOperand1()); + NextInstruction(); + + case RARShlInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1AndFlagsWithCarry(op1 << op2, ((op1 << (op2 - 1)) & 0x80000000) != 0); + NextInstruction(); + + case RARShrInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1AndFlagsWithCarry(op1 >> op2, ((op1 >> (op2 - 1)) & 1) != 0); + NextInstruction(); + + case RARSarInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1AndFlagsWithCarry(((int32_t)op1) >> op2, ((op1 >> (op2 - 1)) & 1) != 0); + NextInstruction(); + + case RARNegInstruction: + SetOperand1AndFlagsWithCarry(-(int32_t)GetOperand1(), result != 0); + NextInstruction(); + + case RARPushaInstruction: + vm->registers[7] -= 32; + for (i = 0; i < 8; i++) + RARVirtualMachineWrite32(vm, vm->registers[7] + (7 - i) * 4, vm->registers[i]); + NextInstruction(); + + case RARPopaInstruction: + for (i = 0; i < 8; i++) + vm->registers[i] = RARVirtualMachineRead32(vm, vm->registers[7] + (7 - i) * 4); + vm->registers[7] += 32; + NextInstruction(); + + case RARPushfInstruction: + vm->registers[7] -= 4; + RARVirtualMachineWrite32(vm, vm->registers[7], flags); + NextInstruction(); + + case RARPopfInstruction: + flags = RARVirtualMachineRead32(vm, vm->registers[7]); + vm->registers[7] += 4; + NextInstruction(); + + case RARMovzxInstruction: + SetOperand1(GetOperand2()); + NextInstruction(); + + case RARMovsxInstruction: + SetOperand1(SignExtend(GetOperand2())); + NextInstruction(); + + case RARXchgInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1(op2); + SetOperand2(op1); + NextInstruction(); + + case RARMulInstruction: + SetOperand1(GetOperand1() * GetOperand2()); + NextInstruction(); + + case RARDivInstruction: + op2 = GetOperand2(); + if (op2 != 0) + SetOperand1(GetOperand1() / op2); + NextInstruction(); + + case RARAdcInstruction: + op1 = GetOperand1(); + carry = (flags & CarryFlag); + if (opcode->bytemode) + SetOperand1AndFlagsWithCarry((op1 + GetOperand2() + carry) & 0xFF, result < op1 || (result == op1 && carry)); /* does not correctly set sign bit */ + else + SetOperand1AndFlagsWithCarry(op1 + GetOperand2() + carry, result < op1 || (result == op1 && carry)); + NextInstruction(); + + case RARSbbInstruction: + op1 = GetOperand1(); + carry = (flags & CarryFlag); + if (opcode->bytemode) + SetOperand1AndFlagsWithCarry((op1 - GetOperand2() - carry) & 0xFF, result > op1 || (result == op1 && carry)); /* does not correctly set sign bit */ + else + SetOperand1AndFlagsWithCarry(op1 - GetOperand2() - carry, result > op1 || (result == op1 && carry)); + NextInstruction(); + + case RARPrintInstruction: + /* TODO: ??? */ + NextInstruction(); + } + } + + return false; +} + +/* Memory and register access */ + +static uint32_t _RARRead32(const uint8_t *b) +{ + return ((uint32_t)b[3] << 24) | ((uint32_t)b[2] << 16) | ((uint32_t)b[1] << 8) | (uint32_t)b[0]; +} + +static void _RARWrite32(uint8_t *b, uint32_t n) +{ + b[3] = (n >> 24) & 0xFF; + b[2] = (n >> 16) & 0xFF; + b[1] = (n >> 8) & 0xFF; + b[0] = n & 0xFF; +} + +void RARSetVirtualMachineRegisters(RARVirtualMachine *vm, uint32_t registers[8]) +{ + if (registers) + memcpy(vm->registers, registers, sizeof(vm->registers)); + else + memset(vm->registers, 0, sizeof(vm->registers)); +} + +uint32_t RARVirtualMachineRead32(RARVirtualMachine *vm, uint32_t address) +{ + return _RARRead32(&vm->memory[address & RARProgramMemoryMask]); +} + +void RARVirtualMachineWrite32(RARVirtualMachine *vm, uint32_t address, uint32_t val) +{ + _RARWrite32(&vm->memory[address & RARProgramMemoryMask], val); +} + +uint8_t RARVirtualMachineRead8(RARVirtualMachine *vm, uint32_t address) +{ + return vm->memory[address & RARProgramMemoryMask]; +} + +void RARVirtualMachineWrite8(RARVirtualMachine *vm, uint32_t address, uint8_t val) +{ + vm->memory[address & RARProgramMemoryMask] = val; +} + +static uint32_t _RARGetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode) +{ + if (/*RARRegisterAddressingMode(0) <= addressingmode && */addressingmode <= RARRegisterAddressingMode(7)) { + uint32_t result = vm->registers[addressingmode % 8]; + if (bytemode) + result = result & 0xFF; + return result; + } + if (RARRegisterIndirectAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterIndirectAddressingMode(7)) { + if (bytemode) + return RARVirtualMachineRead8(vm, vm->registers[addressingmode % 8]); + return RARVirtualMachineRead32(vm, vm->registers[addressingmode % 8]); + } + if (RARIndexedAbsoluteAddressingMode(0) <= addressingmode && addressingmode <= RARIndexedAbsoluteAddressingMode(7)) { + if (bytemode) + return RARVirtualMachineRead8(vm, value + vm->registers[addressingmode % 8]); + return RARVirtualMachineRead32(vm, value + vm->registers[addressingmode % 8]); + } + if (addressingmode == RARAbsoluteAddressingMode) { + if (bytemode) + return RARVirtualMachineRead8(vm, value); + return RARVirtualMachineRead32(vm, value); + } + /* if (addressingmode == RARImmediateAddressingMode) */ + return value; +} + +static void _RARSetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode, uint32_t data) +{ + if (/*RARRegisterAddressingMode(0) <= addressingmode &&*/ addressingmode <= RARRegisterAddressingMode(7)) { + if (bytemode) + data = data & 0xFF; + vm->registers[addressingmode % 8] = data; + } + else if (RARRegisterIndirectAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterIndirectAddressingMode(7)) { + if (bytemode) + RARVirtualMachineWrite8(vm, vm->registers[addressingmode % 8], (uint8_t)data); + else + RARVirtualMachineWrite32(vm, vm->registers[addressingmode % 8], data); + } + else if (RARIndexedAbsoluteAddressingMode(0) <= addressingmode && addressingmode <= RARIndexedAbsoluteAddressingMode(7)) { + if (bytemode) + RARVirtualMachineWrite8(vm, value + vm->registers[addressingmode % 8], (uint8_t)data); + else + RARVirtualMachineWrite32(vm, value + vm->registers[addressingmode % 8], data); + } + else if (addressingmode == RARAbsoluteAddressingMode) { + if (bytemode) + RARVirtualMachineWrite8(vm, value, (uint8_t)data); + else + RARVirtualMachineWrite32(vm, value, data); + } +} + +/* Instruction properties */ + +#define RAR0OperandsFlag 0 +#define RAR1OperandFlag 1 +#define RAR2OperandsFlag 2 +#define RAROperandsFlag 3 +#define RARHasByteModeFlag 4 +#define RARIsUnconditionalJumpFlag 8 +#define RARIsRelativeJumpFlag 16 +#define RARWritesFirstOperandFlag 32 +#define RARWritesSecondOperandFlag 64 +#define RARReadsStatusFlag 128 +#define RARWritesStatusFlag 256 + +static const int InstructionFlags[RARNumberOfInstructions] = { + /*RARMovInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARCmpInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesStatusFlag, + /*RARAddInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARSubInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARJzInstruction*/ RAR1OperandFlag | RARIsUnconditionalJumpFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJnzInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARIncInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARDecInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARJmpInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag, + /*RARXorInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARAndInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RAROrInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARTestInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesStatusFlag, + /*RARJsInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJnsInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJbInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJbeInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJaInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJaeInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARPushInstruction*/ RAR1OperandFlag, + /*RARPopInstruction*/ RAR1OperandFlag, + /*RARCallInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag, + /*RARRetInstruction*/ RAR0OperandsFlag | RARIsUnconditionalJumpFlag, + /*RARNotInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARShlInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARShrInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARSarInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARNegInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARPushaInstruction*/ RAR0OperandsFlag, + /*RARPopaInstruction*/ RAR0OperandsFlag, + /*RARPushfInstruction*/ RAR0OperandsFlag | RARReadsStatusFlag, + /*RARPopfInstruction*/ RAR0OperandsFlag | RARWritesStatusFlag, + /*RARMovzxInstruction*/ RAR2OperandsFlag | RARWritesFirstOperandFlag, + /*RARMovsxInstruction*/ RAR2OperandsFlag | RARWritesFirstOperandFlag, + /*RARXchgInstruction*/ RAR2OperandsFlag | RARWritesFirstOperandFlag | RARWritesSecondOperandFlag | RARHasByteModeFlag, + /*RARMulInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARDivInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARAdcInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARReadsStatusFlag | RARWritesStatusFlag, + /*RARSbbInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARReadsStatusFlag | RARWritesStatusFlag, + /*RARPrintInstruction*/ RAR0OperandsFlag +}; + +int NumberOfRARInstructionOperands(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return 0; + return InstructionFlags[instruction] & RAROperandsFlag; +} + +bool RARInstructionHasByteMode(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARHasByteModeFlag)!=0; +} + +bool RARInstructionIsUnconditionalJump(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARIsUnconditionalJumpFlag) != 0; +} + +bool RARInstructionIsRelativeJump(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARIsRelativeJumpFlag) != 0; +} + +bool RARInstructionWritesFirstOperand(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARWritesFirstOperandFlag) != 0; +} + +bool RARInstructionWritesSecondOperand(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARWritesSecondOperandFlag) != 0; +} + +/* Program debugging */ + +#ifndef NDEBUG +#include + +static void RARPrintOperand(uint8_t addressingmode, uint32_t value) +{ + if (/*RARRegisterAddressingMode(0) <= addressingmode && */addressingmode <= RARRegisterAddressingMode(7)) + printf("r%d", addressingmode % 8); + else if (RARRegisterIndirectAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterIndirectAddressingMode(7)) + printf("@(r%d)", addressingmode % 8); + else if (RARIndexedAbsoluteAddressingMode(0) <= addressingmode && addressingmode <= RARIndexedAbsoluteAddressingMode(7)) + printf("@(r%d+$%02x)", addressingmode % 8, value); + else if (addressingmode == RARAbsoluteAddressingMode) + printf("@($%02x)", value); + else if (addressingmode == RARImmediateAddressingMode) + printf("$%02x", value); +} + +void RARPrintProgram(RARProgram *prog) +{ + static const char *instructionNames[RARNumberOfInstructions] = { + "Mov", "Cmp", "Add", "Sub", "Jz", "Jnz", "Inc", "Dec", "Jmp", "Xor", + "And", "Or", "Test", "Js", "Jns", "Jb", "Jbe", "Ja", "Jae", "Push", + "Pop", "Call", "Ret", "Not", "Shl", "Shr", "Sar", "Neg", "Pusha", "Popa", + "Pushf", "Popf", "Movzx", "Movsx", "Xchg", "Mul", "Div", "Adc", "Sbb", "Print", + }; + + uint32_t i; + for (i = 0; i < prog->length; i++) { + RAROpcode *opcode = &prog->opcodes[i]; + int numoperands = NumberOfRARInstructionOperands(opcode->instruction); + printf(" %02x: %s", i, instructionNames[opcode->instruction]); + if (opcode->bytemode) + printf("B"); + if (numoperands >= 1) { + printf(" "); + RARPrintOperand(opcode->addressingmode1, opcode->value1); + } + if (numoperands == 2) { + printf(", "); + RARPrintOperand(opcode->addressingmode2, opcode->value2); + } + printf("\n"); + } +} +#endif diff --git a/external/unarr/rar/rarvm.h b/external/unarr/rar/rarvm.h new file mode 100644 index 00000000..51567a9a --- /dev/null +++ b/external/unarr/rar/rarvm.h @@ -0,0 +1,117 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/RARVirtualMachine.h */ + +#ifndef rar_vm_h +#define rar_vm_h + +#include +#include + +#define RARProgramMemorySize 0x40000 +#define RARProgramMemoryMask (RARProgramMemorySize - 1) +#define RARProgramWorkSize 0x3c000 +#define RARProgramGlobalSize 0x2000 +#define RARProgramSystemGlobalAddress RARProgramWorkSize +#define RARProgramSystemGlobalSize 64 +#define RARProgramUserGlobalAddress (RARProgramSystemGlobalAddress + RARProgramSystemGlobalSize) +#define RARProgramUserGlobalSize (RARProgramGlobalSize - RARProgramSystemGlobalSize) +#define RARRuntimeMaxInstructions 250000000 + +#define RARRegisterAddressingMode(n) (0 + (n)) +#define RARRegisterIndirectAddressingMode(n) (8 + (n)) +#define RARIndexedAbsoluteAddressingMode(n) (16 + (n)) +#define RARAbsoluteAddressingMode 24 +#define RARImmediateAddressingMode 25 +#define RARNumberOfAddressingModes 26 + +typedef struct RARVirtualMachine RARVirtualMachine; + +struct RARVirtualMachine { + uint32_t registers[8]; + uint8_t memory[RARProgramMemorySize + sizeof(uint32_t) /* overflow sentinel */]; +}; + +typedef struct RARProgram_s RARProgram; + +/* Program building */ + +enum { + RARMovInstruction = 0, + RARCmpInstruction = 1, + RARAddInstruction = 2, + RARSubInstruction = 3, + RARJzInstruction = 4, + RARJnzInstruction = 5, + RARIncInstruction = 6, + RARDecInstruction = 7, + RARJmpInstruction = 8, + RARXorInstruction = 9, + RARAndInstruction = 10, + RAROrInstruction = 11, + RARTestInstruction = 12, + RARJsInstruction = 13, + RARJnsInstruction = 14, + RARJbInstruction = 15, + RARJbeInstruction = 16, + RARJaInstruction = 17, + RARJaeInstruction = 18, + RARPushInstruction = 19, + RARPopInstruction = 20, + RARCallInstruction = 21, + RARRetInstruction = 22, + RARNotInstruction = 23, + RARShlInstruction = 24, + RARShrInstruction = 25, + RARSarInstruction = 26, + RARNegInstruction = 27, + RARPushaInstruction = 28, + RARPopaInstruction = 29, + RARPushfInstruction = 30, + RARPopfInstruction = 31, + RARMovzxInstruction = 32, + RARMovsxInstruction = 33, + RARXchgInstruction = 34, + RARMulInstruction = 35, + RARDivInstruction = 36, + RARAdcInstruction = 37, + RARSbbInstruction = 38, + RARPrintInstruction = 39, + RARNumberOfInstructions = 40, +}; + +RARProgram *RARCreateProgram(void); +void RARDeleteProgram(RARProgram *prog); +bool RARProgramAddInstr(RARProgram *prog, uint8_t instruction, bool bytemode); +bool RARSetLastInstrOperands(RARProgram *prog, uint8_t addressingmode1, uint32_t value1, uint8_t addressingmode2, uint32_t value2); +bool RARIsProgramTerminated(RARProgram *prog); + +/* Execution */ + +bool RARExecuteProgram(RARVirtualMachine *vm, RARProgram *prog); + +/* Memory and register access (convenience) */ + +void RARSetVirtualMachineRegisters(RARVirtualMachine *vm, uint32_t registers[8]); +uint32_t RARVirtualMachineRead32(RARVirtualMachine *vm, uint32_t address); +void RARVirtualMachineWrite32(RARVirtualMachine *vm, uint32_t address, uint32_t val); +uint8_t RARVirtualMachineRead8(RARVirtualMachine *vm, uint32_t address); +void RARVirtualMachineWrite8(RARVirtualMachine *vm, uint32_t address, uint8_t val); + +/* Instruction properties */ + +int NumberOfRARInstructionOperands(uint8_t instruction); +bool RARInstructionHasByteMode(uint8_t instruction); +bool RARInstructionIsUnconditionalJump(uint8_t instruction); +bool RARInstructionIsRelativeJump(uint8_t instruction); +bool RARInstructionWritesFirstOperand(uint8_t instruction); +bool RARInstructionWritesSecondOperand(uint8_t instruction); + +/* Program debugging */ + +#ifndef NDEBUG +void RARPrintProgram(RARProgram *prog); +#endif + +#endif diff --git a/external/unarr/rar/uncompress-rar.c b/external/unarr/rar/uncompress-rar.c new file mode 100644 index 00000000..70c259cf --- /dev/null +++ b/external/unarr/rar/uncompress-rar.c @@ -0,0 +1,984 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRAR30Handle.m */ +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRAR20Handle.m */ + +#include "rar.h" + +static void *gSzAlloc_Alloc(ISzAllocPtr self, size_t size) { (void)self; return malloc(size); } +static void gSzAlloc_Free(ISzAllocPtr self, void *ptr) { (void)self; free(ptr); } +static ISzAlloc gSzAlloc = { gSzAlloc_Alloc, gSzAlloc_Free }; + +static bool br_fill(ar_archive_rar *rar, int bits) +{ + uint8_t bytes[8]; + int count, i; + /* read as many bits as possible */ + count = (64 - rar->uncomp.br.available) / 8; + if (rar->progress.data_left < (size_t)count) + count = (int)rar->progress.data_left; + + if (bits > rar->uncomp.br.available + 8 * count || ar_read(rar->super.stream, bytes, count) != (size_t)count) { + if (!rar->uncomp.br.at_eof) { + warn("Unexpected EOF during decompression (truncated file?)"); + rar->uncomp.br.at_eof = true; + } + return false; + } + rar->progress.data_left -= count; + for (i = 0; i < count; i++) { + rar->uncomp.br.bits = (rar->uncomp.br.bits << 8) | bytes[i]; + } + rar->uncomp.br.available += 8 * count; + return true; +} + +static inline bool br_check(ar_archive_rar *rar, int bits) +{ + return bits <= rar->uncomp.br.available || br_fill(rar, bits); +} + +static inline uint64_t br_bits(ar_archive_rar *rar, int bits) +{ + return (rar->uncomp.br.bits >> (rar->uncomp.br.available -= bits)) & (((uint64_t)1 << bits) - 1); +} + +static Byte ByteIn_Read(const IByteIn *p) +{ + struct ByteReader *self = (struct ByteReader *) p; + return br_check(self->rar, 8) ? (Byte)br_bits(self->rar, 8) : 0xFF; +} + +static void ByteIn_CreateVTable(struct ByteReader *br, ar_archive_rar *rar) +{ + br->super.Read = ByteIn_Read; + br->rar = rar; +} + +static bool rar_init_uncompress(struct ar_archive_rar_uncomp *uncomp, uint8_t version) +{ + /* per XADRARParser.m @handleForSolidStreamWithObject these versions are identical */ + if (version == 29 || version == 36) + version = 3; + else if (version == 20 || version == 26) + version = 2; + else { + warn("Unsupported compression version: %d", version); + return false; + } + if (uncomp->version) { + if (uncomp->version != version) { + warn("Compression version mismatch: %d != %d", version, uncomp->version); + return false; + } + return true; + } + memset(uncomp, 0, sizeof(*uncomp)); + uncomp->start_new_table = true; + if (!lzss_initialize(&uncomp->lzss, LZSS_WINDOW_SIZE)) { + warn("OOM during decompression"); + return false; + } + if (version == 3) { + uncomp->state.v3.ppmd_escape = 2; + uncomp->state.v3.filters.filterstart = SIZE_MAX; + } + uncomp->version = version; + return true; +} + +static void rar_free_codes(struct ar_archive_rar_uncomp *uncomp); + +void rar_clear_uncompress(struct ar_archive_rar_uncomp *uncomp) +{ + if (!uncomp->version) + return; + rar_free_codes(uncomp); + lzss_cleanup(&uncomp->lzss); + if (uncomp->version == 3) { + Ppmd7_Free(&uncomp->state.v3.ppmd7_context, &gSzAlloc); + rar_clear_filters(&uncomp->state.v3.filters); + } + uncomp->version = 0; +} + +static int rar_read_next_symbol(ar_archive_rar *rar, struct huffman_code *code) +{ + int node = 0; + + if (!code->table && !rar_make_table(code)) + return -1; + + /* performance optimization */ + if (code->tablesize <= rar->uncomp.br.available) { + uint16_t bits = (uint16_t)br_bits(rar, code->tablesize); + int length = code->table[bits].length; + int value = code->table[bits].value; + + if (length < 0) { + warn("Invalid data in bitstream"); /* invalid prefix code in bitstream */ + return -1; + } + if (length <= code->tablesize) { + /* Skip only length bits */ + rar->uncomp.br.available += code->tablesize - length; + return value; + } + + node = value; + } + + while (!rar_is_leaf_node(code, node)) { + uint8_t bit; + if (!br_check(rar, 1)) + return -1; + bit = (uint8_t)br_bits(rar, 1); + if (code->tree[node].branches[bit] < 0) { + warn("Invalid data in bitstream"); /* invalid prefix code in bitstream */ + return -1; + } + node = code->tree[node].branches[bit]; + } + + return code->tree[node].branches[0]; +} + +/***** RAR version 2 decompression *****/ + +static void rar_free_codes_v2(struct ar_archive_rar_uncomp_v2 *uncomp_v2) +{ + int i; + rar_free_code(&uncomp_v2->maincode); + rar_free_code(&uncomp_v2->offsetcode); + rar_free_code(&uncomp_v2->lengthcode); + for (i = 0; i < 4; i++) + rar_free_code(&uncomp_v2->audiocode[i]); +} + +static bool rar_parse_codes_v2(ar_archive_rar *rar) +{ + struct ar_archive_rar_uncomp_v2 *uncomp_v2 = &rar->uncomp.state.v2; + struct huffman_code precode; + uint8_t prelengths[19]; + uint16_t i, count; + int j, val, n; + bool ok = false; + + rar_free_codes_v2(uncomp_v2); + + if (!br_check(rar, 2)) + return false; + uncomp_v2->audioblock = br_bits(rar, 1) != 0; + if (!br_bits(rar, 1)) + memset(uncomp_v2->lengthtable, 0, sizeof(uncomp_v2->lengthtable)); + + if (uncomp_v2->audioblock) { + if (!br_check(rar, 2)) + return false; + uncomp_v2->numchannels = (uint8_t)br_bits(rar, 2) + 1; + count = uncomp_v2->numchannels * 257; + if (uncomp_v2->channel > uncomp_v2->numchannels) + uncomp_v2->channel = 0; + } + else + count = MAINCODE_SIZE_20 + OFFSETCODE_SIZE_20 + LENGTHCODE_SIZE_20; + + for (i = 0; i < 19; i++) { + if (!br_check(rar, 4)) + return false; + prelengths[i] = (uint8_t)br_bits(rar, 4); + } + + memset(&precode, 0, sizeof(precode)); + if (!rar_create_code(&precode, prelengths, 19)) + goto PrecodeError; + for (i = 0; i < count; ) { + val = rar_read_next_symbol(rar, &precode); + if (val < 0) + goto PrecodeError; + if (val < 16) { + uncomp_v2->lengthtable[i] = (uncomp_v2->lengthtable[i] + val) & 0x0F; + i++; + } + else if (val == 16) { + if (i == 0) { + warn("Invalid data in bitstream"); + goto PrecodeError; + } + if (!br_check(rar, 2)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 2) + 3; + for (j = 0; j < n && i < count; i++, j++) { + uncomp_v2->lengthtable[i] = uncomp_v2->lengthtable[i - 1]; + } + } + else { + if (val == 17) { + if (!br_check(rar, 3)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 3) + 3; + } + else { + if (!br_check(rar, 7)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 7) + 11; + } + for (j = 0; j < n && i < count; i++, j++) { + uncomp_v2->lengthtable[i] = 0; + } + } + } + ok = true; +PrecodeError: + rar_free_code(&precode); + if (!ok) + return false; + + if (uncomp_v2->audioblock) { + for (i = 0; i < uncomp_v2->numchannels; i++) { + if (!rar_create_code(&uncomp_v2->audiocode[i], uncomp_v2->lengthtable + i * 257, 257)) + return false; + } + } + else { + if (!rar_create_code(&uncomp_v2->maincode, uncomp_v2->lengthtable, MAINCODE_SIZE_20)) + return false; + if (!rar_create_code(&uncomp_v2->offsetcode, uncomp_v2->lengthtable + MAINCODE_SIZE_20, OFFSETCODE_SIZE_20)) + return false; + if (!rar_create_code(&uncomp_v2->lengthcode, uncomp_v2->lengthtable + MAINCODE_SIZE_20 + OFFSETCODE_SIZE_20, LENGTHCODE_SIZE_20)) + return false; + } + + rar->uncomp.start_new_table = false; + return true; +} + +static uint8_t rar_decode_audio(struct AudioState *state, int8_t *channeldelta, int8_t delta) +{ + uint8_t predbyte, byte; + int prederror; + + state->delta[3] = state->delta[2]; + state->delta[2] = state->delta[1]; + state->delta[1] = state->lastdelta - state->delta[0]; + state->delta[0] = state->lastdelta; + + predbyte = ((8 * state->lastbyte + state->weight[0] * state->delta[0] + state->weight[1] * state->delta[1] + state->weight[2] * state->delta[2] + state->weight[3] * state->delta[3] + state->weight[4] * *channeldelta) >> 3) & 0xFF; + byte = (predbyte - delta) & 0xFF; + + prederror = delta << 3; + state->error[0] += abs(prederror); + state->error[1] += abs(prederror - state->delta[0]); state->error[2] += abs(prederror + state->delta[0]); + state->error[3] += abs(prederror - state->delta[1]); state->error[4] += abs(prederror + state->delta[1]); + state->error[5] += abs(prederror - state->delta[2]); state->error[6] += abs(prederror + state->delta[2]); + state->error[7] += abs(prederror - state->delta[3]); state->error[8] += abs(prederror + state->delta[3]); + state->error[9] += abs(prederror - *channeldelta); state->error[10] += abs(prederror + *channeldelta); + + *channeldelta = state->lastdelta = (int8_t)(byte - state->lastbyte); + state->lastbyte = byte; + + if (!(++state->count & 0x1F)) { + uint8_t i, idx = 0; + for (i = 1; i < 11; i++) { + if (state->error[i] < state->error[idx]) + idx = i; + } + memset(state->error, 0, sizeof(state->error)); + + switch (idx) { + case 1: if (state->weight[0] >= -16) state->weight[0]--; break; + case 2: if (state->weight[0] < 16) state->weight[0]++; break; + case 3: if (state->weight[1] >= -16) state->weight[1]--; break; + case 4: if (state->weight[1] < 16) state->weight[1]++; break; + case 5: if (state->weight[2] >= -16) state->weight[2]--; break; + case 6: if (state->weight[2] < 16) state->weight[2]++; break; + case 7: if (state->weight[3] >= -16) state->weight[3]--; break; + case 8: if (state->weight[3] < 16) state->weight[3]++; break; + case 9: if (state->weight[4] >= -16) state->weight[4]--; break; + case 10: if (state->weight[4] < 16) state->weight[4]++; break; + } + } + + return byte; +} + +static int64_t rar_expand_v2(ar_archive_rar *rar, int64_t end) +{ + static const uint8_t lengthbases[] = + { 0, 1, 2, 3, 4, 5, 6, + 7, 8, 10, 12, 14, 16, 20, + 24, 28, 32, 40, 48, 56, 64, + 80, 96, 112, 128, 160, 192, 224 }; + static const uint8_t lengthbits[] = + { 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 2, 2, + 2, 2, 3, 3, 3, 3, 4, + 4, 4, 4, 5, 5, 5, 5 }; + static const int32_t offsetbases[] = + { 0, 1, 2, 3, 4, 6, + 8, 12, 16, 24, 32, 48, + 64, 96, 128, 192, 256, 384, + 512, 768, 1024, 1536, 2048, 3072, + 4096, 6144, 8192, 12288, 16384, 24576, + 32768, 49152, 65536, 98304, 131072, 196608, + 262144, 327680, 393216, 458752, 524288, 589824, + 655360, 720896, 786432, 851968, 917504, 983040 }; + static const uint8_t offsetbits[] = + { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 }; + static const uint8_t shortbases[] = + { 0, 4, 8, 16, 32, 64, 128, 192 }; + static const uint8_t shortbits[] = + { 2, 2, 3, 4, 5, 6, 6, 6 }; + + struct ar_archive_rar_uncomp_v2 *uncomp_v2 = &rar->uncomp.state.v2; + LZSS *lzss = &rar->uncomp.lzss; + int symbol, offs, len; + + if ((uint64_t)end > rar->super.entry_size_uncompressed + rar->solid.size_total) + end = rar->super.entry_size_uncompressed + rar->solid.size_total; + + for (;;) { + if (lzss_position(lzss) >= end) + return end; + + if (uncomp_v2->audioblock) { + uint8_t byte; + symbol = rar_read_next_symbol(rar, &uncomp_v2->audiocode[uncomp_v2->channel]); + if (symbol < 0) + return -1; + if (symbol == 256) { + rar->uncomp.start_new_table = true; + return lzss_position(lzss); + } + byte = rar_decode_audio(&uncomp_v2->audiostate[uncomp_v2->channel], &uncomp_v2->channeldelta, (int8_t)(uint8_t)symbol); + uncomp_v2->channel++; + if (uncomp_v2->channel == uncomp_v2->numchannels) + uncomp_v2->channel = 0; + lzss_emit_literal(lzss, byte); + continue; + } + + symbol = rar_read_next_symbol(rar, &uncomp_v2->maincode); + if (symbol < 0) + return -1; + if (symbol < 256) { + lzss_emit_literal(lzss, (uint8_t)symbol); + continue; + } + if (symbol == 256) { + offs = uncomp_v2->lastoffset; + len = uncomp_v2->lastlength; + } + else if (symbol <= 260) { + int idx = symbol - 256; + int lensymbol = rar_read_next_symbol(rar, &uncomp_v2->lengthcode); + offs = uncomp_v2->oldoffset[(uncomp_v2->oldoffsetindex - idx) & 0x03]; + if (lensymbol < 0 || lensymbol > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || lensymbol > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[lensymbol] + 2; + if (lengthbits[lensymbol] > 0) { + if (!br_check(rar, lengthbits[lensymbol])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[lensymbol]); + } + if (offs >= 0x40000) + len++; + if (offs >= 0x2000) + len++; + if (offs >= 0x101) + len++; + } + else if (symbol <= 268) { + int idx = symbol - 261; + offs = shortbases[idx] + 1; + if (shortbits[idx] > 0) { + if (!br_check(rar, shortbits[idx])) + return -1; + offs += (uint8_t)br_bits(rar, shortbits[idx]); + } + len = 2; + } + else if (symbol == 269) { + rar->uncomp.start_new_table = true; + return lzss_position(lzss); + } + else { + int idx = symbol - 270; + int offssymbol; + if (idx > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || idx > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[idx] + 3; + if (lengthbits[idx] > 0) { + if (!br_check(rar, lengthbits[idx])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[idx]); + } + offssymbol = rar_read_next_symbol(rar, &uncomp_v2->offsetcode); + if (offssymbol < 0 || offssymbol > (int)(sizeof(offsetbases) / sizeof(offsetbases[0])) || offssymbol > (int)(sizeof(offsetbits) / sizeof(offsetbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + offs = offsetbases[offssymbol] + 1; + if (offsetbits[offssymbol] > 0) { + if (!br_check(rar, offsetbits[offssymbol])) + return -1; + offs += (int)br_bits(rar, offsetbits[offssymbol]); + } + if (offs >= 0x40000) + len++; + if (offs >= 0x2000) + len++; + } + + uncomp_v2->lastoffset = uncomp_v2->oldoffset[uncomp_v2->oldoffsetindex++ & 0x03] = offs; + uncomp_v2->lastlength = len; + + lzss_emit_match(lzss, offs, len); + } +} + +/***** RAR version 3 decompression *****/ + +static void rar_free_codes(struct ar_archive_rar_uncomp *uncomp) +{ + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &uncomp->state.v3; + + if (uncomp->version == 2) { + rar_free_codes_v2(&uncomp->state.v2); + return; + } + + rar_free_code(&uncomp_v3->maincode); + rar_free_code(&uncomp_v3->offsetcode); + rar_free_code(&uncomp_v3->lowoffsetcode); + rar_free_code(&uncomp_v3->lengthcode); +} + +static bool rar_parse_codes(ar_archive_rar *rar) +{ + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &rar->uncomp.state.v3; + + if (rar->uncomp.version == 2) + return rar_parse_codes_v2(rar); + + rar_free_codes(&rar->uncomp); + + br_clear_leftover_bits(&rar->uncomp); + + if (!br_check(rar, 1)) + return false; + uncomp_v3->is_ppmd_block = br_bits(rar, 1) != 0; + if (uncomp_v3->is_ppmd_block) { + uint8_t ppmd_flags; + uint32_t max_alloc = 0; + + if (!br_check(rar, 7)) + return false; + ppmd_flags = (uint8_t)br_bits(rar, 7); + if ((ppmd_flags & 0x20)) { + if (!br_check(rar, 8)) + return false; + max_alloc = ((uint8_t)br_bits(rar, 8) + 1) << 20; + } + if ((ppmd_flags & 0x40)) { + if (!br_check(rar, 8)) + return false; + uncomp_v3->ppmd_escape = (uint8_t)br_bits(rar, 8); + } + if ((ppmd_flags & 0x20)) { + uint32_t maxorder = (ppmd_flags & 0x1F) + 1; + if (maxorder == 1) + return false; + if (maxorder > 16) + maxorder = 16 + (maxorder - 16) * 3; + + Ppmd7_Free(&uncomp_v3->ppmd7_context, &gSzAlloc); + Ppmd7_Construct(&uncomp_v3->ppmd7_context); + if (!Ppmd7_Alloc(&uncomp_v3->ppmd7_context, max_alloc, &gSzAlloc)) { + warn("OOM during decompression"); + return false; + } + ByteIn_CreateVTable(&uncomp_v3->bytein, rar); + // We need to set the stream before calling RangeDec_Init + uncomp_v3->ppmd7_context.rc.dec.Stream = &uncomp_v3->bytein.super; + Ppmd7a_RangeDec_Init(&uncomp_v3->ppmd7_context.rc.dec); + Ppmd7_Init(&uncomp_v3->ppmd7_context, maxorder); + } + else { + if (!Ppmd7_WasAllocated(&uncomp_v3->ppmd7_context)) { + warn("Invalid data in bitstream"); /* invalid PPMd sequence */ + return false; + } + Ppmd7a_RangeDec_Init(&uncomp_v3->ppmd7_context.rc.dec); + } + } + else { + struct huffman_code precode; + uint8_t bitlengths[20]; + uint8_t zerocount; + int i, j, val, n; + bool ok = false; + + if (!br_check(rar, 1)) + return false; + if (!br_bits(rar, 1)) + memset(uncomp_v3->lengthtable, 0, sizeof(uncomp_v3->lengthtable)); + memset(&bitlengths, 0, sizeof(bitlengths)); + for (i = 0; i < (int)sizeof(bitlengths); i++) { + if (!br_check(rar, 4)) + return false; + bitlengths[i] = (uint8_t)br_bits(rar, 4); + if (bitlengths[i] == 0x0F) { + if (!br_check(rar, 4)) + return false; + zerocount = (uint8_t)br_bits(rar, 4); + if (zerocount) { + for (j = 0; j < zerocount + 2 && i < (int)sizeof(bitlengths); j++) { + bitlengths[i++] = 0; + } + i--; + } + } + } + + memset(&precode, 0, sizeof(precode)); + if (!rar_create_code(&precode, bitlengths, sizeof(bitlengths))) + goto PrecodeError; + for (i = 0; i < HUFFMAN_TABLE_SIZE; ) { + val = rar_read_next_symbol(rar, &precode); + if (val < 0) + goto PrecodeError; + if (val < 16) { + uncomp_v3->lengthtable[i] = (uncomp_v3->lengthtable[i] + val) & 0x0F; + i++; + } + else if (val < 18) { + if (i == 0) { + warn("Invalid data in bitstream"); + goto PrecodeError; + } + if (val == 16) { + if (!br_check(rar, 3)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 3) + 3; + } + else { + if (!br_check(rar, 7)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 7) + 11; + } + for (j = 0; j < n && i < HUFFMAN_TABLE_SIZE; i++, j++) { + uncomp_v3->lengthtable[i] = uncomp_v3->lengthtable[i - 1]; + } + } + else { + if (val == 18) { + if (!br_check(rar, 3)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 3) + 3; + } + else { + if (!br_check(rar, 7)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 7) + 11; + } + for (j = 0; j < n && i < HUFFMAN_TABLE_SIZE; i++, j++) { + uncomp_v3->lengthtable[i] = 0; + } + } + } + ok = true; +PrecodeError: + rar_free_code(&precode); + if (!ok) + return false; + + if (!rar_create_code(&uncomp_v3->maincode, uncomp_v3->lengthtable, MAINCODE_SIZE)) + return false; + if (!rar_create_code(&uncomp_v3->offsetcode, uncomp_v3->lengthtable + MAINCODE_SIZE, OFFSETCODE_SIZE)) + return false; + if (!rar_create_code(&uncomp_v3->lowoffsetcode, uncomp_v3->lengthtable + MAINCODE_SIZE + OFFSETCODE_SIZE, LOWOFFSETCODE_SIZE)) + return false; + if (!rar_create_code(&uncomp_v3->lengthcode, uncomp_v3->lengthtable + MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE, LENGTHCODE_SIZE)) + return false; + } + + rar->uncomp.start_new_table = false; + return true; +} + +static bool rar_read_filter(ar_archive_rar *rar, bool (* decode_byte)(ar_archive_rar *rar, uint8_t *byte), int64_t *end) +{ + uint8_t flags, val, *code; + uint16_t length, i; + + if (!decode_byte(rar, &flags)) + return false; + length = (flags & 0x07) + 1; + if (length == 7) { + if (!decode_byte(rar, &val)) + return false; + length = val + 7; + } + else if (length == 8) { + if (!decode_byte(rar, &val)) + return false; + length = val << 8; + if (!decode_byte(rar, &val)) + return false; + length |= val; + } + + code = malloc(length); + if (!code) { + warn("OOM during decompression"); + return false; + } + for (i = 0; i < length; i++) { + if (!decode_byte(rar, &code[i])) { + free(code); + return false; + } + } + if (!rar_parse_filter(rar, code, length, flags)) { + free(code); + return false; + } + free(code); + + if (rar->uncomp.state.v3.filters.filterstart < (size_t)*end) + *end = rar->uncomp.state.v3.filters.filterstart; + + return true; +} + +static inline bool rar_decode_ppmd7_symbol(struct ar_archive_rar_uncomp_v3 *uncomp_v3, Byte *symbol) +{ + int value = Ppmd7a_DecodeSymbol(&uncomp_v3->ppmd7_context); + if (value < 0) { + warn("Invalid data in bitstream"); /* invalid PPMd symbol */ + return false; + } + *symbol = (Byte)value; + return true; +} + +static bool rar_decode_byte(ar_archive_rar *rar, uint8_t *byte) +{ + if (!br_check(rar, 8)) + return false; + *byte = (uint8_t)br_bits(rar, 8); + return true; +} + +static bool rar_decode_ppmd7_byte(ar_archive_rar *rar, uint8_t *byte) +{ + return rar_decode_ppmd7_symbol(&rar->uncomp.state.v3, byte); +} + +static bool rar_handle_ppmd_sequence(ar_archive_rar *rar, int64_t *end) +{ + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &rar->uncomp.state.v3; + LZSS *lzss = &rar->uncomp.lzss; + Byte sym, code, length; + int lzss_offset; + + if (!rar_decode_ppmd7_symbol(uncomp_v3, &sym)) + return false; + if (sym != uncomp_v3->ppmd_escape) { + lzss_emit_literal(lzss, sym); + return true; + } + + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + switch (code) { + case 0: + return rar_parse_codes(rar); + + case 2: + rar->uncomp.start_new_table = true; + return true; + + case 3: + return rar_read_filter(rar, rar_decode_ppmd7_byte, end); + + case 4: + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + lzss_offset = code << 16; + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + lzss_offset |= code << 8; + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + lzss_offset |= code; + if (!rar_decode_ppmd7_symbol(uncomp_v3, &length)) + return false; + lzss_emit_match(lzss, lzss_offset + 2, length + 32); + return true; + + case 5: + if (!rar_decode_ppmd7_symbol(uncomp_v3, &length)) + return false; + lzss_emit_match(lzss, 1, length + 4); + return true; + + default: + lzss_emit_literal(lzss, sym); + return true; + } +} + +int64_t rar_expand(ar_archive_rar *rar, int64_t end) +{ + static const uint8_t lengthbases[] = + { 0, 1, 2, 3, 4, 5, 6, + 7, 8, 10, 12, 14, 16, 20, + 24, 28, 32, 40, 48, 56, 64, + 80, 96, 112, 128, 160, 192, 224 }; + static const uint8_t lengthbits[] = + { 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 2, 2, + 2, 2, 3, 3, 3, 3, 4, + 4, 4, 4, 5, 5, 5, 5 }; + static const int32_t offsetbases[] = + { 0, 1, 2, 3, 4, 6, + 8, 12, 16, 24, 32, 48, + 64, 96, 128, 192, 256, 384, + 512, 768, 1024, 1536, 2048, 3072, + 4096, 6144, 8192, 12288, 16384, 24576, + 32768, 49152, 65536, 98304, 131072, 196608, + 262144, 327680, 393216, 458752, 524288, 589824, + 655360, 720896, 786432, 851968, 917504, 983040, + 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, + 2621440, 2883584, 3145728, 3407872, 3670016, 3932160 }; + static const uint8_t offsetbits[] = + { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18 }; + static const uint8_t shortbases[] = + { 0, 4, 8, 16, 32, 64, 128, 192 }; + static const uint8_t shortbits[] = + { 2, 2, 3, 4, 5, 6, 6, 6 }; + + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &rar->uncomp.state.v3; + LZSS *lzss = &rar->uncomp.lzss; + int symbol, offs, len, i; + + if (rar->uncomp.version == 2) + return rar_expand_v2(rar, end); + + for (;;) { + if (lzss_position(lzss) >= end) + return end; + + if (uncomp_v3->is_ppmd_block) { + if (!rar_handle_ppmd_sequence(rar, &end)) + return -1; + if (rar->uncomp.start_new_table) + return lzss_position(lzss); + continue; + } + + symbol = rar_read_next_symbol(rar, &uncomp_v3->maincode); + if (symbol < 0) + return -1; + if (symbol < 256) { + lzss_emit_literal(lzss, (uint8_t)symbol); + continue; + } + if (symbol == 256) { + if (!br_check(rar, 1)) + return -1; + if (!br_bits(rar, 1)) { + if (!br_check(rar, 1)) + return -1; + rar->uncomp.start_new_table = br_bits(rar, 1) != 0; + return lzss_position(lzss); + } + if (!rar_parse_codes(rar)) + return -1; + continue; + } + if (symbol == 257) { + if (!rar_read_filter(rar, rar_decode_byte, &end)) + return -1; + continue; + } + if (symbol == 258) { + if (uncomp_v3->lastlength == 0) + continue; + offs = uncomp_v3->lastoffset; + len = uncomp_v3->lastlength; + } + else if (symbol <= 262) { + int idx = symbol - 259; + int lensymbol = rar_read_next_symbol(rar, &uncomp_v3->lengthcode); + offs = uncomp_v3->oldoffset[idx]; + if (lensymbol < 0 || lensymbol > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || lensymbol > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[lensymbol] + 2; + if (lengthbits[lensymbol] > 0) { + if (!br_check(rar, lengthbits[lensymbol])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[lensymbol]); + } + for (i = idx; i > 0; i--) + uncomp_v3->oldoffset[i] = uncomp_v3->oldoffset[i - 1]; + uncomp_v3->oldoffset[0] = offs; + } + else if (symbol <= 270) { + int idx = symbol - 263; + offs = shortbases[idx] + 1; + if (shortbits[idx] > 0) { + if (!br_check(rar, shortbits[idx])) + return -1; + offs += (uint8_t)br_bits(rar, shortbits[idx]); + } + len = 2; + for (i = 3; i > 0; i--) + uncomp_v3->oldoffset[i] = uncomp_v3->oldoffset[i - 1]; + uncomp_v3->oldoffset[0] = offs; + } + else { + int idx = symbol - 271; + int offssymbol; + if (idx > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || idx > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[idx] + 3; + if (lengthbits[idx] > 0) { + if (!br_check(rar, lengthbits[idx])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[idx]); + } + offssymbol = rar_read_next_symbol(rar, &uncomp_v3->offsetcode); + if (offssymbol < 0 || offssymbol > (int)(sizeof(offsetbases) / sizeof(offsetbases[0])) || offssymbol > (int)(sizeof(offsetbits) / sizeof(offsetbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + offs = offsetbases[offssymbol] + 1; + if (offsetbits[offssymbol] > 0) { + if (offssymbol > 9) { + if (offsetbits[offssymbol] > 4) { + if (!br_check(rar, offsetbits[offssymbol] - 4)) + return -1; + offs += (int)br_bits(rar, offsetbits[offssymbol] - 4) << 4; + } + if (uncomp_v3->numlowoffsetrepeats > 0) { + uncomp_v3->numlowoffsetrepeats--; + offs += uncomp_v3->lastlowoffset; + } + else { + int lowoffsetsymbol = rar_read_next_symbol(rar, &uncomp_v3->lowoffsetcode); + if (lowoffsetsymbol < 0) + return -1; + if (lowoffsetsymbol == 16) { + uncomp_v3->numlowoffsetrepeats = 15; + offs += uncomp_v3->lastlowoffset; + } + else { + offs += lowoffsetsymbol; + uncomp_v3->lastlowoffset = lowoffsetsymbol; + } + } + } + else { + if (!br_check(rar, offsetbits[offssymbol])) + return -1; + offs += (int)br_bits(rar, offsetbits[offssymbol]); + } + } + + if (offs >= 0x40000) + len++; + if (offs >= 0x2000) + len++; + + for (i = 3; i > 0; i--) + uncomp_v3->oldoffset[i] = uncomp_v3->oldoffset[i - 1]; + uncomp_v3->oldoffset[0] = offs; + } + + uncomp_v3->lastoffset = offs; + uncomp_v3->lastlength = len; + + lzss_emit_match(lzss, offs, len); + } +} + +bool rar_uncompress_part(ar_archive_rar *rar, void *buffer, size_t buffer_size) +{ + struct ar_archive_rar_uncomp *uncomp = &rar->uncomp; + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = NULL; + size_t end; + + if (!rar_init_uncompress(uncomp, rar->entry.version)) + return false; + if (uncomp->version == 3) + uncomp_v3 = &uncomp->state.v3; + + for (;;) { + if (uncomp_v3 && uncomp_v3->filters.bytes_ready > 0) { + size_t count = smin(uncomp_v3->filters.bytes_ready, buffer_size); + memcpy(buffer, uncomp_v3->filters.bytes, count); + uncomp_v3->filters.bytes_ready -= count; + uncomp_v3->filters.bytes += count; + rar->progress.bytes_done += count; + buffer_size -= count; + buffer = (uint8_t *)buffer + count; + if (rar->progress.bytes_done == rar->super.entry_size_uncompressed) + goto FinishBlock; + } + else if (uncomp->bytes_ready > 0) { + int count = (int)smin(uncomp->bytes_ready, buffer_size); + lzss_copy_bytes_from_window(&uncomp->lzss, buffer, rar->progress.bytes_done + rar->solid.size_total, count); + uncomp->bytes_ready -= count; + rar->progress.bytes_done += count; + buffer_size -= count; + buffer = (uint8_t *)buffer + count; + } + if (buffer_size == 0) + return true; + + if (uncomp->br.at_eof) + return false; + + if (uncomp_v3 && uncomp_v3->filters.lastend == uncomp_v3->filters.filterstart) { + if (!rar_run_filters(rar)) + return false; + continue; + } + +FinishBlock: + if (uncomp->start_new_table && !rar_parse_codes(rar)) + return false; + + end = rar->progress.bytes_done + rar->solid.size_total + LZSS_WINDOW_SIZE - LZSS_OVERFLOW_SIZE; + if (uncomp_v3 && uncomp_v3->filters.filterstart < end) + end = uncomp_v3->filters.filterstart; + end = (size_t)rar_expand(rar, end); + if (end == (size_t)-1 || end < rar->progress.bytes_done + rar->solid.size_total) + return false; + uncomp->bytes_ready = end - rar->progress.bytes_done - rar->solid.size_total; + if (uncomp_v3) + uncomp_v3->filters.lastend = end; + + if (uncomp_v3 && uncomp_v3->is_ppmd_block && uncomp->start_new_table) + goto FinishBlock; + } +} diff --git a/external/unarr/tar/parse-tar.c b/external/unarr/tar/parse-tar.c new file mode 100644 index 00000000..ceb74329 --- /dev/null +++ b/external/unarr/tar/parse-tar.c @@ -0,0 +1,280 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "tar.h" + +static bool tar_is_number(const char *data, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) { + if ((data[i] < '0' || '7' < data[i]) && data[i] != ' ' && data[i] != '\0') + return false; + } + + return true; +} + +static uint64_t tar_parse_number(const char *data, size_t size) +{ + uint64_t value = 0; + size_t i; + + for (i = 0; i < size; i++) { + if (data[i] == ' ' || data[i] == '\0') + continue; + if (data[i] < '0' || '7' < data[i]) + break; + value = value * 8 + (data[i] - '0'); + } + + return value; +} + +static bool tar_is_zeroed_block(const char *data) +{ + size_t i; + for (i = 0; i < TAR_BLOCK_SIZE; i++) { + if (data[i] != 0) + return false; + } + return true; +} + +static bool ar_is_valid_utf8(const char *string) +{ + const unsigned char *s; + for (s = (const unsigned char *)string; *s; s++) { + int skip = *s < 0x80 ? 0 : + *s < 0xC0 ? -1 : + *s < 0xE0 ? 1 : + *s < 0xF0 ? 2 : + *s < 0xF5 ? 3 : -1; + if (skip < 0) + return false; + while (skip-- > 0) { + if ((*++s & 0xC0) != 0x80) + return false; + } + } + return true; +} + +bool tar_parse_header(ar_archive_tar *tar) +{ + char data[TAR_BLOCK_SIZE]; + uint32_t checksum; + int32_t checksum2; + size_t i; + + if (ar_read(tar->super.stream, data, sizeof(data)) != sizeof(data)) + return false; + + if (tar_is_zeroed_block(data)) { + free(tar->entry.name); + memset(&tar->entry, 0, sizeof(tar->entry)); + return true; + } + + if (!tar_is_number(data + 124, 12) || !tar_is_number(data + 136, 12) || !tar_is_number(data + 148, 8)) + return false; + + tar->entry.filesize = (size_t)tar_parse_number(data + 124, 12); + tar->entry.mtime = (tar_parse_number(data + 136, 12) + 11644473600) * 10000000; + tar->entry.checksum = (uint32_t)tar_parse_number(data + 148, 8); + tar->entry.filetype = data[156]; + free(tar->entry.name); + tar->entry.name = NULL; + + if (tar->entry.filetype == TYPE_FILE_OLD) { + i = 100; + while (--i > 0 && data[i] == '\0'); + if (data[i] == '/') + tar->entry.filetype = TYPE_DIRECTORY; + } + tar->entry.is_ustar = memcmp(data + 257, "ustar\x00""00", 8) == 0 && memcmp(data + 508, "tar\0", 4) != 0; + + if (tar->entry.filesize > (size_t)-1 - tar->super.entry_offset - 2 * TAR_BLOCK_SIZE) + return false; + + checksum = 0; + checksum2 = 0; + memset(data + 148, ' ', 8); + for (i = 0; i < sizeof(data); i++) { + checksum += (unsigned char)data[i]; + checksum2 += (signed char)data[i]; + } + + if (checksum != (uint32_t)checksum2 && tar->entry.checksum == (uint32_t)checksum2) { + log("Checksum was calculated using signed data"); + tar->entry.checksum = checksum; + } + return tar->entry.checksum == checksum; +} + +bool tar_handle_pax_extended(ar_archive *ar) +{ + ar_archive_tar *tar = (ar_archive_tar *)ar; + off64_t offset = ar->entry_offset; + size_t size = tar->entry.filesize; + char *data, *line; + + data = malloc(size); + if (!data) { + log("Ignoring PAX extended header on OOM"); + return ar_parse_entry(ar); + } + if (!ar_entry_uncompress(ar, data, size) || !ar_parse_entry(ar)) { + free(data); + return false; + } + if (tar->last_seen_dir > offset) { + free(data); + return true; + } + + line = data; + while (line < data + size) { + char *key, *value, *ptr; + size_t length, max_size = line - data + size; + + ptr = memchr(line, '=', max_size); + if (!ptr || *line < '1' || '9' < *line) { + warn("Invalid PAX extended header record @%" PRIi64, offset); + break; + } + value = ptr + 1; + *ptr = '\0'; + length = (size_t)strtoul(line, &ptr, 10); + if (max_size < length || length <= (size_t)(value - line) || line[length - 1] != '\n' || *ptr != ' ') { + warn("Invalid PAX extended header record @%" PRIi64, offset); + break; + } + key = ptr + 1; + line += length; + line[-1] = '\0'; + + if (strcmp(key, "path") == 0) { + ptr = malloc(strlen(value) + 1); + if (ptr) { + strcpy(ptr, value); + free(tar->entry.name); + tar->entry.name = ptr; + } + } + else if (strcmp(key, "mtime") == 0) + tar->entry.mtime = (time64_t)((strtod(value, &ptr) + 11644473600) * 10000000); + else if (strcmp(key, "size") == 0) + tar->entry.filesize = (size_t)strtoul(value, &ptr, 10); + else + log("Skipping value for %s", key); + } + free(data); + + tar_get_name(ar, false); + ar->entry_offset = offset; + ar->entry_size_uncompressed = tar->entry.filesize; + ar->entry_filetime = tar->entry.mtime; + + return true; +} + +bool tar_handle_gnu_longname(ar_archive *ar) +{ + ar_archive_tar *tar = (ar_archive_tar *)ar; + off64_t offset = ar->entry_offset; + size_t size = tar->entry.filesize; + char *longname; + + longname = malloc(size + 1); + if (!longname || size == (size_t)-1) { + log("Falling back to the short filename on OOM"); + free(longname); + return ar_parse_entry(ar); + } + if (!ar_entry_uncompress(ar, longname, size) || !ar_parse_entry(ar)) { + free(longname); + return false; + } + if (tar->last_seen_dir > offset) { + free(longname); + return true; + } + if (tar->entry.name) { + log("Skipping GNU long filename in favor of PAX name"); + free(longname); + return true; + } + longname[size] = '\0'; + ar->entry_offset = offset; + /* name could be in any encoding, assume UTF-8 or whatever (DOS) */ + if (ar_is_valid_utf8(longname)) { + tar->entry.name = longname; + } + else { + tar->entry.name = ar_conv_dos_to_utf8(longname); + free(longname); + } + + return true; +} + +const char *tar_get_name(ar_archive *ar, bool raw) +{ + if (raw) + return NULL; + + ar_archive_tar *tar = (ar_archive_tar *)ar; + if (!tar->entry.name) { + char *name; + + if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) + return NULL; + + name = malloc(100 + 1); + if (!name || ar_read(ar->stream, name, 100) != 100) { + free(name); + ar_seek(ar->stream, ar->entry_offset + TAR_BLOCK_SIZE, SEEK_SET); + return NULL; + } + name[100] = '\0'; + + if (tar->entry.is_ustar) { + char *prefixed = malloc(256 + 1); + if (!prefixed || !ar_skip(ar->stream, 245) || ar_read(ar->stream, prefixed, 167) != 167) { + free(name); + free(prefixed); + ar_seek(ar->stream, ar->entry_offset + TAR_BLOCK_SIZE, SEEK_SET); + return NULL; + } + if (prefixed[0] != '\0') { + prefixed[156] = '\0'; + strcat(prefixed, "/"); + strcat(prefixed, name); + free(name); + name = prefixed; + prefixed = NULL; + } + free(prefixed); + } + else + ar_skip(ar->stream, TAR_BLOCK_SIZE - 100); + + /* name could be in any encoding, assume UTF-8 or whatever (DOS) */ + if (ar_is_valid_utf8(name)) { + tar->entry.name = name; + } + else { + tar->entry.name = ar_conv_dos_to_utf8(name); + free(name); + } + /* normalize path separators */ + if (tar->entry.name) { + char *p = tar->entry.name; + while ((p = strchr(p, '\\')) != NULL) { + *p = '/'; + } + } + } + return tar->entry.name; +} diff --git a/external/unarr/tar/tar.c b/external/unarr/tar/tar.c new file mode 100644 index 00000000..53bd15e8 --- /dev/null +++ b/external/unarr/tar/tar.c @@ -0,0 +1,93 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "tar.h" + +static void tar_close(ar_archive *ar) +{ + ar_archive_tar *tar = (ar_archive_tar *)ar; + free(tar->entry.name); +} + +static bool tar_parse_entry(ar_archive *ar, off64_t offset) +{ + ar_archive_tar *tar = (ar_archive_tar *)ar; + + if (!ar_seek(ar->stream, offset, SEEK_SET)) { + warn("Couldn't seek to offset %" PRIi64, offset); + return false; + } + if (!tar_parse_header(tar)) { + warn("Invalid tar header data @%" PRIi64, offset); + return false; + } + if (!tar->entry.checksum) { + ar->at_eof = true; + return false; + } + + ar->entry_offset = offset; + ar->entry_offset_next = offset + TAR_BLOCK_SIZE + (tar->entry.filesize + TAR_BLOCK_SIZE - 1) / TAR_BLOCK_SIZE * TAR_BLOCK_SIZE; + ar->entry_size_uncompressed = tar->entry.filesize; + ar->entry_filetime = tar->entry.mtime; + tar->bytes_done = 0; + + if (tar->last_seen_dir > offset) + tar->last_seen_dir = 0; + + switch (tar->entry.filetype) { + case TYPE_FILE: + case TYPE_FILE_OLD: + return true; + case TYPE_DIRECTORY: + log("Skipping directory entry \"%s\"", tar_get_name(ar, false)); + tar->last_seen_dir = ar->entry_offset; + return tar_parse_entry(ar, ar->entry_offset_next); + case TYPE_PAX_GLOBAL: + log("Skipping PAX global extended header record"); + return tar_parse_entry(ar, ar->entry_offset_next); + case TYPE_PAX_EXTENDED: + return tar_handle_pax_extended(ar); + case TYPE_GNU_LONGNAME: + return tar_handle_gnu_longname(ar); + default: + warn("Unknown entry type '%c'", tar->entry.filetype); + return true; + } +} + +static bool tar_uncompress(ar_archive *ar, void *buffer, size_t count) +{ + ar_archive_tar *tar = (ar_archive_tar *)ar; + if (count > ar->entry_size_uncompressed - tar->bytes_done) { + warn("Requesting too much data (%" PRIuPTR " < %" PRIuPTR ")", ar->entry_size_uncompressed - tar->bytes_done, count); + return false; + } + if (ar_read(ar->stream, buffer, count) != count) { + warn("Unexpected EOF in stored data"); + return false; + } + tar->bytes_done += count; + return true; +} + +ar_archive *ar_open_tar_archive(ar_stream *stream) +{ + ar_archive *ar; + ar_archive_tar *tar; + + if (!ar_seek(stream, 0, SEEK_SET)) + return NULL; + + ar = ar_open_archive(stream, sizeof(ar_archive_tar), tar_close, tar_parse_entry, tar_get_name, tar_uncompress, NULL, 0); + if (!ar) + return NULL; + + tar = (ar_archive_tar *)ar; + if (!tar_parse_header(tar) || !tar->entry.checksum) { + free(ar); + return NULL; + } + + return ar; +} diff --git a/external/unarr/tar/tar.h b/external/unarr/tar/tar.h new file mode 100644 index 00000000..db6d65eb --- /dev/null +++ b/external/unarr/tar/tar.h @@ -0,0 +1,46 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef tar_tar_h +#define tar_tar_h + +#include "../common/unarr-imp.h" + +typedef struct ar_archive_tar_s ar_archive_tar; + +/***** parse-tar *****/ + +#define TAR_BLOCK_SIZE 512 + +enum tar_filetype { + TYPE_FILE = '0', TYPE_FILE_OLD = '\0', + TYPE_HARD_LINK = '1', TYPE_SOFT_LINK = '2', + TYPE_DIRECTORY = '5', + TYPE_GNU_LONGNAME = 'L', + TYPE_PAX_GLOBAL = 'g', TYPE_PAX_EXTENDED = 'x', +}; + +struct tar_entry { + char *name; + size_t filesize; + time64_t mtime; + uint32_t checksum; + char filetype; + bool is_ustar; +}; + +bool tar_parse_header(ar_archive_tar *tar); +bool tar_handle_pax_extended(ar_archive *ar); +bool tar_handle_gnu_longname(ar_archive *ar); +const char *tar_get_name(ar_archive *ar, bool raw); + +/***** tar *****/ + +struct ar_archive_tar_s { + ar_archive super; + struct tar_entry entry; + size_t bytes_done; + off64_t last_seen_dir; +}; + +#endif diff --git a/external/unarr/test/CMakeLists.txt b/external/unarr/test/CMakeLists.txt new file mode 100644 index 00000000..cd024add --- /dev/null +++ b/external/unarr/test/CMakeLists.txt @@ -0,0 +1,81 @@ +# project (unarr-test C) + +include(ProcessorCount) +ProcessorCount(N) + +if(BUILD_FUZZER) + add_executable(fuzzer fuzzer.c) + set_target_properties(fuzzer PROPERTIES LINK_FLAGS "${sanitize_opts}") + target_link_libraries(fuzzer unarr) + + file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/corpus) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/corpus/fuzzed) + + add_test(NAME fuzzer_test + COMMAND fuzzer ${CMAKE_CURRENT_SOURCE_DIR}/corpus/fuzzed + ${CMAKE_CURRENT_SOURCE_DIR}/corpus -jobs=${N}) +endif() + +if(BUILD_INTEGRATION_TESTS) + add_executable(unarr-test main.c) + target_link_libraries(unarr-test unarr) + + function(run_test file) + add_test(NAME ${file} COMMAND unarr-test + ${CMAKE_CURRENT_SOURCE_DIR}/corpus/${file}) + set_tests_properties( + ${file} + PROPERTIES ENVIRONMENT + "PATH=${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE};$ENV{PATH}") + endfunction(run_test file) + + # Rar + run_test(integration/lipsum_rar4_store.rar) + run_test(integration/lipsum_rar4_default.rar) + run_test(integration/lipsum_rar4_max.rar) + # Non-standard compression methods + run_test(integration/lipsum_rar4_ppmd.rar) # Text + run_test(integration/lipsum_rar4_delta.rar) + run_test(integration/silent_rar4_audio.rar) + # TODO bmp - RGB color (bmp?) + + # Tar + run_test(integration/lipsum.tar) + + # Zip + run_test(integration/lipsum_zip_copy.zip) + run_test(integration/lipsum_zip_default.zip) + run_test(integration/lipsum_zip_max.zip) + # Non-standard compression methods + if(BZIP2_FOUND) + run_test(integration/lipsum_zip_bzip2.zip) + endif() + run_test(integration/lipsum_zip_deflate64.zip) + run_test(integration/lipsum_zip_lzma1.zip) + run_test(integration/lipsum_zip_ppmd.zip) + if(LIBLZMA_FOUND) + run_test(integration/lipsum_zip_xz.zip) + endif() + + # 7z + if(ENABLE_7Z) + run_test(integration/lipsum_7z_store.7z) + run_test(integration/lipsum_7z_default.7z) + run_test(integration/lipsum_7z_max.7z) + # Non-standard compression methods + run_test(integration/lipsum_7z_lzma1.7z) + run_test(integration/lipsum_7z_ppmd.7z) + endif() +endif() + +if(BUILD_UNIT_TESTS) + find_package(cmocka CONFIG REQUIRED) + add_executable(crc32-test crc32-test.c) + if(TARGET cmocka::cmocka) + target_link_libraries(crc32-test PRIVATE cmocka::cmocka) + else() + target_include_directories(crc32-test PRIVATE ${CMOCKA_INCLUDE_DIR}) + target_link_libraries(crc32-test PRIVATE ${CMOCKA_LIBRARY}) + endif() + add_test(crc32 crc32-test) +endif() diff --git a/external/unarr/test/Makefile b/external/unarr/test/Makefile new file mode 100644 index 00000000..bffba29f --- /dev/null +++ b/external/unarr/test/Makefile @@ -0,0 +1,74 @@ +# GNU Makefile + +build ?= debug + +OUT := build/$(build) + +default: all + +# --- Configuration --- + +CFLAGS += -Wall -D_FILE_OFFSET_BITS=64 +LIBS += -lm + +ifeq "$(build)" "debug" +CFLAGS += -pipe -g -DDEBUG +else ifeq "$(build)" "profile" +CFLAGS += -pipe -O3 -DNDEBUG -pg +LDFLAGS += -pg +else ifeq "$(build)" "release" +CFLAGS += -pipe -O3 -DNDEBUG -fomit-frame-pointer +else ifeq "$(build)" "coverage" +CFLAGS += -pipe -g -DDEBUG -pg -fprofile-arcs -ftest-coverage +LIBS += -lgcov +else +$(error unknown build setting: '$(build)') +endif + +# --- Commands --- + +ifneq "$(verbose)" "yes" +QUIET_AR = @ echo ' ' ' ' AR $@ ; +QUIET_CC = @ echo ' ' ' ' CC $@ ; +QUIET_LINK = @ echo ' ' ' ' LINK $@ ; +endif + +CC_CMD = $(QUIET_CC) mkdir -p $(@D) ; $(CC) $(CFLAGS) -o $@ -c $< +AR_CMD = $(QUIET_AR) $(AR) cr $@ $^ +LINK_CMD = $(QUIET_LINK) $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) + +# --- Third party libraries --- + +# TODO: build zlib, bzip2 and 7z when available + +# --- unarr files --- + +UNARR_OUT := $(OUT)/unarr + +UNARR_DIRS := .. ../common ../lzmasdk ../rar ../tar ../zip ../_7z +UNARR_SRC := $(wildcard $(UNARR_DIRS:=/*.c)) +UNARR_OBJ := $(addprefix $(UNARR_OUT)/, $(addsuffix .o, $(basename $(UNARR_SRC)))) + +$(UNARR_OUT)/%.o : %.c + $(CC_CMD) + +UNARR_LIB := $(OUT)/libunarr.a + +$(UNARR_LIB): $(UNARR_OBJ) + $(AR_CMD) + +UNARR_TEST := $(OUT)/unarr-test + +$(UNARR_TEST) : $(UNARR_OUT)/main.o $(UNARR_LIB) + $(LINK_CMD) + +# TODO: add header dependencies + +# --- Clean and Default --- + +all: $(UNARR_TEST) + +clean: + rm -rf build + +.PHONY: all clean diff --git a/external/unarr/test/corpus/integration/lipsum.tar b/external/unarr/test/corpus/integration/lipsum.tar new file mode 100644 index 00000000..246c7e59 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum.tar differ diff --git a/external/unarr/test/corpus/integration/lipsum_7z_default.7z b/external/unarr/test/corpus/integration/lipsum_7z_default.7z new file mode 100644 index 00000000..1bac4aaf Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_7z_default.7z differ diff --git a/external/unarr/test/corpus/integration/lipsum_7z_lzma1.7z b/external/unarr/test/corpus/integration/lipsum_7z_lzma1.7z new file mode 100644 index 00000000..4f5ba1cf Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_7z_lzma1.7z differ diff --git a/external/unarr/test/corpus/integration/lipsum_7z_max.7z b/external/unarr/test/corpus/integration/lipsum_7z_max.7z new file mode 100644 index 00000000..0d090359 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_7z_max.7z differ diff --git a/external/unarr/test/corpus/integration/lipsum_7z_ppmd.7z b/external/unarr/test/corpus/integration/lipsum_7z_ppmd.7z new file mode 100644 index 00000000..1dbe4347 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_7z_ppmd.7z differ diff --git a/external/unarr/test/corpus/integration/lipsum_7z_store.7z b/external/unarr/test/corpus/integration/lipsum_7z_store.7z new file mode 100644 index 00000000..c63bf6dd Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_7z_store.7z differ diff --git a/external/unarr/test/corpus/integration/lipsum_rar4_default.rar b/external/unarr/test/corpus/integration/lipsum_rar4_default.rar new file mode 100644 index 00000000..47082423 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_rar4_default.rar differ diff --git a/external/unarr/test/corpus/integration/lipsum_rar4_delta.rar b/external/unarr/test/corpus/integration/lipsum_rar4_delta.rar new file mode 100644 index 00000000..47082423 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_rar4_delta.rar differ diff --git a/external/unarr/test/corpus/integration/lipsum_rar4_max.rar b/external/unarr/test/corpus/integration/lipsum_rar4_max.rar new file mode 100644 index 00000000..230737ab Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_rar4_max.rar differ diff --git a/external/unarr/test/corpus/integration/lipsum_rar4_ppmd.rar b/external/unarr/test/corpus/integration/lipsum_rar4_ppmd.rar new file mode 100644 index 00000000..7f8a7fb3 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_rar4_ppmd.rar differ diff --git a/external/unarr/test/corpus/integration/lipsum_rar4_store.rar b/external/unarr/test/corpus/integration/lipsum_rar4_store.rar new file mode 100644 index 00000000..d74ec399 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_rar4_store.rar differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_bzip2.zip b/external/unarr/test/corpus/integration/lipsum_zip_bzip2.zip new file mode 100644 index 00000000..e049c036 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_bzip2.zip differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_copy.zip b/external/unarr/test/corpus/integration/lipsum_zip_copy.zip new file mode 100644 index 00000000..547df1b7 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_copy.zip differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_default.zip b/external/unarr/test/corpus/integration/lipsum_zip_default.zip new file mode 100644 index 00000000..114f6f5d Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_default.zip differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_deflate64.zip b/external/unarr/test/corpus/integration/lipsum_zip_deflate64.zip new file mode 100644 index 00000000..f4e3e23d Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_deflate64.zip differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_lzma1.zip b/external/unarr/test/corpus/integration/lipsum_zip_lzma1.zip new file mode 100644 index 00000000..d575ea1f Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_lzma1.zip differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_max.zip b/external/unarr/test/corpus/integration/lipsum_zip_max.zip new file mode 100644 index 00000000..ee62603a Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_max.zip differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_ppmd.zip b/external/unarr/test/corpus/integration/lipsum_zip_ppmd.zip new file mode 100644 index 00000000..4fb4e675 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_ppmd.zip differ diff --git a/external/unarr/test/corpus/integration/lipsum_zip_xz.zip b/external/unarr/test/corpus/integration/lipsum_zip_xz.zip new file mode 100644 index 00000000..67080df7 Binary files /dev/null and b/external/unarr/test/corpus/integration/lipsum_zip_xz.zip differ diff --git a/external/unarr/test/corpus/integration/silent_rar4_audio.rar b/external/unarr/test/corpus/integration/silent_rar4_audio.rar new file mode 100644 index 00000000..753cd128 Binary files /dev/null and b/external/unarr/test/corpus/integration/silent_rar4_audio.rar differ diff --git a/external/unarr/test/corpus/src/lorem_ipsum.txt b/external/unarr/test/corpus/src/lorem_ipsum.txt new file mode 100644 index 00000000..ecb92123 --- /dev/null +++ b/external/unarr/test/corpus/src/lorem_ipsum.txt @@ -0,0 +1,15 @@ +Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +-- + +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. + +Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. + +Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. + +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis. + +At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, At accusam aliquyam diam diam dolore dolores duo eirmod eos erat, et nonumy sed tempor et et invidunt justo labore Stet clita ea et gubergren, kasd magna no rebum. sanctus sea sed takimata ut vero voluptua. est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat. + +Consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. \ No newline at end of file diff --git a/external/unarr/test/corpus/src/silent_quarter-second.wav b/external/unarr/test/corpus/src/silent_quarter-second.wav new file mode 100644 index 00000000..64b2c52f Binary files /dev/null and b/external/unarr/test/corpus/src/silent_quarter-second.wav differ diff --git a/external/unarr/test/crc32-test.c b/external/unarr/test/crc32-test.c new file mode 100644 index 00000000..e24df71c --- /dev/null +++ b/external/unarr/test/crc32-test.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +#include "../common/crc32.c" + +static void crc32_9(void **state) { + assert_int_equal(0xCBF43926, ar_crc32(0, "123456789", 9)); +} + +static void crc32_8(void **state) { + assert_int_equal(0x9AE0DAAF, ar_crc32(0, "12345678", 8)); +} + +static void crc32_4(void **state) { + assert_int_equal(0x9BE3E0A3, ar_crc32(0, "1234", 4)); +} + +static void crc32_2(void **state) { + assert_int_equal(0x4F5344CD, ar_crc32(0, "12", 2)); +} + +static void crc32_1(void **state) { + assert_int_equal(0x83DCEFB7, ar_crc32(0, "1", 1)); +} + +static void crc32_1_0(void **state) { + assert_int_equal(0xD202EF8D, ar_crc32(0, "\0", 1)); +} + +static void crc32_1_FF(void **state) { + assert_int_equal(0xFF000000, ar_crc32(0, "\xFF", 1)); +} + +int main(void) { + const struct CMUnitTest tests[] = { + cmocka_unit_test(crc32_9), + cmocka_unit_test(crc32_8), + cmocka_unit_test(crc32_4), + cmocka_unit_test(crc32_2), + cmocka_unit_test(crc32_1), + cmocka_unit_test(crc32_1_0), + cmocka_unit_test(crc32_1_FF) + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/external/unarr/test/fuzzer.c b/external/unarr/test/fuzzer.c new file mode 100644 index 00000000..1f9b3d31 --- /dev/null +++ b/external/unarr/test/fuzzer.c @@ -0,0 +1,65 @@ +#include "unarr.h" +#include + +ar_archive *ar_open_any_archive(ar_stream *stream) { + ar_archive *ar = ar_open_zip_archive(stream, false); + if (!ar) + ar = ar_open_zip_archive(stream, true); + if (!ar) + ar = ar_open_rar_archive(stream); + if (!ar) + ar = ar_open_7z_archive(stream); + if (!ar) + ar = ar_open_tar_archive(stream); + return ar; +} + +void read_test(ar_stream *stream) { + + ar_archive *ar = ar_open_any_archive(stream); + + if (!ar) { + return; + } + + while (true) { + + if (ar_at_eof(ar)) { + break; + } + + bool ok = ar_parse_entry(ar); + + if (!ok) { + break; + } + + size_t size = ar_entry_get_size(ar); + while (size > 0) { + unsigned char buffer[1024]; + size_t count = size < sizeof(buffer) ? size : sizeof(buffer); + if (!ar_entry_uncompress(ar, buffer, count)) + break; + size -= count; + } + } + + if (!ar_at_eof(ar)) { + printf("not reach eof\n"); + } + + ar_close_archive(ar); +} + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + + ar_stream *stream; + + stream = ar_open_memory(Data, Size); + + read_test(stream); + + ar_close(stream); + + return 0; // Non-zero return values are reserved for future use. +} diff --git a/external/unarr/test/main.c b/external/unarr/test/main.c new file mode 100644 index 00000000..b394deab --- /dev/null +++ b/external/unarr/test/main.c @@ -0,0 +1,81 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* demonstration of most of the public unarr API: + parses and decompresses an archive into memory (integrity test) */ + +#include "unarr.h" + +#include +#include +#include +#if !defined(NDEBUG) && defined(_MSC_VER) +#include +#include +#endif + +ar_archive *ar_open_any_archive(ar_stream *stream, const char *fileext) +{ + ar_archive *ar = ar_open_rar_archive(stream); + if (!ar) + ar = ar_open_zip_archive(stream, fileext && (strcmp(fileext, ".xps") == 0 || strcmp(fileext, ".epub") == 0)); + if (!ar) + ar = ar_open_7z_archive(stream); + if (!ar) + ar = ar_open_tar_archive(stream); + return ar; +} + +#define FailIf(cond, msg, ...) if (cond) { fprintf(stderr, msg "\n", __VA_ARGS__); goto CleanUp; } error_step++ + +int main(int argc, char *argv[]) +{ + ar_stream *stream = NULL; + ar_archive *ar = NULL; + int entry_count = 1; + int entry_skips = 0; + int error_step = 1; + +#if !defined(NDEBUG) && defined(_MSC_VER) + if (!IsDebuggerPresent()) { + _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE); + _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); + } + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + FailIf(argc != 2, "Syntax: %s ", argv[0]); + + stream = ar_open_file(argv[1]); + FailIf(!stream, "Error: File \"%s\" not found!", argv[1]); + + printf("Parsing \"%s\":\n", argv[1]); + ar = ar_open_any_archive(stream, strrchr(argv[1], '.')); + FailIf(!ar, "Error: No valid %s archive!", "RAR, ZIP, 7Z or TAR"); + + while (ar_parse_entry(ar)) { + size_t size = ar_entry_get_size(ar); + printf("%02d. %s (@%" PRIi64 ")\n", entry_count++, ar_entry_get_name(ar), ar_entry_get_offset(ar)); + const char *raw_filename = ar_entry_get_raw_name(ar); + if (raw_filename) + printf("Raw filename %s \n", raw_filename); + while (size > 0) { + unsigned char buffer[1024]; + size_t count = size < sizeof(buffer) ? size : sizeof(buffer); + if (!ar_entry_uncompress(ar, buffer, count)) + break; + size -= count; + } + if (size > 0) { + fprintf(stderr, "Warning: Failed to uncompress... skipping\n"); + entry_skips++; + } + } + FailIf(!ar_at_eof(ar), "Error: Failed to parse entry %d!", entry_count); + error_step = entry_skips > 0 ? 1000 + entry_skips : 0; + +CleanUp: + ar_close_archive(ar); + ar_close(stream); + return error_step; +} diff --git a/external/unarr/unarr-config.cmake.in b/external/unarr/unarr-config.cmake.in new file mode 100644 index 00000000..1c95f9b8 --- /dev/null +++ b/external/unarr/unarr-config.cmake.in @@ -0,0 +1,11 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +@UNARR_DEPENDS_BZip2@ +@UNARR_DEPENDS_LibLZMA@ +@UNARR_DEPENDS_ZLIB@ + +if (NOT TARGET unarr::unarr) + include("${CMAKE_CURRENT_LIST_DIR}/unarr-targets.cmake") +endif() diff --git a/external/unarr/unarr.h b/external/unarr/unarr.h new file mode 100644 index 00000000..b9bdbc73 --- /dev/null +++ b/external/unarr/unarr.h @@ -0,0 +1,133 @@ +/* Copyright 2020 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef unarr_h +#define unarr_h + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + + +/* macros for shared library usage */ + +#if defined (UNARR_IS_SHARED_LIBRARY) +#if defined (_WIN32) + +#if defined (UNARR_EXPORT_SYMBOLS) +#define UNARR_EXPORT __declspec(dllexport) +#else +#define UNARR_EXPORT __declspec(dllimport) +#endif // UNARR_EXPORT_SYMBOLS + +#else // _WIN32 + +#if defined (UNARR_EXPORT_SYMBOLS) +#define UNARR_EXPORT __attribute__((visibility("default"))) +#else +#define UNARR_EXPORT +#endif // UNARR_EXPORT_SYMBOLS +#endif // _WIN32 + +#else // defined UNARR_IS_SHARED_LIBRARY +#define UNARR_EXPORT + +#endif // UNARR_IS_SHARED_LIBRARY + + +typedef int64_t off64_t; +typedef int64_t time64_t; + +#define UNARR_API_VERSION 110 + +/***** common/stream *****/ + +typedef struct ar_stream_s ar_stream; + +/* opens a read-only stream for the given file path; returns NULL on error */ +UNARR_EXPORT ar_stream *ar_open_file(const char *path); +#ifdef _WIN32 +UNARR_EXPORT ar_stream *ar_open_file_w(const wchar_t *path); +#endif +/* opens a read-only stream for the given chunk of memory; the pointer must be valid until ar_close is called */ +UNARR_EXPORT ar_stream *ar_open_memory(const void *data, size_t datalen); +#ifdef _WIN32 +typedef struct IStream IStream; +/* opens a read-only stream based on the given IStream */ +UNARR_EXPORT ar_stream *ar_open_istream(IStream *stream); +#endif + +/* closes the stream and releases underlying resources */ +UNARR_EXPORT void ar_close(ar_stream *stream); +/* tries to read 'count' bytes into buffer, advancing the read offset pointer; returns the actual number of bytes read */ +UNARR_EXPORT size_t ar_read(ar_stream *stream, void *buffer, size_t count); +/* moves the read offset pointer (same as fseek); returns false on failure */ +UNARR_EXPORT bool ar_seek(ar_stream *stream, off64_t offset, int origin); +/* shortcut for ar_seek(stream, count, SEEK_CUR); returns false on failure */ +UNARR_EXPORT bool ar_skip(ar_stream *stream, off64_t count); +/* returns the current read offset (or 0 on error) */ +UNARR_EXPORT off64_t ar_tell(ar_stream *stream); + +/***** common/unarr *****/ + +typedef struct ar_archive_s ar_archive; + +/* frees all data stored for the given archive; does not close the underlying stream */ +UNARR_EXPORT void ar_close_archive(ar_archive *ar); +/* reads the next archive entry; returns false on error or at the end of the file (use ar_at_eof to distinguish the two cases) */ +UNARR_EXPORT bool ar_parse_entry(ar_archive *ar); +/* reads the archive entry at the given offset as returned by ar_entry_get_offset (offset 0 always restarts at the first entry); should always succeed */ +UNARR_EXPORT bool ar_parse_entry_at(ar_archive *ar, off64_t offset); +/* reads the (first) archive entry associated with the given name; returns false if the entry couldn't be found */ +UNARR_EXPORT bool ar_parse_entry_for(ar_archive *ar, const char *entry_name); +/* returns whether the last ar_parse_entry call has reached the file's expected end */ +UNARR_EXPORT bool ar_at_eof(ar_archive *ar); +/* returns the name of the current entry as UTF-8 string; this pointer is only valid until the next call to ar_parse_entry; returns NULL on failure */ +UNARR_EXPORT const char *ar_entry_get_name(ar_archive *ar); +/* fallback function for non-spec archives with bad name encodings, i.e. some zip files */ +/* returns the name of the current entry as raw string, this pointer is only valid until the next call to ar_parse_entry; NULL on failure */ +/* this is currently only needed for zip archives; calling it on other archives will result in NULL */ +UNARR_EXPORT const char *ar_entry_get_raw_name(ar_archive *ar); +/* returns the stream offset of the current entry for use with ar_parse_entry_at */ +UNARR_EXPORT off64_t ar_entry_get_offset(ar_archive *ar); +/* returns the total size of uncompressed data of the current entry; read exactly that many bytes using ar_entry_uncompress */ +UNARR_EXPORT size_t ar_entry_get_size(ar_archive *ar); +/* returns the stored modification date of the current entry in 100ns since 1601/01/01 */ +UNARR_EXPORT time64_t ar_entry_get_filetime(ar_archive *ar); +/* WARNING: don't manually seek in the stream between ar_parse_entry and the last corresponding ar_entry_uncompress call! */ +/* uncompresses the next 'count' bytes of the current entry into buffer; returns false on error */ +UNARR_EXPORT bool ar_entry_uncompress(ar_archive *ar, void *buffer, size_t count); + +/* copies at most 'count' bytes of the archive's global comment (if any) into buffer; returns the actual amout of bytes copied (or, if 'buffer' is NULL, the required buffer size) */ +UNARR_EXPORT size_t ar_get_global_comment(ar_archive *ar, void *buffer, size_t count); + +/***** rar/rar *****/ + +/* checks whether 'stream' could contain RAR data and prepares for archive listing/extraction; returns NULL on failure */ +UNARR_EXPORT ar_archive *ar_open_rar_archive(ar_stream *stream); + +/***** tar/tar *****/ + +/* checks whether 'stream' could contain TAR data and prepares for archive listing/extraction; returns NULL on failure */ +UNARR_EXPORT ar_archive *ar_open_tar_archive(ar_stream *stream); + +/***** zip/zip *****/ + +/* checks whether 'stream' could contain ZIP data and prepares for archive listing/extraction; returns NULL on failure */ +/* set deflatedonly for extracting XPS, EPUB, etc. documents where non-Deflate compression methods are not supported by specification */ +UNARR_EXPORT ar_archive *ar_open_zip_archive(ar_stream *stream, bool deflatedonly); + +/***** _7z/_7z *****/ + +/* checks whether 'stream' could contain 7Z data and prepares for archive listing/extraction; returns NULL on failure */ +UNARR_EXPORT ar_archive *ar_open_7z_archive(ar_stream *stream); + +#ifdef __cplusplus +} +#endif + +#endif //unarr_h diff --git a/external/unarr/zip/inflate.c b/external/unarr/zip/inflate.c new file mode 100644 index 00000000..00a25564 --- /dev/null +++ b/external/unarr/zip/inflate.c @@ -0,0 +1,486 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "inflate.h" +#include "../common/allocator.h" + +#include +#include +#include + +#ifdef _MSC_VER +#define UNARR_FORCE_INLINE __forceinline +#else +#define UNARR_FORCE_INLINE inline __attribute__((always_inline)) +#endif + +#define MAX_BITS 16 +#define TREE_FAST_BITS 10 +#define MAX_TREE_NODES 288 + +enum inflate_step { + STEP_NEXT_BLOCK = 0, + STEP_COPY_INIT, STEP_COPY, + STEP_INFLATE_STATIC_INIT, STEP_INFLATE_DYNAMIC_INIT, STEP_INFLATE_DYNAMIC_INIT_PRETREE, STEP_INFLATE_DYNAMIC_INIT_TREES, + STEP_INFLATE_CODE, STEP_INFLATE, STEP_INFLATE_DISTANCE_CODE, STEP_INFLATE_DISTANCE, STEP_INFLATE_REPEAT, +}; +enum { RESULT_EOS = -1, RESULT_NOT_DONE = 0, RESULT_ERROR = 1 }; + +#if defined(_MSC_VER) || defined(__GNUC__) +#define RESULT_ERROR (RESULT_ERROR + __COUNTER__) +#endif + +struct tree { + struct { + unsigned value : 11; + unsigned is_value : 1; + unsigned length : 4; + } nodes[(1 << TREE_FAST_BITS) + MAX_TREE_NODES * 2]; + int next_node; +}; + +struct inflate_state_s { + enum inflate_step step; + struct { + int value; + int length; + int dist; + int tree_idx; + } state; + struct { + int hlit; + int hdist; + int hclen; + int idx; + int clens[288 + 32]; + } prepare; + bool inflate64; + bool is_final_block; + struct tree tree_lengths; + struct tree tree_dists; + struct { + const uint8_t *data_in; + size_t *avail_in; + uint64_t bits; + int available; + } in; + struct { + uint8_t *data_out; + size_t *avail_out; + uint8_t window[1 << 16]; + size_t offset; + } out; +}; + +static const struct { + int bits; + int length; +} table_lengths[30] = { + { 0, 3 }, { 0, 4 }, { 0, 5 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 }, { 0, 10 }, + { 1, 11 }, { 1, 13 }, { 1, 15 }, { 1, 17 }, { 2, 19 }, { 2, 23 }, { 2, 27 }, { 2, 31 }, + { 3, 35 }, { 3, 43 }, { 3, 51 }, { 3, 59 }, { 4, 67 }, { 4, 83 }, { 4, 99 }, { 4, 115 }, + { 5, 131 }, { 5, 163 }, { 5, 195 }, { 5, 227 }, + { 0, 258 }, /* Deflate64 (replaces { 0, 258 }) */ { 16, 3 } +}; + +static const struct { + int bits; + int dist; +} table_dists[32] = { + { 0, 1 }, { 0, 2 }, { 0, 3 }, { 0, 4 }, { 1, 5 }, { 1, 7 }, + { 2, 9 }, { 2, 13 }, { 3, 17 }, { 3, 25 }, { 4, 33 }, { 4, 49 }, + { 5, 65 }, { 5, 97 }, { 6, 129 }, { 6, 193 }, { 7, 257 }, { 7, 385 }, + { 8, 513 }, { 8, 769 }, { 9, 1025 }, { 9, 1537 }, { 10, 2049 }, { 10, 3073 }, + { 11, 4097 }, { 11, 6145 }, { 12, 8193 }, { 12, 12289 }, { 13, 16385 }, { 13, 24577 }, + /* Deflate64 */ { 14, 32769 }, { 14, 49153 } +}; + +static const int table_code_length_idxs[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static UNARR_FORCE_INLINE bool br_ensure(inflate_state *state, int bits) +{ + while (state->in.available < bits) { + if (*state->in.avail_in == 0) + return false; + state->in.bits |= ((uint64_t)*state->in.data_in++ << state->in.available); + (*state->in.avail_in)--; + state->in.available += 8; + } + return true; +} + +static UNARR_FORCE_INLINE uint64_t br_bits(inflate_state *state, int bits) +{ + uint64_t res = state->in.bits & (((uint64_t)1 << bits) - 1); + state->in.available -= bits; + state->in.bits >>= bits; + return res; +} + +static UNARR_FORCE_INLINE void output(inflate_state *state, uint8_t value) +{ + *state->out.data_out++ = value; + (*state->out.avail_out)--; + state->out.window[state->out.offset++ & (sizeof(state->out.window) - 1)] = value; +} + +static bool tree_add_value(struct tree *tree, int key, int bits, int value) +{ + int rkey = 0, i; + for (i = 0; i < bits; i++) + rkey = (rkey << 1) | ((key >> i) & 1); + + if (bits <= TREE_FAST_BITS) { + if (tree->nodes[rkey].length) + return false; + tree->nodes[rkey].length = bits; + tree->nodes[rkey].value = value; + tree->nodes[rkey].is_value = true; + for (i = 1; i < (1 << (TREE_FAST_BITS - bits)); i++) { + if (tree->nodes[rkey | (i << bits)].length) + return false; + tree->nodes[rkey | (i << bits)] = tree->nodes[rkey]; + } + return true; + } + + rkey &= (1 << TREE_FAST_BITS) - 1; + if (tree->nodes[rkey].is_value) + return false; + tree->nodes[rkey].length = TREE_FAST_BITS + 1; + if (!tree->nodes[rkey].value) + tree->nodes[rkey].value = (1 << TREE_FAST_BITS) + tree->next_node++ * 2; + i = tree->nodes[rkey].value; + bits -= TREE_FAST_BITS; + + while (bits > 1) { + i |= (key >> (bits - 1)) & 1; + if (tree->nodes[i].is_value) + return false; + if (!tree->nodes[i].value) { + if (tree->next_node == MAX_TREE_NODES) + return false; + tree->nodes[i].value = (1 << TREE_FAST_BITS) + tree->next_node++ * 2; + } + i = tree->nodes[i].value; + bits--; + } + i |= key & 1; + if (tree->nodes[i].value || tree->nodes[i].is_value) + return false; + tree->nodes[i].value = value; + tree->nodes[i].is_value = true; + + return true; +} + +static UNARR_FORCE_INLINE int tree_get_value(inflate_state *state, const struct tree *tree, bool not_fast) +{ + if (state->state.tree_idx == 0) { + int key = state->in.bits & ((1 << TREE_FAST_BITS) - 1); + while (not_fast && state->in.available < TREE_FAST_BITS && state->in.available < (int)tree->nodes[key].length) { + if (!br_ensure(state, tree->nodes[key].length)) + return RESULT_NOT_DONE; + key = state->in.bits & ((1 << TREE_FAST_BITS) - 1); + } + if (tree->nodes[key].is_value) { + state->state.value = tree->nodes[key].value; + (void)br_bits(state, tree->nodes[key].length); + return RESULT_EOS; + } + if (tree->nodes[key].length == 0) + return RESULT_ERROR; + (void)br_bits(state, TREE_FAST_BITS); + state->state.tree_idx = tree->nodes[key].value; + } + while (state->state.value == -1) { + int idx; + if (not_fast && !br_ensure(state, 1)) + return RESULT_NOT_DONE; + idx = state->state.tree_idx | (int)br_bits(state, 1); + if (tree->nodes[idx].is_value) + state->state.value = tree->nodes[idx].value; + else if (tree->nodes[idx].value) + state->state.tree_idx = tree->nodes[idx].value; + else + return RESULT_ERROR; + } + state->state.tree_idx = 0; + return RESULT_EOS; +} + +static void setup_static_trees(inflate_state *state) +{ + int i; + + memset(&state->tree_lengths, 0, sizeof(state->tree_lengths)); + for (i = 0; i < 144; i++) + tree_add_value(&state->tree_lengths, i + 48, 8, i); + for (i = 144; i < 256; i++) + tree_add_value(&state->tree_lengths, i + 256, 9, i); + for (i = 256; i < 280; i++) + tree_add_value(&state->tree_lengths, i - 256, 7, i); + for (i = 280; i < 288; i++) + tree_add_value(&state->tree_lengths, i - 88, 8, i); + + memset(&state->tree_dists, 0, sizeof(state->tree_dists)); + for (i = 0; i < 32; i++) + tree_add_value(&state->tree_dists, i, 5, i); +} + +static bool setup_dynamic_tree(struct tree *tree, int *clens, int count) +{ + int code, i; + int bl_count[MAX_BITS]; + int next_code[MAX_BITS]; + + memset(bl_count, 0, sizeof(bl_count)); + for (i = 0; i < count; i++) + bl_count[clens[i]]++; + bl_count[0] = 0; + + code = 0; + for (i = 1; i < MAX_BITS; i++) { + code = (code + bl_count[i - 1]) << 1; + next_code[i] = code; + } + + memset(tree, 0, sizeof(*tree)); + for (i = 0; i < count; i++) { + if (clens[i] != 0) { + if (!tree_add_value(tree, next_code[clens[i]], clens[i], i)) + return false; + next_code[clens[i]]++; + } + } + + return true; +} + +inflate_state *inflate_create(bool inflate64) +{ + inflate_state *state = calloc(1, sizeof(inflate_state)); + if (state) + state->inflate64 = inflate64; + return state; +} + +void inflate_free(inflate_state *state) +{ + free(state); +} + +int inflate_process(inflate_state *state, const void *data_in, size_t *avail_in, void *data_out, size_t *avail_out) +{ + bool not_fast = true; + int res; + + if (!state || !data_in || !avail_in || !data_out || !avail_out) + return RESULT_ERROR; + + state->in.data_in = data_in; + state->in.avail_in = avail_in; + state->out.data_out = data_out; + state->out.avail_out = avail_out; + + for (;;) { + switch (state->step) { + case STEP_NEXT_BLOCK: + if (state->is_final_block) + return RESULT_EOS; + + if (!br_ensure(state, 3)) + return RESULT_NOT_DONE; + state->is_final_block = br_bits(state, 1) != 0; + switch (br_bits(state, 2)) { + case 0: + state->step = STEP_COPY_INIT; + break; + case 1: + state->step = STEP_INFLATE_STATIC_INIT; + break; + case 2: + state->step = STEP_INFLATE_DYNAMIC_INIT; + break; + default: + return RESULT_ERROR; + } + break; + + case STEP_COPY_INIT: + if (!br_ensure(state, 32)) + return RESULT_NOT_DONE; + (void)br_bits(state, state->in.available & 0x7); + state->state.length = (uint16_t)br_bits(state, 16); + if (state->state.length != 0xFFFF - (uint16_t)br_bits(state, 16)) + return RESULT_ERROR; + state->step = STEP_COPY; + /* fall through */ + + case STEP_COPY: + while (state->state.length > 0) { + if (!br_ensure(state, 8) || *avail_out == 0) + return RESULT_NOT_DONE; + output(state, (uint8_t)br_bits(state, 8)); + state->state.length--; + } + state->step = STEP_NEXT_BLOCK; + break; + + case STEP_INFLATE_STATIC_INIT: + setup_static_trees(state); + /* fall through */ + + STEP_INFLATE_START: + not_fast = !br_ensure(state, state->inflate64 ? 49 : 48); + state->state.value = -1; + /* fall through */ + + case STEP_INFLATE_CODE: + res = tree_get_value(state, &state->tree_lengths, not_fast); + if (res != RESULT_EOS) { + state->step = STEP_INFLATE_CODE; + return res; + } + /* fall through */ + + case STEP_INFLATE: + if (state->state.value < 256) { + if (*avail_out == 0) { + state->step = STEP_INFLATE; + return RESULT_NOT_DONE; + } + output(state, (uint8_t)state->state.value); + goto STEP_INFLATE_START; + } + if (state->state.value == 256) { + state->step = STEP_NEXT_BLOCK; + break; + } + if (state->state.value > 285) + return RESULT_ERROR; + if (state->inflate64 && state->state.value == 285) { + not_fast = !br_ensure(state, 45); + state->state.value = 286; + } + if (not_fast && !br_ensure(state, table_lengths[state->state.value - 257].bits)) { + state->step = STEP_INFLATE; + return RESULT_NOT_DONE; + } + state->state.length = table_lengths[state->state.value - 257].length + (int)br_bits(state, table_lengths[state->state.value - 257].bits); + state->state.value = -1; + /* fall through */ + + case STEP_INFLATE_DISTANCE_CODE: + res = tree_get_value(state, &state->tree_dists, not_fast); + if (res != RESULT_EOS) { + state->step = STEP_INFLATE_DISTANCE_CODE; + return res; + } + /* fall through */ + + case STEP_INFLATE_DISTANCE: + if (not_fast && !br_ensure(state, table_dists[state->state.value].bits)) { + state->step = STEP_INFLATE_DISTANCE; + return RESULT_NOT_DONE; + } + state->state.dist = table_dists[state->state.value].dist + (int)br_bits(state, table_dists[state->state.value].bits); + if ((size_t)state->state.dist > state->out.offset || (state->state.value > 30 && !state->inflate64)) + return RESULT_ERROR; + state->step = STEP_INFLATE_REPEAT; + /* fall through */ + + case STEP_INFLATE_REPEAT: + while (state->state.length > 0) { + if (*avail_out == 0) + return RESULT_NOT_DONE; + output(state, state->out.window[(state->out.offset - state->state.dist) & (sizeof(state->out.window) - 1)]); + state->state.length--; + } + goto STEP_INFLATE_START; + + case STEP_INFLATE_DYNAMIC_INIT: + if (!br_ensure(state, 14)) + return RESULT_NOT_DONE; + state->prepare.hlit = (int)br_bits(state, 5) + 257; + state->prepare.hdist = (int)br_bits(state, 5) + 1; + state->prepare.hclen = (int)br_bits(state, 4) + 4; + memset(state->prepare.clens, 0, sizeof(state->prepare.clens)); + state->prepare.idx = 0; + state->step = STEP_INFLATE_DYNAMIC_INIT_PRETREE; + /* fall through */ + + case STEP_INFLATE_DYNAMIC_INIT_PRETREE: + while (state->prepare.idx < state->prepare.hclen) { + if (!br_ensure(state, 3)) + return RESULT_NOT_DONE; + state->prepare.clens[table_code_length_idxs[state->prepare.idx]] = (int)br_bits(state, 3); + state->prepare.idx++; + } + if (!setup_dynamic_tree(&state->tree_lengths, state->prepare.clens, 19)) + return RESULT_ERROR; + memset(state->prepare.clens, 0, sizeof(state->prepare.clens)); + state->prepare.idx = 0; + state->state.value = -1; + state->step = STEP_INFLATE_DYNAMIC_INIT_TREES; + /* fall through */ + + case STEP_INFLATE_DYNAMIC_INIT_TREES: + while (state->prepare.idx < state->prepare.hlit + state->prepare.hdist) { + int value = 0, repeat = 0; + if (state->state.value == -1) { + res = tree_get_value(state, &state->tree_lengths, true); + if (res != RESULT_EOS) + return res; + } + if (state->state.value < 16) { + state->prepare.clens[state->prepare.idx++] = state->state.value; + } + else if (state->state.value == 16) { + if (state->prepare.idx == 0) + return RESULT_ERROR; + if (!br_ensure(state, 2)) + return RESULT_NOT_DONE; + value = state->prepare.clens[state->prepare.idx - 1]; + repeat = (int)br_bits(state, 2) + 3; + } + else if (state->state.value == 17) { + if (!br_ensure(state, 3)) + return RESULT_NOT_DONE; + value = 0; + repeat = (int)br_bits(state, 3) + 3; + } + else { + if (!br_ensure(state, 7)) + return RESULT_NOT_DONE; + value = 0; + repeat = (int)br_bits(state, 7) + 11; + } + if (repeat) { + if (state->prepare.idx + repeat > state->prepare.hlit + state->prepare.hdist) + return RESULT_ERROR; + while (repeat-- > 0) + state->prepare.clens[state->prepare.idx++] = value; + } + state->state.value = -1; + } + if (!setup_dynamic_tree(&state->tree_lengths, state->prepare.clens, state->prepare.hlit)) + return RESULT_ERROR; + if (!setup_dynamic_tree(&state->tree_dists, state->prepare.clens + state->prepare.hlit, state->prepare.hdist)) + return RESULT_ERROR; + goto STEP_INFLATE_START; + } + } +} + +int inflate_flush(inflate_state *state, unsigned char data_in[8]) +{ + int count = 0; + int keep = state->in.available & 0x7; + while (count < state->in.available / 8) { + data_in[count] = (state->in.bits >> (count * 8 + keep)) & 0xFF; + count++; + } + state->in.available = keep; + return count; +} diff --git a/external/unarr/zip/inflate.h b/external/unarr/zip/inflate.h new file mode 100644 index 00000000..52178ca1 --- /dev/null +++ b/external/unarr/zip/inflate.h @@ -0,0 +1,19 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef zip_inflate_h +#define zip_inflate_h + +#include +#include + +typedef struct inflate_state_s inflate_state; + +inflate_state *inflate_create(bool inflate64); +/* updates avail_in and avail_out and returns -1 on EOF or any other non-zero value on error */ +int inflate_process(inflate_state *state, const void *data_in, size_t *avail_in, void *data_out, size_t *avail_out); +/* restores up to 8 bytes of data cached by inflate_process */ +int inflate_flush(inflate_state *state, unsigned char data_in[8]); +void inflate_free(inflate_state *state); + +#endif diff --git a/external/unarr/zip/parse-zip.c b/external/unarr/zip/parse-zip.c new file mode 100644 index 00000000..8fc70c3f --- /dev/null +++ b/external/unarr/zip/parse-zip.c @@ -0,0 +1,327 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "zip.h" + +#if defined(_MSC_VER) && !defined(inline) +#define inline __inline +#endif + +static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; } +static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | (uint32_t) data[3] << 24; } +static inline uint64_t uint64le(unsigned char *data) { return (uint64_t)uint32le(data) | (uint64_t)uint32le(data + 4) << 32; } + +bool zip_seek_to_compressed_data(ar_archive_zip *zip) +{ + struct zip_entry entry; + + if (!ar_seek(zip->super.stream, zip->entry.offset, SEEK_SET)) + return false; + if (!zip_parse_local_file_entry(zip, &entry)) + return false; + if (zip->entry.method != entry.method) { + warn("Compression methods don't match: %d != %d", zip->entry.method, entry.method); + if (!zip->entry.method) + zip->entry.method = entry.method; + } + if (zip->entry.dosdate != entry.dosdate) { + warn("Timestamps don't match"); + if (!zip->entry.dosdate) { + zip->entry.dosdate = entry.dosdate; + zip->super.entry_filetime = ar_conv_dosdate_to_filetime(zip->entry.dosdate); + } + } + + return ar_seek(zip->super.stream, zip->entry.offset + ZIP_LOCAL_ENTRY_FIXED_SIZE + entry.namelen + entry.extralen, SEEK_SET); +} + +static bool zip_parse_extra_fields(ar_archive_zip *zip, struct zip_entry *entry) +{ + uint8_t *extra; + + if (!entry->extralen) + return true; + + /* read ZIP64 values where needed */ + if (!ar_skip(zip->super.stream, entry->namelen)) + return false; + extra = malloc(entry->extralen); + if (!extra || ar_read(zip->super.stream, extra, entry->extralen) != entry->extralen) { + free(extra); + return false; + } + for (uint32_t idx = 0; idx + 4 < entry->extralen; idx += 4 + uint16le(&extra[idx + 2])) { + if (uint16le(&extra[idx]) == 0x0001) { + uint16_t size = uint16le(&extra[idx + 2]); + if (size + idx + 1 > entry->extralen) { + free(extra); + return false; + } + + uint16_t offset = 0; + if (entry->uncompressed == UINT32_MAX && offset + 8 <= size) { + entry->uncompressed = uint64le(&extra[idx + 4 + offset]); + offset += 8; + } + if (entry->datasize == UINT32_MAX && offset + 8 <= size) { + entry->datasize = uint64le(&extra[idx + 4 + offset]); + offset += 8; + } + if (entry->header_offset == UINT32_MAX && offset + 8 <= size) { + entry->header_offset = (off64_t)uint64le(&extra[idx + 4 + offset]); + offset += 8; + } + if (entry->disk == UINT16_MAX && offset + 4 <= size) { + entry->disk = uint32le(&extra[idx + 4 + offset]); + offset += 4; + } + break; + } + } + free(extra); + + return true; +} + +bool zip_parse_local_file_entry(ar_archive_zip *zip, struct zip_entry *entry) +{ + uint8_t data[ZIP_LOCAL_ENTRY_FIXED_SIZE]; + + if (ar_read(zip->super.stream, data, sizeof(data)) != sizeof(data)) + return false; + + memset(entry, 0, sizeof(*entry)); + entry->signature = uint32le(data + 0); + entry->version = uint16le(data + 4); + entry->flags = uint16le(data + 6); + entry->method = uint16le(data + 8); + entry->dosdate = uint32le(data + 10); + entry->crc = uint32le(data + 14); + entry->datasize = uint32le(data + 18); + entry->uncompressed = uint32le(data + 22); + entry->namelen = uint16le(data + 26); + entry->extralen = uint16le(data + 28); + + if (entry->signature != SIG_LOCAL_FILE_HEADER) + return false; + + return zip_parse_extra_fields(zip, entry); +} + +off64_t zip_find_next_local_file_entry(ar_stream *stream, off64_t offset) +{ + uint8_t data[512]; + int count, i; + + if (!ar_seek(stream, offset, SEEK_SET)) + return -1; + count = (int)ar_read(stream, data, sizeof(data)); + + while (count >= ZIP_LOCAL_ENTRY_FIXED_SIZE) { + for (i = 0; i < count - 4; i++) { + if (uint32le(data + i) == SIG_LOCAL_FILE_HEADER) + return offset + i; + } + memmove(data, data + count - 4, 4); + offset += count - 4; + count = (int)ar_read(stream, data + 4, sizeof(data) - 4) + 4; + } + + return -1; +} + +bool zip_parse_directory_entry(ar_archive_zip *zip, struct zip_entry *entry) +{ + uint8_t data[ZIP_DIR_ENTRY_FIXED_SIZE]; + + if (ar_read(zip->super.stream, data, sizeof(data)) != sizeof(data)) + return false; + + entry->signature = uint32le(data + 0); + entry->version = uint16le(data + 4); + entry->min_version = uint16le(data + 6); + entry->flags = uint16le(data + 8); + entry->method = uint16le(data + 10); + entry->dosdate = uint32le(data + 12); + entry->crc = uint32le(data + 16); + entry->datasize = uint32le(data + 20); + entry->uncompressed = uint32le(data + 24); + entry->namelen = uint16le(data + 28); + entry->extralen = uint16le(data + 30); + entry->commentlen = uint16le(data + 32); + entry->disk = uint16le(data + 34); + entry->attr_internal = uint16le(data + 36); + entry->attr_external = uint32le(data + 38); + entry->header_offset = uint32le(data + 42); + + if (entry->signature != SIG_CENTRAL_DIRECTORY) + return false; + + return zip_parse_extra_fields(zip, entry); +} + +off64_t zip_find_end_of_last_directory_entry(ar_stream *stream, struct zip_eocd64 *eocd) +{ + uint8_t data[ZIP_DIR_ENTRY_FIXED_SIZE]; + uint64_t i; + + if (!ar_seek(stream, eocd->dir_offset, SEEK_SET)) + return -1; + for (i = 0; i < eocd->numentries; i++) { + if (ar_read(stream, data, sizeof(data)) != sizeof(data)) + return -1; + if (uint32le(data + 0) != SIG_CENTRAL_DIRECTORY) + return -1; + if (!ar_skip(stream, uint16le(data + 28) + uint16le(data + 30) + uint16le(data + 32))) + return -1; + } + + return ar_tell(stream); +} + +bool zip_parse_end_of_central_directory(ar_stream *stream, struct zip_eocd64 *eocd) +{ + uint8_t data[56]; + if (ar_read(stream, data, ZIP_END_OF_CENTRAL_DIR_SIZE) != ZIP_END_OF_CENTRAL_DIR_SIZE) + return false; + + eocd->signature = uint32le(data + 0); + eocd->diskno = uint16le(data + 4); + eocd->diskno_dir = uint16le(data + 6); + eocd->numentries_disk = uint16le(data + 8); + eocd->numentries = uint16le(data + 10); + eocd->dir_size = uint32le(data + 12); + eocd->dir_offset = uint32le(data + 16); + eocd->commentlen = uint16le(data + 20); + + if (eocd->signature != SIG_END_OF_CENTRAL_DIRECTORY) + return false; + + /* try to locate the ZIP64 end of central directory */ + if (!ar_skip(stream, -42)) + return eocd->dir_size < 20; + if (ar_read(stream, data, 20) != 20) + return false; + if (uint32le(data + 0) != SIG_END_OF_CENTRAL_DIRECTORY_64_LOCATOR) + return true; + if ((eocd->diskno != UINT16_MAX && uint32le(data + 4) != eocd->diskno) || uint32le(data + 16) != 1) { + warn("Archive spanning isn't supported"); + return false; + } + if (!ar_seek(stream, (off64_t)uint64le(data + 8), SEEK_SET)) + return false; + if (ar_read(stream, data, 56) != 56) + return false; + + /* use data from ZIP64 end of central directory (when necessary) */ + eocd->signature = uint32le(data + 0); + eocd->version = uint16le(data + 12); + eocd->min_version = uint16le(data + 14); + if (eocd->diskno == UINT16_MAX) + eocd->diskno = uint32le(data + 16); + if (eocd->diskno_dir == UINT16_MAX) + eocd->diskno_dir = uint32le(data + 20); + if (eocd->numentries_disk == UINT16_MAX) + eocd->numentries_disk = uint64le(data + 24); + if (eocd->numentries == UINT16_MAX) + eocd->numentries = uint64le(data + 32); + if (eocd->dir_size == UINT32_MAX) + eocd->dir_size = uint64le(data + 40); + if (eocd->dir_offset == UINT32_MAX) + eocd->dir_offset = (off64_t)uint64le(data + 48); + + if (eocd->signature != SIG_END_OF_CENTRAL_DIRECTORY_64) + return false; + if (eocd->diskno != eocd->diskno_dir || eocd->numentries != eocd->numentries_disk) { + warn("Archive spanning isn't supported"); + return false; + } + if (uint64le(data + 4) > 44) + log("ZIP64 extensible data sector present @" PRIi64, ar_tell(stream)); + + return true; +} + +off64_t zip_find_end_of_central_directory(ar_stream *stream) +{ + uint8_t data[512]; + off64_t filesize; + int fromend = 0; + int count, i; + + if (!ar_seek(stream, 0, SEEK_END)) + return -1; + filesize = ar_tell(stream); + + while (fromend < UINT16_MAX + ZIP_END_OF_CENTRAL_DIR_SIZE && fromend < filesize) { + count = (filesize - fromend < (int)sizeof(data) ? (int)(filesize - fromend) : (int)sizeof(data)); + fromend += count; + if (count < ZIP_END_OF_CENTRAL_DIR_SIZE) + return -1; + if (!ar_seek(stream, -fromend, SEEK_END)) + return -1; + if (ar_read(stream, data, count) != (size_t)count) + return -1; + for (i = count - ZIP_END_OF_CENTRAL_DIR_SIZE; i >= 0; i--) { + if (uint32le(data + i) == SIG_END_OF_CENTRAL_DIRECTORY) + return filesize - fromend + i; + } + fromend -= ZIP_END_OF_CENTRAL_DIR_SIZE - 1; + } + + return -1; +} + +const char *zip_get_name(ar_archive *ar, bool raw) +{ + ar_archive_zip *zip = (ar_archive_zip *)ar; + if (!zip->entry.name) { + struct zip_entry entry; + char *name; + + if (zip->dir.end_offset >= 0) { + if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) + return NULL; + if (!zip_parse_directory_entry(zip, &entry)) + return NULL; + if (!ar_seek(ar->stream, ar->entry_offset + ZIP_DIR_ENTRY_FIXED_SIZE, SEEK_SET)) + return NULL; + } + else { + if (!ar_seek(ar->stream, zip->entry.offset, SEEK_SET)) + return NULL; + if (!zip_parse_local_file_entry(zip, &entry)) + return NULL; + if (!ar_seek(ar->stream, ar->entry_offset + ZIP_LOCAL_ENTRY_FIXED_SIZE, SEEK_SET)) + return NULL; + } + + name = malloc(entry.namelen + 1); + if (!name || ar_read(ar->stream, name, entry.namelen) != entry.namelen) { + free(name); + return NULL; + } + name[entry.namelen] = '\0'; + + zip->entry.raw_name = malloc(entry.namelen + 1); + if (zip->entry.raw_name) { + memcpy(zip->entry.raw_name, name, entry.namelen + 1); + } + + if ((entry.flags & (1 << 11))) { + zip->entry.name = name; + } + else { + zip->entry.name = ar_conv_dos_to_utf8(name); + free(name); + } + /* normalize path separators */ + if (zip->entry.name) { + char *p = zip->entry.name; + while ((p = strchr(p, '\\')) != NULL) { + *p = '/'; + } + } + } + return raw ? zip->entry.raw_name : zip->entry.name; +} diff --git a/external/unarr/zip/uncompress-zip.c b/external/unarr/zip/uncompress-zip.c new file mode 100644 index 00000000..41d7d8c1 --- /dev/null +++ b/external/unarr/zip/uncompress-zip.c @@ -0,0 +1,540 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "zip.h" + +#define ERR_UNCOMP UINT32_MAX + +static bool zip_fill_input_buffer(ar_archive_zip *zip) +{ + struct ar_archive_zip_uncomp *uncomp = &zip->uncomp; + size_t count; + + if (uncomp->input.offset) { + memmove(&uncomp->input.data[0], &uncomp->input.data[uncomp->input.offset], uncomp->input.bytes_left); + uncomp->input.offset = 0; + } + count = sizeof(uncomp->input.data) - uncomp->input.bytes_left; + if (count > zip->progress.data_left) + count = zip->progress.data_left; + if (ar_read(zip->super.stream, &uncomp->input.data[uncomp->input.bytes_left], count) != count) { + warn("Unexpected EOF during decompression (invalid data size?)"); + return false; + } + zip->progress.data_left -= count; + uncomp->input.bytes_left += (uint16_t)count; + uncomp->input.at_eof = !zip->progress.data_left; + + return true; +} + +/***** Deflate compression *****/ + +#ifdef HAVE_ZLIB +static void *gZlib_Alloc(void *opaque, uInt count, uInt size) { (void)opaque; return calloc(count, size); } +static void gZlib_Free(void *opaque, void *ptr) { (void)opaque; free(ptr); } + +static bool zip_init_uncompress_deflate(struct ar_archive_zip_uncomp *uncomp) +{ + int err; + + uncomp->state.zstream.zalloc = gZlib_Alloc; + uncomp->state.zstream.zfree = gZlib_Free; + uncomp->state.zstream.opaque = NULL; + + err = inflateInit2(&uncomp->state.zstream, -15); + return err == Z_OK; +} + +static uint32_t zip_uncompress_data_deflate(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk) +{ + int err; + + uncomp->state.zstream.next_in = &uncomp->input.data[uncomp->input.offset]; + uncomp->state.zstream.avail_in = uncomp->input.bytes_left; + uncomp->state.zstream.next_out = buffer; + uncomp->state.zstream.avail_out = buffer_size; + + err = inflate(&uncomp->state.zstream, Z_SYNC_FLUSH); + + uncomp->input.offset += uncomp->input.bytes_left - (uint16_t)uncomp->state.zstream.avail_in; + uncomp->input.bytes_left = (uint16_t)uncomp->state.zstream.avail_in; + + if (err != Z_OK && err != Z_STREAM_END) { + warn("Unexpected ZLIB error %d", err); + return ERR_UNCOMP; + } + if (err == Z_STREAM_END && (!is_last_chunk || uncomp->state.zstream.avail_out)) { + warn("Premature EOS in Deflate stream"); + return ERR_UNCOMP; + } + + return buffer_size - uncomp->state.zstream.avail_out; +} + +static void zip_clear_uncompress_deflate(struct ar_archive_zip_uncomp *uncomp) +{ + inflateEnd(&uncomp->state.zstream); +} +#endif + +/***** Deflate(64) compression *****/ + +static bool zip_init_uncompress_deflate64(struct ar_archive_zip_uncomp *uncomp, bool deflate64) +{ + uncomp->state.inflate = inflate_create(deflate64); + + return uncomp->state.inflate != NULL; +} + +static uint32_t zip_uncompress_data_deflate64(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk) +{ + size_t avail_in = uncomp->input.bytes_left; + size_t avail_out = buffer_size; + + int result = inflate_process(uncomp->state.inflate, &uncomp->input.data[uncomp->input.offset], &avail_in, buffer, &avail_out); + + uncomp->input.offset += uncomp->input.bytes_left - (uint16_t)avail_in; + uncomp->input.bytes_left = (uint16_t)avail_in; + + if (result && result != EOF) { + warn("Unexpected Inflate error %d", result); + return ERR_UNCOMP; + } + if (result == EOF && (!is_last_chunk || avail_out)) { + warn("Premature EOS in Deflate stream"); + return ERR_UNCOMP; + } + + return buffer_size - (uint32_t)avail_out; +} + +static void zip_clear_uncompress_deflate64(struct ar_archive_zip_uncomp *uncomp) +{ + inflate_free(uncomp->state.inflate); +} + +/***** BZIP2 compression *****/ + +#ifdef HAVE_BZIP2 +static void *gBzip2_Alloc(void *opaque, int count, int size) { (void)opaque; return calloc(count, size); } +static void gBzip2_Free(void *opaque, void *ptr) { (void)opaque; free(ptr); } + +static bool zip_init_uncompress_bzip2(struct ar_archive_zip_uncomp *uncomp) +{ + int err; + + uncomp->state.bstream.bzalloc = gBzip2_Alloc; + uncomp->state.bstream.bzfree = gBzip2_Free; + uncomp->state.bstream.opaque = NULL; + + err = BZ2_bzDecompressInit(&uncomp->state.bstream, 0, 0); + return err == BZ_OK; +} + +static uint32_t zip_uncompress_data_bzip2(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk) +{ + int err; + + uncomp->state.bstream.next_in = (char *)&uncomp->input.data[uncomp->input.offset]; + uncomp->state.bstream.avail_in = uncomp->input.bytes_left; + uncomp->state.bstream.next_out = (char *)buffer; + uncomp->state.bstream.avail_out = buffer_size; + + err = BZ2_bzDecompress(&uncomp->state.bstream); + + uncomp->input.offset += uncomp->input.bytes_left - (uint16_t)uncomp->state.bstream.avail_in; + uncomp->input.bytes_left = (uint16_t)uncomp->state.bstream.avail_in; + + if (err != BZ_OK && err != BZ_STREAM_END) { + warn("Unexpected BZIP2 error %d", err); + return ERR_UNCOMP; + } + if (err == BZ_STREAM_END && (!is_last_chunk || uncomp->state.bstream.avail_out)) { + warn("Premature EOS in BZIP2 stream"); + return ERR_UNCOMP; + } + + return buffer_size - uncomp->state.bstream.avail_out; +} + +static void zip_clear_uncompress_bzip2(struct ar_archive_zip_uncomp *uncomp) +{ + BZ2_bzDecompressEnd(&uncomp->state.bstream); +} +#endif + +/***** LZMA compression *****/ + +#ifdef HAVE_LIBLZMA + +static void *gLzma_Alloc(void *opaque, size_t nmemb, size_t size) + { (void)opaque; (void) nmemb; return malloc(size); } +static void gLzma_Free(void *opaque, void *ptr) + { (void)opaque; free(ptr); } + +static bool zip_init_uncompress_lzma(struct ar_archive_zip_uncomp *uncomp) +{ + lzma_stream strm = LZMA_STREAM_INIT; + uncomp->state.lzmastream = strm; + #if LZMA_VERSION_MAJOR > 5 || (LZMA_VERSION_MAJOR == 5 && LZMA_VERSION_MINOR >= 2) + static const lzma_allocator allocator = { gLzma_Alloc, gLzma_Free, NULL }; + #else + static lzma_allocator allocator = { gLzma_Alloc, gLzma_Free, NULL }; + #endif + uncomp->state.lzmastream.allocator = &allocator; + return true; +} + +static uint32_t zip_uncompress_data_lzma1(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk) +{ + int err; + + if (uncomp->state.lzmastream.internal == NULL) { + uint8_t propsize; + propsize = uncomp->input.data[uncomp->input.offset + 2]; + + lzma_filter filters[2] = {{.id=LZMA_FILTER_LZMA1, .options=NULL}, + {.id=LZMA_VLI_UNKNOWN, .options=NULL}}; + + err = lzma_properties_decode( + &filters[0], NULL, + &uncomp->input.data[uncomp->input.offset + 4], propsize); + + if (err != LZMA_OK) { + warn("Properties error %d", err); + return ERR_UNCOMP; + } + + err = lzma_raw_decoder(&uncomp->state.lzmastream, filters); + free(filters[0].options); + if (err != LZMA_OK) { + warn("Decoder init error %d", err); + return ERR_UNCOMP; + } + uncomp->input.offset += 4 + propsize; + uncomp->input.bytes_left -= 4 + propsize; + } + + uncomp->state.lzmastream.next_in = &uncomp->input.data[uncomp->input.offset]; + uncomp->state.lzmastream.avail_in = uncomp->input.bytes_left; + uncomp->state.lzmastream.next_out = buffer; + uncomp->state.lzmastream.avail_out = buffer_size; + + err = lzma_code(&uncomp->state.lzmastream, LZMA_RUN); + + uncomp->input.offset += (uint16_t)uncomp->input.bytes_left - (uint16_t)uncomp->state.lzmastream.avail_in; + uncomp->input.bytes_left = (uint16_t)uncomp->state.lzmastream.avail_in; + + if (err != LZMA_OK && err != LZMA_STREAM_END) { + warn("Unexpected LZMA error %d", err); + warn("%d", buffer_size - uncomp->state.lzmastream.avail_out); + return ERR_UNCOMP; + } + if (err == LZMA_STREAM_END && (!is_last_chunk || uncomp->state.lzmastream.avail_out)) { + warn("Premature EOS in LZMA stream"); + return ERR_UNCOMP; + } + return buffer_size - uncomp->state.lzmastream.avail_out; +} + +static uint32_t zip_uncompress_data_xz(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk) +{ + int err; + + if (uncomp->state.lzmastream.internal == NULL) { + /* restrict decoder memory usage to 100 MB */ + err = lzma_stream_decoder(&uncomp->state.lzmastream, 100 << 20, 0); + if (err != LZMA_OK) { + warn("Unexpected LZMA Decoder init error %d", err); + return ERR_UNCOMP; + } + } + + uncomp->state.lzmastream.next_in = &uncomp->input.data[uncomp->input.offset]; + uncomp->state.lzmastream.avail_in = uncomp->input.bytes_left; + uncomp->state.lzmastream.next_out = buffer; + uncomp->state.lzmastream.avail_out = buffer_size; + + err = lzma_code(&uncomp->state.lzmastream, LZMA_RUN); + + uncomp->input.offset += (uint16_t)uncomp->input.bytes_left - (uint16_t)uncomp->state.lzmastream.avail_in; + uncomp->input.bytes_left = (uint16_t)uncomp->state.lzmastream.avail_in; + + if (err != LZMA_OK && err != LZMA_STREAM_END) { + warn("Unexpected XZ error %d", err); + warn("%d", buffer_size - uncomp->state.lzmastream.avail_out); + return ERR_UNCOMP; + } + if (err == LZMA_STREAM_END && (!is_last_chunk || uncomp->state.lzmastream.avail_out)) { + warn("Premature EOS in XZ stream"); + return ERR_UNCOMP; + } + return buffer_size - uncomp->state.lzmastream.avail_out; +} + + +static void zip_clear_uncompress_lzma(struct ar_archive_zip_uncomp *uncomp) +{ + lzma_end(&uncomp->state.lzmastream); +} + +#else //HAVE_LIBLZMA + +static void *gLzma_Alloc(ISzAllocPtr self, size_t size) { (void)self; return malloc(size); } +static void gLzma_Free(ISzAllocPtr self, void *ptr) { (void)self; free(ptr); } + +static bool zip_init_uncompress_lzma(struct ar_archive_zip_uncomp *uncomp, uint16_t flags) +{ + uncomp->state.lzma.alloc.Alloc = gLzma_Alloc; + uncomp->state.lzma.alloc.Free = gLzma_Free; + uncomp->state.lzma.finish = (flags & (1 << 1)) ? LZMA_FINISH_END : LZMA_FINISH_ANY; + LzmaDec_Construct(&uncomp->state.lzma.dec); + return true; +} + +static uint32_t zip_uncompress_data_lzma(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk) +{ + SizeT srclen, dstlen; + ELzmaStatus status; + ELzmaFinishMode finish; + SRes res; + + if (!uncomp->state.lzma.dec.dic) { + uint8_t propsize; + if (uncomp->input.bytes_left < 9) { + warn("Insufficient data in compressed stream"); + return ERR_UNCOMP; + } + propsize = uncomp->input.data[uncomp->input.offset + 2]; + if (uncomp->input.data[uncomp->input.offset + 3] != 0 || uncomp->input.bytes_left < 4 + propsize) { + warn("Insufficient data in compressed stream"); + return ERR_UNCOMP; + } + res = LzmaDec_Allocate(&uncomp->state.lzma.dec, &uncomp->input.data[uncomp->input.offset + 4], propsize, &uncomp->state.lzma.alloc); + uncomp->input.offset += 4 + propsize; + uncomp->input.bytes_left -= 4 + propsize; + if (res != SZ_OK) + return ERR_UNCOMP; + LzmaDec_Init(&uncomp->state.lzma.dec); + } + + srclen = uncomp->input.bytes_left; + dstlen = buffer_size; + finish = uncomp->input.at_eof && is_last_chunk ? uncomp->state.lzma.finish : LZMA_FINISH_ANY; + res = LzmaDec_DecodeToBuf(&uncomp->state.lzma.dec, buffer, &dstlen, &uncomp->input.data[uncomp->input.offset], &srclen, finish, &status); + + uncomp->input.offset += (uint16_t)srclen; + uncomp->input.bytes_left -= (uint16_t)srclen; + + if (res != SZ_OK || (srclen == 0 && dstlen == 0)) { + warn("Unexpected LZMA error %d", res); + return ERR_UNCOMP; + } + if (status == LZMA_STATUS_FINISHED_WITH_MARK && (!is_last_chunk || dstlen != buffer_size)) { + warn("Premature EOS in LZMA stream"); + return ERR_UNCOMP; + } + + return (uint32_t)dstlen; +} + +static void zip_clear_uncompress_lzma(struct ar_archive_zip_uncomp *uncomp) +{ + LzmaDec_Free(&uncomp->state.lzma.dec, &uncomp->state.lzma.alloc); +} + +#endif //HAVE_LIBLZMA + +/***** PPMd compression *****/ + +static void *gPpmd_Alloc(ISzAllocPtr self, size_t size) { (void)self; return malloc(size); } +static void gPpmd_Free(ISzAllocPtr self, void *ptr) { (void)self; free(ptr); } + +static Byte gPpmd_ByteIn_Read(const IByteIn *p) +{ + struct ByteReader *self = (struct ByteReader *) p; + if (!self->input->bytes_left && (!self->zip->progress.data_left || !zip_fill_input_buffer(self->zip))) + return 0xFF; + self->input->bytes_left--; + return self->input->data[self->input->offset++]; +} + +static bool zip_init_uncompress_ppmd(ar_archive_zip *zip) +{ + struct ar_archive_zip_uncomp *uncomp = &zip->uncomp; + uncomp->state.ppmd8.alloc.Alloc = gPpmd_Alloc; + uncomp->state.ppmd8.alloc.Free = gPpmd_Free; + uncomp->state.ppmd8.bytein.super.Read = gPpmd_ByteIn_Read; + uncomp->state.ppmd8.bytein.input = &uncomp->input; + uncomp->state.ppmd8.bytein.zip = zip; + uncomp->state.ppmd8.ctx.Stream.In = &uncomp->state.ppmd8.bytein.super; + Ppmd8_Construct(&uncomp->state.ppmd8.ctx); + return true; +} + +static uint32_t zip_uncompress_data_ppmd(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk) +{ + uint32_t bytes_done = 0; + + if (!uncomp->state.ppmd8.ctx.Base) { + uint8_t order, size, method; + if (uncomp->input.bytes_left < 2) { + warn("Insufficient data in compressed stream"); + return ERR_UNCOMP; + } + order = (uncomp->input.data[uncomp->input.offset] & 0x0F) + 1; + size = ((uncomp->input.data[uncomp->input.offset] >> 4) | ((uncomp->input.data[uncomp->input.offset + 1] << 4) & 0xFF)); + method = uncomp->input.data[uncomp->input.offset + 1] >> 4; + uncomp->input.bytes_left -= 2; + uncomp->input.offset += 2; + if (order < 2 || method > 2) { + warn("Invalid PPMd data stream"); + return ERR_UNCOMP; + } +#ifndef PPMD8_FREEZE_SUPPORT + if (order == 2) { + warn("PPMd freeze method isn't supported"); + return ERR_UNCOMP; + } +#endif + if (!Ppmd8_Alloc(&uncomp->state.ppmd8.ctx, (size + 1) << 20, &uncomp->state.ppmd8.alloc)) + return ERR_UNCOMP; + if (!Ppmd8_Init_RangeDec(&uncomp->state.ppmd8.ctx)) + return ERR_UNCOMP; + Ppmd8_Init(&uncomp->state.ppmd8.ctx, order, method); + } + + while (bytes_done < buffer_size) { + int symbol = Ppmd8_DecodeSymbol(&uncomp->state.ppmd8.ctx); + if (symbol < 0) { + warn("Invalid PPMd data stream"); + return ERR_UNCOMP; + } + ((uint8_t *)buffer)[bytes_done++] = (uint8_t)symbol; + } + + if (is_last_chunk) { + int symbol = Ppmd8_DecodeSymbol(&uncomp->state.ppmd8.ctx); + if (symbol != -1 || !Ppmd8_RangeDec_IsFinishedOK(&uncomp->state.ppmd8.ctx)) { + warn("Invalid PPMd data stream"); + return ERR_UNCOMP; + } + } + + return bytes_done; +} + +static void zip_clear_uncompress_ppmd(struct ar_archive_zip_uncomp *uncomp) +{ + Ppmd8_Free(&uncomp->state.ppmd8.ctx, &uncomp->state.ppmd8.alloc); +} + +/***** common decompression handling *****/ + +static bool zip_init_uncompress(ar_archive_zip *zip) +{ + struct ar_archive_zip_uncomp *uncomp = &zip->uncomp; + if (uncomp->initialized) + return true; + memset(uncomp, 0, sizeof(*uncomp)); + if (zip->entry.method == METHOD_DEFLATE) { +#ifdef HAVE_ZLIB + if (zip_init_uncompress_deflate(uncomp)) { + uncomp->uncompress_data = zip_uncompress_data_deflate; + uncomp->clear_state = zip_clear_uncompress_deflate; + } +#else + if (zip_init_uncompress_deflate64(uncomp, false)) { + uncomp->uncompress_data = zip_uncompress_data_deflate64; + uncomp->clear_state = zip_clear_uncompress_deflate64; + } +#endif + } + else if (zip->entry.method == METHOD_DEFLATE64) { + if (zip_init_uncompress_deflate64(uncomp, true)) { + uncomp->uncompress_data = zip_uncompress_data_deflate64; + uncomp->clear_state = zip_clear_uncompress_deflate64; + } + } + else if (zip->entry.method == METHOD_BZIP2) { +#ifdef HAVE_BZIP2 + if (zip_init_uncompress_bzip2(uncomp)) { + uncomp->uncompress_data = zip_uncompress_data_bzip2; + uncomp->clear_state = zip_clear_uncompress_bzip2; + } +#else + warn("BZIP2 support requires BZIP2 (define HAVE_BZIP2)"); +#endif + } +#ifdef HAVE_LIBLZMA + else if (zip->entry.method == METHOD_LZMA) { + if (zip_init_uncompress_lzma(uncomp)) { + uncomp->uncompress_data = zip_uncompress_data_lzma1; + uncomp->clear_state = zip_clear_uncompress_lzma; + } + } + else if (zip->entry.method == METHOD_XZ) { + if (zip_init_uncompress_lzma(uncomp)) { + uncomp->uncompress_data = zip_uncompress_data_xz; + uncomp->clear_state = zip_clear_uncompress_lzma; + } + } +#else + else if (zip->entry.method == METHOD_LZMA) { + if (zip_init_uncompress_lzma(uncomp, zip->entry.flags)) { + uncomp->uncompress_data = zip_uncompress_data_lzma; + uncomp->clear_state = zip_clear_uncompress_lzma; + } + } +#endif // HAVE_LIBLZMA + else if (zip->entry.method == METHOD_PPMD) { + if (zip_init_uncompress_ppmd(zip)) { + uncomp->uncompress_data = zip_uncompress_data_ppmd; + uncomp->clear_state = zip_clear_uncompress_ppmd; + } + } + else + warn("Unsupported compression method %d", zip->entry.method); + uncomp->initialized = uncomp->uncompress_data != NULL && uncomp->clear_state != NULL; + return uncomp->initialized; +} + +void zip_clear_uncompress(struct ar_archive_zip_uncomp *uncomp) +{ + if (!uncomp->initialized) + return; + uncomp->clear_state(uncomp); + uncomp->initialized = false; +} + +bool zip_uncompress_part(ar_archive_zip *zip, void *buffer, size_t buffer_size) +{ + struct ar_archive_zip_uncomp *uncomp = &zip->uncomp; + uint32_t count; + + if (!zip_init_uncompress(zip)) + return false; + + for (;;) { + if (buffer_size == 0) + return true; + + if (uncomp->input.bytes_left < sizeof(uncomp->input.data) / 2 && zip->progress.data_left) { + if (!zip_fill_input_buffer(zip)) + return false; + } + + count = buffer_size >= UINT32_MAX ? UINT32_MAX - 1 : (uint32_t)buffer_size; + count = uncomp->uncompress_data(uncomp, buffer, count, zip->progress.bytes_done + count == zip->super.entry_size_uncompressed); + if (count == ERR_UNCOMP) + return false; + if (count == 0 && !zip->progress.data_left) { + warn("Insufficient data in compressed stream"); + return false; + } + zip->progress.bytes_done += count; + buffer = (uint8_t *)buffer + count; + buffer_size -= count; + } +} diff --git a/external/unarr/zip/zip.c b/external/unarr/zip/zip.c new file mode 100644 index 00000000..a4dd6055 --- /dev/null +++ b/external/unarr/zip/zip.c @@ -0,0 +1,219 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "zip.h" + +static void zip_close(ar_archive *ar) +{ + ar_archive_zip *zip = (ar_archive_zip *)ar; + free(zip->entry.name); + free(zip->entry.raw_name); + zip_clear_uncompress(&zip->uncomp); +} + +static bool zip_parse_local_entry(ar_archive *ar, off64_t offset) +{ + ar_archive_zip *zip = (ar_archive_zip *)ar; + struct zip_entry entry; + + offset = zip_find_next_local_file_entry(ar->stream, offset); + if (offset < 0) { + if (ar->entry_offset_next) + ar->at_eof = true; + else + warn("Work around failed, no entries found in this file"); + return false; + } + if (!ar_seek(ar->stream, offset, SEEK_SET)) { + warn("Couldn't seek to offset %" PRIi64, offset); + return false; + } + if (!zip_parse_local_file_entry(zip, &entry)) + return false; + + ar->entry_offset = offset; + ar->entry_offset_next = offset + ZIP_LOCAL_ENTRY_FIXED_SIZE + entry.namelen + entry.extralen + (off64_t)entry.datasize; + if (ar->entry_offset_next <= ar->entry_offset) { + warn("Compressed size is too large (%" PRIu64 ")", entry.datasize); + return false; + } + ar->entry_size_uncompressed = (size_t)entry.uncompressed; + ar->entry_filetime = ar_conv_dosdate_to_filetime(entry.dosdate); + + zip->entry.offset = offset; + zip->entry.method = entry.method; + zip->entry.flags = entry.flags; + zip->entry.crc = entry.crc; + free(zip->entry.name); + zip->entry.name = NULL; + free(zip->entry.raw_name); + zip->entry.raw_name = NULL; + zip->entry.dosdate = entry.dosdate; + + zip->progress.crc = 0; + zip->progress.bytes_done = 0; + zip->progress.data_left = (size_t)entry.datasize; + zip_clear_uncompress(&zip->uncomp); + + if (entry.datasize == 0 && ar_entry_get_name(ar) && + zip->entry.name != NULL && *zip->entry.name && + zip->entry.name[strlen(zip->entry.name) - 1] == '/') { + log("Skipping directory entry \"%s\"", zip->entry.name); + return zip_parse_local_entry(ar, ar->entry_offset_next); + } + if (entry.datasize == 0 && entry.uncompressed == 0 && (entry.flags & (1 << 3))) { + warn("Deferring sizes to data descriptor isn't supported"); + ar->entry_size_uncompressed = 1; + } + + return true; +} + +static bool zip_parse_entry(ar_archive *ar, off64_t offset) +{ + ar_archive_zip *zip = (ar_archive_zip *)ar; + struct zip_entry entry; + + if (offset >= zip->dir.end_offset) { + ar->at_eof = true; + return false; + } + if (!ar_seek(ar->stream, offset, SEEK_SET)) { + warn("Couldn't seek to offset %" PRIi64, offset); + return false; + } + if (!zip_parse_directory_entry(zip, &entry)) { + warn("Couldn't read directory entry @%" PRIi64, offset); + return false; + } + + ar->entry_offset = offset; + ar->entry_offset_next = offset + ZIP_DIR_ENTRY_FIXED_SIZE + entry.namelen + entry.extralen + entry.commentlen; + ar->entry_size_uncompressed = (size_t)entry.uncompressed; + ar->entry_filetime = ar_conv_dosdate_to_filetime(entry.dosdate); + + zip->entry.offset = entry.header_offset; + zip->entry.method = entry.method; + zip->entry.flags = entry.flags; + zip->entry.crc = entry.crc; + free(zip->entry.name); + zip->entry.name = NULL; + free(zip->entry.raw_name); + zip->entry.raw_name = NULL; + zip->entry.dosdate = entry.dosdate; + + zip->progress.crc = 0; + zip->progress.bytes_done = 0; + zip->progress.data_left = (size_t)entry.datasize; + zip_clear_uncompress(&zip->uncomp); + + if (entry.datasize == 0 && ((entry.version >> 8) == 0 || (entry.version >> 8) == 3) && (entry.attr_external & 0x40000010)) { + log("Skipping directory entry \"%s\"", zip_get_name(ar, false)); + return zip_parse_entry(ar, ar->entry_offset_next); + } + + return true; +} + +static bool zip_copy_stored(ar_archive_zip *zip, void *buffer, size_t count) +{ + if (count > zip->progress.data_left) { + warn("Unexpected EOS in stored data"); + return false; + } + if (ar_read(zip->super.stream, buffer, count) != count) { + warn("Unexpected EOF in stored data"); + return false; + } + zip->progress.data_left -= count; + zip->progress.bytes_done += count; + return true; +} + +static bool zip_uncompress(ar_archive *ar, void *buffer, size_t count) +{ + ar_archive_zip *zip = (ar_archive_zip *)ar; + if (zip->progress.bytes_done == 0) { + if ((zip->entry.flags & ((1 << 0) | (1 << 6)))) { + warn("Encrypted archives aren't supported"); + return false; + } + if (!zip_seek_to_compressed_data(zip)) { + warn("Couldn't find data for file"); + return false; + } + } + if (count > ar->entry_size_uncompressed - zip->progress.bytes_done) { + warn("Requesting too much data (%" PRIuPTR " < %" PRIuPTR ")", ar->entry_size_uncompressed - zip->progress.bytes_done, count); + return false; + } + if (zip->entry.method == METHOD_STORE) { + if (!zip_copy_stored(zip, buffer, count)) + return false; + } + else if (zip->deflatedonly && zip->entry.method != METHOD_DEFLATE) { + warn("Only store and deflate compression methods are allowed"); + return false; + } + else { + if (!zip_uncompress_part(zip, buffer, count)) + return false; + } + + zip->progress.crc = ar_crc32(zip->progress.crc, buffer, count); + if (zip->progress.bytes_done < ar->entry_size_uncompressed) + return true; + if (zip->uncomp.initialized ? !zip->uncomp.input.at_eof || zip->uncomp.input.bytes_left : zip->progress.data_left) + log("Compressed block has more data than required"); + if (zip->progress.crc != zip->entry.crc) { + warn("Checksum of extracted data doesn't match"); + return false; + } + return true; +} + +static size_t zip_get_global_comment(ar_archive *ar, void *buffer, size_t count) +{ + ar_archive_zip *zip = (ar_archive_zip *)ar; + if (!zip->comment_size) + return 0; + if (!buffer) + return zip->comment_size; + if (!ar_seek(ar->stream, zip->comment_offset, SEEK_SET)) + return 0; + if (count > zip->comment_size) + count = zip->comment_size; + return ar_read(ar->stream, buffer, count); +} + +ar_archive *ar_open_zip_archive(ar_stream *stream, bool deflatedonly) +{ + ar_archive *ar; + ar_archive_zip *zip; + struct zip_eocd64 eocd = { 0 }; + + off64_t offset = zip_find_end_of_central_directory(stream); + if (offset < 0) + return NULL; + if (!ar_seek(stream, offset, SEEK_SET)) + return NULL; + if (!zip_parse_end_of_central_directory(stream, &eocd)) + return NULL; + + ar = ar_open_archive(stream, sizeof(ar_archive_zip), zip_close, zip_parse_entry, zip_get_name, zip_uncompress, zip_get_global_comment, eocd.dir_offset); + if (!ar) + return NULL; + + zip = (ar_archive_zip *)ar; + zip->dir.end_offset = zip_find_end_of_last_directory_entry(stream, &eocd); + if (zip->dir.end_offset < 0) { + warn("Couldn't read central directory @%" PRIi64 ", trying to work around...", eocd.dir_offset); + ar->parse_entry = zip_parse_local_entry; + ar->entry_offset_first = ar->entry_offset_next = 0; + } + zip->deflatedonly = deflatedonly; + zip->comment_offset = offset + ZIP_END_OF_CENTRAL_DIR_SIZE; + zip->comment_size = eocd.commentlen; + + return ar; +} diff --git a/external/unarr/zip/zip.h b/external/unarr/zip/zip.h new file mode 100644 index 00000000..b2ba34c6 --- /dev/null +++ b/external/unarr/zip/zip.h @@ -0,0 +1,173 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef zip_zip_h +#define zip_zip_h + +#include "../common/unarr-imp.h" + +#ifdef HAVE_ZLIB +#include +#endif +#include "inflate.h" +#ifdef HAVE_BZIP2 +#include +#endif +#ifdef HAVE_LIBLZMA +#include +#else +#include "../lzmasdk/LzmaDec.h" +#endif +#include "../lzmasdk/Ppmd8.h" + +typedef struct ar_archive_zip_s ar_archive_zip; + +/***** parse-zip *****/ + +enum zip_signatures { + SIG_LOCAL_FILE_HEADER = 0x04034B50, + SIG_CENTRAL_DIRECTORY = 0x02014B50, + SIG_END_OF_CENTRAL_DIRECTORY_64 = 0x06064B50, + SIG_END_OF_CENTRAL_DIRECTORY_64_LOCATOR = 0x07064B50, + SIG_END_OF_CENTRAL_DIRECTORY = 0x06054B50, +}; + +enum compression_method { + METHOD_STORE = 0, METHOD_DEFLATE = 8, + METHOD_DEFLATE64 = 9, METHOD_BZIP2 = 12, METHOD_LZMA = 14, + METHOD_XZ = 95, METHOD_PPMD = 98, +}; + +#define ZIP_LOCAL_ENTRY_FIXED_SIZE 30 +#define ZIP_DIR_ENTRY_FIXED_SIZE 46 +#define ZIP_END_OF_CENTRAL_DIR_SIZE 22 + +struct zip_entry { + uint32_t signature; + uint16_t version; + uint16_t min_version; + uint16_t flags; + uint16_t method; + uint32_t dosdate; + uint32_t crc; + uint64_t datasize; + uint64_t uncompressed; + uint16_t namelen; + uint16_t extralen; + uint16_t commentlen; + uint32_t disk; + uint16_t attr_internal; + uint32_t attr_external; + off64_t header_offset; +}; + +struct zip_eocd64 { + uint32_t signature; + uint16_t version; + uint16_t min_version; + uint32_t diskno; + uint32_t diskno_dir; + uint64_t numentries_disk; + uint64_t numentries; + uint64_t dir_size; + off64_t dir_offset; + uint16_t commentlen; +}; + +struct ar_archive_zip_entry { + off64_t offset; + uint16_t method; + uint16_t flags; + uint32_t crc; + char *name; + char *raw_name; + uint32_t dosdate; +}; + +bool zip_seek_to_compressed_data(ar_archive_zip *zip); +bool zip_parse_local_file_entry(ar_archive_zip *zip, struct zip_entry *entry); +off64_t zip_find_next_local_file_entry(ar_stream *stream, off64_t offset); +bool zip_parse_directory_entry(ar_archive_zip *zip, struct zip_entry *entry); +off64_t zip_find_end_of_last_directory_entry(ar_stream *stream, struct zip_eocd64 *eocd); +bool zip_parse_end_of_central_directory(ar_stream *stream, struct zip_eocd64 *eocd); +off64_t zip_find_end_of_central_directory(ar_stream *stream); +const char *zip_get_name(ar_archive *ar, bool raw); + +/***** uncompress-zip *****/ + +struct ar_archive_zip_uncomp; + +typedef uint32_t (* zip_uncomp_uncompress_data_fn)(struct ar_archive_zip_uncomp *uncomp, void *buffer, uint32_t buffer_size, bool is_last_chunk); +typedef void (* zip_uncomp_clear_state_fn)(struct ar_archive_zip_uncomp *uncomp); + +struct InputBuffer { + uint8_t data[4096]; + uint16_t offset; + uint16_t bytes_left; + bool at_eof; +}; + +struct ByteReader { + IByteIn super; + struct InputBuffer *input; + ar_archive_zip *zip; +}; + +struct ar_archive_zip_uncomp { + bool initialized; + zip_uncomp_uncompress_data_fn uncompress_data; + zip_uncomp_clear_state_fn clear_state; + union { +#ifdef HAVE_ZLIB + z_stream zstream; +#endif + inflate_state *inflate; +#ifdef HAVE_BZIP2 + bz_stream bstream; +#endif +#ifdef HAVE_LIBLZMA + lzma_stream lzmastream; +#else + struct { + CLzmaDec dec; + ELzmaFinishMode finish; + ISzAlloc alloc; + } lzma; +#endif //HAVE_LIBLZMA + struct { + CPpmd8 ctx; + struct ByteReader bytein; + ISzAlloc alloc; + } ppmd8; + } state; + struct InputBuffer input; +}; + +bool zip_uncompress_part(ar_archive_zip *zip, void *buffer, size_t buffer_size); +void zip_clear_uncompress(struct ar_archive_zip_uncomp *uncomp); + +/***** zip *****/ + +struct ar_archive_zip_dir { + /* off64_t offset; // use ar_archive::entry_offset_first */ + off64_t end_offset; +}; + +struct ar_archive_zip_progress { + size_t data_left; + size_t bytes_done; + uint32_t crc; +}; + +struct ar_archive_zip_s { + ar_archive super; + struct ar_archive_zip_dir dir; + struct ar_archive_zip_entry entry; + struct ar_archive_zip_uncomp uncomp; + struct ar_archive_zip_progress progress; + bool deflatedonly; + off64_t comment_offset; + uint16_t comment_size; +}; + +#endif