mirror of
https://github.com/nodejs/node.git
synced 2024-11-21 10:59:27 +00:00
deps,src: use SIMD for normal base64 encoding
PR-URL: https://github.com/nodejs/node/pull/39775 Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
This commit is contained in:
parent
71ca6d7d6a
commit
f561f31f1c
32
LICENSE
32
LICENSE
@ -1839,3 +1839,35 @@ The externally maintained libraries used by Node.js are:
|
||||
OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
|
||||
- base64, located at deps/base64/base64/, is licensed as follows:
|
||||
"""
|
||||
Copyright (c) 2005-2007, Nick Galbreath
|
||||
Copyright (c) 2013-2019, Alfred Klomp
|
||||
Copyright (c) 2015-2017, Wojciech Mula
|
||||
Copyright (c) 2016-2017, Matthieu Darbois
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
14
deps/base64/README.md
vendored
Normal file
14
deps/base64/README.md
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
# base64
|
||||
|
||||
This project boosts base64 encoding/decoding performance by utilizing SIMD
|
||||
operations where possible.
|
||||
|
||||
The source is pulled from: https://github.com/aklomp/base64
|
||||
|
||||
Active development occurs in the default branch (currently named `master`).
|
||||
|
||||
## Updating
|
||||
|
||||
```sh
|
||||
$ git clone https://github.com/aklomp/base64
|
||||
```
|
191
deps/base64/base64.gyp
vendored
Normal file
191
deps/base64/base64.gyp
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
{
|
||||
'variables': {
|
||||
'arm_fpu%': '',
|
||||
'target_arch%': '',
|
||||
},
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'base64',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'direct_dependent_settings': {
|
||||
'include_dirs': [ 'base64/include' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE' ],
|
||||
},
|
||||
'defines': [ 'BASE64_STATIC_DEFINE' ],
|
||||
'sources': [
|
||||
'base64/include/libbase64.h',
|
||||
'base64/lib/arch/generic/codec.c',
|
||||
'base64/lib/tables/tables.c',
|
||||
'base64/lib/codec_choose.c',
|
||||
'base64/lib/codecs.h',
|
||||
'base64/lib/lib.c',
|
||||
],
|
||||
|
||||
'conditions': [
|
||||
[ 'arm_fpu=="neon" and target_arch=="arm"', {
|
||||
'defines': [ 'HAVE_NEON32=1' ],
|
||||
'dependencies': [ 'base64_neon32' ],
|
||||
}, {
|
||||
'sources': [ 'base64/lib/arch/neon32/codec.c' ],
|
||||
}],
|
||||
|
||||
# arm64 requires NEON, so it's safe to always use it
|
||||
[ 'target_arch=="arm64"', {
|
||||
'defines': [ 'HAVE_NEON64=1' ],
|
||||
'dependencies': [ 'base64_neon64' ],
|
||||
}, {
|
||||
'sources': [ 'base64/lib/arch/neon64/codec.c' ],
|
||||
}],
|
||||
|
||||
# Runtime detection will happen for x86 CPUs
|
||||
[ 'target_arch in "ia32 x64 x32"', {
|
||||
'defines': [
|
||||
'HAVE_SSSE3=1',
|
||||
'HAVE_SSE41=1',
|
||||
'HAVE_SSE42=1',
|
||||
'HAVE_AVX=1',
|
||||
'HAVE_AVX2=1',
|
||||
],
|
||||
'dependencies': [
|
||||
'base64_ssse3',
|
||||
'base64_sse41',
|
||||
'base64_sse42',
|
||||
'base64_avx',
|
||||
'base64_avx2',
|
||||
],
|
||||
}, {
|
||||
'sources': [
|
||||
'base64/lib/arch/ssse3/codec.c',
|
||||
'base64/lib/arch/sse41/codec.c',
|
||||
'base64/lib/arch/sse42/codec.c',
|
||||
'base64/lib/arch/avx/codec.c',
|
||||
'base64/lib/arch/avx2/codec.c',
|
||||
],
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_ssse3',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/ssse3/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSSE3=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mssse3' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mssse3' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_sse41',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/sse41/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSE41=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-msse4.1' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-msse4.1' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_sse42',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/sse42/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSE42=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-msse4.2' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-msse4.2' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_avx',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/avx/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mavx' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mavx' ]
|
||||
},
|
||||
}, {
|
||||
'msvs_settings': {
|
||||
'VCCLCompilerTool': {
|
||||
'AdditionalOptions': [
|
||||
'/arch:AVX'
|
||||
],
|
||||
},
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_avx2',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/avx2/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX2=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mavx2' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mavx2' ]
|
||||
},
|
||||
}, {
|
||||
'msvs_settings': {
|
||||
'VCCLCompilerTool': {
|
||||
'AdditionalOptions': [
|
||||
'/arch:AVX2'
|
||||
],
|
||||
},
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_neon32',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/neon32/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_NEON32=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mfpu=neon' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mfpu=neon' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_neon64',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/neon64/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_NEON64=1' ],
|
||||
# NEON is required in arm64, so no -mfpu flag is needed
|
||||
}
|
||||
|
||||
]
|
||||
}
|
22
deps/base64/base64/.editorconfig
vendored
Normal file
22
deps/base64/base64/.editorconfig
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
# https://EditorConfig.org
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
indent_style = tab
|
||||
tab_width = 8
|
||||
indent_size = 8
|
||||
|
||||
[CMakeLists.txt]
|
||||
tab_width = 4
|
||||
indent_style = space
|
||||
[*.cmake]
|
||||
tab_width = 4
|
||||
indent_style = space
|
||||
|
||||
[*.py]
|
||||
tab_width = 4
|
||||
indent_style = space
|
133
deps/base64/base64/.github/workflows/test.yml
vendored
Normal file
133
deps/base64/base64/.github/workflows/test.yml
vendored
Normal file
@ -0,0 +1,133 @@
|
||||
name: Test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
makefile-test:
|
||||
name: makefile-${{ matrix.runner }}-amd64-${{ matrix.compiler }} ${{ ((matrix.openmp == 1) && '+openmp') || '' }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: ["ubuntu-18.04"]
|
||||
compiler: ["gcc", "clang"]
|
||||
openmp: ["0", "1"]
|
||||
include:
|
||||
- runner: "macos-11"
|
||||
compiler: "clang"
|
||||
openmp: "0"
|
||||
env:
|
||||
OPENMP: ${{ matrix.openmp }}
|
||||
OMP_NUM_THREADS: ${{ ((matrix.openmp == 1) && '2') || '0' }}
|
||||
CC: ${{ matrix.compiler }}
|
||||
OBJCOPY: ${{ (startsWith(matrix.runner, 'macos') && 'echo') || 'objcopy' }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Run tests
|
||||
run: ./test/ci/test.sh
|
||||
|
||||
cmake-test:
|
||||
name: cmake-${{ matrix.runner }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: ["ubuntu-18.04", "macos-11", "windows-2019"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: CMake Configure
|
||||
run: >
|
||||
cmake
|
||||
-B out
|
||||
-Werror=dev
|
||||
-DBASE64_BUILD_TESTS=ON
|
||||
${{ runner.os != 'Windows' && '-DCMAKE_BUILD_TYPE=Release' || '' }}
|
||||
${{ runner.os == 'macOS' && '-DBASE64_WITH_AVX2=OFF' || '' }}
|
||||
- name: CMake Build
|
||||
run: cmake --build out --config Release --verbose
|
||||
- name: CTest
|
||||
run: ctest --no-tests=error --test-dir out -VV --build-config Release
|
||||
|
||||
alpine-makefile-test:
|
||||
name: makefile-alpine-amd64-gcc
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: alpine:3.12
|
||||
env:
|
||||
CC: gcc
|
||||
steps:
|
||||
- name: Install deps
|
||||
run: apk add --update bash build-base git
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Run tests
|
||||
run: ./test/ci/test.sh
|
||||
|
||||
alpine-cmake-test:
|
||||
name: cmake-alpine-amd64-gcc
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: alpine:3.12
|
||||
steps:
|
||||
- name: Install deps
|
||||
run: apk add --update bash build-base cmake git
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: CMake Configure
|
||||
run: cmake -B out -Werror=dev -DBASE64_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release
|
||||
- name: CMake Build
|
||||
run: cmake --build out --config Release --verbose
|
||||
- name: CTest
|
||||
run: ctest --no-tests=error -VV --build-config Release
|
||||
working-directory: ./out
|
||||
|
||||
alpine-alt-arch-makefile-test:
|
||||
name: makefile-alpine-${{matrix.arch}}-${{matrix.cc}}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [armv7, aarch64, s390x, ppc64le]
|
||||
cc: [gcc, clang]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- uses: uraimo/run-on-arch-action@v2
|
||||
with:
|
||||
arch: ${{matrix.arch}}
|
||||
distro: alpine_latest
|
||||
env: |
|
||||
CC: ${{matrix.cc}}
|
||||
install: apk add --update bash build-base cmake git ${{matrix.cc}}
|
||||
run: ./test/ci/test.sh
|
||||
|
||||
alpine-alt-arch-cmake-test:
|
||||
name: cmake-alpine-${{matrix.arch}}-${{matrix.cc}}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [armv7, aarch64, s390x, ppc64le]
|
||||
cc: [gcc, clang]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- uses: uraimo/run-on-arch-action@v2
|
||||
with:
|
||||
arch: ${{matrix.arch}}
|
||||
distro: alpine_latest
|
||||
env: |
|
||||
CC: ${{matrix.cc}}
|
||||
install: apk add --update bash build-base cmake git ${{matrix.cc}}
|
||||
run: |
|
||||
echo "::group::CMake Configure"
|
||||
cmake -B out -Werror=dev -DBASE64_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release
|
||||
echo "::endgroup::CMake Configure"
|
||||
echo "::group::CMake Build"
|
||||
cmake --build out --config Release --verbose
|
||||
echo "::endgroup::CMake Build"
|
||||
echo "::group::CTest"
|
||||
ctest --no-tests=error --test-dir out -VV --build-config Release
|
||||
echo "::endgroup::CTest"
|
12
deps/base64/base64/.gitignore
vendored
Normal file
12
deps/base64/base64/.gitignore
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
*.o
|
||||
bin/base64
|
||||
lib/config.h
|
||||
test/benchmark
|
||||
test/test_base64
|
||||
|
||||
# visual studio symbol db, etc.
|
||||
.vs/
|
||||
# build directory used by CMakePresets
|
||||
out/
|
||||
# private cmake presets
|
||||
CMakeUserPresets.json
|
286
deps/base64/base64/CMakeLists.txt
vendored
Normal file
286
deps/base64/base64/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,286 @@
|
||||
# Written in 2016-2017, 2021 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
cmake_minimum_required(VERSION 3.10.2)
|
||||
|
||||
# new dependent option syntax. We are already compliant
|
||||
if (POLICY CMP0127)
|
||||
cmake_policy(SET CMP0127 NEW)
|
||||
endif()
|
||||
|
||||
project(base64 LANGUAGES C VERSION 0.4.0)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CMakeDependentOption)
|
||||
include(CheckIncludeFile)
|
||||
include(FeatureSummary)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
|
||||
|
||||
#######################################################################
|
||||
# platform detection
|
||||
include(TargetArch)
|
||||
detect_target_architecture(_TARGET_ARCH)
|
||||
|
||||
check_include_file(getopt.h HAVE_GETOPT_H)
|
||||
cmake_dependent_option(BASE64_BUILD_CLI "Build the cli for encoding and decoding" ON "HAVE_GETOPT_H" OFF)
|
||||
add_feature_info(CLI BASE64_BUILD_CLI "enables the CLI executable for encoding and decoding")
|
||||
|
||||
###################################################################
|
||||
# optional/conditional dependencies
|
||||
find_package(OpenMP)
|
||||
set_package_properties(OpenMP PROPERTIES
|
||||
TYPE OPTIONAL
|
||||
PURPOSE "Allows to utilize OpenMP"
|
||||
)
|
||||
|
||||
|
||||
########################################################################
|
||||
# Compilation options
|
||||
option(BASE64_WERROR "Treat warnings as error" ON)
|
||||
option(BASE64_BUILD_TESTS "add test projects" OFF)
|
||||
cmake_dependent_option(BASE64_WITH_OpenMP "use OpenMP" OFF "OpenMP_FOUND" OFF)
|
||||
add_feature_info("OpenMP codec" BASE64_WITH_OpenMP "spreads codec work accross multiple threads")
|
||||
cmake_dependent_option(BASE64_REGENERATE_TABLES "regenerate the codec tables" OFF "NOT CMAKE_CROSSCOMPILING" OFF)
|
||||
|
||||
set(_IS_X86 "_TARGET_ARCH_x86 OR _TARGET_ARCH_x64")
|
||||
cmake_dependent_option(BASE64_WITH_SSSE3 "add SSSE 3 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(SSSE3 BASE64_WITH_SSSE3 "add SSSE 3 codepath")
|
||||
cmake_dependent_option(BASE64_WITH_SSE41 "add SSE 4.1 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(SSE4.1 BASE64_WITH_SSE41 "add SSE 4.1 codepath")
|
||||
cmake_dependent_option(BASE64_WITH_SSE42 "add SSE 4.2 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(SSE4.2 BASE64_WITH_SSE42 "add SSE 4.2 codepath")
|
||||
cmake_dependent_option(BASE64_WITH_AVX "add AVX codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(AVX BASE64_WITH_AVX "add AVX codepath")
|
||||
cmake_dependent_option(BASE64_WITH_AVX2 "add AVX 2 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(AVX2 BASE64_WITH_AVX2 "add AVX2 codepath")
|
||||
|
||||
cmake_dependent_option(BASE64_WITH_NEON32 "add NEON32 codepath" OFF _TARGET_ARCH_arm OFF)
|
||||
add_feature_info(NEON32 BASE64_WITH_NEON32 "add NEON32 codepath")
|
||||
|
||||
cmake_dependent_option(BASE64_WITH_NEON64 "add NEON64 codepath" ON _TARGET_ARCH_arm64 OFF)
|
||||
add_feature_info(NEON64 BASE64_WITH_NEON64 "add NEON64 codepath")
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
|
||||
|
||||
########################################################################
|
||||
# Regenerate headers
|
||||
|
||||
if (BASE64_REGENERATE_TABLES)
|
||||
# Generate tables in build folder and copy to source tree.
|
||||
# Don't add the tables in the source tree to the outputs, to avoid `make clean` removing them.
|
||||
add_executable(table_generator
|
||||
lib/tables/table_generator.c
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT table_dec_32bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_dec_32bit.h"
|
||||
COMMAND table_generator > table_dec_32bit.h
|
||||
COMMAND "${CMAKE_COMMAND}" -E copy table_dec_32bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_dec_32bit.h"
|
||||
DEPENDS table_generator
|
||||
)
|
||||
set(Python_ADDITIONAL_VERSIONS 3)
|
||||
find_package(PythonInterp REQUIRED)
|
||||
add_custom_command(OUTPUT table_enc_12bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.h"
|
||||
COMMAND "${PYTHON_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.py" > table_enc_12bit.h
|
||||
COMMAND "${CMAKE_COMMAND}" -E copy table_enc_12bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.h"
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.py"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
########################################################################
|
||||
# library project
|
||||
add_library(base64
|
||||
# library files
|
||||
lib/lib.c
|
||||
lib/codec_choose.c
|
||||
include/libbase64.h
|
||||
|
||||
lib/tables/tables.c
|
||||
# Add generated headers explicitly to target, to insert them in the dependency tree
|
||||
lib/tables/table_dec_32bit.h
|
||||
lib/tables/table_enc_12bit.h
|
||||
|
||||
# codec implementations
|
||||
lib/arch/generic/codec.c
|
||||
|
||||
lib/arch/ssse3/codec.c
|
||||
lib/arch/sse41/codec.c
|
||||
lib/arch/sse42/codec.c
|
||||
lib/arch/avx/codec.c
|
||||
lib/arch/avx2/codec.c
|
||||
|
||||
lib/arch/neon32/codec.c
|
||||
lib/arch/neon64/codec.c
|
||||
)
|
||||
|
||||
target_include_directories(base64
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||
PRIVATE
|
||||
"${CMAKE_CURRENT_BINARY_DIR}"
|
||||
)
|
||||
|
||||
####################################################################
|
||||
# platform/compiler specific configuration
|
||||
set_target_properties(base64 PROPERTIES
|
||||
C_STANDARD 99
|
||||
C_STANDARD_REQUIRED YES
|
||||
C_EXTENSIONS OFF
|
||||
DEFINE_SYMBOL BASE64_EXPORTS
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR}
|
||||
)
|
||||
|
||||
#generate_export_header(base64)
|
||||
# the following definitions and those in libbase64.h have been
|
||||
# kept forward compatible in case we ever switch to generate_export_header
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set_target_properties(base64 PROPERTIES
|
||||
C_VISIBILITY_PRESET hidden
|
||||
)
|
||||
else()
|
||||
target_compile_definitions(base64
|
||||
PUBLIC
|
||||
BASE64_STATIC_DEFINE
|
||||
)
|
||||
endif()
|
||||
|
||||
target_compile_options(base64 PRIVATE
|
||||
$<$<C_COMPILER_ID:MSVC>:
|
||||
/W4
|
||||
/we4013 # Error warning C4013: 'function' undefined; assuming extern returning int
|
||||
/we4700 # Error warning C4700: uninitialized local variable
|
||||
/we4715 # not all control paths return a value
|
||||
/we4003 # not enough actual parameters for macro
|
||||
/wd4456 # disable warning C4456: declaration of 'xxx' hides previous local declaration
|
||||
>
|
||||
$<$<NOT:$<C_COMPILER_ID:MSVC>>:
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
>
|
||||
$<$<BOOL:${BASE64_WERROR}>:$<IF:$<C_COMPILER_ID:MSVC>,/WX,-Werror>>
|
||||
)
|
||||
|
||||
target_compile_definitions(base64 PRIVATE
|
||||
$<$<C_COMPILER_ID:MSVC>:
|
||||
# remove unnecessary warnings about unchecked iterators
|
||||
_SCL_SECURE_NO_WARNINGS
|
||||
>
|
||||
)
|
||||
|
||||
########################################################################
|
||||
# SIMD settings
|
||||
include(TargetSIMDInstructionSet)
|
||||
define_SIMD_compile_flags()
|
||||
|
||||
if (_TARGET_ARCH STREQUAL "x86" OR _TARGET_ARCH STREQUAL "x64")
|
||||
macro(configure_codec _TYPE)
|
||||
if (BASE64_WITH_${_TYPE})
|
||||
string(TOLOWER "${_TYPE}" _DIR)
|
||||
set_source_files_properties("lib/arch/${_DIR}/codec.c" PROPERTIES
|
||||
COMPILE_FLAGS "${COMPILE_FLAGS_${_TYPE}}"
|
||||
)
|
||||
|
||||
if (${ARGC} GREATER 1 AND MSVC)
|
||||
set_source_files_properties("lib/arch/${_DIR}/codec.c" PROPERTIES
|
||||
COMPILE_DEFINITIONS ${ARGV1}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
configure_codec(SSSE3 __SSSE3__)
|
||||
configure_codec(SSE41 __SSSE4_1__)
|
||||
configure_codec(SSE42 __SSSE4_2__)
|
||||
configure_codec(AVX)
|
||||
configure_codec(AVX2)
|
||||
|
||||
elseif (_TARGET_ARCH STREQUAL "arm")
|
||||
set(BASE64_NEON32_CFLAGS "${COMPILE_FLAGS_NEON32}" CACHE STRING "the NEON32 compile flags (for 'lib/arch/neon32/codec.c')")
|
||||
mark_as_advanced(BASE64_NEON32_CFLAGS)
|
||||
|
||||
if (BASE64_WITH_NEON32)
|
||||
set_source_files_properties("lib/arch/neon32/codec.c" PROPERTIES
|
||||
COMPILE_FLAGS "${BASE64_NEON32_CFLAGS} "
|
||||
)
|
||||
endif()
|
||||
|
||||
#elseif (_TARGET_ARCH STREQUAL "arm64" AND BASE64_WITH_NEON64)
|
||||
|
||||
endif()
|
||||
|
||||
configure_file("${CMAKE_CURRENT_LIST_DIR}/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/config.h" @ONLY)
|
||||
|
||||
########################################################################
|
||||
# OpenMP Settings
|
||||
if (BASE64_WITH_OpenMP)
|
||||
target_link_libraries(base64 PRIVATE OpenMP::OpenMP_C)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
if (BASE64_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# base64
|
||||
if (BASE64_BUILD_CLI)
|
||||
add_executable(base64-bin
|
||||
bin/base64.c
|
||||
)
|
||||
target_link_libraries(base64-bin PRIVATE base64)
|
||||
set_target_properties(base64-bin PROPERTIES
|
||||
OUTPUT_NAME base64
|
||||
)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# cmake install
|
||||
install(DIRECTORY include/ TYPE INCLUDE)
|
||||
install(TARGETS base64
|
||||
EXPORT base64-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
)
|
||||
if (BASE64_BUILD_CLI)
|
||||
install(TARGETS base64-bin EXPORT base64-targets DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(cmake/base64-config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config.cmake"
|
||||
|
||||
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config-version.cmake"
|
||||
VERSION ${BASE64_VERSION}
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config-version.cmake"
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
|
||||
)
|
||||
|
||||
install(EXPORT base64-targets
|
||||
NAMESPACE aklomp::
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
|
||||
)
|
||||
|
||||
########################################################################
|
||||
feature_summary(WHAT PACKAGES_FOUND PACKAGES_NOT_FOUND ENABLED_FEATURES DISABLED_FEATURES)
|
28
deps/base64/base64/LICENSE
vendored
Normal file
28
deps/base64/base64/LICENSE
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
Copyright (c) 2005-2007, Nick Galbreath
|
||||
Copyright (c) 2013-2019, Alfred Klomp
|
||||
Copyright (c) 2015-2017, Wojciech Mula
|
||||
Copyright (c) 2016-2017, Matthieu Darbois
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
93
deps/base64/base64/Makefile
vendored
Normal file
93
deps/base64/base64/Makefile
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic
|
||||
|
||||
# Set OBJCOPY if not defined by environment:
|
||||
OBJCOPY ?= objcopy
|
||||
|
||||
OBJS = \
|
||||
lib/arch/avx2/codec.o \
|
||||
lib/arch/generic/codec.o \
|
||||
lib/arch/neon32/codec.o \
|
||||
lib/arch/neon64/codec.o \
|
||||
lib/arch/ssse3/codec.o \
|
||||
lib/arch/sse41/codec.o \
|
||||
lib/arch/sse42/codec.o \
|
||||
lib/arch/avx/codec.o \
|
||||
lib/lib.o \
|
||||
lib/codec_choose.o \
|
||||
lib/tables/tables.o
|
||||
|
||||
HAVE_AVX2 = 0
|
||||
HAVE_NEON32 = 0
|
||||
HAVE_NEON64 = 0
|
||||
HAVE_SSSE3 = 0
|
||||
HAVE_SSE41 = 0
|
||||
HAVE_SSE42 = 0
|
||||
HAVE_AVX = 0
|
||||
|
||||
# The user should supply compiler flags for the codecs they want to build.
|
||||
# Check which codecs we're going to include:
|
||||
ifdef AVX2_CFLAGS
|
||||
HAVE_AVX2 = 1
|
||||
endif
|
||||
ifdef NEON32_CFLAGS
|
||||
HAVE_NEON32 = 1
|
||||
endif
|
||||
ifdef NEON64_CFLAGS
|
||||
HAVE_NEON64 = 1
|
||||
endif
|
||||
ifdef SSSE3_CFLAGS
|
||||
HAVE_SSSE3 = 1
|
||||
endif
|
||||
ifdef SSE41_CFLAGS
|
||||
HAVE_SSE41 = 1
|
||||
endif
|
||||
ifdef SSE42_CFLAGS
|
||||
HAVE_SSE42 = 1
|
||||
endif
|
||||
ifdef AVX_CFLAGS
|
||||
HAVE_AVX = 1
|
||||
endif
|
||||
ifdef OPENMP
|
||||
CFLAGS += -fopenmp
|
||||
endif
|
||||
|
||||
|
||||
.PHONY: all analyze clean
|
||||
|
||||
all: bin/base64 lib/libbase64.o
|
||||
|
||||
bin/base64: bin/base64.o lib/libbase64.o
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
lib/libbase64.o: $(OBJS)
|
||||
$(LD) -r -o $@ $^
|
||||
$(OBJCOPY) --keep-global-symbols=lib/exports.txt $@
|
||||
|
||||
lib/config.h:
|
||||
@echo "#define HAVE_AVX2 $(HAVE_AVX2)" > $@
|
||||
@echo "#define HAVE_NEON32 $(HAVE_NEON32)" >> $@
|
||||
@echo "#define HAVE_NEON64 $(HAVE_NEON64)" >> $@
|
||||
@echo "#define HAVE_SSSE3 $(HAVE_SSSE3)" >> $@
|
||||
@echo "#define HAVE_SSE41 $(HAVE_SSE41)" >> $@
|
||||
@echo "#define HAVE_SSE42 $(HAVE_SSE42)" >> $@
|
||||
@echo "#define HAVE_AVX $(HAVE_AVX)" >> $@
|
||||
|
||||
$(OBJS): lib/config.h
|
||||
$(OBJS): CFLAGS += -Ilib
|
||||
|
||||
lib/arch/avx2/codec.o: CFLAGS += $(AVX2_CFLAGS)
|
||||
lib/arch/neon32/codec.o: CFLAGS += $(NEON32_CFLAGS)
|
||||
lib/arch/neon64/codec.o: CFLAGS += $(NEON64_CFLAGS)
|
||||
lib/arch/ssse3/codec.o: CFLAGS += $(SSSE3_CFLAGS)
|
||||
lib/arch/sse41/codec.o: CFLAGS += $(SSE41_CFLAGS)
|
||||
lib/arch/sse42/codec.o: CFLAGS += $(SSE42_CFLAGS)
|
||||
lib/arch/avx/codec.o: CFLAGS += $(AVX_CFLAGS)
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
analyze: clean
|
||||
scan-build --use-analyzer=`which clang` --status-bugs make
|
||||
|
||||
clean:
|
||||
rm -f bin/base64 bin/base64.o lib/libbase64.o lib/config.h $(OBJS)
|
474
deps/base64/base64/README.md
vendored
Normal file
474
deps/base64/base64/README.md
vendored
Normal file
@ -0,0 +1,474 @@
|
||||
# Fast Base64 stream encoder/decoder
|
||||
|
||||
[![Build Status](https://github.com/aklomp/base64/actions/workflows/test.yml/badge.svg)](https://github.com/aklomp/base64/actions/workflows/test.yml)
|
||||
|
||||
This is an implementation of a base64 stream encoding/decoding library in C99
|
||||
with SIMD (AVX2, NEON, AArch64/NEON, SSSE3, SSE4.1, SSE4.2, AVX) and
|
||||
[OpenMP](http://www.openmp.org) acceleration. It also contains wrapper functions
|
||||
to encode/decode simple length-delimited strings. This library aims to be:
|
||||
|
||||
- FAST;
|
||||
- easy to use;
|
||||
- elegant.
|
||||
|
||||
On x86, the library does runtime feature detection. The first time it's called,
|
||||
the library will determine the appropriate encoding/decoding routines for the
|
||||
machine. It then remembers them for the lifetime of the program. If your
|
||||
processor supports AVX2, SSSE3, SSE4.1, SSE4.2 or AVX instructions, the library
|
||||
will pick an optimized codec that lets it encode/decode 12 or 24 bytes at a
|
||||
time, which gives a speedup of four or more times compared to the "plain"
|
||||
bytewise codec.
|
||||
|
||||
NEON support is hardcoded to on or off at compile time, because portable
|
||||
runtime feature detection is unavailable on ARM.
|
||||
|
||||
Even if your processor does not support SIMD instructions, this is a very fast
|
||||
library. The fallback routine can process 32 or 64 bits of input in one round,
|
||||
depending on your processor's word width, which still makes it significantly
|
||||
faster than naive bytewise implementations. On some 64-bit machines, the 64-bit
|
||||
routines even outperform the SSSE3 ones.
|
||||
|
||||
To the author's knowledge, at the time of original release, this was the only
|
||||
Base64 library to offer SIMD acceleration. The author wrote
|
||||
[an article](http://www.alfredklomp.com/programming/sse-base64) explaining one
|
||||
possible SIMD approach to encoding/decoding Base64. The article can help figure
|
||||
out what the code is doing, and why.
|
||||
|
||||
Notable features:
|
||||
|
||||
- Really fast on x86 and ARM systems by using SIMD vector processing;
|
||||
- Can use [OpenMP](http://www.openmp.org) for even more parallel speedups;
|
||||
- Really fast on other 32 or 64-bit platforms through optimized routines;
|
||||
- Reads/writes blocks of streaming data;
|
||||
- Does not dynamically allocate memory;
|
||||
- Valid C99 that compiles with pedantic options on;
|
||||
- Re-entrant and threadsafe;
|
||||
- Unit tested;
|
||||
- Uses Duff's Device.
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
The original AVX2, NEON and Aarch64/NEON codecs were generously contributed by
|
||||
[Inkymail](https://github.com/inkymail/base64), who, in their fork, also
|
||||
implemented some additional features. Their work is slowly being backported
|
||||
into this project.
|
||||
|
||||
The SSSE3 and AVX2 codecs were substantially improved by using some very clever
|
||||
optimizations described by Wojciech Muła in a
|
||||
[series](http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html) of
|
||||
[articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html).
|
||||
His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64).
|
||||
|
||||
The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl).
|
||||
|
||||
## Building
|
||||
|
||||
The `lib` directory contains the code for the actual library.
|
||||
Typing `make` in the toplevel directory will build `lib/libbase64.o` and `bin/base64`.
|
||||
The first is a single, self-contained object file that you can link into your own project.
|
||||
The second is a standalone test binary that works similarly to the `base64` system utility.
|
||||
|
||||
The matching header file needed to use this library is in `include/libbase64.h`.
|
||||
|
||||
To compile just the "plain" library without SIMD codecs, type:
|
||||
|
||||
```sh
|
||||
make lib/libbase64.o
|
||||
```
|
||||
|
||||
Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `NEON32_CFLAGS`, `NEON64_CFLAGS`,
|
||||
`SSSE3_CFLAGS`, `SSE41_CFLAGS`, `SSE42_CFLAGS` and/or `AVX_CFLAGS` environment variables.
|
||||
A typical build invocation on x86 looks like this:
|
||||
|
||||
```sh
|
||||
AVX2_CFLAGS=-mavx2 SSSE3_CFLAGS=-mssse3 SSE41_CFLAGS=-msse4.1 SSE42_CFLAGS=-msse4.2 AVX_CFLAGS=-mavx make lib/libbase64.o
|
||||
```
|
||||
|
||||
### AVX2
|
||||
|
||||
To build and include the AVX2 codec, set the `AVX2_CFLAGS` environment variable to a value that will turn on AVX2 support in your compiler, typically `-mavx2`.
|
||||
Example:
|
||||
|
||||
```sh
|
||||
AVX2_CFLAGS=-mavx2 make
|
||||
```
|
||||
|
||||
The codec will only be used if runtime feature detection shows that the target machine supports AVX2.
|
||||
|
||||
### SSSE3
|
||||
|
||||
To build and include the SSSE3 codec, set the `SSSE3_CFLAGS` environment variable to a value that will turn on SSSE3 support in your compiler, typically `-mssse3`.
|
||||
Example:
|
||||
|
||||
```sh
|
||||
SSSE3_CFLAGS=-mssse3 make
|
||||
```
|
||||
|
||||
The codec will only be used if runtime feature detection shows that the target machine supports SSSE3.
|
||||
|
||||
### NEON
|
||||
|
||||
This library includes two NEON codecs: one for regular 32-bit ARM and one for the 64-bit AArch64 with NEON, which has double the amount of SIMD registers and can do full 64-byte table lookups.
|
||||
These codecs encode in 48-byte chunks and decode in massive 64-byte chunks, so they had to be augmented with an uint32/64 codec to stay fast on smaller inputs!
|
||||
|
||||
Use LLVM/Clang for compiling the NEON codecs.
|
||||
The code generation of at least GCC 4.6 (the version shipped with Raspbian and used for testing) contains a bug when compiling `vstq4_u8()`, and the generated assembly code is of low quality.
|
||||
NEON intrinsics are a known weak area of GCC.
|
||||
Clang does a better job.
|
||||
|
||||
NEON support can unfortunately not be portably detected at runtime from userland (the `mrc` instruction is privileged), so the default value for using the NEON codec is determined at compile-time.
|
||||
But you can do your own runtime detection.
|
||||
You can include the NEON codec and make it the default, then do a runtime check if the CPU has NEON support, and if not, force a downgrade to non-NEON with `BASE64_FORCE_PLAIN`.
|
||||
|
||||
These are your options:
|
||||
|
||||
1. Don't include NEON support;
|
||||
2. build NEON support and make it the default, but build all other code without NEON flags so that you can override the default at runtime with `BASE64_FORCE_PLAIN`;
|
||||
3. build everything with NEON support and make it the default;
|
||||
4. build everything with NEON support, but don't make it the default (which makes no sense).
|
||||
|
||||
For option 1, simply don't specify any NEON-specific compiler flags at all, like so:
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="-march=armv6" make
|
||||
```
|
||||
|
||||
For option 2, keep your `CFLAGS` plain, but set the `NEON32_CFLAGS` environment variable to a value that will build NEON support.
|
||||
The line below, for instance, will build all the code at ARMv6 level, except for the NEON codec, which is built at ARMv7.
|
||||
It will also make the NEON codec the default.
|
||||
For ARMv6 platforms, override that default at runtime with the `BASE64_FORCE_PLAIN` flag.
|
||||
No ARMv7/NEON code will then be touched.
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="-march=armv6" NEON32_CFLAGS="-march=armv7 -mfpu=neon" make
|
||||
```
|
||||
|
||||
For option 3, put everything in your `CFLAGS` and use a stub, but non-empty, `NEON32_CFLAGS`.
|
||||
This example works for the Raspberry Pi 2B V1.1, which has NEON support:
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="-march=armv7 -mtune=cortex-a7" NEON32_CFLAGS="-mfpu=neon" make
|
||||
```
|
||||
|
||||
To build and include the NEON64 codec, use `CFLAGS` as usual to define the platform and set `NEON64_CFLAGS` to a nonempty stub.
|
||||
(The AArch64 target has mandatory NEON64 support.)
|
||||
Example:
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a" NEON64_CFLAGS=" " make
|
||||
```
|
||||
|
||||
### OpenMP
|
||||
|
||||
To enable OpenMP on GCC you need to build with `-fopenmp`. This can be by setting the the `OPENMP` environment variable to `1`.
|
||||
|
||||
Example:
|
||||
|
||||
```sh
|
||||
OPENMP=1 make
|
||||
```
|
||||
|
||||
This will let the compiler define `_OPENMP`, which in turn will include the OpenMP optimized `lib_openmp.c` into `lib.c`.
|
||||
|
||||
By default the number of parallel threads will be equal to the number of cores of the processor.
|
||||
On a quad core with hyperthreading eight cores will be detected, but hyperthreading will not increase the performance.
|
||||
|
||||
To get verbose information about OpenMP start the program with `OMP_DISPLAY_ENV=VERBOSE`, for instance
|
||||
|
||||
```sh
|
||||
OMP_DISPLAY_ENV=VERBOSE test/benchmark
|
||||
```
|
||||
|
||||
To put a limit on the number of threads, start the program with `OMP_THREAD_LIMIT=n`, for instance
|
||||
|
||||
```sh
|
||||
OMP_THREAD_LIMIT=2 test/benchmark
|
||||
```
|
||||
|
||||
An example of running a benchmark with OpenMP, SSSE3 and AVX2 enabled:
|
||||
|
||||
```sh
|
||||
make clean && OPENMP=1 SSSE3_CFLAGS=-mssse3 AVX2_CFLAGS=-mavx2 make && OPENMP=1 make -C test
|
||||
```
|
||||
|
||||
## API reference
|
||||
|
||||
Strings are represented as a pointer and a length; they are not
|
||||
zero-terminated. This was a conscious design decision. In the decoding step,
|
||||
relying on zero-termination would make no sense since the output could contain
|
||||
legitimate zero bytes. In the encoding step, returning the length saves the
|
||||
overhead of calling `strlen()` on the output. If you insist on the trailing
|
||||
zero, you can easily add it yourself at the given offset.
|
||||
|
||||
### Flags
|
||||
|
||||
Some API calls take a `flags` argument.
|
||||
That argument can be used to force the use of a specific codec, even if that codec is a no-op in the current build.
|
||||
Mainly there for testing purposes, this is also useful on ARM where the only way to do runtime NEON detection is to ask the OS if it's available.
|
||||
The following constants can be used:
|
||||
|
||||
- `BASE64_FORCE_AVX2`
|
||||
- `BASE64_FORCE_NEON32`
|
||||
- `BASE64_FORCE_NEON64`
|
||||
- `BASE64_FORCE_PLAIN`
|
||||
- `BASE64_FORCE_SSSE3`
|
||||
- `BASE64_FORCE_SSE41`
|
||||
- `BASE64_FORCE_SSE42`
|
||||
- `BASE64_FORCE_AVX`
|
||||
|
||||
Set `flags` to `0` for the default behavior, which is runtime feature detection on x86, a compile-time fixed codec on ARM, and the plain codec on other platforms.
|
||||
|
||||
### Encoding
|
||||
|
||||
#### base64_encode
|
||||
|
||||
```c
|
||||
void base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Wrapper function to encode a plain string of given length.
|
||||
Output is written to `out` without trailing zero.
|
||||
Output length in bytes is written to `outlen`.
|
||||
The buffer in `out` has been allocated by the caller and is at least 4/3 the size of the input.
|
||||
|
||||
#### base64_stream_encode_init
|
||||
|
||||
```c
|
||||
void base64_stream_encode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Call this before calling `base64_stream_encode()` to init the state.
|
||||
|
||||
#### base64_stream_encode
|
||||
|
||||
```c
|
||||
void base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
```
|
||||
|
||||
Encodes the block of data of given length at `src`, into the buffer at `out`.
|
||||
Caller is responsible for allocating a large enough out-buffer; it must be at least 4/3 the size of the in-buffer, but take some margin.
|
||||
Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
|
||||
Does not zero-terminate or finalize the output.
|
||||
|
||||
#### base64_stream_encode_final
|
||||
|
||||
```c
|
||||
void base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
```
|
||||
|
||||
Finalizes the output begun by previous calls to `base64_stream_encode()`.
|
||||
Adds the required end-of-stream markers if appropriate.
|
||||
`outlen` is modified and will contain the number of new bytes written at `out` (which will quite often be zero).
|
||||
|
||||
### Decoding
|
||||
|
||||
#### base64_decode
|
||||
|
||||
```c
|
||||
int base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Wrapper function to decode a plain string of given length.
|
||||
Output is written to `out` without trailing zero. Output length in bytes is written to `outlen`.
|
||||
The buffer in `out` has been allocated by the caller and is at least 3/4 the size of the input.
|
||||
Returns `1` for success, and `0` when a decode error has occured due to invalid input.
|
||||
Returns `-1` if the chosen codec is not included in the current build.
|
||||
|
||||
#### base64_stream_decode_init
|
||||
|
||||
```c
|
||||
void base64_stream_decode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Call this before calling `base64_stream_decode()` to init the state.
|
||||
|
||||
#### base64_stream_decode
|
||||
|
||||
```c
|
||||
int base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
```
|
||||
|
||||
Decodes the block of data of given length at `src`, into the buffer at `out`.
|
||||
Caller is responsible for allocating a large enough out-buffer; it must be at least 3/4 the size of the in-buffer, but take some margin.
|
||||
Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
|
||||
Does not zero-terminate the output.
|
||||
Returns 1 if all is well, and 0 if a decoding error was found, such as an invalid character.
|
||||
Returns -1 if the chosen codec is not included in the current build.
|
||||
Used by the test harness to check whether a codec is available for testing.
|
||||
|
||||
## Examples
|
||||
|
||||
A simple example of encoding a static string to base64 and printing the output
|
||||
to stdout:
|
||||
|
||||
```c
|
||||
#include <stdio.h> /* fwrite */
|
||||
#include "libbase64.h"
|
||||
|
||||
int main ()
|
||||
{
|
||||
char src[] = "hello world";
|
||||
char out[20];
|
||||
size_t srclen = sizeof(src) - 1;
|
||||
size_t outlen;
|
||||
|
||||
base64_encode(src, srclen, out, &outlen, 0);
|
||||
|
||||
fwrite(out, outlen, 1, stdout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
A simple example (no error checking, etc) of stream encoding standard input to
|
||||
standard output:
|
||||
|
||||
```c
|
||||
#include <stdio.h>
|
||||
#include "libbase64.h"
|
||||
|
||||
int main ()
|
||||
{
|
||||
size_t nread, nout;
|
||||
char buf[12000], out[16000];
|
||||
struct base64_state state;
|
||||
|
||||
// Initialize stream encoder:
|
||||
base64_stream_encode_init(&state, 0);
|
||||
|
||||
// Read contents of stdin into buffer:
|
||||
while ((nread = fread(buf, 1, sizeof(buf), stdin)) > 0) {
|
||||
|
||||
// Encode buffer:
|
||||
base64_stream_encode(&state, buf, nread, out, &nout);
|
||||
|
||||
// If there's output, print it to stdout:
|
||||
if (nout) {
|
||||
fwrite(out, nout, 1, stdout);
|
||||
}
|
||||
|
||||
// If an error occurred, exit the loop:
|
||||
if (feof(stdin)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize encoding:
|
||||
base64_stream_encode_final(&state, out, &nout);
|
||||
|
||||
// If the finalizing resulted in extra output bytes, print them:
|
||||
if (nout) {
|
||||
fwrite(out, nout, 1, stdout);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Also see `bin/base64.c` for a simple re-implementation of the `base64` utility.
|
||||
A file or standard input is fed through the encoder/decoder, and the output is
|
||||
written to standard output.
|
||||
|
||||
## Tests
|
||||
|
||||
See `tests/` for a small test suite. Testing is automated with
|
||||
[GitHub Actions](https://github.com/aklomp/base64/actions), which builds and
|
||||
tests the code across various architectures.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
Benchmarks can be run with the built-in benchmark program as follows:
|
||||
|
||||
```sh
|
||||
make -C test benchmark <buildflags> && test/benchmark
|
||||
```
|
||||
|
||||
It will run an encoding and decoding benchmark for all of the compiled-in codecs.
|
||||
|
||||
The tables below contain some results on random machines. All numbers measured with a 10MB buffer in MB/sec, rounded to the nearest integer.
|
||||
|
||||
\*: Update needed
|
||||
|
||||
x86 processors
|
||||
|
||||
| Processor | Plain enc | Plain dec | SSSE3 enc | SSSE3 dec | AVX enc | AVX dec | AVX2 enc | AVX2 dec |
|
||||
|-------------------------------------------|----------:|----------:|----------:|----------:|--------:|--------:|---------:|---------:|
|
||||
| i7-4771 @ 3.5 GHz | 833\* | 1111\* | 3333\* | 4444\* | TBD | TBD | 4999\* | 6666\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 | 1790\* | 3038\* | 4899\* | 4043\* | 4796\* | 5709\* | 4681\* | 6386\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 1 thread | 1784\* | 3041\* | 4945\* | 4035\* | 4776\* | 5719\* | 4661\* | 6294\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 2 thread | 3401\* | 5729\* | 5489\* | 7444\* | 5003\* | 8624\* | 5105\* | 8558\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4884\* | 7099\* | 4917\* | 7057\* | 4799\* | 7143\* | 4902\* | 7219\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 5212\* | 8849\* | 5284\* | 9099\* | 5289\* | 9220\* | 4849\* | 9200\* |
|
||||
| i7-4870HQ @ 2.5 GHz | 1471\* | 3066\* | 6721\* | 6962\* | 7015\* | 8267\* | 8328\* | 11576\* |
|
||||
| i5-4590S @ 3.0 GHz | 3356 | 3197 | 4363 | 6104 | 4243 | 6233 | 4160 | 6344 |
|
||||
| Xeon X5570 @ 2.93 GHz | 2161 | 1508 | 3160 | 3915 | - | - | - | - |
|
||||
| Pentium4 @ 3.4 GHz | 896 | 740 | - | - | - | - | - | - |
|
||||
| Atom N270 | 243 | 266 | 508 | 387 | - | - | - | - |
|
||||
| AMD E-450 | 645 | 564 | 625 | 634 | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz | 79\* | 92\* | 152\* | 172\* | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz OPENMP 2 thread | 158\* | 184\* | 300\* | 343\* | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz (x86-64) | 162 | 119 | 209 | 164 | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz (x86-64) 2 thread | 319 | 237 | 412 | 329 | - | - | - | - |
|
||||
|
||||
ARM processors
|
||||
|
||||
| Processor | Plain enc | Plain dec | NEON32 enc | NEON32 dec | NEON64 enc | NEON64 dec |
|
||||
|-------------------------------------------|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
|
||||
| Raspberry PI B+ V1.2 | 46\* | 40\* | - | - | - | - |
|
||||
| Raspberry PI 2 B V1.1 | 85 | 141 | 300 | 225 | - | - |
|
||||
| Apple iPhone SE armv7 | 1056\* | 895\* | 2943\* | 2618\* | - | - |
|
||||
| Apple iPhone SE arm64 | 1061\* | 1239\* | - | - | 4098\* | 3983\* |
|
||||
|
||||
PowerPC processors
|
||||
|
||||
| Processor | Plain enc | Plain dec |
|
||||
|-------------------------------------------|----------:|----------:|
|
||||
| PowerPC E6500 @ 1.8GHz | 270\* | 265\* |
|
||||
|
||||
|
||||
Benchmarks on i7-4770 @ 3.4 GHz DDR1600 with varrying buffer sizes:
|
||||
![Benchmarks](base64-benchmarks.png)
|
||||
|
||||
Note: optimal buffer size to take advantage of the cache is in the range of 100 kB to 1 MB, leading to 12x faster AVX encoding/decoding compared to Plain, or a throughput of 24/27GB/sec.
|
||||
Also note the performance degradation when the buffer size is less than 10 kB due to thread creation overhead.
|
||||
To prevent this from happening `lib_openmp.c` defines `OMP_THRESHOLD 20000`, requiring at least a 20000 byte buffer to enable multithreading.
|
||||
|
||||
## License
|
||||
|
||||
This repository is licensed under the
|
||||
[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the
|
||||
LICENSE file.
|
BIN
deps/base64/base64/base64-benchmarks.png
vendored
Normal file
BIN
deps/base64/base64/base64-benchmarks.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
128
deps/base64/base64/bin/base64.c
vendored
Normal file
128
deps/base64/base64/bin/base64.c
vendored
Normal file
@ -0,0 +1,128 @@
|
||||
#include <stddef.h> // size_t
|
||||
#include <stdio.h> // fopen()
|
||||
#include <string.h> // strlen()
|
||||
#include <getopt.h>
|
||||
#include "../include/libbase64.h"
|
||||
|
||||
#define BUFSIZE 1024 * 1024
|
||||
|
||||
static char buf[BUFSIZE];
|
||||
static char out[(BUFSIZE * 5) / 3]; // Technically 4/3 of input, but take some margin
|
||||
size_t nread;
|
||||
size_t nout;
|
||||
|
||||
static int
|
||||
enc (FILE *fp)
|
||||
{
|
||||
int ret = 1;
|
||||
struct base64_state state;
|
||||
|
||||
base64_stream_encode_init(&state, 0);
|
||||
|
||||
while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
|
||||
base64_stream_encode(&state, buf, nread, out, &nout);
|
||||
if (nout) {
|
||||
fwrite(out, nout, 1, stdout);
|
||||
}
|
||||
if (feof(fp)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ferror(fp)) {
|
||||
fprintf(stderr, "read error\n");
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
base64_stream_encode_final(&state, out, &nout);
|
||||
|
||||
if (nout) {
|
||||
fwrite(out, nout, 1, stdout);
|
||||
}
|
||||
out: fclose(fp);
|
||||
fclose(stdout);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
dec (FILE *fp)
|
||||
{
|
||||
int ret = 1;
|
||||
struct base64_state state;
|
||||
|
||||
base64_stream_decode_init(&state, 0);
|
||||
|
||||
while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
|
||||
if (!base64_stream_decode(&state, buf, nread, out, &nout)) {
|
||||
fprintf(stderr, "decoding error\n");
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
if (nout) {
|
||||
fwrite(out, nout, 1, stdout);
|
||||
}
|
||||
if (feof(fp)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ferror(fp)) {
|
||||
fprintf(stderr, "read error\n");
|
||||
ret = 0;
|
||||
}
|
||||
out: fclose(fp);
|
||||
fclose(stdout);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
char *file;
|
||||
FILE *fp;
|
||||
int decode = 0;
|
||||
|
||||
// Parse options:
|
||||
for (;;)
|
||||
{
|
||||
int c;
|
||||
int opt_index = 0;
|
||||
static struct option opt_long[] = {
|
||||
{ "decode", 0, 0, 'd' },
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
if ((c = getopt_long(argc, argv, "d", opt_long, &opt_index)) == -1) {
|
||||
break;
|
||||
}
|
||||
switch (c)
|
||||
{
|
||||
case 'd':
|
||||
decode = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// No options left on command line? Read from stdin:
|
||||
if (optind >= argc) {
|
||||
fp = stdin;
|
||||
}
|
||||
|
||||
// One option left on command line? Treat it as a file:
|
||||
else if (optind + 1 == argc) {
|
||||
file = argv[optind];
|
||||
if (strcmp(file, "-") == 0) {
|
||||
fp = stdin;
|
||||
}
|
||||
else if ((fp = fopen(file, "rb")) == NULL) {
|
||||
printf("cannot open %s\n", file);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// More than one option left on command line? Syntax error:
|
||||
else {
|
||||
printf("Usage: %s <file>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Invert return codes to create shell return code:
|
||||
return (decode) ? !dec(fp) : !enc(fp);
|
||||
}
|
30
deps/base64/base64/cmake/Modules/TargetArch.cmake
vendored
Normal file
30
deps/base64/base64/cmake/Modules/TargetArch.cmake
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
# Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
|
||||
set(TARGET_ARCHITECTURE_TEST_FILE "${CMAKE_CURRENT_LIST_DIR}/../test-arch.c")
|
||||
|
||||
function(detect_target_architecture OUTPUT_VARIABLE)
|
||||
message(STATUS "${CMAKE_CURRENT_LIST_DIR}")
|
||||
try_compile(_IGNORED "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"${TARGET_ARCHITECTURE_TEST_FILE}"
|
||||
OUTPUT_VARIABLE _LOG
|
||||
)
|
||||
|
||||
string(REGEX MATCH "##arch=([^#]+)##" _IGNORED "${_LOG}")
|
||||
|
||||
set(${OUTPUT_VARIABLE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
|
||||
set("${OUTPUT_VARIABLE}_${CMAKE_MATCH_1}" 1 PARENT_SCOPE)
|
||||
if (CMAKE_MATCH_1 STREQUAL "unknown")
|
||||
message(WARNING "could not detect the target architecture.")
|
||||
endif()
|
||||
endfunction()
|
34
deps/base64/base64/cmake/Modules/TargetSIMDInstructionSet.cmake
vendored
Normal file
34
deps/base64/base64/cmake/Modules/TargetSIMDInstructionSet.cmake
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
# Written in 2016-2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
|
||||
########################################################################
|
||||
# compiler flags definition
|
||||
macro(define_SIMD_compile_flags)
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
|
||||
# x86
|
||||
set(COMPILE_FLAGS_SSSE3 "-mssse3")
|
||||
set(COMPILE_FLAGS_SSE41 "-msse4.1")
|
||||
set(COMPILE_FLAGS_SSE42 "-msse4.2")
|
||||
set(COMPILE_FLAGS_AVX "-mavx")
|
||||
set(COMPILE_FLAGS_AVX2 "-mavx2")
|
||||
|
||||
#arm
|
||||
set(COMPILE_FLAGS_NEON32 "-mfpu=neon")
|
||||
elseif(MSVC)
|
||||
set(COMPILE_FLAGS_SSSE3 " ")
|
||||
set(COMPILE_FLAGS_SSE41 " ")
|
||||
set(COMPILE_FLAGS_SSE42 " ")
|
||||
set(COMPILE_FLAGS_AVX "/arch:AVX")
|
||||
set(COMPILE_FLAGS_AVX2 "/arch:AVX2")
|
||||
endif()
|
||||
endmacro(define_SIMD_compile_flags)
|
5
deps/base64/base64/cmake/base64-config.cmake.in
vendored
Normal file
5
deps/base64/base64/cmake/base64-config.cmake.in
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/base64-targets.cmake")
|
||||
|
||||
check_required_components(base64)
|
25
deps/base64/base64/cmake/config.h.in
vendored
Normal file
25
deps/base64/base64/cmake/config.h.in
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
#ifndef BASE64_CONFIG_H
|
||||
#define BASE64_CONFIG_H
|
||||
|
||||
#cmakedefine01 BASE64_WITH_SSSE3
|
||||
#define HAVE_SSSE3 BASE64_WITH_SSSE3
|
||||
|
||||
#cmakedefine01 BASE64_WITH_SSE41
|
||||
#define HAVE_SSE41 BASE64_WITH_SSE41
|
||||
|
||||
#cmakedefine01 BASE64_WITH_SSE42
|
||||
#define HAVE_SSE42 BASE64_WITH_SSE42
|
||||
|
||||
#cmakedefine01 BASE64_WITH_AVX
|
||||
#define HAVE_AVX BASE64_WITH_AVX
|
||||
|
||||
#cmakedefine01 BASE64_WITH_AVX2
|
||||
#define HAVE_AVX2 BASE64_WITH_AVX2
|
||||
|
||||
#cmakedefine01 BASE64_WITH_NEON32
|
||||
#define HAVE_NEON32 BASE64_WITH_NEON32
|
||||
|
||||
#cmakedefine01 BASE64_WITH_NEON64
|
||||
#define HAVE_NEON64 BASE64_WITH_NEON64
|
||||
|
||||
#endif // BASE64_CONFIG_H
|
35
deps/base64/base64/cmake/test-arch.c
vendored
Normal file
35
deps/base64/base64/cmake/test-arch.c
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
// Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
//
|
||||
// To the extent possible under law, the author(s) have dedicated all
|
||||
// copyright and related and neighboring rights to this software to the
|
||||
// public domain worldwide. This software is distributed without any warranty.
|
||||
//
|
||||
// You should have received a copy of the CC0 Public Domain Dedication
|
||||
// along with this software. If not, see
|
||||
//
|
||||
// http://creativecommons.org/publicdomain/zero/1.0/
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// ARM 64-Bit
|
||||
#if defined(__aarch64__)
|
||||
#error ##arch=arm64##
|
||||
|
||||
// ARM 32-Bit
|
||||
#elif defined(__arm__) \
|
||||
|| defined(_M_ARM)
|
||||
#error ##arch=arm##
|
||||
|
||||
// x86 64-Bit
|
||||
#elif defined(__x86_64__) \
|
||||
|| defined(_M_X64)
|
||||
#error ##arch=x64##
|
||||
|
||||
// x86 32-Bit
|
||||
#elif defined(__i386__) \
|
||||
|| defined(_M_IX86)
|
||||
#error ##arch=x86##
|
||||
|
||||
#else
|
||||
#error ##arch=unknown##
|
||||
#endif
|
145
deps/base64/base64/include/libbase64.h
vendored
Normal file
145
deps/base64/base64/include/libbase64.h
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
#ifndef LIBBASE64_H
|
||||
#define LIBBASE64_H
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define BASE64_SYMBOL_IMPORT __declspec(dllimport)
|
||||
#define BASE64_SYMBOL_EXPORT __declspec(dllexport)
|
||||
#define BASE64_SYMBOL_PRIVATE
|
||||
|
||||
#elif __GNUC__ >= 4
|
||||
#define BASE64_SYMBOL_IMPORT __attribute__ ((visibility ("default")))
|
||||
#define BASE64_SYMBOL_EXPORT __attribute__ ((visibility ("default")))
|
||||
#define BASE64_SYMBOL_PRIVATE __attribute__ ((visibility ("hidden")))
|
||||
|
||||
#else
|
||||
#define BASE64_SYMBOL_IMPORT
|
||||
#define BASE64_SYMBOL_EXPORT
|
||||
#define BASE64_SYMBOL_PRIVATE
|
||||
#endif
|
||||
|
||||
#if defined(BASE64_STATIC_DEFINE)
|
||||
#define BASE64_EXPORT
|
||||
#define BASE64_NO_EXPORT
|
||||
|
||||
#else
|
||||
#if defined(BASE64_EXPORTS) // defined if we are building the shared library
|
||||
#define BASE64_EXPORT BASE64_SYMBOL_EXPORT
|
||||
|
||||
#else
|
||||
#define BASE64_EXPORT BASE64_SYMBOL_IMPORT
|
||||
#endif
|
||||
|
||||
#define BASE64_NO_EXPORT BASE64_SYMBOL_PRIVATE
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* These are the flags that can be passed in the `flags` argument. The values
|
||||
* below force the use of a given codec, even if that codec is a no-op in the
|
||||
* current build. Used in testing. Set to 0 for the default behavior, which is
|
||||
* runtime feature detection on x86, a compile-time fixed codec on ARM, and
|
||||
* the plain codec on other platforms: */
|
||||
#define BASE64_FORCE_AVX2 (1 << 0)
|
||||
#define BASE64_FORCE_NEON32 (1 << 1)
|
||||
#define BASE64_FORCE_NEON64 (1 << 2)
|
||||
#define BASE64_FORCE_PLAIN (1 << 3)
|
||||
#define BASE64_FORCE_SSSE3 (1 << 4)
|
||||
#define BASE64_FORCE_SSE41 (1 << 5)
|
||||
#define BASE64_FORCE_SSE42 (1 << 6)
|
||||
#define BASE64_FORCE_AVX (1 << 7)
|
||||
|
||||
struct base64_state {
|
||||
int eof;
|
||||
int bytes;
|
||||
int flags;
|
||||
unsigned char carry;
|
||||
};
|
||||
|
||||
/* Wrapper function to encode a plain string of given length. Output is written
|
||||
* to *out without trailing zero. Output length in bytes is written to *outlen.
|
||||
* The buffer in `out` has been allocated by the caller and is at least 4/3 the
|
||||
* size of the input. See above for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Call this before calling base64_stream_encode() to init the state. See above
|
||||
* for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_stream_encode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Encodes the block of data of given length at `src`, into the buffer at
|
||||
* `out`. Caller is responsible for allocating a large enough out-buffer; it
|
||||
* must be at least 4/3 the size of the in-buffer, but take some margin. Places
|
||||
* the number of new bytes written into `outlen` (which is set to zero when the
|
||||
* function starts). Does not zero-terminate or finalize the output. */
|
||||
void BASE64_EXPORT base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
|
||||
* Adds the required end-of-stream markers if appropriate. `outlen` is modified
|
||||
* and will contain the number of new bytes written at `out` (which will quite
|
||||
* often be zero). */
|
||||
void BASE64_EXPORT base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
/* Wrapper function to decode a plain string of given length. Output is written
|
||||
* to *out without trailing zero. Output length in bytes is written to *outlen.
|
||||
* The buffer in `out` has been allocated by the caller and is at least 3/4 the
|
||||
* size of the input. See above for `flags`, set to 0 for default operation: */
|
||||
int BASE64_EXPORT base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Call this before calling base64_stream_decode() to init the state. See above
|
||||
* for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_stream_decode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Decodes the block of data of given length at `src`, into the buffer at
|
||||
* `out`. Caller is responsible for allocating a large enough out-buffer; it
|
||||
* must be at least 3/4 the size of the in-buffer, but take some margin. Places
|
||||
* the number of new bytes written into `outlen` (which is set to zero when the
|
||||
* function starts). Does not zero-terminate the output. Returns 1 if all is
|
||||
* well, and 0 if a decoding error was found, such as an invalid character.
|
||||
* Returns -1 if the chosen codec is not included in the current build. Used by
|
||||
* the test harness to check whether a codec is available for testing. */
|
||||
int BASE64_EXPORT base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LIBBASE64_H */
|
42
deps/base64/base64/lib/arch/avx/codec.c
vendored
Normal file
42
deps/base64/base64/lib/arch/avx/codec.c
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
#include "../ssse3/enc_translate.c"
|
||||
#include "../ssse3/enc_reshuffle.c"
|
||||
#include "../ssse3/enc_loop.c"
|
||||
|
||||
#endif // HAVE_AVX
|
||||
|
||||
BASE64_ENC_FUNCTION(avx)
|
||||
{
|
||||
#if HAVE_AVX
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(avx)
|
||||
{
|
||||
#if HAVE_AVX
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
42
deps/base64/base64/lib/arch/avx2/codec.c
vendored
Normal file
42
deps/base64/base64/lib/arch/avx2/codec.c
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX2
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "dec_reshuffle.c"
|
||||
#include "dec_loop.c"
|
||||
#include "enc_translate.c"
|
||||
#include "enc_reshuffle.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
BASE64_ENC_FUNCTION(avx2)
|
||||
{
|
||||
#if HAVE_AVX2
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(avx2)
|
||||
{
|
||||
#if HAVE_AVX2
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
110
deps/base64/base64/lib/arch/avx2/dec_loop.c
vendored
Normal file
110
deps/base64/base64/lib/arch/avx2/dec_loop.c
vendored
Normal file
@ -0,0 +1,110 @@
|
||||
static inline int
|
||||
dec_loop_avx2_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const __m256i lut_lo = _mm256_setr_epi8(
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
|
||||
|
||||
const __m256i lut_hi = _mm256_setr_epi8(
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
|
||||
|
||||
const __m256i lut_roll = _mm256_setr_epi8(
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
const __m256i mask_2F = _mm256_set1_epi8(0x2F);
|
||||
|
||||
// Load input:
|
||||
__m256i str = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// See the SSSE3 decoder for an explanation of the algorithm.
|
||||
const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F);
|
||||
const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F);
|
||||
const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
|
||||
const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
|
||||
|
||||
if (!_mm256_testz_si256(lo, hi)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F);
|
||||
const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles));
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
str = _mm256_add_epi8(str, roll);
|
||||
|
||||
// Reshuffle the input to packed 12-byte output format:
|
||||
str = dec_reshuffle(str);
|
||||
|
||||
// Store the output:
|
||||
_mm256_storeu_si256((__m256i *) *o, str);
|
||||
|
||||
*s += 32;
|
||||
*o += 24;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 45) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 32 bytes per round. Because 8 extra zero bytes are
|
||||
// written after the output, ensure that there will be at least 13
|
||||
// bytes of input data left to cover the gap. (11 data bytes and up to
|
||||
// two end-of-string markers.)
|
||||
size_t rounds = (*slen - 13) / 32;
|
||||
|
||||
*slen -= rounds * 32; // 32 bytes consumed per round
|
||||
*olen += rounds * 24; // 24 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_avx2_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 32;
|
||||
*olen -= rounds * 24;
|
||||
}
|
34
deps/base64/base64/lib/arch/avx2/dec_reshuffle.c
vendored
Normal file
34
deps/base64/base64/lib/arch/avx2/dec_reshuffle.c
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
static inline __m256i
|
||||
dec_reshuffle (const __m256i in)
|
||||
{
|
||||
// in, lower lane, bits, upper case are most significant bits, lower
|
||||
// case are least significant bits:
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
|
||||
const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
|
||||
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
|
||||
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
|
||||
// 0000eeee FFffffff 0000DDDD DDddEEEE
|
||||
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
|
||||
|
||||
__m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
|
||||
// 00000000 JJJJJJjj KKKKkkkk LLllllll
|
||||
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
|
||||
// 00000000 DDDDDDdd EEEEeeee FFffffff
|
||||
// 00000000 AAAAAAaa BBBBbbbb CCcccccc
|
||||
|
||||
// Pack bytes together in each lane:
|
||||
out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
|
||||
2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
|
||||
2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
|
||||
// 00000000 00000000 00000000 00000000
|
||||
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
|
||||
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
|
||||
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
|
||||
|
||||
// Pack lanes:
|
||||
return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
|
||||
}
|
89
deps/base64/base64/lib/arch/avx2/enc_loop.c
vendored
Normal file
89
deps/base64/base64/lib/arch/avx2/enc_loop.c
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
static inline void
|
||||
enc_loop_avx2_inner_first (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// First load is done at s - 0 to not get a segfault:
|
||||
__m256i src = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// Shift by 4 bytes, as required by enc_reshuffle:
|
||||
src = _mm256_permutevar8x32_epi32(src, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
|
||||
|
||||
// Reshuffle, translate, store:
|
||||
src = enc_reshuffle(src);
|
||||
src = enc_translate(src);
|
||||
_mm256_storeu_si256((__m256i *) *o, src);
|
||||
|
||||
// Subsequent loads will be done at s - 4, set pointer for next round:
|
||||
*s += 20;
|
||||
*o += 32;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_avx2_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input:
|
||||
__m256i src = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// Reshuffle, translate, store:
|
||||
src = enc_reshuffle(src);
|
||||
src = enc_translate(src);
|
||||
_mm256_storeu_si256((__m256i *) *o, src);
|
||||
|
||||
*s += 24;
|
||||
*o += 32;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 32) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 24 bytes at a time. Because blocks are loaded 32
|
||||
// bytes at a time an offset of -4, ensure that there will be at least
|
||||
// 4 remaining bytes after the last round, so that the final read will
|
||||
// not pass beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 4) / 24;
|
||||
|
||||
*slen -= rounds * 24; // 24 bytes consumed per round
|
||||
*olen += rounds * 32; // 32 bytes produced per round
|
||||
|
||||
// The first loop iteration requires special handling to ensure that
|
||||
// the read, which is done at an offset, does not underflow the buffer:
|
||||
enc_loop_avx2_inner_first(s, o);
|
||||
rounds--;
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_avx2_inner(s, o);
|
||||
break;
|
||||
}
|
||||
|
||||
// Add the offset back:
|
||||
*s += 4;
|
||||
}
|
83
deps/base64/base64/lib/arch/avx2/enc_reshuffle.c
vendored
Normal file
83
deps/base64/base64/lib/arch/avx2/enc_reshuffle.c
vendored
Normal file
@ -0,0 +1,83 @@
|
||||
static inline __m256i
|
||||
enc_reshuffle (const __m256i input)
|
||||
{
|
||||
// Translation of the SSSE3 reshuffling algorithm to AVX2. This one
|
||||
// works with shifted (4 bytes) input in order to be able to work
|
||||
// efficiently in the two 128-bit lanes.
|
||||
|
||||
// Input, bytes MSB to LSB:
|
||||
// 0 0 0 0 x w v u t s r q p o n m
|
||||
// l k j i h g f e d c b a 0 0 0 0
|
||||
|
||||
const __m256i in = _mm256_shuffle_epi8(input, _mm256_set_epi8(
|
||||
10, 11, 9, 10,
|
||||
7, 8, 6, 7,
|
||||
4, 5, 3, 4,
|
||||
1, 2, 0, 1,
|
||||
|
||||
14, 15, 13, 14,
|
||||
11, 12, 10, 11,
|
||||
8, 9, 7, 8,
|
||||
5, 6, 4, 5));
|
||||
// in, bytes MSB to LSB:
|
||||
// w x v w
|
||||
// t u s t
|
||||
// q r p q
|
||||
// n o m n
|
||||
// k l j k
|
||||
// h i g h
|
||||
// e f d e
|
||||
// b c a b
|
||||
|
||||
const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0FC0FC00));
|
||||
// bits, upper case are most significant bits, lower case are least
|
||||
// significant bits.
|
||||
// 0000wwww XX000000 VVVVVV00 00000000
|
||||
// 0000tttt UU000000 SSSSSS00 00000000
|
||||
// 0000qqqq RR000000 PPPPPP00 00000000
|
||||
// 0000nnnn OO000000 MMMMMM00 00000000
|
||||
// 0000kkkk LL000000 JJJJJJ00 00000000
|
||||
// 0000hhhh II000000 GGGGGG00 00000000
|
||||
// 0000eeee FF000000 DDDDDD00 00000000
|
||||
// 0000bbbb CC000000 AAAAAA00 00000000
|
||||
|
||||
const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
|
||||
// 00000000 00wwwwXX 00000000 00VVVVVV
|
||||
// 00000000 00ttttUU 00000000 00SSSSSS
|
||||
// 00000000 00qqqqRR 00000000 00PPPPPP
|
||||
// 00000000 00nnnnOO 00000000 00MMMMMM
|
||||
// 00000000 00kkkkLL 00000000 00JJJJJJ
|
||||
// 00000000 00hhhhII 00000000 00GGGGGG
|
||||
// 00000000 00eeeeFF 00000000 00DDDDDD
|
||||
// 00000000 00bbbbCC 00000000 00AAAAAA
|
||||
|
||||
const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003F03F0));
|
||||
// 00000000 00xxxxxx 000000vv WWWW0000
|
||||
// 00000000 00uuuuuu 000000ss TTTT0000
|
||||
// 00000000 00rrrrrr 000000pp QQQQ0000
|
||||
// 00000000 00oooooo 000000mm NNNN0000
|
||||
// 00000000 00llllll 000000jj KKKK0000
|
||||
// 00000000 00iiiiii 000000gg HHHH0000
|
||||
// 00000000 00ffffff 000000dd EEEE0000
|
||||
// 00000000 00cccccc 000000aa BBBB0000
|
||||
|
||||
const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
|
||||
// 00xxxxxx 00000000 00vvWWWW 00000000
|
||||
// 00uuuuuu 00000000 00ssTTTT 00000000
|
||||
// 00rrrrrr 00000000 00ppQQQQ 00000000
|
||||
// 00oooooo 00000000 00mmNNNN 00000000
|
||||
// 00llllll 00000000 00jjKKKK 00000000
|
||||
// 00iiiiii 00000000 00ggHHHH 00000000
|
||||
// 00ffffff 00000000 00ddEEEE 00000000
|
||||
// 00cccccc 00000000 00aaBBBB 00000000
|
||||
|
||||
return _mm256_or_si256(t1, t3);
|
||||
// 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
|
||||
// 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
|
||||
// 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
|
||||
// 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
}
|
30
deps/base64/base64/lib/arch/avx2/enc_translate.c
vendored
Normal file
30
deps/base64/base64/lib/arch/avx2/enc_translate.c
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
static inline __m256i
|
||||
enc_translate (const __m256i in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const __m256i lut = _mm256_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from the input. The index for range #0 is right,
|
||||
// others are 1 less than expected:
|
||||
__m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
const __m256i mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
|
||||
// now correct:
|
||||
indices = _mm256_sub_epi8(indices, mask);
|
||||
|
||||
// Add offsets to input values:
|
||||
return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
|
||||
}
|
86
deps/base64/base64/lib/arch/generic/32/dec_loop.c
vendored
Normal file
86
deps/base64/base64/lib/arch/generic/32/dec_loop.c
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
static inline int
|
||||
dec_loop_generic_32_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const uint32_t str
|
||||
= base64_table_dec_32bit_d0[(*s)[0]]
|
||||
| base64_table_dec_32bit_d1[(*s)[1]]
|
||||
| base64_table_dec_32bit_d2[(*s)[2]]
|
||||
| base64_table_dec_32bit_d3[(*s)[3]];
|
||||
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
|
||||
// LUTs for little-endian set MSB in case of invalid character:
|
||||
if (str & UINT32_C(0x80000000)) {
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
// LUTs for big-endian set LSB in case of invalid character:
|
||||
if (str & UINT32_C(1)) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
// Store the output:
|
||||
memcpy(*o, &str, sizeof (str));
|
||||
|
||||
*s += 4;
|
||||
*o += 3;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 8) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 4 bytes per round. Because one extra zero byte is
|
||||
// written after the output, ensure that there will be at least 4 bytes
|
||||
// of input data left to cover the gap. (Two data bytes and up to two
|
||||
// end-of-string markers.)
|
||||
size_t rounds = (*slen - 4) / 4;
|
||||
|
||||
*slen -= rounds * 4; // 4 bytes consumed per round
|
||||
*olen += rounds * 3; // 3 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_generic_32_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 4;
|
||||
*olen -= rounds * 3;
|
||||
}
|
73
deps/base64/base64/lib/arch/generic/32/enc_loop.c
vendored
Normal file
73
deps/base64/base64/lib/arch/generic/32/enc_loop.c
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
static inline void
|
||||
enc_loop_generic_32_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
uint32_t src;
|
||||
|
||||
// Load input:
|
||||
memcpy(&src, *s, sizeof (src));
|
||||
|
||||
// Reorder to 32-bit big-endian, if not already in that format. The
|
||||
// workset must be in big-endian, otherwise the shifted bits do not
|
||||
// carry over properly among adjacent bytes:
|
||||
src = BASE64_HTOBE32(src);
|
||||
|
||||
// Two indices for the 12-bit lookup table:
|
||||
const size_t index0 = (src >> 20) & 0xFFFU;
|
||||
const size_t index1 = (src >> 8) & 0xFFFU;
|
||||
|
||||
// Table lookup and store:
|
||||
memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
|
||||
memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
|
||||
|
||||
*s += 3;
|
||||
*o += 4;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 3 bytes at a time. Because blocks are loaded 4
|
||||
// bytes at a time, ensure that there will be at least one remaining
|
||||
// byte after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 1) / 3;
|
||||
|
||||
*slen -= rounds * 3; // 3 bytes consumed per round
|
||||
*olen += rounds * 4; // 4 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
77
deps/base64/base64/lib/arch/generic/64/enc_loop.c
vendored
Normal file
77
deps/base64/base64/lib/arch/generic/64/enc_loop.c
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
static inline void
|
||||
enc_loop_generic_64_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
uint64_t src;
|
||||
|
||||
// Load input:
|
||||
memcpy(&src, *s, sizeof (src));
|
||||
|
||||
// Reorder to 64-bit big-endian, if not already in that format. The
|
||||
// workset must be in big-endian, otherwise the shifted bits do not
|
||||
// carry over properly among adjacent bytes:
|
||||
src = BASE64_HTOBE64(src);
|
||||
|
||||
// Four indices for the 12-bit lookup table:
|
||||
const size_t index0 = (src >> 52) & 0xFFFU;
|
||||
const size_t index1 = (src >> 40) & 0xFFFU;
|
||||
const size_t index2 = (src >> 28) & 0xFFFU;
|
||||
const size_t index3 = (src >> 16) & 0xFFFU;
|
||||
|
||||
// Table lookup and store:
|
||||
memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
|
||||
memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
|
||||
memcpy(*o + 4, base64_table_enc_12bit + index2, 2);
|
||||
memcpy(*o + 6, base64_table_enc_12bit + index3, 2);
|
||||
|
||||
*s += 6;
|
||||
*o += 8;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_generic_64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 8) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 6 bytes at a time. Because blocks are loaded 8
|
||||
// bytes at a time, ensure that there will be at least 2 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 2) / 6;
|
||||
|
||||
*slen -= rounds * 6; // 6 bytes consumed per round
|
||||
*olen += rounds * 8; // 8 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
39
deps/base64/base64/lib/arch/generic/codec.c
vendored
Normal file
39
deps/base64/base64/lib/arch/generic/codec.c
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if BASE64_WORDSIZE == 32
|
||||
# include "32/enc_loop.c"
|
||||
#elif BASE64_WORDSIZE == 64
|
||||
# include "64/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
# include "32/dec_loop.c"
|
||||
#endif
|
||||
|
||||
BASE64_ENC_FUNCTION(plain)
|
||||
{
|
||||
#include "enc_head.c"
|
||||
#if BASE64_WORDSIZE == 32
|
||||
enc_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#elif BASE64_WORDSIZE == 64
|
||||
enc_loop_generic_64(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
#include "enc_tail.c"
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(plain)
|
||||
{
|
||||
#include "dec_head.c"
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
#include "dec_tail.c"
|
||||
}
|
37
deps/base64/base64/lib/arch/generic/dec_head.c
vendored
Normal file
37
deps/base64/base64/lib/arch/generic/dec_head.c
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
int ret = 0;
|
||||
const uint8_t *s = (const uint8_t *) src;
|
||||
uint8_t *o = (uint8_t *) out;
|
||||
uint8_t q;
|
||||
|
||||
// Use local temporaries to avoid cache thrashing:
|
||||
size_t olen = 0;
|
||||
size_t slen = srclen;
|
||||
struct base64_state st;
|
||||
st.eof = state->eof;
|
||||
st.bytes = state->bytes;
|
||||
st.carry = state->carry;
|
||||
|
||||
// If we previously saw an EOF or an invalid character, bail out:
|
||||
if (st.eof) {
|
||||
*outlen = 0;
|
||||
ret = 0;
|
||||
// If there was a trailing '=' to check, check it:
|
||||
if (slen && (st.eof == BASE64_AEOF)) {
|
||||
state->bytes = 0;
|
||||
state->eof = BASE64_EOF;
|
||||
ret = ((base64_table_dec_8bit[*s++] == 254) && (slen == 1)) ? 1 : 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Turn four 6-bit numbers into three bytes:
|
||||
// out[0] = 11111122
|
||||
// out[1] = 22223333
|
||||
// out[2] = 33444444
|
||||
|
||||
// Duff's device again:
|
||||
switch (st.bytes)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
case 0:
|
91
deps/base64/base64/lib/arch/generic/dec_tail.c
vendored
Normal file
91
deps/base64/base64/lib/arch/generic/dec_tail.c
vendored
Normal file
@ -0,0 +1,91 @@
|
||||
if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.eof = BASE64_EOF;
|
||||
// Treat character '=' as invalid for byte 0:
|
||||
break;
|
||||
}
|
||||
st.carry = q << 2;
|
||||
st.bytes++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 1: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.eof = BASE64_EOF;
|
||||
// Treat character '=' as invalid for byte 1:
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | (q >> 4);
|
||||
st.carry = q << 4;
|
||||
st.bytes++;
|
||||
olen++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 2: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.bytes++;
|
||||
// When q == 254, the input char is '='.
|
||||
// Check if next byte is also '=':
|
||||
if (q == 254) {
|
||||
if (slen-- != 0) {
|
||||
st.bytes = 0;
|
||||
// EOF:
|
||||
st.eof = BASE64_EOF;
|
||||
q = base64_table_dec_8bit[*s++];
|
||||
ret = ((q == 254) && (slen == 0)) ? 1 : 0;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// Almost EOF
|
||||
st.eof = BASE64_AEOF;
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we get here, there was an error:
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | (q >> 2);
|
||||
st.carry = q << 6;
|
||||
st.bytes++;
|
||||
olen++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 3: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.bytes = 0;
|
||||
st.eof = BASE64_EOF;
|
||||
// When q == 254, the input char is '='. Return 1 and EOF.
|
||||
// When q == 255, the input char is invalid. Return 0 and EOF.
|
||||
ret = ((q == 254) && (slen == 0)) ? 1 : 0;
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | q;
|
||||
st.carry = 0;
|
||||
st.bytes = 0;
|
||||
olen++;
|
||||
}
|
||||
}
|
||||
|
||||
state->eof = st.eof;
|
||||
state->bytes = st.bytes;
|
||||
state->carry = st.carry;
|
||||
*outlen = olen;
|
||||
return ret;
|
24
deps/base64/base64/lib/arch/generic/enc_head.c
vendored
Normal file
24
deps/base64/base64/lib/arch/generic/enc_head.c
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
// Assume that *out is large enough to contain the output.
|
||||
// Theoretically it should be 4/3 the length of src.
|
||||
const uint8_t *s = (const uint8_t *) src;
|
||||
uint8_t *o = (uint8_t *) out;
|
||||
|
||||
// Use local temporaries to avoid cache thrashing:
|
||||
size_t olen = 0;
|
||||
size_t slen = srclen;
|
||||
struct base64_state st;
|
||||
st.bytes = state->bytes;
|
||||
st.carry = state->carry;
|
||||
|
||||
// Turn three bytes into four 6-bit numbers:
|
||||
// in[0] = 00111111
|
||||
// in[1] = 00112222
|
||||
// in[2] = 00222233
|
||||
// in[3] = 00333333
|
||||
|
||||
// Duff's device, a for() loop inside a switch() statement. Legal!
|
||||
switch (st.bytes)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
case 0:
|
34
deps/base64/base64/lib/arch/generic/enc_tail.c
vendored
Normal file
34
deps/base64/base64/lib/arch/generic/enc_tail.c
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[*s >> 2];
|
||||
st.carry = (*s++ << 4) & 0x30;
|
||||
st.bytes++;
|
||||
olen += 1;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 1: if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[st.carry | (*s >> 4)];
|
||||
st.carry = (*s++ << 2) & 0x3C;
|
||||
st.bytes++;
|
||||
olen += 1;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 2: if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[st.carry | (*s >> 6)];
|
||||
*o++ = base64_table_enc_6bit[*s++ & 0x3F];
|
||||
st.bytes = 0;
|
||||
olen += 2;
|
||||
}
|
||||
}
|
||||
state->bytes = st.bytes;
|
||||
state->carry = st.carry;
|
||||
*outlen = olen;
|
77
deps/base64/base64/lib/arch/neon32/codec.c
vendored
Normal file
77
deps/base64/base64/lib/arch/neon32/codec.c
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#ifdef __arm__
|
||||
# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32
|
||||
# define BASE64_USE_NEON32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers.
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define BASE64_NEON32_USE_ASM
|
||||
#endif
|
||||
|
||||
static inline uint8x16_t
|
||||
vqtbl1q_u8 (const uint8x16_t lut, const uint8x16_t indices)
|
||||
{
|
||||
// NEON32 only supports 64-bit wide lookups in 128-bit tables. Emulate
|
||||
// the NEON64 `vqtbl1q_u8` intrinsic to do 128-bit wide lookups.
|
||||
uint8x8x2_t lut2;
|
||||
uint8x8x2_t result;
|
||||
|
||||
lut2.val[0] = vget_low_u8(lut);
|
||||
lut2.val[1] = vget_high_u8(lut);
|
||||
|
||||
result.val[0] = vtbl2_u8(lut2, vget_low_u8(indices));
|
||||
result.val[1] = vtbl2_u8(lut2, vget_high_u8(indices));
|
||||
|
||||
return vcombine_u8(result.val[0], result.val[1]);
|
||||
}
|
||||
|
||||
#include "../generic/32/dec_loop.c"
|
||||
#include "../generic/32/enc_loop.c"
|
||||
#include "dec_loop.c"
|
||||
#include "enc_reshuffle.c"
|
||||
#include "enc_translate.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // BASE64_USE_NEON32
|
||||
|
||||
// Stride size is so large on these NEON 32-bit functions
|
||||
// (48 bytes encode, 32 bytes decode) that we inline the
|
||||
// uint32 codec to stay performant on smaller inputs.
|
||||
|
||||
BASE64_ENC_FUNCTION(neon32)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_neon32(&s, &slen, &o, &olen);
|
||||
enc_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(neon32)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_neon32(&s, &slen, &o, &olen);
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
106
deps/base64/base64/lib/arch/neon32/dec_loop.c
vendored
Normal file
106
deps/base64/base64/lib/arch/neon32/dec_loop.c
vendored
Normal file
@ -0,0 +1,106 @@
|
||||
static inline int
|
||||
is_nonzero (const uint8x16_t v)
|
||||
{
|
||||
uint64_t u64;
|
||||
const uint64x2_t v64 = vreinterpretq_u64_u8(v);
|
||||
const uint32x2_t v32 = vqmovn_u64(v64);
|
||||
|
||||
vst1_u64(&u64, vreinterpret_u64_u32(v32));
|
||||
return u64 != 0;
|
||||
}
|
||||
|
||||
static inline uint8x16_t
|
||||
delta_lookup (const uint8x16_t v)
|
||||
{
|
||||
const uint8x8_t lut = {
|
||||
0, 16, 19, 4, (uint8_t) -65, (uint8_t) -65, (uint8_t) -71, (uint8_t) -71,
|
||||
};
|
||||
|
||||
return vcombine_u8(
|
||||
vtbl1_u8(lut, vget_low_u8(v)),
|
||||
vtbl1_u8(lut, vget_high_u8(v)));
|
||||
}
|
||||
|
||||
static inline uint8x16_t
|
||||
dec_loop_neon32_lane (uint8x16_t *lane)
|
||||
{
|
||||
// See the SSSE3 decoder for an explanation of the algorithm.
|
||||
const uint8x16_t lut_lo = {
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A
|
||||
};
|
||||
|
||||
const uint8x16_t lut_hi = {
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
|
||||
};
|
||||
|
||||
const uint8x16_t mask_0F = vdupq_n_u8(0x0F);
|
||||
const uint8x16_t mask_2F = vdupq_n_u8(0x2F);
|
||||
|
||||
const uint8x16_t hi_nibbles = vshrq_n_u8(*lane, 4);
|
||||
const uint8x16_t lo_nibbles = vandq_u8(*lane, mask_0F);
|
||||
const uint8x16_t eq_2F = vceqq_u8(*lane, mask_2F);
|
||||
|
||||
const uint8x16_t hi = vqtbl1q_u8(lut_hi, hi_nibbles);
|
||||
const uint8x16_t lo = vqtbl1q_u8(lut_lo, lo_nibbles);
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
*lane = vaddq_u8(*lane, delta_lookup(vaddq_u8(eq_2F, hi_nibbles)));
|
||||
|
||||
// Return the validity mask:
|
||||
return vandq_u8(lo, hi);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
|
||||
// extra trailing zero bytes are written, so it is not necessary to
|
||||
// reserve extra input bytes:
|
||||
size_t rounds = *slen / 64;
|
||||
|
||||
*slen -= rounds * 64; // 64 bytes consumed per round
|
||||
*olen += rounds * 48; // 48 bytes produced per round
|
||||
|
||||
do {
|
||||
uint8x16x3_t dec;
|
||||
|
||||
// Load 64 bytes and deinterleave:
|
||||
uint8x16x4_t str = vld4q_u8(*s);
|
||||
|
||||
// Decode each lane, collect a mask of invalid inputs:
|
||||
const uint8x16_t classified
|
||||
= dec_loop_neon32_lane(&str.val[0])
|
||||
| dec_loop_neon32_lane(&str.val[1])
|
||||
| dec_loop_neon32_lane(&str.val[2])
|
||||
| dec_loop_neon32_lane(&str.val[3]);
|
||||
|
||||
// Check for invalid input: if any of the delta values are
|
||||
// zero, fall back on bytewise code to do error checking and
|
||||
// reporting:
|
||||
if (is_nonzero(classified)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Compress four bytes into three:
|
||||
dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4));
|
||||
dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2));
|
||||
dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]);
|
||||
|
||||
// Interleave and store decoded result:
|
||||
vst3q_u8(*o, dec);
|
||||
|
||||
*s += 64;
|
||||
*o += 48;
|
||||
|
||||
} while (--rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 64;
|
||||
*olen -= rounds * 48;
|
||||
}
|
169
deps/base64/base64/lib/arch/neon32/enc_loop.c
vendored
Normal file
169
deps/base64/base64/lib/arch/neon32/enc_loop.c
vendored
Normal file
@ -0,0 +1,169 @@
|
||||
#ifdef BASE64_NEON32_USE_ASM
|
||||
static inline void
|
||||
enc_loop_neon32_inner_asm (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// This function duplicates the functionality of enc_loop_neon32_inner,
|
||||
// but entirely with inline assembly. This gives a significant speedup
|
||||
// over using NEON intrinsics, which do not always generate very good
|
||||
// code. The logic of the assembly is directly lifted from the
|
||||
// intrinsics version, so it can be used as a guide to this code.
|
||||
|
||||
// Temporary registers, used as scratch space.
|
||||
uint8x16_t tmp0, tmp1, tmp2, tmp3;
|
||||
uint8x16_t mask0, mask1, mask2, mask3;
|
||||
|
||||
// A lookup table containing the absolute offsets for all ranges.
|
||||
const uint8x16_t lut = {
|
||||
65U, 71U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
237U, 240U, 0U, 0U
|
||||
};
|
||||
|
||||
// Numeric constants.
|
||||
const uint8x16_t n51 = vdupq_n_u8(51);
|
||||
const uint8x16_t n25 = vdupq_n_u8(25);
|
||||
const uint8x16_t n63 = vdupq_n_u8(63);
|
||||
|
||||
__asm__ (
|
||||
|
||||
// Load 48 bytes and deinterleave. The bytes are loaded to
|
||||
// hard-coded registers q12, q13 and q14, to ensure that they
|
||||
// are contiguous. Increment the source pointer.
|
||||
"vld3.8 {d24, d26, d28}, [%[src]]! \n\t"
|
||||
"vld3.8 {d25, d27, d29}, [%[src]]! \n\t"
|
||||
|
||||
// Reshuffle the bytes using temporaries.
|
||||
"vshr.u8 %q[t0], q12, #2 \n\t"
|
||||
"vshr.u8 %q[t1], q13, #4 \n\t"
|
||||
"vshr.u8 %q[t2], q14, #6 \n\t"
|
||||
"vsli.8 %q[t1], q12, #4 \n\t"
|
||||
"vsli.8 %q[t2], q13, #2 \n\t"
|
||||
"vand.u8 %q[t1], %q[t1], %q[n63] \n\t"
|
||||
"vand.u8 %q[t2], %q[t2], %q[n63] \n\t"
|
||||
"vand.u8 %q[t3], q14, %q[n63] \n\t"
|
||||
|
||||
// t0..t3 are the reshuffled inputs. Create LUT indices.
|
||||
"vqsub.u8 q12, %q[t0], %q[n51] \n\t"
|
||||
"vqsub.u8 q13, %q[t1], %q[n51] \n\t"
|
||||
"vqsub.u8 q14, %q[t2], %q[n51] \n\t"
|
||||
"vqsub.u8 q15, %q[t3], %q[n51] \n\t"
|
||||
|
||||
// Create the mask for range #0.
|
||||
"vcgt.u8 %q[m0], %q[t0], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m1], %q[t1], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m2], %q[t2], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m3], %q[t3], %q[n25] \n\t"
|
||||
|
||||
// Subtract -1 to correct the LUT indices.
|
||||
"vsub.u8 q12, %q[m0] \n\t"
|
||||
"vsub.u8 q13, %q[m1] \n\t"
|
||||
"vsub.u8 q14, %q[m2] \n\t"
|
||||
"vsub.u8 q15, %q[m3] \n\t"
|
||||
|
||||
// Lookup the delta values.
|
||||
"vtbl.u8 d24, {%q[lut]}, d24 \n\t"
|
||||
"vtbl.u8 d25, {%q[lut]}, d25 \n\t"
|
||||
"vtbl.u8 d26, {%q[lut]}, d26 \n\t"
|
||||
"vtbl.u8 d27, {%q[lut]}, d27 \n\t"
|
||||
"vtbl.u8 d28, {%q[lut]}, d28 \n\t"
|
||||
"vtbl.u8 d29, {%q[lut]}, d29 \n\t"
|
||||
"vtbl.u8 d30, {%q[lut]}, d30 \n\t"
|
||||
"vtbl.u8 d31, {%q[lut]}, d31 \n\t"
|
||||
|
||||
// Add the delta values.
|
||||
"vadd.u8 q12, %q[t0] \n\t"
|
||||
"vadd.u8 q13, %q[t1] \n\t"
|
||||
"vadd.u8 q14, %q[t2] \n\t"
|
||||
"vadd.u8 q15, %q[t3] \n\t"
|
||||
|
||||
// Store 64 bytes and interleave. Increment the dest pointer.
|
||||
"vst4.8 {d24, d26, d28, d30}, [%[dst]]! \n\t"
|
||||
"vst4.8 {d25, d27, d29, d31}, [%[dst]]! \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[t0] "=&w" (tmp0),
|
||||
[t1] "=&w" (tmp1),
|
||||
[t2] "=&w" (tmp2),
|
||||
[t3] "=&w" (tmp3),
|
||||
[m0] "=&w" (mask0),
|
||||
[m1] "=&w" (mask1),
|
||||
[m2] "=&w" (mask2),
|
||||
[m3] "=&w" (mask3)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut] "w" (lut),
|
||||
[n25] "w" (n25),
|
||||
[n51] "w" (n51),
|
||||
[n63] "w" (n63)
|
||||
|
||||
// Clobbers.
|
||||
: "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
enc_loop_neon32_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
#ifdef BASE64_NEON32_USE_ASM
|
||||
enc_loop_neon32_inner_asm(s, o);
|
||||
#else
|
||||
// Load 48 bytes and deinterleave:
|
||||
uint8x16x3_t src = vld3q_u8(*s);
|
||||
|
||||
// Reshuffle:
|
||||
uint8x16x4_t out = enc_reshuffle(src);
|
||||
|
||||
// Translate reshuffled bytes to the Base64 alphabet:
|
||||
out = enc_translate(out);
|
||||
|
||||
// Interleave and store output:
|
||||
vst4q_u8(*o, out);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_neon32_inner(s, o);
|
||||
break;
|
||||
}
|
||||
}
|
31
deps/base64/base64/lib/arch/neon32/enc_reshuffle.c
vendored
Normal file
31
deps/base64/base64/lib/arch/neon32/enc_reshuffle.c
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
static inline uint8x16x4_t
|
||||
enc_reshuffle (uint8x16x3_t in)
|
||||
{
|
||||
uint8x16x4_t out;
|
||||
|
||||
// Input:
|
||||
// in[0] = a7 a6 a5 a4 a3 a2 a1 a0
|
||||
// in[1] = b7 b6 b5 b4 b3 b2 b1 b0
|
||||
// in[2] = c7 c6 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Output:
|
||||
// out[0] = 00 00 a7 a6 a5 a4 a3 a2
|
||||
// out[1] = 00 00 a1 a0 b7 b6 b5 b4
|
||||
// out[2] = 00 00 b3 b2 b1 b0 c7 c6
|
||||
// out[3] = 00 00 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Move the input bits to where they need to be in the outputs. Except
|
||||
// for the first output, the high two bits are not cleared.
|
||||
out.val[0] = vshrq_n_u8(in.val[0], 2);
|
||||
out.val[1] = vshrq_n_u8(in.val[1], 4);
|
||||
out.val[2] = vshrq_n_u8(in.val[2], 6);
|
||||
out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
|
||||
out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
|
||||
|
||||
// Clear the high two bits in the second, third and fourth output.
|
||||
out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
|
||||
out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
|
||||
out.val[3] = vandq_u8(in.val[2], vdupq_n_u8(0x3F));
|
||||
|
||||
return out;
|
||||
}
|
57
deps/base64/base64/lib/arch/neon32/enc_translate.c
vendored
Normal file
57
deps/base64/base64/lib/arch/neon32/enc_translate.c
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
static inline uint8x16x4_t
|
||||
enc_translate (const uint8x16x4_t in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const uint8x16_t lut = {
|
||||
65U, 71U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
237U, 240U, 0U, 0U
|
||||
};
|
||||
|
||||
const uint8x16_t offset = vdupq_n_u8(51);
|
||||
|
||||
uint8x16x4_t indices, mask, delta, out;
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from input:
|
||||
// the index for range #0 is right, others are 1 less than expected:
|
||||
indices.val[0] = vqsubq_u8(in.val[0], offset);
|
||||
indices.val[1] = vqsubq_u8(in.val[1], offset);
|
||||
indices.val[2] = vqsubq_u8(in.val[2], offset);
|
||||
indices.val[3] = vqsubq_u8(in.val[3], offset);
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
mask.val[0] = vcgtq_u8(in.val[0], vdupq_n_u8(25));
|
||||
mask.val[1] = vcgtq_u8(in.val[1], vdupq_n_u8(25));
|
||||
mask.val[2] = vcgtq_u8(in.val[2], vdupq_n_u8(25));
|
||||
mask.val[3] = vcgtq_u8(in.val[3], vdupq_n_u8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4], All indices are
|
||||
// now correct:
|
||||
indices.val[0] = vsubq_u8(indices.val[0], mask.val[0]);
|
||||
indices.val[1] = vsubq_u8(indices.val[1], mask.val[1]);
|
||||
indices.val[2] = vsubq_u8(indices.val[2], mask.val[2]);
|
||||
indices.val[3] = vsubq_u8(indices.val[3], mask.val[3]);
|
||||
|
||||
// Lookup delta values:
|
||||
delta.val[0] = vqtbl1q_u8(lut, indices.val[0]);
|
||||
delta.val[1] = vqtbl1q_u8(lut, indices.val[1]);
|
||||
delta.val[2] = vqtbl1q_u8(lut, indices.val[2]);
|
||||
delta.val[3] = vqtbl1q_u8(lut, indices.val[3]);
|
||||
|
||||
// Add delta values:
|
||||
out.val[0] = vaddq_u8(in.val[0], delta.val[0]);
|
||||
out.val[1] = vaddq_u8(in.val[1], delta.val[1]);
|
||||
out.val[2] = vaddq_u8(in.val[2], delta.val[2]);
|
||||
out.val[3] = vaddq_u8(in.val[3], delta.val[3]);
|
||||
|
||||
return out;
|
||||
}
|
92
deps/base64/base64/lib/arch/neon64/codec.c
vendored
Normal file
92
deps/base64/base64/lib/arch/neon64/codec.c
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#ifdef __aarch64__
|
||||
# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64
|
||||
# define BASE64_USE_NEON64
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_USE_NEON64
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers.
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define BASE64_NEON64_USE_ASM
|
||||
#endif
|
||||
|
||||
static inline uint8x16x4_t
|
||||
load_64byte_table (const uint8_t *p)
|
||||
{
|
||||
#ifdef BASE64_NEON64_USE_ASM
|
||||
|
||||
// Force the table to be loaded into contiguous registers. GCC will not
|
||||
// normally allocate contiguous registers for a `uint8x16x4_t'. These
|
||||
// registers are chosen to not conflict with the ones in the enc loop.
|
||||
register uint8x16_t t0 __asm__ ("v8");
|
||||
register uint8x16_t t1 __asm__ ("v9");
|
||||
register uint8x16_t t2 __asm__ ("v10");
|
||||
register uint8x16_t t3 __asm__ ("v11");
|
||||
|
||||
__asm__ (
|
||||
"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
|
||||
: [src] "+r" (p),
|
||||
[t0] "=w" (t0),
|
||||
[t1] "=w" (t1),
|
||||
[t2] "=w" (t2),
|
||||
[t3] "=w" (t3)
|
||||
);
|
||||
|
||||
return (uint8x16x4_t) {
|
||||
.val[0] = t0,
|
||||
.val[1] = t1,
|
||||
.val[2] = t2,
|
||||
.val[3] = t3,
|
||||
};
|
||||
#else
|
||||
return vld1q_u8_x4(p);
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "../generic/32/dec_loop.c"
|
||||
#include "../generic/64/enc_loop.c"
|
||||
#include "dec_loop.c"
|
||||
#include "enc_reshuffle.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // BASE64_USE_NEON64
|
||||
|
||||
// Stride size is so large on these NEON 64-bit functions
|
||||
// (48 bytes encode, 64 bytes decode) that we inline the
|
||||
// uint64 codec to stay performant on smaller inputs.
|
||||
|
||||
BASE64_ENC_FUNCTION(neon64)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON64
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_neon64(&s, &slen, &o, &olen);
|
||||
enc_loop_generic_64(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(neon64)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON64
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_neon64(&s, &slen, &o, &olen);
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
129
deps/base64/base64/lib/arch/neon64/dec_loop.c
vendored
Normal file
129
deps/base64/base64/lib/arch/neon64/dec_loop.c
vendored
Normal file
@ -0,0 +1,129 @@
|
||||
// The input consists of five valid character sets in the Base64 alphabet,
|
||||
// which we need to map back to the 6-bit values they represent.
|
||||
// There are three ranges, two singles, and then there's the rest.
|
||||
//
|
||||
// # From To LUT Characters
|
||||
// 1 [0..42] [255] #1 invalid input
|
||||
// 2 [43] [62] #1 +
|
||||
// 3 [44..46] [255] #1 invalid input
|
||||
// 4 [47] [63] #1 /
|
||||
// 5 [48..57] [52..61] #1 0..9
|
||||
// 6 [58..63] [255] #1 invalid input
|
||||
// 7 [64] [255] #2 invalid input
|
||||
// 8 [65..90] [0..25] #2 A..Z
|
||||
// 9 [91..96] [255] #2 invalid input
|
||||
// 10 [97..122] [26..51] #2 a..z
|
||||
// 11 [123..126] [255] #2 invalid input
|
||||
// (12) Everything else => invalid input
|
||||
|
||||
// The first LUT will use the VTBL instruction (out of range indices are set to
|
||||
// 0 in destination).
|
||||
static const uint8_t dec_lut1[] = {
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 62U, 255U, 255U, 255U, 63U,
|
||||
52U, 53U, 54U, 55U, 56U, 57U, 58U, 59U, 60U, 61U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
};
|
||||
|
||||
// The second LUT will use the VTBX instruction (out of range indices will be
|
||||
// unchanged in destination). Input [64..126] will be mapped to index [1..63]
|
||||
// in this LUT. Index 0 means that value comes from LUT #1.
|
||||
static const uint8_t dec_lut2[] = {
|
||||
0U, 255U, 0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U, 13U,
|
||||
14U, 15U, 16U, 17U, 18U, 19U, 20U, 21U, 22U, 23U, 24U, 25U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 26U, 27U, 28U, 29U, 30U, 31U, 32U, 33U, 34U, 35U, 36U, 37U, 38U, 39U,
|
||||
40U, 41U, 42U, 43U, 44U, 45U, 46U, 47U, 48U, 49U, 50U, 51U, 255U, 255U, 255U, 255U,
|
||||
};
|
||||
|
||||
// All input values in range for the first look-up will be 0U in the second
|
||||
// look-up result. All input values out of range for the first look-up will be
|
||||
// 0U in the first look-up result. Thus, the two results can be ORed without
|
||||
// conflicts.
|
||||
//
|
||||
// Invalid characters that are in the valid range for either look-up will be
|
||||
// set to 255U in the combined result. Other invalid characters will just be
|
||||
// passed through with the second look-up result (using the VTBX instruction).
|
||||
// Since the second LUT is 64 bytes, those passed-through values are guaranteed
|
||||
// to have a value greater than 63U. Therefore, valid characters will be mapped
|
||||
// to the valid [0..63] range and all invalid characters will be mapped to
|
||||
// values greater than 63.
|
||||
|
||||
static inline void
|
||||
dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
|
||||
// extra trailing zero bytes are written, so it is not necessary to
|
||||
// reserve extra input bytes:
|
||||
size_t rounds = *slen / 64;
|
||||
|
||||
*slen -= rounds * 64; // 64 bytes consumed per round
|
||||
*olen += rounds * 48; // 48 bytes produced per round
|
||||
|
||||
const uint8x16x4_t tbl_dec1 = load_64byte_table(dec_lut1);
|
||||
const uint8x16x4_t tbl_dec2 = load_64byte_table(dec_lut2);
|
||||
|
||||
do {
|
||||
const uint8x16_t offset = vdupq_n_u8(63U);
|
||||
uint8x16x4_t dec1, dec2;
|
||||
uint8x16x3_t dec;
|
||||
|
||||
// Load 64 bytes and deinterleave:
|
||||
uint8x16x4_t str = vld4q_u8((uint8_t *) *s);
|
||||
|
||||
// Get indices for second LUT:
|
||||
dec2.val[0] = vqsubq_u8(str.val[0], offset);
|
||||
dec2.val[1] = vqsubq_u8(str.val[1], offset);
|
||||
dec2.val[2] = vqsubq_u8(str.val[2], offset);
|
||||
dec2.val[3] = vqsubq_u8(str.val[3], offset);
|
||||
|
||||
// Get values from first LUT:
|
||||
dec1.val[0] = vqtbl4q_u8(tbl_dec1, str.val[0]);
|
||||
dec1.val[1] = vqtbl4q_u8(tbl_dec1, str.val[1]);
|
||||
dec1.val[2] = vqtbl4q_u8(tbl_dec1, str.val[2]);
|
||||
dec1.val[3] = vqtbl4q_u8(tbl_dec1, str.val[3]);
|
||||
|
||||
// Get values from second LUT:
|
||||
dec2.val[0] = vqtbx4q_u8(dec2.val[0], tbl_dec2, dec2.val[0]);
|
||||
dec2.val[1] = vqtbx4q_u8(dec2.val[1], tbl_dec2, dec2.val[1]);
|
||||
dec2.val[2] = vqtbx4q_u8(dec2.val[2], tbl_dec2, dec2.val[2]);
|
||||
dec2.val[3] = vqtbx4q_u8(dec2.val[3], tbl_dec2, dec2.val[3]);
|
||||
|
||||
// Get final values:
|
||||
str.val[0] = vorrq_u8(dec1.val[0], dec2.val[0]);
|
||||
str.val[1] = vorrq_u8(dec1.val[1], dec2.val[1]);
|
||||
str.val[2] = vorrq_u8(dec1.val[2], dec2.val[2]);
|
||||
str.val[3] = vorrq_u8(dec1.val[3], dec2.val[3]);
|
||||
|
||||
// Check for invalid input, any value larger than 63:
|
||||
const uint8x16_t classified
|
||||
= vcgtq_u8(str.val[0], vdupq_n_u8(63))
|
||||
| vcgtq_u8(str.val[1], vdupq_n_u8(63))
|
||||
| vcgtq_u8(str.val[2], vdupq_n_u8(63))
|
||||
| vcgtq_u8(str.val[3], vdupq_n_u8(63));
|
||||
|
||||
// Check that all bits are zero:
|
||||
if (vmaxvq_u8(classified) != 0U) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Compress four bytes into three:
|
||||
dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4);
|
||||
dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2);
|
||||
dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3];
|
||||
|
||||
// Interleave and store decoded result:
|
||||
vst3q_u8((uint8_t *) *o, dec);
|
||||
|
||||
*s += 64;
|
||||
*o += 48;
|
||||
|
||||
} while (--rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 64;
|
||||
*olen -= rounds * 48;
|
||||
}
|
133
deps/base64/base64/lib/arch/neon64/enc_loop.c
vendored
Normal file
133
deps/base64/base64/lib/arch/neon64/enc_loop.c
vendored
Normal file
@ -0,0 +1,133 @@
|
||||
#ifdef BASE64_NEON64_USE_ASM
|
||||
static inline void
|
||||
enc_loop_neon64_inner_asm (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
|
||||
{
|
||||
// This function duplicates the functionality of enc_loop_neon64_inner,
|
||||
// but entirely with inline assembly. This gives a significant speedup
|
||||
// over using NEON intrinsics, which do not always generate very good
|
||||
// code. The logic of the assembly is directly lifted from the
|
||||
// intrinsics version, so it can be used as a guide to this code.
|
||||
|
||||
// Temporary registers, used as scratch space.
|
||||
uint8x16_t tmp0, tmp1, tmp2, tmp3;
|
||||
|
||||
// Numeric constant.
|
||||
const uint8x16_t n63 = vdupq_n_u8(63);
|
||||
|
||||
__asm__ (
|
||||
|
||||
// Load 48 bytes and deinterleave. The bytes are loaded to
|
||||
// hard-coded registers v12, v13 and v14, to ensure that they
|
||||
// are contiguous. Increment the source pointer.
|
||||
"ld3 {v12.16b, v13.16b, v14.16b}, [%[src]], #48 \n\t"
|
||||
|
||||
// Reshuffle the bytes using temporaries.
|
||||
"ushr %[t0].16b, v12.16b, #2 \n\t"
|
||||
"ushr %[t1].16b, v13.16b, #4 \n\t"
|
||||
"ushr %[t2].16b, v14.16b, #6 \n\t"
|
||||
"sli %[t1].16b, v12.16b, #4 \n\t"
|
||||
"sli %[t2].16b, v13.16b, #2 \n\t"
|
||||
"and %[t1].16b, %[t1].16b, %[n63].16b \n\t"
|
||||
"and %[t2].16b, %[t2].16b, %[n63].16b \n\t"
|
||||
"and %[t3].16b, v14.16b, %[n63].16b \n\t"
|
||||
|
||||
// Translate the values to the Base64 alphabet.
|
||||
"tbl v12.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t0].16b \n\t"
|
||||
"tbl v13.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t1].16b \n\t"
|
||||
"tbl v14.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t2].16b \n\t"
|
||||
"tbl v15.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t3].16b \n\t"
|
||||
|
||||
// Store 64 bytes and interleave. Increment the dest pointer.
|
||||
"st4 {v12.16b, v13.16b, v14.16b, v15.16b}, [%[dst]], #64 \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[t0] "=&w" (tmp0),
|
||||
[t1] "=&w" (tmp1),
|
||||
[t2] "=&w" (tmp2),
|
||||
[t3] "=&w" (tmp3)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [n63] "w" (n63),
|
||||
[l0] "w" (tbl_enc.val[0]),
|
||||
[l1] "w" (tbl_enc.val[1]),
|
||||
[l2] "w" (tbl_enc.val[2]),
|
||||
[l3] "w" (tbl_enc.val[3])
|
||||
|
||||
// Clobbers.
|
||||
: "v12", "v13", "v14", "v15"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
|
||||
{
|
||||
#ifdef BASE64_NEON64_USE_ASM
|
||||
enc_loop_neon64_inner_asm(s, o, tbl_enc);
|
||||
#else
|
||||
// Load 48 bytes and deinterleave:
|
||||
uint8x16x3_t src = vld3q_u8(*s);
|
||||
|
||||
// Divide bits of three input bytes over four output bytes:
|
||||
uint8x16x4_t out = enc_reshuffle(src);
|
||||
|
||||
// The bits have now been shifted to the right locations;
|
||||
// translate their values 0..63 to the Base64 alphabet.
|
||||
// Use a 64-byte table lookup:
|
||||
out.val[0] = vqtbl4q_u8(tbl_enc, out.val[0]);
|
||||
out.val[1] = vqtbl4q_u8(tbl_enc, out.val[1]);
|
||||
out.val[2] = vqtbl4q_u8(tbl_enc, out.val[2]);
|
||||
out.val[3] = vqtbl4q_u8(tbl_enc, out.val[3]);
|
||||
|
||||
// Interleave and store output:
|
||||
vst4q_u8(*o, out);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
// Load the encoding table:
|
||||
const uint8x16x4_t tbl_enc = load_64byte_table(base64_table_enc_6bit);
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
break;
|
||||
}
|
||||
}
|
31
deps/base64/base64/lib/arch/neon64/enc_reshuffle.c
vendored
Normal file
31
deps/base64/base64/lib/arch/neon64/enc_reshuffle.c
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
static inline uint8x16x4_t
|
||||
enc_reshuffle (const uint8x16x3_t in)
|
||||
{
|
||||
uint8x16x4_t out;
|
||||
|
||||
// Input:
|
||||
// in[0] = a7 a6 a5 a4 a3 a2 a1 a0
|
||||
// in[1] = b7 b6 b5 b4 b3 b2 b1 b0
|
||||
// in[2] = c7 c6 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Output:
|
||||
// out[0] = 00 00 a7 a6 a5 a4 a3 a2
|
||||
// out[1] = 00 00 a1 a0 b7 b6 b5 b4
|
||||
// out[2] = 00 00 b3 b2 b1 b0 c7 c6
|
||||
// out[3] = 00 00 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Move the input bits to where they need to be in the outputs. Except
|
||||
// for the first output, the high two bits are not cleared.
|
||||
out.val[0] = vshrq_n_u8(in.val[0], 2);
|
||||
out.val[1] = vshrq_n_u8(in.val[1], 4);
|
||||
out.val[2] = vshrq_n_u8(in.val[2], 6);
|
||||
out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
|
||||
out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
|
||||
|
||||
// Clear the high two bits in the second, third and fourth output.
|
||||
out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
|
||||
out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
|
||||
out.val[3] = vandq_u8(in.val[2], vdupq_n_u8(0x3F));
|
||||
|
||||
return out;
|
||||
}
|
42
deps/base64/base64/lib/arch/sse41/codec.c
vendored
Normal file
42
deps/base64/base64/lib/arch/sse41/codec.c
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSE41
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
#include "../ssse3/enc_translate.c"
|
||||
#include "../ssse3/enc_reshuffle.c"
|
||||
#include "../ssse3/enc_loop.c"
|
||||
|
||||
#endif // HAVE_SSE41
|
||||
|
||||
BASE64_ENC_FUNCTION(sse41)
|
||||
{
|
||||
#if HAVE_SSE41
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(sse41)
|
||||
{
|
||||
#if HAVE_SSE41
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
42
deps/base64/base64/lib/arch/sse42/codec.c
vendored
Normal file
42
deps/base64/base64/lib/arch/sse42/codec.c
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSE42
|
||||
#include <nmmintrin.h>
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
#include "../ssse3/enc_translate.c"
|
||||
#include "../ssse3/enc_reshuffle.c"
|
||||
#include "../ssse3/enc_loop.c"
|
||||
|
||||
#endif // HAVE_SSE42
|
||||
|
||||
BASE64_ENC_FUNCTION(sse42)
|
||||
{
|
||||
#if HAVE_SSE42
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(sse42)
|
||||
{
|
||||
#if HAVE_SSE42
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
42
deps/base64/base64/lib/arch/ssse3/codec.c
vendored
Normal file
42
deps/base64/base64/lib/arch/ssse3/codec.c
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#include "dec_reshuffle.c"
|
||||
#include "dec_loop.c"
|
||||
#include "enc_reshuffle.c"
|
||||
#include "enc_translate.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
BASE64_ENC_FUNCTION(ssse3)
|
||||
{
|
||||
#if HAVE_SSSE3
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(ssse3)
|
||||
{
|
||||
#if HAVE_SSSE3
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
173
deps/base64/base64/lib/arch/ssse3/dec_loop.c
vendored
Normal file
173
deps/base64/base64/lib/arch/ssse3/dec_loop.c
vendored
Normal file
@ -0,0 +1,173 @@
|
||||
// The input consists of six character sets in the Base64 alphabet, which we
|
||||
// need to map back to the 6-bit values they represent. There are three ranges,
|
||||
// two singles, and then there's the rest.
|
||||
//
|
||||
// # From To Add Characters
|
||||
// 1 [43] [62] +19 +
|
||||
// 2 [47] [63] +16 /
|
||||
// 3 [48..57] [52..61] +4 0..9
|
||||
// 4 [65..90] [0..25] -65 A..Z
|
||||
// 5 [97..122] [26..51] -71 a..z
|
||||
// (6) Everything else => invalid input
|
||||
//
|
||||
// We will use lookup tables for character validation and offset computation.
|
||||
// Remember that 0x2X and 0x0X are the same index for _mm_shuffle_epi8, this
|
||||
// allows to mask with 0x2F instead of 0x0F and thus save one constant
|
||||
// declaration (register and/or memory access).
|
||||
//
|
||||
// For offsets:
|
||||
// Perfect hash for lut = ((src >> 4) & 0x2F) + ((src == 0x2F) ? 0xFF : 0x00)
|
||||
// 0000 = garbage
|
||||
// 0001 = /
|
||||
// 0010 = +
|
||||
// 0011 = 0-9
|
||||
// 0100 = A-Z
|
||||
// 0101 = A-Z
|
||||
// 0110 = a-z
|
||||
// 0111 = a-z
|
||||
// 1000 >= garbage
|
||||
//
|
||||
// For validation, here's the table.
|
||||
// A character is valid if and only if the AND of the 2 lookups equals 0:
|
||||
//
|
||||
// hi \ lo 0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
|
||||
// LUT 0x15 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x13 0x1A 0x1B 0x1B 0x1B 0x1A
|
||||
//
|
||||
// 0000 0x10 char NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI
|
||||
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
//
|
||||
// 0001 0x10 char DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US
|
||||
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
//
|
||||
// 0010 0x01 char ! " # $ % & ' ( ) * + , - . /
|
||||
// andlut 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x00 0x01 0x01 0x01 0x00
|
||||
//
|
||||
// 0011 0x02 char 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x02 0x02 0x02 0x02 0x02 0x02
|
||||
//
|
||||
// 0100 0x04 char @ A B C D E F G H I J K L M N O
|
||||
// andlut 0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
|
||||
//
|
||||
// 0101 0x08 char P Q R S T U V W X Y Z [ \ ] ^ _
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
|
||||
//
|
||||
// 0110 0x04 char ` a b c d e f g h i j k l m n o
|
||||
// andlut 0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
|
||||
// 0111 0x08 char p q r s t u v w x y z { | } ~
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
|
||||
//
|
||||
// 1000 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1001 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1010 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1011 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1100 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1101 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1110 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1111 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
|
||||
static inline int
|
||||
dec_loop_ssse3_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const __m128i lut_lo = _mm_setr_epi8(
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
|
||||
|
||||
const __m128i lut_hi = _mm_setr_epi8(
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
|
||||
|
||||
const __m128i lut_roll = _mm_setr_epi8(
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
const __m128i mask_2F = _mm_set1_epi8(0x2F);
|
||||
|
||||
// Load input:
|
||||
__m128i str = _mm_loadu_si128((__m128i *) *s);
|
||||
|
||||
// Table lookups:
|
||||
const __m128i hi_nibbles = _mm_and_si128(_mm_srli_epi32(str, 4), mask_2F);
|
||||
const __m128i lo_nibbles = _mm_and_si128(str, mask_2F);
|
||||
const __m128i hi = _mm_shuffle_epi8(lut_hi, hi_nibbles);
|
||||
const __m128i lo = _mm_shuffle_epi8(lut_lo, lo_nibbles);
|
||||
|
||||
// Check for invalid input: if any "and" values from lo and hi are not
|
||||
// zero, fall back on bytewise code to do error checking and reporting:
|
||||
if (_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_setzero_si128())) != 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const __m128i eq_2F = _mm_cmpeq_epi8(str, mask_2F);
|
||||
const __m128i roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2F, hi_nibbles));
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
str = _mm_add_epi8(str, roll);
|
||||
|
||||
// Reshuffle the input to packed 12-byte output format:
|
||||
str = dec_reshuffle(str);
|
||||
|
||||
// Store the output:
|
||||
_mm_storeu_si128((__m128i *) *o, str);
|
||||
|
||||
*s += 16;
|
||||
*o += 12;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 24) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 16 bytes per round. Because 4 extra zero bytes are
|
||||
// written after the output, ensure that there will be at least 8 bytes
|
||||
// of input data left to cover the gap. (6 data bytes and up to two
|
||||
// end-of-string markers.)
|
||||
size_t rounds = (*slen - 8) / 16;
|
||||
|
||||
*slen -= rounds * 16; // 16 bytes consumed per round
|
||||
*olen += rounds * 12; // 12 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_ssse3_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 16;
|
||||
*olen -= rounds * 12;
|
||||
}
|
33
deps/base64/base64/lib/arch/ssse3/dec_reshuffle.c
vendored
Normal file
33
deps/base64/base64/lib/arch/ssse3/dec_reshuffle.c
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
static inline __m128i
|
||||
dec_reshuffle (const __m128i in)
|
||||
{
|
||||
// in, bits, upper case are most significant bits, lower case are least significant bits
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
|
||||
const __m128i merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
|
||||
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
|
||||
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
|
||||
// 0000eeee FFffffff 0000DDDD DDddEEEE
|
||||
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
|
||||
|
||||
const __m128i out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
|
||||
// 00000000 JJJJJJjj KKKKkkkk LLllllll
|
||||
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
|
||||
// 00000000 DDDDDDdd EEEEeeee FFffffff
|
||||
// 00000000 AAAAAAaa BBBBbbbb CCcccccc
|
||||
|
||||
// Pack bytes together:
|
||||
return _mm_shuffle_epi8(out, _mm_setr_epi8(
|
||||
2, 1, 0,
|
||||
6, 5, 4,
|
||||
10, 9, 8,
|
||||
14, 13, 12,
|
||||
-1, -1, -1, -1));
|
||||
// 00000000 00000000 00000000 00000000
|
||||
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
|
||||
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
|
||||
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
|
||||
}
|
67
deps/base64/base64/lib/arch/ssse3/enc_loop.c
vendored
Normal file
67
deps/base64/base64/lib/arch/ssse3/enc_loop.c
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
static inline void
|
||||
enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input:
|
||||
__m128i str = _mm_loadu_si128((__m128i *) *s);
|
||||
|
||||
// Reshuffle:
|
||||
str = enc_reshuffle(str);
|
||||
|
||||
// Translate reshuffled bytes to the Base64 alphabet:
|
||||
str = enc_translate(str);
|
||||
|
||||
// Store:
|
||||
_mm_storeu_si128((__m128i *) *o, str);
|
||||
|
||||
*s += 12;
|
||||
*o += 16;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 12 bytes at a time. Because blocks are loaded 16
|
||||
// bytes at a time, ensure that there will be at least 4 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 4) / 12;
|
||||
|
||||
*slen -= rounds * 12; // 12 bytes consumed per round
|
||||
*olen += rounds * 16; // 16 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
48
deps/base64/base64/lib/arch/ssse3/enc_reshuffle.c
vendored
Normal file
48
deps/base64/base64/lib/arch/ssse3/enc_reshuffle.c
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
static inline __m128i
|
||||
enc_reshuffle (__m128i in)
|
||||
{
|
||||
// Input, bytes MSB to LSB:
|
||||
// 0 0 0 0 l k j i h g f e d c b a
|
||||
|
||||
in = _mm_shuffle_epi8(in, _mm_set_epi8(
|
||||
10, 11, 9, 10,
|
||||
7, 8, 6, 7,
|
||||
4, 5, 3, 4,
|
||||
1, 2, 0, 1));
|
||||
// in, bytes MSB to LSB:
|
||||
// k l j k
|
||||
// h i g h
|
||||
// e f d e
|
||||
// b c a b
|
||||
|
||||
const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0FC0FC00));
|
||||
// bits, upper case are most significant bits, lower case are least significant bits
|
||||
// 0000kkkk LL000000 JJJJJJ00 00000000
|
||||
// 0000hhhh II000000 GGGGGG00 00000000
|
||||
// 0000eeee FF000000 DDDDDD00 00000000
|
||||
// 0000bbbb CC000000 AAAAAA00 00000000
|
||||
|
||||
const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
|
||||
// 00000000 00kkkkLL 00000000 00JJJJJJ
|
||||
// 00000000 00hhhhII 00000000 00GGGGGG
|
||||
// 00000000 00eeeeFF 00000000 00DDDDDD
|
||||
// 00000000 00bbbbCC 00000000 00AAAAAA
|
||||
|
||||
const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003F03F0));
|
||||
// 00000000 00llllll 000000jj KKKK0000
|
||||
// 00000000 00iiiiii 000000gg HHHH0000
|
||||
// 00000000 00ffffff 000000dd EEEE0000
|
||||
// 00000000 00cccccc 000000aa BBBB0000
|
||||
|
||||
const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
|
||||
// 00llllll 00000000 00jjKKKK 00000000
|
||||
// 00iiiiii 00000000 00ggHHHH 00000000
|
||||
// 00ffffff 00000000 00ddEEEE 00000000
|
||||
// 00cccccc 00000000 00aaBBBB 00000000
|
||||
|
||||
return _mm_or_si128(t1, t3);
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
}
|
33
deps/base64/base64/lib/arch/ssse3/enc_translate.c
vendored
Normal file
33
deps/base64/base64/lib/arch/ssse3/enc_translate.c
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
static inline __m128i
|
||||
enc_translate (const __m128i in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const __m128i lut = _mm_setr_epi8(
|
||||
65, 71, -4, -4,
|
||||
-4, -4, -4, -4,
|
||||
-4, -4, -4, -4,
|
||||
-19, -16, 0, 0
|
||||
);
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from the input. The index for range #0 is right,
|
||||
// others are 1 less than expected:
|
||||
__m128i indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
__m128i mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
|
||||
// now correct:
|
||||
indices = _mm_sub_epi8(indices, mask);
|
||||
|
||||
// Add offsets to input values:
|
||||
return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
|
||||
}
|
281
deps/base64/base64/lib/codec_choose.c
vendored
Normal file
281
deps/base64/base64/lib/codec_choose.c
vendored
Normal file
@ -0,0 +1,281 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "codecs.h"
|
||||
#include "config.h"
|
||||
#include "env.h"
|
||||
|
||||
#if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
|
||||
#define BASE64_X86
|
||||
#if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2)
|
||||
#define BASE64_X86_SIMD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_X86
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
|
||||
{ \
|
||||
int info[4]; \
|
||||
__cpuidex(info, __level, __count); \
|
||||
__eax = info[0]; \
|
||||
__ebx = info[1]; \
|
||||
__ecx = info[2]; \
|
||||
__edx = info[3]; \
|
||||
}
|
||||
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
|
||||
__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
|
||||
#else
|
||||
#include <cpuid.h>
|
||||
#if HAVE_AVX2 || HAVE_AVX
|
||||
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
|
||||
static inline uint64_t _xgetbv (uint32_t index)
|
||||
{
|
||||
uint32_t eax, edx;
|
||||
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
}
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef bit_AVX2
|
||||
#define bit_AVX2 (1 << 5)
|
||||
#endif
|
||||
#ifndef bit_SSSE3
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#endif
|
||||
#ifndef bit_SSE41
|
||||
#define bit_SSE41 (1 << 19)
|
||||
#endif
|
||||
#ifndef bit_SSE42
|
||||
#define bit_SSE42 (1 << 20)
|
||||
#endif
|
||||
#ifndef bit_AVX
|
||||
#define bit_AVX (1 << 28)
|
||||
#endif
|
||||
|
||||
#define bit_XSAVE_XRSTORE (1 << 27)
|
||||
|
||||
#ifndef _XCR_XFEATURE_ENABLED_MASK
|
||||
#define _XCR_XFEATURE_ENABLED_MASK 0
|
||||
#endif
|
||||
|
||||
#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
|
||||
#endif
|
||||
|
||||
// Function declarations:
|
||||
#define BASE64_CODEC_FUNCS(arch) \
|
||||
BASE64_ENC_FUNCTION(arch); \
|
||||
BASE64_DEC_FUNCTION(arch); \
|
||||
|
||||
BASE64_CODEC_FUNCS(avx2)
|
||||
BASE64_CODEC_FUNCS(neon32)
|
||||
BASE64_CODEC_FUNCS(neon64)
|
||||
BASE64_CODEC_FUNCS(plain)
|
||||
BASE64_CODEC_FUNCS(ssse3)
|
||||
BASE64_CODEC_FUNCS(sse41)
|
||||
BASE64_CODEC_FUNCS(sse42)
|
||||
BASE64_CODEC_FUNCS(avx)
|
||||
|
||||
static bool
|
||||
codec_choose_forced (struct codec *codec, int flags)
|
||||
{
|
||||
// If the user wants to use a certain codec,
|
||||
// always allow it, even if the codec is a no-op.
|
||||
// For testing purposes.
|
||||
|
||||
if (!(flags & 0xFF)) {
|
||||
return false;
|
||||
}
|
||||
if (flags & BASE64_FORCE_AVX2) {
|
||||
codec->enc = base64_stream_encode_avx2;
|
||||
codec->dec = base64_stream_decode_avx2;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_NEON32) {
|
||||
codec->enc = base64_stream_encode_neon32;
|
||||
codec->dec = base64_stream_decode_neon32;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_NEON64) {
|
||||
codec->enc = base64_stream_encode_neon64;
|
||||
codec->dec = base64_stream_decode_neon64;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_PLAIN) {
|
||||
codec->enc = base64_stream_encode_plain;
|
||||
codec->dec = base64_stream_decode_plain;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSSE3) {
|
||||
codec->enc = base64_stream_encode_ssse3;
|
||||
codec->dec = base64_stream_decode_ssse3;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSE41) {
|
||||
codec->enc = base64_stream_encode_sse41;
|
||||
codec->dec = base64_stream_decode_sse41;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSE42) {
|
||||
codec->enc = base64_stream_encode_sse42;
|
||||
codec->dec = base64_stream_decode_sse42;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_AVX) {
|
||||
codec->enc = base64_stream_encode_avx;
|
||||
codec->dec = base64_stream_decode_avx;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
codec_choose_arm (struct codec *codec)
|
||||
{
|
||||
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
|
||||
|
||||
// Unfortunately there is no portable way to check for NEON
|
||||
// support at runtime from userland in the same way that x86
|
||||
// has cpuid, so just stick to the compile-time configuration:
|
||||
|
||||
#if defined(__aarch64__) && HAVE_NEON64
|
||||
codec->enc = base64_stream_encode_neon64;
|
||||
codec->dec = base64_stream_decode_neon64;
|
||||
#else
|
||||
codec->enc = base64_stream_encode_neon32;
|
||||
codec->dec = base64_stream_decode_neon32;
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
||||
#else
|
||||
(void)codec;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
codec_choose_x86 (struct codec *codec)
|
||||
{
|
||||
#ifdef BASE64_X86_SIMD
|
||||
|
||||
unsigned int eax, ebx = 0, ecx = 0, edx;
|
||||
unsigned int max_level;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
int info[4];
|
||||
__cpuidex(info, 0, 0);
|
||||
max_level = info[0];
|
||||
#else
|
||||
max_level = __get_cpuid_max(0, NULL);
|
||||
#endif
|
||||
|
||||
#if HAVE_AVX2 || HAVE_AVX
|
||||
// Check for AVX/AVX2 support:
|
||||
// Checking for AVX requires 3 things:
|
||||
// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
|
||||
// (allowing saving YMM registers on context switch)
|
||||
// 2) CPUID indicates support for AVX
|
||||
// 3) XGETBV indicates the AVX registers will be saved and restored on
|
||||
// context switch
|
||||
//
|
||||
// Note that XGETBV is only available on 686 or later CPUs, so the
|
||||
// instruction needs to be conditionally run.
|
||||
if (max_level >= 1) {
|
||||
__cpuid_count(1, 0, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_XSAVE_XRSTORE) {
|
||||
uint64_t xcr_mask;
|
||||
xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
||||
if (xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) {
|
||||
#if HAVE_AVX2
|
||||
if (max_level >= 7) {
|
||||
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
||||
if (ebx & bit_AVX2) {
|
||||
codec->enc = base64_stream_encode_avx2;
|
||||
codec->dec = base64_stream_decode_avx2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if HAVE_AVX
|
||||
__cpuid_count(1, 0, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_AVX) {
|
||||
codec->enc = base64_stream_encode_avx;
|
||||
codec->dec = base64_stream_decode_avx;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE42
|
||||
// Check for SSE42 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSE42) {
|
||||
codec->enc = base64_stream_encode_sse42;
|
||||
codec->dec = base64_stream_decode_sse42;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE41
|
||||
// Check for SSE41 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSE41) {
|
||||
codec->enc = base64_stream_encode_sse41;
|
||||
codec->dec = base64_stream_decode_sse41;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
// Check for SSSE3 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSSE3) {
|
||||
codec->enc = base64_stream_encode_ssse3;
|
||||
codec->dec = base64_stream_decode_ssse3;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
(void)codec;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
codec_choose (struct codec *codec, int flags)
|
||||
{
|
||||
// User forced a codec:
|
||||
if (codec_choose_forced(codec, flags)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Runtime feature detection:
|
||||
if (codec_choose_arm(codec)) {
|
||||
return;
|
||||
}
|
||||
if (codec_choose_x86(codec)) {
|
||||
return;
|
||||
}
|
||||
codec->enc = base64_stream_encode_plain;
|
||||
codec->dec = base64_stream_decode_plain;
|
||||
}
|
65
deps/base64/base64/lib/codecs.h
vendored
Normal file
65
deps/base64/base64/lib/codecs.h
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "config.h"
|
||||
|
||||
// Function parameters for encoding functions:
|
||||
#define BASE64_ENC_PARAMS \
|
||||
( struct base64_state *state \
|
||||
, const char *src \
|
||||
, size_t srclen \
|
||||
, char *out \
|
||||
, size_t *outlen \
|
||||
)
|
||||
|
||||
// Function parameters for decoding functions:
|
||||
#define BASE64_DEC_PARAMS \
|
||||
( struct base64_state *state \
|
||||
, const char *src \
|
||||
, size_t srclen \
|
||||
, char *out \
|
||||
, size_t *outlen \
|
||||
)
|
||||
|
||||
// Function signature for encoding functions:
|
||||
#define BASE64_ENC_FUNCTION(arch) \
|
||||
void \
|
||||
base64_stream_encode_ ## arch \
|
||||
BASE64_ENC_PARAMS
|
||||
|
||||
// Function signature for decoding functions:
|
||||
#define BASE64_DEC_FUNCTION(arch) \
|
||||
int \
|
||||
base64_stream_decode_ ## arch \
|
||||
BASE64_DEC_PARAMS
|
||||
|
||||
// Cast away unused variable, silence compiler:
|
||||
#define UNUSED(x) ((void)(x))
|
||||
|
||||
// Stub function when encoder arch unsupported:
|
||||
#define BASE64_ENC_STUB \
|
||||
UNUSED(state); \
|
||||
UNUSED(src); \
|
||||
UNUSED(srclen); \
|
||||
UNUSED(out); \
|
||||
\
|
||||
*outlen = 0;
|
||||
|
||||
// Stub function when decoder arch unsupported:
|
||||
#define BASE64_DEC_STUB \
|
||||
UNUSED(state); \
|
||||
UNUSED(src); \
|
||||
UNUSED(srclen); \
|
||||
UNUSED(out); \
|
||||
UNUSED(outlen); \
|
||||
\
|
||||
return -1;
|
||||
|
||||
struct codec
|
||||
{
|
||||
void (* enc) BASE64_ENC_PARAMS;
|
||||
int (* dec) BASE64_DEC_PARAMS;
|
||||
};
|
||||
|
||||
extern void codec_choose (struct codec *, int flags);
|
1
deps/base64/base64/lib/config.h
vendored
Normal file
1
deps/base64/base64/lib/config.h
vendored
Normal file
@ -0,0 +1 @@
|
||||
// Intentionally empty
|
74
deps/base64/base64/lib/env.h
vendored
Normal file
74
deps/base64/base64/lib/env.h
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
#ifndef BASE64_ENV_H
|
||||
#define BASE64_ENV_H
|
||||
|
||||
// This header file contains macro definitions that describe certain aspects of
|
||||
// the compile-time environment. Compatibility and portability macros go here.
|
||||
|
||||
// Define machine endianness. This is for GCC:
|
||||
#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
# define BASE64_LITTLE_ENDIAN 1
|
||||
#else
|
||||
# define BASE64_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
|
||||
// This is for Clang:
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
# define BASE64_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
# define BASE64_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
|
||||
// MSVC++ needs intrin.h for _byteswap_uint64 (issue #68):
|
||||
#if BASE64_LITTLE_ENDIAN && defined(_MSC_VER)
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
// Endian conversion functions:
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
# ifdef _MSC_VER
|
||||
// Microsoft Visual C++:
|
||||
# define BASE64_HTOBE32(x) _byteswap_ulong(x)
|
||||
# define BASE64_HTOBE64(x) _byteswap_uint64(x)
|
||||
# else
|
||||
// GCC and Clang:
|
||||
# define BASE64_HTOBE32(x) __builtin_bswap32(x)
|
||||
# define BASE64_HTOBE64(x) __builtin_bswap64(x)
|
||||
# endif
|
||||
#else
|
||||
// No conversion needed:
|
||||
# define BASE64_HTOBE32(x) (x)
|
||||
# define BASE64_HTOBE64(x) (x)
|
||||
#endif
|
||||
|
||||
// Detect word size:
|
||||
#if defined (__x86_64__)
|
||||
// This also works for the x32 ABI, which has a 64-bit word size.
|
||||
# define BASE64_WORDSIZE 64
|
||||
#elif defined (_INTEGRAL_MAX_BITS)
|
||||
# define BASE64_WORDSIZE _INTEGRAL_MAX_BITS
|
||||
#elif defined (__WORDSIZE)
|
||||
# define BASE64_WORDSIZE __WORDSIZE
|
||||
#elif defined (__SIZE_WIDTH__)
|
||||
# define BASE64_WORDSIZE __SIZE_WIDTH__
|
||||
#else
|
||||
# error BASE64_WORDSIZE_NOT_DEFINED
|
||||
#endif
|
||||
|
||||
// End-of-file definitions.
|
||||
// Almost end-of-file when waiting for the last '=' character:
|
||||
#define BASE64_AEOF 1
|
||||
// End-of-file when stream end has been reached or invalid input provided:
|
||||
#define BASE64_EOF 2
|
||||
|
||||
// GCC 7 defaults to issuing a warning for fallthrough in switch statements,
|
||||
// unless the fallthrough cases are marked with an attribute. As we use
|
||||
// fallthrough deliberately, define an alias for the attribute:
|
||||
#if __GNUC__ >= 7
|
||||
# define BASE64_FALLTHROUGH __attribute__((fallthrough));
|
||||
#else
|
||||
# define BASE64_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
#endif // BASE64_ENV_H
|
7
deps/base64/base64/lib/exports.txt
vendored
Normal file
7
deps/base64/base64/lib/exports.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
base64_encode
|
||||
base64_stream_encode
|
||||
base64_stream_encode_init
|
||||
base64_stream_encode_final
|
||||
base64_decode
|
||||
base64_stream_decode
|
||||
base64_stream_decode_init
|
164
deps/base64/base64/lib/lib.c
vendored
Normal file
164
deps/base64/base64/lib/lib.c
vendored
Normal file
@ -0,0 +1,164 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "tables/tables.h"
|
||||
#include "codecs.h"
|
||||
#include "env.h"
|
||||
|
||||
// These static function pointers are initialized once when the library is
|
||||
// first used, and remain in use for the remaining lifetime of the program.
|
||||
// The idea being that CPU features don't change at runtime.
|
||||
static struct codec codec = { NULL, NULL };
|
||||
|
||||
void
|
||||
base64_stream_encode_init (struct base64_state *state, int flags)
|
||||
{
|
||||
// If any of the codec flags are set, redo choice:
|
||||
if (codec.enc == NULL || flags & 0xFF) {
|
||||
codec_choose(&codec, flags);
|
||||
}
|
||||
state->eof = 0;
|
||||
state->bytes = 0;
|
||||
state->carry = 0;
|
||||
state->flags = flags;
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
codec.enc(state, src, srclen, out, outlen);
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
uint8_t *o = (uint8_t *)out;
|
||||
|
||||
if (state->bytes == 1) {
|
||||
*o++ = base64_table_enc_6bit[state->carry];
|
||||
*o++ = '=';
|
||||
*o++ = '=';
|
||||
*outlen = 3;
|
||||
return;
|
||||
}
|
||||
if (state->bytes == 2) {
|
||||
*o++ = base64_table_enc_6bit[state->carry];
|
||||
*o++ = '=';
|
||||
*outlen = 2;
|
||||
return;
|
||||
}
|
||||
*outlen = 0;
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_decode_init (struct base64_state *state, int flags)
|
||||
{
|
||||
// If any of the codec flags are set, redo choice:
|
||||
if (codec.dec == NULL || flags & 0xFF) {
|
||||
codec_choose(&codec, flags);
|
||||
}
|
||||
state->eof = 0;
|
||||
state->bytes = 0;
|
||||
state->carry = 0;
|
||||
state->flags = flags;
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
return codec.dec(state, src, srclen, out, outlen);
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
|
||||
// Due to the overhead of initializing OpenMP and creating a team of
|
||||
// threads, we require the data length to be larger than a threshold:
|
||||
#define OMP_THRESHOLD 20000
|
||||
|
||||
// Conditionally include OpenMP-accelerated codec implementations:
|
||||
#include "lib_openmp.c"
|
||||
#endif
|
||||
|
||||
void
|
||||
base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
size_t s;
|
||||
size_t t;
|
||||
struct base64_state state;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (srclen >= OMP_THRESHOLD) {
|
||||
base64_encode_openmp(src, srclen, out, outlen, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_encode_init(&state, flags);
|
||||
|
||||
// Feed the whole string to the stream reader:
|
||||
base64_stream_encode(&state, src, srclen, out, &s);
|
||||
|
||||
// Finalize the stream by writing trailer if any:
|
||||
base64_stream_encode_final(&state, out + s, &t);
|
||||
|
||||
// Final output length is stream length plus tail:
|
||||
*outlen = s + t;
|
||||
}
|
||||
|
||||
int
|
||||
base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
int ret;
|
||||
struct base64_state state;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (srclen >= OMP_THRESHOLD) {
|
||||
return base64_decode_openmp(src, srclen, out, outlen, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_decode_init(&state, flags);
|
||||
|
||||
// Feed the whole string to the stream reader:
|
||||
ret = base64_stream_decode(&state, src, srclen, out, outlen);
|
||||
|
||||
// If when decoding a whole block, we're still waiting for input then fail:
|
||||
if (ret && (state.bytes == 0)) {
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
149
deps/base64/base64/lib/lib_openmp.c
vendored
Normal file
149
deps/base64/base64/lib/lib_openmp.c
vendored
Normal file
@ -0,0 +1,149 @@
|
||||
// This code makes some assumptions on the implementation of
|
||||
// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
|
||||
// Basically these assumptions boil down to that when breaking the src into
|
||||
// parts, out parts can be written without side effects.
|
||||
// This is met when:
|
||||
// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
|
||||
// 2) the shared variables src and out are not read or written outside of the
|
||||
// bounds of their parts, i.e. when base64_stream_encode() reads a multiple
|
||||
// of 3 bytes, it must write no more then a multiple of 4 bytes, not even
|
||||
// temporarily;
|
||||
// 3) the state flag can be discarded after base64_stream_encode() and
|
||||
// base64_stream_decode() on the parts.
|
||||
|
||||
static inline void
|
||||
base64_encode_openmp
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
size_t s;
|
||||
size_t t;
|
||||
size_t sum = 0, len, last_len;
|
||||
struct base64_state state, initial_state;
|
||||
int num_threads, i;
|
||||
|
||||
// Request a number of threads but not necessarily get them:
|
||||
#pragma omp parallel
|
||||
{
|
||||
// Get the number of threads used from one thread only,
|
||||
// as num_threads is a shared var:
|
||||
#pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
|
||||
// Split the input string into num_threads parts, each
|
||||
// part a multiple of 3 bytes. The remaining bytes will
|
||||
// be done later:
|
||||
len = srclen / (num_threads * 3);
|
||||
len *= 3;
|
||||
last_len = srclen - num_threads * len;
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_encode_init(&state, flags);
|
||||
initial_state = state;
|
||||
}
|
||||
|
||||
// Single has an implicit barrier for all threads to wait here
|
||||
// for the above to complete:
|
||||
#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
|
||||
for (i = 0; i < num_threads; i++)
|
||||
{
|
||||
// Feed each part of the string to the stream reader:
|
||||
base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
|
||||
sum += s;
|
||||
}
|
||||
}
|
||||
|
||||
// As encoding should never fail and we encode an exact multiple
|
||||
// of 3 bytes, we can discard state:
|
||||
state = initial_state;
|
||||
|
||||
// Encode the remaining bytes:
|
||||
base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
|
||||
|
||||
// Finalize the stream by writing trailer if any:
|
||||
base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
|
||||
|
||||
// Final output length is stream length plus tail:
|
||||
sum += s + t;
|
||||
*outlen = sum;
|
||||
}
|
||||
|
||||
static inline int
|
||||
base64_decode_openmp
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
int num_threads, result = 0, i;
|
||||
size_t sum = 0, len, last_len, s;
|
||||
struct base64_state state, initial_state;
|
||||
|
||||
// Request a number of threads but not necessarily get them:
|
||||
#pragma omp parallel
|
||||
{
|
||||
// Get the number of threads used from one thread only,
|
||||
// as num_threads is a shared var:
|
||||
#pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
|
||||
// Split the input string into num_threads parts, each
|
||||
// part a multiple of 4 bytes. The remaining bytes will
|
||||
// be done later:
|
||||
len = srclen / (num_threads * 4);
|
||||
len *= 4;
|
||||
last_len = srclen - num_threads * len;
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_decode_init(&state, flags);
|
||||
|
||||
initial_state = state;
|
||||
}
|
||||
|
||||
// Single has an implicit barrier to wait here for the above to
|
||||
// complete:
|
||||
#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
|
||||
for (i = 0; i < num_threads; i++)
|
||||
{
|
||||
int this_result;
|
||||
|
||||
// Feed each part of the string to the stream reader:
|
||||
this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
|
||||
sum += s;
|
||||
result += this_result;
|
||||
}
|
||||
}
|
||||
|
||||
// If `result' equals `-num_threads', then all threads returned -1,
|
||||
// indicating that the requested codec is not available:
|
||||
if (result == -num_threads) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// If `result' does not equal `num_threads', then at least one of the
|
||||
// threads hit a decode error:
|
||||
if (result != num_threads) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// So far so good, now decode whatever remains in the buffer. Reuse the
|
||||
// initial state, since we are at a 4-byte boundary:
|
||||
state = initial_state;
|
||||
result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
|
||||
sum += s;
|
||||
*outlen = sum;
|
||||
|
||||
// If when decoding a whole block, we're still waiting for input then fail:
|
||||
if (result && (state.bytes == 0)) {
|
||||
return result;
|
||||
}
|
||||
return 0;
|
||||
}
|
1
deps/base64/base64/lib/tables/.gitignore
vendored
Normal file
1
deps/base64/base64/lib/tables/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
table_generator
|
17
deps/base64/base64/lib/tables/Makefile
vendored
Normal file
17
deps/base64/base64/lib/tables/Makefile
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
.PHONY: all clean
|
||||
|
||||
TARGETS := table_dec_32bit.h table_enc_12bit.h table_generator
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
clean:
|
||||
$(RM) $(TARGETS)
|
||||
|
||||
table_dec_32bit.h: table_generator
|
||||
./$^ > $@
|
||||
|
||||
table_enc_12bit.h: table_enc_12bit.py
|
||||
./$^ > $@
|
||||
|
||||
table_generator: table_generator.c
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
393
deps/base64/base64/lib/tables/table_dec_32bit.h
vendored
Normal file
393
deps/base64/base64/lib/tables/table_dec_32bit.h
vendored
Normal file
@ -0,0 +1,393 @@
|
||||
#include <stdint.h>
|
||||
#define CHAR62 '+'
|
||||
#define CHAR63 '/'
|
||||
#define CHARPAD '='
|
||||
|
||||
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
|
||||
|
||||
/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d0[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x000000f8, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000fc,
|
||||
0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4,
|
||||
0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018,
|
||||
0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030,
|
||||
0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048,
|
||||
0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060,
|
||||
0x00000064, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078,
|
||||
0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090,
|
||||
0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8,
|
||||
0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0,
|
||||
0x000000c4, 0x000000c8, 0x000000cc, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d1[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x0000e003, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000f003,
|
||||
0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003,
|
||||
0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000,
|
||||
0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000,
|
||||
0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001,
|
||||
0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001,
|
||||
0x00009001, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001,
|
||||
0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002,
|
||||
0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002,
|
||||
0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003,
|
||||
0x00001003, 0x00002003, 0x00003003, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d2[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00800f00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00c00f00,
|
||||
0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00,
|
||||
0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100,
|
||||
0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300,
|
||||
0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400,
|
||||
0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600,
|
||||
0x00400600, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700,
|
||||
0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900,
|
||||
0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00,
|
||||
0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00,
|
||||
0x00400c00, 0x00800c00, 0x00c00c00, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d3[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x003e0000, 0xffffffff, 0xffffffff, 0xffffffff, 0x003f0000,
|
||||
0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000,
|
||||
0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000,
|
||||
0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000,
|
||||
0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000,
|
||||
0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000,
|
||||
0x00190000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000,
|
||||
0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000,
|
||||
0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000,
|
||||
0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000,
|
||||
0x00310000, 0x00320000, 0x00330000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
#else
|
||||
|
||||
|
||||
/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d0[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xf8000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xfc000000,
|
||||
0xd0000000, 0xd4000000, 0xd8000000, 0xdc000000, 0xe0000000, 0xe4000000,
|
||||
0xe8000000, 0xec000000, 0xf0000000, 0xf4000000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x04000000, 0x08000000, 0x0c000000, 0x10000000, 0x14000000, 0x18000000,
|
||||
0x1c000000, 0x20000000, 0x24000000, 0x28000000, 0x2c000000, 0x30000000,
|
||||
0x34000000, 0x38000000, 0x3c000000, 0x40000000, 0x44000000, 0x48000000,
|
||||
0x4c000000, 0x50000000, 0x54000000, 0x58000000, 0x5c000000, 0x60000000,
|
||||
0x64000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x68000000, 0x6c000000, 0x70000000, 0x74000000, 0x78000000,
|
||||
0x7c000000, 0x80000000, 0x84000000, 0x88000000, 0x8c000000, 0x90000000,
|
||||
0x94000000, 0x98000000, 0x9c000000, 0xa0000000, 0xa4000000, 0xa8000000,
|
||||
0xac000000, 0xb0000000, 0xb4000000, 0xb8000000, 0xbc000000, 0xc0000000,
|
||||
0xc4000000, 0xc8000000, 0xcc000000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d1[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x03e00000, 0xffffffff, 0xffffffff, 0xffffffff, 0x03f00000,
|
||||
0x03400000, 0x03500000, 0x03600000, 0x03700000, 0x03800000, 0x03900000,
|
||||
0x03a00000, 0x03b00000, 0x03c00000, 0x03d00000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00100000, 0x00200000, 0x00300000, 0x00400000, 0x00500000, 0x00600000,
|
||||
0x00700000, 0x00800000, 0x00900000, 0x00a00000, 0x00b00000, 0x00c00000,
|
||||
0x00d00000, 0x00e00000, 0x00f00000, 0x01000000, 0x01100000, 0x01200000,
|
||||
0x01300000, 0x01400000, 0x01500000, 0x01600000, 0x01700000, 0x01800000,
|
||||
0x01900000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x01a00000, 0x01b00000, 0x01c00000, 0x01d00000, 0x01e00000,
|
||||
0x01f00000, 0x02000000, 0x02100000, 0x02200000, 0x02300000, 0x02400000,
|
||||
0x02500000, 0x02600000, 0x02700000, 0x02800000, 0x02900000, 0x02a00000,
|
||||
0x02b00000, 0x02c00000, 0x02d00000, 0x02e00000, 0x02f00000, 0x03000000,
|
||||
0x03100000, 0x03200000, 0x03300000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d2[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x000f8000, 0xffffffff, 0xffffffff, 0xffffffff, 0x000fc000,
|
||||
0x000d0000, 0x000d4000, 0x000d8000, 0x000dc000, 0x000e0000, 0x000e4000,
|
||||
0x000e8000, 0x000ec000, 0x000f0000, 0x000f4000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00004000, 0x00008000, 0x0000c000, 0x00010000, 0x00014000, 0x00018000,
|
||||
0x0001c000, 0x00020000, 0x00024000, 0x00028000, 0x0002c000, 0x00030000,
|
||||
0x00034000, 0x00038000, 0x0003c000, 0x00040000, 0x00044000, 0x00048000,
|
||||
0x0004c000, 0x00050000, 0x00054000, 0x00058000, 0x0005c000, 0x00060000,
|
||||
0x00064000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00068000, 0x0006c000, 0x00070000, 0x00074000, 0x00078000,
|
||||
0x0007c000, 0x00080000, 0x00084000, 0x00088000, 0x0008c000, 0x00090000,
|
||||
0x00094000, 0x00098000, 0x0009c000, 0x000a0000, 0x000a4000, 0x000a8000,
|
||||
0x000ac000, 0x000b0000, 0x000b4000, 0x000b8000, 0x000bc000, 0x000c0000,
|
||||
0x000c4000, 0x000c8000, 0x000cc000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d3[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00003e00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00003f00,
|
||||
0x00003400, 0x00003500, 0x00003600, 0x00003700, 0x00003800, 0x00003900,
|
||||
0x00003a00, 0x00003b00, 0x00003c00, 0x00003d00, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00000100, 0x00000200, 0x00000300, 0x00000400, 0x00000500, 0x00000600,
|
||||
0x00000700, 0x00000800, 0x00000900, 0x00000a00, 0x00000b00, 0x00000c00,
|
||||
0x00000d00, 0x00000e00, 0x00000f00, 0x00001000, 0x00001100, 0x00001200,
|
||||
0x00001300, 0x00001400, 0x00001500, 0x00001600, 0x00001700, 0x00001800,
|
||||
0x00001900, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00001a00, 0x00001b00, 0x00001c00, 0x00001d00, 0x00001e00,
|
||||
0x00001f00, 0x00002000, 0x00002100, 0x00002200, 0x00002300, 0x00002400,
|
||||
0x00002500, 0x00002600, 0x00002700, 0x00002800, 0x00002900, 0x00002a00,
|
||||
0x00002b00, 0x00002c00, 0x00002d00, 0x00002e00, 0x00002f00, 0x00003000,
|
||||
0x00003100, 0x00003200, 0x00003300, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
#endif
|
1031
deps/base64/base64/lib/tables/table_enc_12bit.h
vendored
Normal file
1031
deps/base64/base64/lib/tables/table_enc_12bit.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
45
deps/base64/base64/lib/tables/table_enc_12bit.py
vendored
Executable file
45
deps/base64/base64/lib/tables/table_enc_12bit.py
vendored
Executable file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
def tr(x):
|
||||
"""Translate a 6-bit value to the Base64 alphabet."""
|
||||
s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \
|
||||
+ 'abcdefghijklmnopqrstuvwxyz' \
|
||||
+ '0123456789' \
|
||||
+ '+/'
|
||||
return ord(s[x])
|
||||
|
||||
def table(fn):
|
||||
"""Generate a 12-bit lookup table."""
|
||||
ret = []
|
||||
for n in range(0, 2**12):
|
||||
pre = "\n\t" if n % 8 == 0 else " "
|
||||
pre = "\t" if n == 0 else pre
|
||||
ret.append("{}0x{:04X}U,".format(pre, fn(n)))
|
||||
return "".join(ret)
|
||||
|
||||
def table_be():
|
||||
"""Generate a 12-bit big-endian lookup table."""
|
||||
return table(lambda n: (tr(n & 0x3F) << 0) | (tr(n >> 6) << 8))
|
||||
|
||||
def table_le():
|
||||
"""Generate a 12-bit little-endian lookup table."""
|
||||
return table(lambda n: (tr(n >> 6) << 0) | (tr(n & 0x3F) << 8))
|
||||
|
||||
def main():
|
||||
"""Entry point."""
|
||||
lines = [
|
||||
"#include <stdint.h>",
|
||||
"",
|
||||
"const uint16_t base64_table_enc_12bit[] = {",
|
||||
"#if BASE64_LITTLE_ENDIAN",
|
||||
table_le(),
|
||||
"#else",
|
||||
table_be(),
|
||||
"#endif",
|
||||
"};"
|
||||
]
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
184
deps/base64/base64/lib/tables/table_generator.c
vendored
Normal file
184
deps/base64/base64/lib/tables/table_generator.c
vendored
Normal file
@ -0,0 +1,184 @@
|
||||
/**
|
||||
*
|
||||
* Copyright 2005, 2006 Nick Galbreath -- nickg [at] modp [dot] com
|
||||
* Copyright 2017 Matthieu Darbois
|
||||
* All rights reserved.
|
||||
*
|
||||
* http://modp.com/release/base64
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/****************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
static uint8_t b64chars[64] = {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
|
||||
};
|
||||
|
||||
static uint8_t padchar = '=';
|
||||
|
||||
static void printStart(void)
|
||||
{
|
||||
printf("#include <stdint.h>\n");
|
||||
printf("#define CHAR62 '%c'\n", b64chars[62]);
|
||||
printf("#define CHAR63 '%c'\n", b64chars[63]);
|
||||
printf("#define CHARPAD '%c'\n", padchar);
|
||||
}
|
||||
|
||||
static void clearDecodeTable(uint32_t* ary)
|
||||
{
|
||||
int i = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
ary[i] = 0xFFFFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
/* dump uint32_t as hex digits */
|
||||
void uint32_array_to_c_hex(const uint32_t* ary, size_t sz, const char* name)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
printf("const uint32_t %s[%d] = {\n", name, (int)sz);
|
||||
for (;;) {
|
||||
printf("0x%08" PRIx32, ary[i]);
|
||||
++i;
|
||||
if (i == sz)
|
||||
break;
|
||||
if (i % 6 == 0) {
|
||||
printf(",\n");
|
||||
} else {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf("\n};\n");
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
uint32_t x;
|
||||
uint32_t i = 0;
|
||||
uint32_t ary[256];
|
||||
|
||||
/* over-ride standard alphabet */
|
||||
if (argc == 2) {
|
||||
uint8_t* replacements = (uint8_t*)argv[1];
|
||||
if (strlen((char*)replacements) != 3) {
|
||||
fprintf(stderr, "input must be a string of 3 characters '-', '.' or '_'\n");
|
||||
exit(1);
|
||||
}
|
||||
fprintf(stderr, "fusing '%s' as replacements in base64 encoding\n", replacements);
|
||||
b64chars[62] = replacements[0];
|
||||
b64chars[63] = replacements[1];
|
||||
padchar = replacements[2];
|
||||
}
|
||||
|
||||
printStart();
|
||||
|
||||
printf("\n\n#if BASE64_LITTLE_ENDIAN\n");
|
||||
|
||||
printf("\n\n/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 2;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d0");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = ((i & 0x30) >> 4) | ((i & 0x0F) << 12);
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d1");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = ((i & 0x03) << 22) | ((i & 0x3c) << 6);
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d2");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 16;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d3");
|
||||
printf("\n\n");
|
||||
|
||||
printf("#else\n");
|
||||
|
||||
printf("\n\n/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 26;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d0");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 20;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d1");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 14;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d2");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 8;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d3");
|
||||
printf("\n\n");
|
||||
|
||||
printf("#endif\n");
|
||||
|
||||
return 0;
|
||||
}
|
40
deps/base64/base64/lib/tables/tables.c
vendored
Normal file
40
deps/base64/base64/lib/tables/tables.c
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
#include "tables.h"
|
||||
|
||||
const uint8_t
|
||||
base64_table_enc_6bit[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789"
|
||||
"+/";
|
||||
|
||||
// In the lookup table below, note that the value for '=' (character 61) is
|
||||
// 254, not 255. This character is used for in-band signaling of the end of
|
||||
// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
|
||||
// and + / are mapped to their "decoded" values. The other bytes all map to
|
||||
// the value 255, which flags them as "invalid input".
|
||||
|
||||
const uint8_t
|
||||
base64_table_dec_8bit[] =
|
||||
{
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, // 32..47
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63
|
||||
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, // 80..95
|
||||
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
};
|
||||
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
# include "table_dec_32bit.h"
|
||||
# include "table_enc_12bit.h"
|
||||
#endif
|
23
deps/base64/base64/lib/tables/tables.h
vendored
Normal file
23
deps/base64/base64/lib/tables/tables.h
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef BASE64_TABLES_H
|
||||
#define BASE64_TABLES_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../env.h"
|
||||
|
||||
// These tables are used by all codecs for fallback plain encoding/decoding:
|
||||
extern const uint8_t base64_table_enc_6bit[];
|
||||
extern const uint8_t base64_table_dec_8bit[];
|
||||
|
||||
// These tables are used for the 32-bit and 64-bit generic decoders:
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
extern const uint32_t base64_table_dec_32bit_d0[];
|
||||
extern const uint32_t base64_table_dec_32bit_d1[];
|
||||
extern const uint32_t base64_table_dec_32bit_d2[];
|
||||
extern const uint32_t base64_table_dec_32bit_d3[];
|
||||
|
||||
// This table is used by the 32 and 64-bit generic encoders:
|
||||
extern const uint16_t base64_table_enc_12bit[];
|
||||
#endif
|
||||
|
||||
#endif // BASE64_TABLES_H
|
45
deps/base64/base64/test/CMakeLists.txt
vendored
Normal file
45
deps/base64/base64/test/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
# Written in 2016 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
|
||||
function(add_base64_test TEST_NAME)
|
||||
unset(SRC_FILE)
|
||||
foreach(SRC_FILE ${ARGN})
|
||||
list(APPEND SRC_FILES "${SRC_FILE}")
|
||||
endforeach()
|
||||
|
||||
add_executable(${TEST_NAME} ${SRC_FILES})
|
||||
target_link_libraries(${TEST_NAME} PRIVATE base64)
|
||||
|
||||
add_test(NAME ${TEST_NAME}
|
||||
COMMAND ${TEST_NAME}
|
||||
)
|
||||
install(TARGETS ${TEST_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endfunction()
|
||||
|
||||
|
||||
add_base64_test(test_base64
|
||||
codec_supported.c
|
||||
test_base64.c
|
||||
)
|
||||
|
||||
if (NOT WIN32)
|
||||
add_base64_test(benchmark
|
||||
codec_supported.c
|
||||
benchmark.c
|
||||
)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
target_link_libraries(benchmark PRIVATE rt)
|
||||
endif()
|
||||
|
33
deps/base64/base64/test/Makefile
vendored
Normal file
33
deps/base64/base64/test/Makefile
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic
|
||||
ifdef OPENMP
|
||||
CFLAGS += -fopenmp
|
||||
endif
|
||||
|
||||
TARGET := $(shell $(CC) -dumpmachine)
|
||||
ifneq (, $(findstring darwin, $(TARGET)))
|
||||
BENCH_LDFLAGS=
|
||||
else
|
||||
# default to linux, -lrt needed
|
||||
BENCH_LDFLAGS=-lrt
|
||||
endif
|
||||
|
||||
.PHONY: clean test
|
||||
|
||||
test: clean test_base64 benchmark
|
||||
./test_base64
|
||||
./benchmark
|
||||
|
||||
test_base64: test_base64.c codec_supported.o ../lib/libbase64.o
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
benchmark: benchmark.c codec_supported.o ../lib/libbase64.o
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(BENCH_LDFLAGS)
|
||||
|
||||
../%:
|
||||
make -C .. $*
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
clean:
|
||||
rm -f benchmark test_base64 *.o
|
233
deps/base64/base64/test/benchmark.c
vendored
Normal file
233
deps/base64/base64/test/benchmark.c
vendored
Normal file
@ -0,0 +1,233 @@
|
||||
// For clock_gettime(2):
|
||||
#ifndef _POSIX_C_SOURCE
|
||||
#define _POSIX_C_SOURCE 199309L
|
||||
#endif
|
||||
|
||||
// For CLOCK_REALTIME on FreeBSD:
|
||||
#ifndef _XOPEN_SOURCE
|
||||
#define _XOPEN_SOURCE 600
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef __MACH__
|
||||
#include <mach/mach_time.h>
|
||||
#endif
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "codec_supported.h"
|
||||
|
||||
#define KB 1024
|
||||
#define MB (1024 * KB)
|
||||
|
||||
#define RANDOMDEV "/dev/urandom"
|
||||
|
||||
struct buffers {
|
||||
char *reg;
|
||||
char *enc;
|
||||
size_t regsz;
|
||||
size_t encsz;
|
||||
};
|
||||
|
||||
// Define buffer sizes to test with:
|
||||
static struct bufsize {
|
||||
char *label;
|
||||
size_t len;
|
||||
int repeat;
|
||||
int batch;
|
||||
}
|
||||
sizes[] = {
|
||||
{ "10 MB", MB * 10, 10, 1 },
|
||||
{ "1 MB", MB * 1, 10, 10 },
|
||||
{ "100 KB", KB * 100, 10, 100 },
|
||||
{ "10 KB", KB * 10, 100, 100 },
|
||||
{ "1 KB", KB * 1, 100, 1000 },
|
||||
};
|
||||
|
||||
static inline float
|
||||
bytes_to_mb (size_t bytes)
|
||||
{
|
||||
return bytes / (float) MB;
|
||||
}
|
||||
|
||||
static bool
|
||||
get_random_data (struct buffers *b, char **errmsg)
|
||||
{
|
||||
int fd;
|
||||
ssize_t nread;
|
||||
size_t total_read = 0;
|
||||
|
||||
// Open random device for semi-random data:
|
||||
if ((fd = open(RANDOMDEV, O_RDONLY)) < 0) {
|
||||
*errmsg = "Cannot open " RANDOMDEV;
|
||||
return false;
|
||||
}
|
||||
|
||||
printf("Filling buffer with %.1f MB of random data...\n", bytes_to_mb(b->regsz));
|
||||
|
||||
while (total_read < b->regsz) {
|
||||
if ((nread = read(fd, b->reg + total_read, b->regsz - total_read)) < 0) {
|
||||
*errmsg = "Read error";
|
||||
close(fd);
|
||||
return false;
|
||||
}
|
||||
total_read += nread;
|
||||
}
|
||||
close(fd);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef __MACH__
|
||||
typedef uint64_t base64_timespec;
|
||||
static void
|
||||
base64_gettime (base64_timespec * o_time)
|
||||
{
|
||||
*o_time = mach_absolute_time();
|
||||
}
|
||||
|
||||
static float
|
||||
timediff_sec (base64_timespec *start, base64_timespec *end)
|
||||
{
|
||||
uint64_t diff = *end - *start;
|
||||
mach_timebase_info_data_t tb = { 0, 0 };
|
||||
mach_timebase_info(&tb);
|
||||
|
||||
return (float)((diff * tb.numer) / tb.denom) / 1e9f;
|
||||
}
|
||||
#else
|
||||
typedef struct timespec base64_timespec;
|
||||
static void
|
||||
base64_gettime (base64_timespec * o_time)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, o_time);
|
||||
}
|
||||
|
||||
static float
|
||||
timediff_sec (base64_timespec *start, base64_timespec *end)
|
||||
{
|
||||
return (end->tv_sec - start->tv_sec) + ((float)(end->tv_nsec - start->tv_nsec)) / 1e9f;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
codec_bench_enc (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
|
||||
{
|
||||
float timediff, fastest = -1.0f;
|
||||
base64_timespec start, end;
|
||||
|
||||
// Reset buffer size:
|
||||
b->regsz = bs->len;
|
||||
|
||||
// Repeat benchmark a number of times for a fair test:
|
||||
for (int i = bs->repeat; i; i--) {
|
||||
|
||||
// Timing loop, use batches to increase timer resolution:
|
||||
base64_gettime(&start);
|
||||
for (int j = bs->batch; j; j--)
|
||||
base64_encode(b->reg, b->regsz, b->enc, &b->encsz, flags);
|
||||
base64_gettime(&end);
|
||||
|
||||
// Calculate average time of batch:
|
||||
timediff = timediff_sec(&start, &end) / bs->batch;
|
||||
|
||||
// Update fastest time seen:
|
||||
if (fastest < 0.0f || timediff < fastest)
|
||||
fastest = timediff;
|
||||
}
|
||||
|
||||
printf("%s\tencode\t%.02f MB/sec\n", name, bytes_to_mb(b->regsz) / fastest);
|
||||
}
|
||||
|
||||
static void
|
||||
codec_bench_dec (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
|
||||
{
|
||||
float timediff, fastest = -1.0f;
|
||||
base64_timespec start, end;
|
||||
|
||||
// Reset buffer size:
|
||||
b->encsz = bs->len;
|
||||
|
||||
// Repeat benchmark a number of times for a fair test:
|
||||
for (int i = bs->repeat; i; i--) {
|
||||
|
||||
// Timing loop, use batches to increase timer resolution:
|
||||
base64_gettime(&start);
|
||||
for (int j = bs->batch; j; j--)
|
||||
base64_decode(b->enc, b->encsz, b->reg, &b->regsz, flags);
|
||||
base64_gettime(&end);
|
||||
|
||||
// Calculate average time of batch:
|
||||
timediff = timediff_sec(&start, &end) / bs->batch;
|
||||
|
||||
// Update fastest time seen:
|
||||
if (fastest < 0.0f || timediff < fastest)
|
||||
fastest = timediff;
|
||||
}
|
||||
|
||||
printf("%s\tdecode\t%.02f MB/sec\n", name, bytes_to_mb(b->encsz) / fastest);
|
||||
}
|
||||
|
||||
static void
|
||||
codec_bench (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
|
||||
{
|
||||
codec_bench_enc(b, bs, name, flags);
|
||||
codec_bench_dec(b, bs, name, flags);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int ret = 0;
|
||||
char *errmsg = NULL;
|
||||
struct buffers b;
|
||||
|
||||
// Set buffer sizes to largest buffer length:
|
||||
b.regsz = sizes[0].len;
|
||||
b.encsz = sizes[0].len * 5 / 3;
|
||||
|
||||
// Allocate space for megabytes of random data:
|
||||
if ((b.reg = malloc(b.regsz)) == NULL) {
|
||||
errmsg = "Out of memory";
|
||||
ret = 1;
|
||||
goto err0;
|
||||
}
|
||||
|
||||
// Allocate space for encoded output:
|
||||
if ((b.enc = malloc(b.encsz)) == NULL) {
|
||||
errmsg = "Out of memory";
|
||||
ret = 1;
|
||||
goto err1;
|
||||
}
|
||||
|
||||
// Fill buffer with random data:
|
||||
if (get_random_data(&b, &errmsg) == false) {
|
||||
ret = 1;
|
||||
goto err2;
|
||||
}
|
||||
|
||||
// Loop over all buffer sizes:
|
||||
for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); i++) {
|
||||
printf("Testing with buffer size %s, fastest of %d * %d\n",
|
||||
sizes[i].label, sizes[i].repeat, sizes[i].batch);
|
||||
|
||||
// Loop over all codecs:
|
||||
for (size_t j = 0; codecs[j]; j++)
|
||||
if (codec_supported(1 << j))
|
||||
codec_bench(&b, &sizes[i], codecs[j], 1 << j);
|
||||
};
|
||||
|
||||
// Free memory:
|
||||
err2: free(b.enc);
|
||||
err1: free(b.reg);
|
||||
err0: if (errmsg)
|
||||
fputs(errmsg, stderr);
|
||||
|
||||
return ret;
|
||||
}
|
28
deps/base64/base64/test/ci/test.sh
vendored
Executable file
28
deps/base64/base64/test/ci/test.sh
vendored
Executable file
@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
set -ve
|
||||
|
||||
MACHINE=$(uname -m)
|
||||
if [ "${MACHINE}" == "x86_64" ]; then
|
||||
export SSSE3_CFLAGS=-mssse3
|
||||
export SSE41_CFLAGS=-msse4.1
|
||||
export SSE42_CFLAGS=-msse4.2
|
||||
export AVX_CFLAGS=-mavx
|
||||
# no AVX2 on GHA macOS
|
||||
if [ "$(uname -s)" != "Darwin" ]; then
|
||||
export AVX2_CFLAGS=-mavx2
|
||||
fi
|
||||
elif [ "${MACHINE}" == "aarch64" ]; then
|
||||
export NEON64_CFLAGS="-march=armv8-a"
|
||||
elif [ "${MACHINE}" == "armv7l" ]; then
|
||||
export NEON32_CFLAGS="-march=armv7-a -mfloat-abi=hard -mfpu=neon"
|
||||
fi
|
||||
|
||||
if [ "${OPENMP:-}" == "0" ]; then
|
||||
unset OPENMP
|
||||
fi
|
||||
|
||||
uname -a
|
||||
${CC} --version
|
||||
|
||||
make
|
||||
make -C test
|
28
deps/base64/base64/test/codec_supported.c
vendored
Normal file
28
deps/base64/base64/test/codec_supported.c
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
|
||||
static char *_codecs[] =
|
||||
{ "AVX2"
|
||||
, "NEON32"
|
||||
, "NEON64"
|
||||
, "plain"
|
||||
, "SSSE3"
|
||||
, "SSE41"
|
||||
, "SSE42"
|
||||
, "AVX"
|
||||
, NULL
|
||||
} ;
|
||||
|
||||
char **codecs = _codecs;
|
||||
|
||||
int
|
||||
codec_supported (int flags)
|
||||
{
|
||||
// Check if given codec is supported by trying to decode a test string:
|
||||
char *a = "aGVsbG8=";
|
||||
char b[10];
|
||||
size_t outlen;
|
||||
|
||||
return (base64_decode(a, strlen(a), b, &outlen, flags) != -1);
|
||||
}
|
3
deps/base64/base64/test/codec_supported.h
vendored
Normal file
3
deps/base64/base64/test/codec_supported.h
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
extern char **codecs;
|
||||
|
||||
int codec_supported (int flags);
|
41
deps/base64/base64/test/moby_dick.h
vendored
Normal file
41
deps/base64/base64/test/moby_dick.h
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
static const char *moby_dick_plain =
|
||||
"Call me Ishmael. Some years ago--never mind how long precisely--having\n"
|
||||
"little or no money in my purse, and nothing particular to interest me on\n"
|
||||
"shore, I thought I would sail about a little and see the watery part of\n"
|
||||
"the world. It is a way I have of driving off the spleen and regulating\n"
|
||||
"the circulation. Whenever I find myself growing grim about the mouth;\n"
|
||||
"whenever it is a damp, drizzly November in my soul; whenever I find\n"
|
||||
"myself involuntarily pausing before coffin warehouses, and bringing up\n"
|
||||
"the rear of every funeral I meet; and especially whenever my hypos get\n"
|
||||
"such an upper hand of me, that it requires a strong moral principle to\n"
|
||||
"prevent me from deliberately stepping into the street, and methodically\n"
|
||||
"knocking people's hats off--then, I account it high time to get to sea\n"
|
||||
"as soon as I can. This is my substitute for pistol and ball. With a\n"
|
||||
"philosophical flourish Cato throws himself upon his sword; I quietly\n"
|
||||
"take to the ship. There is nothing surprising in this. If they but knew\n"
|
||||
"it, almost all men in their degree, some time or other, cherish very\n"
|
||||
"nearly the same feelings towards the ocean with me.\n";
|
||||
|
||||
static const char *moby_dick_base64 =
|
||||
"Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZ"
|
||||
"yBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIG"
|
||||
"FuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGh"
|
||||
"vdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5"
|
||||
"IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZ"
|
||||
"mYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldm"
|
||||
"VyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV"
|
||||
"2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5l"
|
||||
"dmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2Zma"
|
||||
"W4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bm"
|
||||
"VyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3V"
|
||||
"jaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1v"
|
||||
"cmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwa"
|
||||
"W5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbG"
|
||||
"UncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8"
|
||||
"gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlz"
|
||||
"dG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRoc"
|
||||
"m93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2"
|
||||
"hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV"
|
||||
"0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l"
|
||||
"IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd"
|
||||
"2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==";
|
1
deps/base64/base64/test/moby_dick_base64.txt
vendored
Normal file
1
deps/base64/base64/test/moby_dick_base64.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZyBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIGFuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGhvdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZmYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldmVyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5ldmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2ZmaW4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bmVyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3VjaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1vcmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwaW5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbGUncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlzdG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRocm93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1lIG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==
|
16
deps/base64/base64/test/moby_dick_plain.txt
vendored
Normal file
16
deps/base64/base64/test/moby_dick_plain.txt
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
Call me Ishmael. Some years ago--never mind how long precisely--having
|
||||
little or no money in my purse, and nothing particular to interest me on
|
||||
shore, I thought I would sail about a little and see the watery part of
|
||||
the world. It is a way I have of driving off the spleen and regulating
|
||||
the circulation. Whenever I find myself growing grim about the mouth;
|
||||
whenever it is a damp, drizzly November in my soul; whenever I find
|
||||
myself involuntarily pausing before coffin warehouses, and bringing up
|
||||
the rear of every funeral I meet; and especially whenever my hypos get
|
||||
such an upper hand of me, that it requires a strong moral principle to
|
||||
prevent me from deliberately stepping into the street, and methodically
|
||||
knocking people's hats off--then, I account it high time to get to sea
|
||||
as soon as I can. This is my substitute for pistol and ball. With a
|
||||
philosophical flourish Cato throws himself upon his sword; I quietly
|
||||
take to the ship. There is nothing surprising in this. If they but knew
|
||||
it, almost all men in their degree, some time or other, cherish very
|
||||
nearly the same feelings towards the ocean with me.
|
365
deps/base64/base64/test/test_base64.c
vendored
Normal file
365
deps/base64/base64/test/test_base64.c
vendored
Normal file
@ -0,0 +1,365 @@
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "../include/libbase64.h"
|
||||
#include "codec_supported.h"
|
||||
#include "moby_dick.h"
|
||||
|
||||
static char out[2000];
|
||||
static size_t outlen;
|
||||
|
||||
static bool
|
||||
assert_enc (int flags, const char *src, const char *dst)
|
||||
{
|
||||
size_t srclen = strlen(src);
|
||||
size_t dstlen = strlen(dst);
|
||||
|
||||
base64_encode(src, srclen, out, &outlen, flags);
|
||||
|
||||
if (outlen != dstlen) {
|
||||
printf("FAIL: encoding of '%s': length expected %lu, got %lu\n", src,
|
||||
(unsigned long)dstlen,
|
||||
(unsigned long)outlen
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (strncmp(dst, out, outlen) != 0) {
|
||||
out[outlen] = '\0';
|
||||
printf("FAIL: encoding of '%s': expected output '%s', got '%s'\n", src, dst, out);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
assert_dec (int flags, const char *src, const char *dst)
|
||||
{
|
||||
size_t srclen = strlen(src);
|
||||
size_t dstlen = strlen(dst);
|
||||
|
||||
if (!base64_decode(src, srclen, out, &outlen, flags)) {
|
||||
printf("FAIL: decoding of '%s': decoding error\n", src);
|
||||
return true;
|
||||
}
|
||||
if (outlen != dstlen) {
|
||||
printf("FAIL: encoding of '%s': "
|
||||
"length expected %lu, got %lu\n", src,
|
||||
(unsigned long)dstlen,
|
||||
(unsigned long)outlen
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (strncmp(dst, out, outlen) != 0) {
|
||||
out[outlen] = '\0';
|
||||
printf("FAIL: decoding of '%s': expected output '%s', got '%s'\n", src, dst, out);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
assert_roundtrip (int flags, const char *src)
|
||||
{
|
||||
char tmp[1500];
|
||||
size_t tmplen;
|
||||
size_t srclen = strlen(src);
|
||||
|
||||
// Encode the input into global buffer:
|
||||
base64_encode(src, srclen, out, &outlen, flags);
|
||||
|
||||
// Decode the global buffer into local temp buffer:
|
||||
if (!base64_decode(out, outlen, tmp, &tmplen, flags)) {
|
||||
printf("FAIL: decoding of '%s': decoding error\n", out);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check that 'src' is identical to 'tmp':
|
||||
if (srclen != tmplen) {
|
||||
printf("FAIL: roundtrip of '%s': "
|
||||
"length expected %lu, got %lu\n", src,
|
||||
(unsigned long)srclen,
|
||||
(unsigned long)tmplen
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (strncmp(src, tmp, tmplen) != 0) {
|
||||
tmp[tmplen] = '\0';
|
||||
printf("FAIL: roundtrip of '%s': got '%s'\n", src, tmp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
test_char_table (int flags)
|
||||
{
|
||||
bool fail = false;
|
||||
char chr[256];
|
||||
char enc[400], dec[400];
|
||||
size_t enclen, declen;
|
||||
|
||||
// Fill array with all characters 0..255:
|
||||
for (int i = 0; i < 256; i++)
|
||||
chr[i] = (unsigned char)i;
|
||||
|
||||
// Loop, using each char as a starting position to increase test coverage:
|
||||
for (int i = 0; i < 256; i++) {
|
||||
|
||||
size_t chrlen = 256 - i;
|
||||
|
||||
base64_encode(&chr[i], chrlen, enc, &enclen, BASE64_FORCE_PLAIN);
|
||||
|
||||
if (!base64_decode(enc, enclen, dec, &declen, flags)) {
|
||||
printf("FAIL: decoding @ %d: decoding error\n", i);
|
||||
fail = true;
|
||||
continue;
|
||||
}
|
||||
if (declen != chrlen) {
|
||||
printf("FAIL: roundtrip @ %d: "
|
||||
"length expected %lu, got %lu\n", i,
|
||||
(unsigned long)chrlen,
|
||||
(unsigned long)declen
|
||||
);
|
||||
fail = true;
|
||||
continue;
|
||||
}
|
||||
if (strncmp(&chr[i], dec, declen) != 0) {
|
||||
printf("FAIL: roundtrip @ %d: decoded output not same as input\n", i);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
static int
|
||||
test_streaming (int flags)
|
||||
{
|
||||
bool fail = false;
|
||||
char chr[256];
|
||||
char ref[400], enc[400];
|
||||
size_t reflen;
|
||||
struct base64_state state;
|
||||
|
||||
// Fill array with all characters 0..255:
|
||||
for (int i = 0; i < 256; i++)
|
||||
chr[i] = (unsigned char)i;
|
||||
|
||||
// Create reference base64 encoding:
|
||||
base64_encode(chr, 256, ref, &reflen, BASE64_FORCE_PLAIN);
|
||||
|
||||
// Encode the table with various block sizes and compare to reference:
|
||||
for (size_t bs = 1; bs < 255; bs++)
|
||||
{
|
||||
size_t inpos = 0;
|
||||
size_t partlen = 0;
|
||||
size_t enclen = 0;
|
||||
|
||||
base64_stream_encode_init(&state, flags);
|
||||
memset(enc, 0, 400);
|
||||
for (;;) {
|
||||
base64_stream_encode(&state, &chr[inpos], (inpos + bs > 256) ? 256 - inpos : bs, &enc[enclen], &partlen);
|
||||
enclen += partlen;
|
||||
if (inpos + bs > 256) {
|
||||
break;
|
||||
}
|
||||
inpos += bs;
|
||||
}
|
||||
base64_stream_encode_final(&state, &enc[enclen], &partlen);
|
||||
enclen += partlen;
|
||||
|
||||
if (enclen != reflen) {
|
||||
printf("FAIL: stream encoding gave incorrect size: "
|
||||
"%lu instead of %lu\n",
|
||||
(unsigned long)enclen,
|
||||
(unsigned long)reflen
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
if (strncmp(ref, enc, reflen) != 0) {
|
||||
printf("FAIL: stream encoding with blocksize %lu failed\n",
|
||||
(unsigned long)bs
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Decode the reference encoding with various block sizes and
|
||||
// compare to input char table:
|
||||
for (size_t bs = 1; bs < 255; bs++)
|
||||
{
|
||||
size_t inpos = 0;
|
||||
size_t partlen = 0;
|
||||
size_t enclen = 0;
|
||||
|
||||
base64_stream_decode_init(&state, flags);
|
||||
memset(enc, 0, 400);
|
||||
while (base64_stream_decode(&state, &ref[inpos], (inpos + bs > reflen) ? reflen - inpos : bs, &enc[enclen], &partlen)) {
|
||||
enclen += partlen;
|
||||
inpos += bs;
|
||||
}
|
||||
if (enclen != 256) {
|
||||
printf("FAIL: stream decoding gave incorrect size: "
|
||||
"%lu instead of 255\n",
|
||||
(unsigned long)enclen
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
if (strncmp(chr, enc, 256) != 0) {
|
||||
printf("FAIL: stream decoding with blocksize %lu failed\n",
|
||||
(unsigned long)bs
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
static int
|
||||
test_invalid_dec_input (int flags)
|
||||
{
|
||||
// Subset of invalid characters to cover all ranges
|
||||
static const char invalid_set[] = { '\0', -1, '!', '-', ';', '_', '|' };
|
||||
static const char* invalid_strings[] = {
|
||||
"Zm9vYg=",
|
||||
"Zm9vYg",
|
||||
"Zm9vY",
|
||||
"Zm9vYmF=Zm9v"
|
||||
};
|
||||
|
||||
bool fail = false;
|
||||
char chr[256];
|
||||
char enc[400], dec[400];
|
||||
size_t enclen, declen;
|
||||
|
||||
// Fill array with all characters 0..255:
|
||||
for (int i = 0; i < 256; i++)
|
||||
chr[i] = (unsigned char)i;
|
||||
|
||||
// Create reference base64 encoding:
|
||||
base64_encode(chr, 256, enc, &enclen, BASE64_FORCE_PLAIN);
|
||||
|
||||
// Test invalid strings returns error.
|
||||
for (size_t i = 0U; i < sizeof(invalid_strings) / sizeof(invalid_strings[0]); ++i) {
|
||||
if (base64_decode(invalid_strings[i], strlen(invalid_strings[i]), dec, &declen, flags)) {
|
||||
printf("FAIL: decoding invalid input \"%s\": no decoding error\n", invalid_strings[i]);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop, corrupting each char to increase test coverage:
|
||||
for (size_t c = 0U; c < sizeof(invalid_set); ++c) {
|
||||
for (size_t i = 0U; i < enclen; i++) {
|
||||
char backup = enc[i];
|
||||
|
||||
enc[i] = invalid_set[c];
|
||||
|
||||
if (base64_decode(enc, enclen, dec, &declen, flags)) {
|
||||
printf("FAIL: decoding invalid input @ %d: no decoding error\n", (int)i);
|
||||
fail = true;
|
||||
enc[i] = backup;
|
||||
continue;
|
||||
}
|
||||
enc[i] = backup;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop, corrupting two chars to increase test coverage:
|
||||
for (size_t c = 0U; c < sizeof(invalid_set); ++c) {
|
||||
for (size_t i = 0U; i < enclen - 2U; i++) {
|
||||
char backup = enc[i+0];
|
||||
char backup2 = enc[i+2];
|
||||
|
||||
enc[i+0] = invalid_set[c];
|
||||
enc[i+2] = invalid_set[c];
|
||||
|
||||
if (base64_decode(enc, enclen, dec, &declen, flags)) {
|
||||
printf("FAIL: decoding invalid input @ %d: no decoding error\n", (int)i);
|
||||
fail = true;
|
||||
enc[i+0] = backup;
|
||||
enc[i+2] = backup2;
|
||||
continue;
|
||||
}
|
||||
enc[i+0] = backup;
|
||||
enc[i+2] = backup2;
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
static int
|
||||
test_one_codec (const char *codec, int flags)
|
||||
{
|
||||
bool fail = false;
|
||||
|
||||
printf("Codec %s:\n", codec);
|
||||
|
||||
// Skip if this codec is not supported:
|
||||
if (!codec_supported(flags)) {
|
||||
puts(" skipping");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Test vectors:
|
||||
struct {
|
||||
const char *in;
|
||||
const char *out;
|
||||
} vec[] = {
|
||||
|
||||
// These are the test vectors from RFC4648:
|
||||
{ "", "" },
|
||||
{ "f", "Zg==" },
|
||||
{ "fo", "Zm8=" },
|
||||
{ "foo", "Zm9v" },
|
||||
{ "foob", "Zm9vYg==" },
|
||||
{ "fooba", "Zm9vYmE=" },
|
||||
{ "foobar", "Zm9vYmFy" },
|
||||
|
||||
// The first paragraph from Moby Dick,
|
||||
// to test the SIMD codecs with larger blocksize:
|
||||
{ moby_dick_plain, moby_dick_base64 },
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(vec) / sizeof(vec[0]); i++) {
|
||||
|
||||
// Encode plain string, check against output:
|
||||
fail |= assert_enc(flags, vec[i].in, vec[i].out);
|
||||
|
||||
// Decode the output string, check if we get the input:
|
||||
fail |= assert_dec(flags, vec[i].out, vec[i].in);
|
||||
|
||||
// Do a roundtrip on the inputs and the outputs:
|
||||
fail |= assert_roundtrip(flags, vec[i].in);
|
||||
fail |= assert_roundtrip(flags, vec[i].out);
|
||||
}
|
||||
|
||||
fail |= test_char_table(flags);
|
||||
fail |= test_streaming(flags);
|
||||
fail |= test_invalid_dec_input(flags);
|
||||
|
||||
if (!fail)
|
||||
puts(" all tests passed.");
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
bool fail = false;
|
||||
|
||||
// Loop over all codecs:
|
||||
for (size_t i = 0; codecs[i]; i++) {
|
||||
|
||||
// Flags to invoke this codec:
|
||||
int codec_flags = (1 << i);
|
||||
|
||||
// Test this codec, merge the results:
|
||||
fail |= test_one_codec(codecs[i], codec_flags);
|
||||
}
|
||||
|
||||
return (fail) ? 1 : 0;
|
||||
}
|
2
node.gyp
2
node.gyp
@ -461,6 +461,7 @@
|
||||
'<(SHARED_INTERMEDIATE_DIR)' # for node_natives.h
|
||||
],
|
||||
'dependencies': [
|
||||
'deps/base64/base64.gyp:base64',
|
||||
'deps/googletest/googletest.gyp:gtest_prod',
|
||||
'deps/histogram/histogram.gyp:histogram',
|
||||
'deps/uvwasi/uvwasi.gyp:uvwasi',
|
||||
@ -1191,6 +1192,7 @@
|
||||
|
||||
'dependencies': [
|
||||
'<(node_lib_target_name)',
|
||||
'deps/base64/base64.gyp:base64',
|
||||
'deps/googletest/googletest.gyp:gtest',
|
||||
'deps/googletest/googletest.gyp:gtest_main',
|
||||
'deps/histogram/histogram.gyp:histogram',
|
||||
|
@ -4,6 +4,7 @@
|
||||
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
|
||||
|
||||
#include "base64.h"
|
||||
#include "libbase64.h"
|
||||
#include "util.h"
|
||||
|
||||
namespace node {
|
||||
@ -131,6 +132,11 @@ inline size_t base64_encode(const char* src,
|
||||
|
||||
dlen = base64_encoded_size(slen, mode);
|
||||
|
||||
if (mode == Base64Mode::NORMAL) {
|
||||
::base64_encode(src, slen, dst, &dlen, 0);
|
||||
return dlen;
|
||||
}
|
||||
|
||||
unsigned a;
|
||||
unsigned b;
|
||||
unsigned c;
|
||||
|
@ -136,4 +136,6 @@ addlicense "nghttp3" "deps/ngtcp2/nghttp3/" "$licenseText"
|
||||
licenseText="$(curl -sL https://raw.githubusercontent.com/jprichardson/node-fs-extra/b34da2762a4865b025cac06d02d6a2f1f1027b65/LICENSE)"
|
||||
addlicense "node-fs-extra" "lib/internal/fs/cp" "$licenseText"
|
||||
|
||||
addlicense "base64" "deps/base64/base64/" "$(cat "${rootdir}"/deps/base64/base64/LICENSE)"
|
||||
|
||||
mv "$tmplicense" "$licensefile"
|
||||
|
Loading…
Reference in New Issue
Block a user