deps,src: use SIMD for normal base64 encoding

PR-URL: https://github.com/nodejs/node/pull/39775 Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
2024-11-21 10:59:27 +00:00 · 2021-08-13 01:49:27 -04:00 · 2021-08-13 01:49:27 -04:00 · f561f31f1c
commit f561f31f1c
parent 71ca6d7d6a
78 changed files with 7161 additions and 0 deletions
--- a/32
+++ b/32
@ -1839,3 +1839,35 @@ The externally maintained libraries used by Node.js are:
    OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  """
+
+- base64, located at deps/base64/base64/, is licensed as follows:
+  """
+    Copyright (c) 2005-2007, Nick Galbreath
+    Copyright (c) 2013-2019, Alfred Klomp
+    Copyright (c) 2015-2017, Wojciech Mula
+    Copyright (c) 2016-2017, Matthieu Darbois
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    - Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    - Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+    TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  """
--- a/deps/base64/README.md
+++ b/deps/base64/README.md
@ -0,0 +1,14 @@
+# base64
+
+This project boosts base64 encoding/decoding performance by utilizing SIMD
+operations where possible.
+
+The source is pulled from: https://github.com/aklomp/base64
+
+Active development occurs in the default branch (currently named `master`).
+
+## Updating
+
+```sh
+$ git clone https://github.com/aklomp/base64
+```
--- a/deps/base64/base64.gyp
+++ b/deps/base64/base64.gyp
@ -0,0 +1,191 @@
+{
+  'variables': {
+    'arm_fpu%': '',
+    'target_arch%': '',
+  },
+  'targets': [
+    {
+      'target_name': 'base64',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'direct_dependent_settings': {
+        'include_dirs': [ 'base64/include' ],
+        'defines': [ 'BASE64_STATIC_DEFINE' ],
+      },
+      'defines': [ 'BASE64_STATIC_DEFINE' ],
+      'sources': [
+        'base64/include/libbase64.h',
+        'base64/lib/arch/generic/codec.c',
+        'base64/lib/tables/tables.c',
+        'base64/lib/codec_choose.c',
+        'base64/lib/codecs.h',
+        'base64/lib/lib.c',
+      ],
+
+      'conditions': [
+        [ 'arm_fpu=="neon" and target_arch=="arm"', {
+          'defines': [ 'HAVE_NEON32=1' ],
+          'dependencies': [ 'base64_neon32' ],
+        }, {
+          'sources': [ 'base64/lib/arch/neon32/codec.c' ],
+        }],
+
+        # arm64 requires NEON, so it's safe to always use it
+        [ 'target_arch=="arm64"', {
+          'defines': [ 'HAVE_NEON64=1' ],
+          'dependencies': [ 'base64_neon64' ],
+        }, {
+          'sources': [ 'base64/lib/arch/neon64/codec.c' ],
+        }],
+
+        # Runtime detection will happen for x86 CPUs
+        [ 'target_arch in "ia32 x64 x32"', {
+          'defines': [
+            'HAVE_SSSE3=1',
+            'HAVE_SSE41=1',
+            'HAVE_SSE42=1',
+            'HAVE_AVX=1',
+            'HAVE_AVX2=1',
+          ],
+          'dependencies': [
+            'base64_ssse3',
+            'base64_sse41',
+            'base64_sse42',
+            'base64_avx',
+            'base64_avx2',
+          ],
+        }, {
+          'sources': [
+            'base64/lib/arch/ssse3/codec.c',
+            'base64/lib/arch/sse41/codec.c',
+            'base64/lib/arch/sse42/codec.c',
+            'base64/lib/arch/avx/codec.c',
+            'base64/lib/arch/avx2/codec.c',
+          ],
+        }],
+      ],
+    },
+
+    {
+      'target_name': 'base64_ssse3',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'sources': [ 'base64/lib/arch/ssse3/codec.c' ],
+      'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSSE3=1' ],
+      'conditions': [
+        [ 'OS!="win"', {
+          'cflags': [ '-mssse3' ],
+          'xcode_settings': {
+            'OTHER_CFLAGS': [ '-mssse3' ]
+          },
+        }],
+      ],
+    },
+
+    {
+      'target_name': 'base64_sse41',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'sources': [ 'base64/lib/arch/sse41/codec.c' ],
+      'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSE41=1' ],
+      'conditions': [
+        [ 'OS!="win"', {
+          'cflags': [ '-msse4.1' ],
+          'xcode_settings': {
+            'OTHER_CFLAGS': [ '-msse4.1' ]
+          },
+        }],
+      ],
+    },
+
+    {
+      'target_name': 'base64_sse42',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'sources': [ 'base64/lib/arch/sse42/codec.c' ],
+      'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSE42=1' ],
+      'conditions': [
+        [ 'OS!="win"', {
+          'cflags': [ '-msse4.2' ],
+          'xcode_settings': {
+            'OTHER_CFLAGS': [ '-msse4.2' ]
+          },
+        }],
+      ],
+    },
+
+    {
+      'target_name': 'base64_avx',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'sources': [ 'base64/lib/arch/avx/codec.c' ],
+      'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX=1' ],
+      'conditions': [
+        [ 'OS!="win"', {
+          'cflags': [ '-mavx' ],
+          'xcode_settings': {
+            'OTHER_CFLAGS': [ '-mavx' ]
+          },
+        }, {
+          'msvs_settings': {
+            'VCCLCompilerTool': {
+              'AdditionalOptions': [
+                '/arch:AVX'
+              ],
+            },
+          },
+        }],
+      ],
+    },
+
+    {
+      'target_name': 'base64_avx2',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'sources': [ 'base64/lib/arch/avx2/codec.c' ],
+      'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX2=1' ],
+      'conditions': [
+        [ 'OS!="win"', {
+          'cflags': [ '-mavx2' ],
+          'xcode_settings': {
+            'OTHER_CFLAGS': [ '-mavx2' ]
+          },
+        }, {
+          'msvs_settings': {
+            'VCCLCompilerTool': {
+              'AdditionalOptions': [
+                '/arch:AVX2'
+              ],
+            },
+          },
+        }],
+      ],
+    },
+
+    {
+      'target_name': 'base64_neon32',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'sources': [ 'base64/lib/arch/neon32/codec.c' ],
+      'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_NEON32=1' ],
+      'conditions': [
+        [ 'OS!="win"', {
+          'cflags': [ '-mfpu=neon' ],
+          'xcode_settings': {
+            'OTHER_CFLAGS': [ '-mfpu=neon' ]
+          },
+        }],
+      ],
+    },
+
+    {
+      'target_name': 'base64_neon64',
+      'type': 'static_library',
+      'include_dirs': [ 'base64/include', 'base64/lib' ],
+      'sources': [ 'base64/lib/arch/neon64/codec.c' ],
+      'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_NEON64=1' ],
+      # NEON is required in arm64, so no -mfpu flag is needed
+    }
+
+  ]
+}
--- a/deps/base64/base64/.editorconfig
+++ b/deps/base64/base64/.editorconfig
@ -0,0 +1,22 @@
+# https://EditorConfig.org
+root = true
+
+[*]
+charset = utf-8
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+indent_style = tab
+tab_width = 8
+indent_size = 8
+
+[CMakeLists.txt]
+tab_width = 4
+indent_style = space
+[*.cmake]
+tab_width = 4
+indent_style = space
+
+[*.py]
+tab_width = 4
+indent_style = space
--- a/deps/base64/base64/.github/workflows/test.yml
+++ b/deps/base64/base64/.github/workflows/test.yml
@ -0,0 +1,133 @@
+name: Test
+
+on: [push, pull_request]
+
+jobs:
+  makefile-test:
+    name: makefile-${{ matrix.runner }}-amd64-${{ matrix.compiler }} ${{ ((matrix.openmp == 1) && '+openmp') || '' }}
+    runs-on: ${{ matrix.runner }}
+    strategy:
+      fail-fast: false
+      matrix:
+        runner: ["ubuntu-18.04"]
+        compiler: ["gcc", "clang"]
+        openmp: ["0", "1"]
+        include:
+          - runner: "macos-11"
+            compiler: "clang"
+            openmp: "0"
+    env:
+      OPENMP: ${{ matrix.openmp }}
+      OMP_NUM_THREADS: ${{ ((matrix.openmp == 1) && '2') || '0' }}
+      CC: ${{ matrix.compiler }}
+      OBJCOPY: ${{ (startsWith(matrix.runner, 'macos') && 'echo') || 'objcopy' }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Run tests
+        run: ./test/ci/test.sh
+
+  cmake-test:
+    name: cmake-${{ matrix.runner }}
+    runs-on: ${{ matrix.runner }}
+    strategy:
+      fail-fast: false
+      matrix:
+        runner: ["ubuntu-18.04", "macos-11", "windows-2019"]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: CMake Configure
+        run: >
+          cmake
+          -B out
+          -Werror=dev
+          -DBASE64_BUILD_TESTS=ON
+          ${{ runner.os != 'Windows' && '-DCMAKE_BUILD_TYPE=Release' || '' }}
+          ${{ runner.os == 'macOS' && '-DBASE64_WITH_AVX2=OFF' || '' }}
+      - name: CMake Build
+        run: cmake --build out --config Release --verbose
+      - name: CTest
+        run: ctest --no-tests=error --test-dir out -VV --build-config Release
+
+  alpine-makefile-test:
+    name: makefile-alpine-amd64-gcc
+    runs-on: ubuntu-latest
+    container:
+      image: alpine:3.12
+      env:
+        CC: gcc
+    steps:
+      - name: Install deps
+        run: apk add --update bash build-base git
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Run tests
+        run: ./test/ci/test.sh
+
+  alpine-cmake-test:
+    name: cmake-alpine-amd64-gcc
+    runs-on: ubuntu-latest
+    container:
+      image: alpine:3.12
+    steps:
+      - name: Install deps
+        run: apk add --update bash build-base cmake git
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: CMake Configure
+        run: cmake -B out -Werror=dev -DBASE64_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release
+      - name: CMake Build
+        run: cmake --build out --config Release --verbose
+      - name: CTest
+        run: ctest --no-tests=error -VV --build-config Release
+        working-directory: ./out
+
+  alpine-alt-arch-makefile-test:
+    name: makefile-alpine-${{matrix.arch}}-${{matrix.cc}}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [armv7, aarch64, s390x, ppc64le]
+        cc: [gcc, clang]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - uses: uraimo/run-on-arch-action@v2
+        with:
+          arch: ${{matrix.arch}}
+          distro: alpine_latest
+          env: |
+            CC: ${{matrix.cc}}
+          install: apk add --update bash build-base cmake git ${{matrix.cc}}
+          run: ./test/ci/test.sh
+
+  alpine-alt-arch-cmake-test:
+    name: cmake-alpine-${{matrix.arch}}-${{matrix.cc}}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [armv7, aarch64, s390x, ppc64le]
+        cc: [gcc, clang]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - uses: uraimo/run-on-arch-action@v2
+        with:
+          arch: ${{matrix.arch}}
+          distro: alpine_latest
+          env: |
+            CC: ${{matrix.cc}}
+          install: apk add --update bash build-base cmake git ${{matrix.cc}}
+          run: |
+            echo "::group::CMake Configure"
+            cmake -B out -Werror=dev -DBASE64_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release
+            echo "::endgroup::CMake Configure"
+            echo "::group::CMake Build"
+            cmake --build out --config Release --verbose
+            echo "::endgroup::CMake Build"
+            echo "::group::CTest"
+            ctest --no-tests=error --test-dir out -VV --build-config Release
+            echo "::endgroup::CTest"
--- a/deps/base64/base64/.gitignore
+++ b/deps/base64/base64/.gitignore
@ -0,0 +1,12 @@
+*.o
+bin/base64
+lib/config.h
+test/benchmark
+test/test_base64
+
+# visual studio symbol db, etc.
+.vs/
+# build directory used by CMakePresets
+out/
+# private cmake presets
+CMakeUserPresets.json
--- a/deps/base64/base64/CMakeLists.txt
+++ b/deps/base64/base64/CMakeLists.txt
@ -0,0 +1,286 @@
+# Written in 2016-2017, 2021 by Henrik Steffen Gaßmann henrik@gassmann.onl
+#
+# To the extent possible under law, the author(s) have dedicated all
+# copyright and related and neighboring rights to this software to the
+# public domain worldwide. This software is distributed without any warranty.
+#
+# You should have received a copy of the CC0 Public Domain Dedication
+# along with this software. If not, see
+#
+#     http://creativecommons.org/publicdomain/zero/1.0/
+#
+########################################################################
+cmake_minimum_required(VERSION 3.10.2)
+
+# new dependent option syntax. We are already compliant
+if (POLICY CMP0127)
+    cmake_policy(SET CMP0127 NEW)
+endif()
+
+project(base64 LANGUAGES C VERSION 0.4.0)
+
+include(GNUInstallDirs)
+include(CMakeDependentOption)
+include(CheckIncludeFile)
+include(FeatureSummary)
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
+
+#######################################################################
+# platform detection
+include(TargetArch)
+detect_target_architecture(_TARGET_ARCH)
+
+check_include_file(getopt.h HAVE_GETOPT_H)
+cmake_dependent_option(BASE64_BUILD_CLI "Build the cli for encoding and decoding" ON "HAVE_GETOPT_H" OFF)
+add_feature_info(CLI BASE64_BUILD_CLI "enables the CLI executable for encoding and decoding")
+
+###################################################################
+# optional/conditional dependencies
+find_package(OpenMP)
+set_package_properties(OpenMP PROPERTIES
+    TYPE OPTIONAL
+    PURPOSE "Allows to utilize OpenMP"
+)
+
+
+########################################################################
+# Compilation options
+option(BASE64_WERROR "Treat warnings as error" ON)
+option(BASE64_BUILD_TESTS "add test projects" OFF)
+cmake_dependent_option(BASE64_WITH_OpenMP "use OpenMP" OFF "OpenMP_FOUND" OFF)
+add_feature_info("OpenMP codec" BASE64_WITH_OpenMP "spreads codec work accross multiple threads")
+cmake_dependent_option(BASE64_REGENERATE_TABLES "regenerate the codec tables" OFF "NOT CMAKE_CROSSCOMPILING" OFF)
+
+set(_IS_X86 "_TARGET_ARCH_x86 OR _TARGET_ARCH_x64")
+cmake_dependent_option(BASE64_WITH_SSSE3 "add SSSE 3 codepath" ON ${_IS_X86} OFF)
+add_feature_info(SSSE3 BASE64_WITH_SSSE3 "add SSSE 3 codepath")
+cmake_dependent_option(BASE64_WITH_SSE41 "add SSE 4.1 codepath" ON ${_IS_X86} OFF)
+add_feature_info(SSE4.1 BASE64_WITH_SSE41 "add SSE 4.1 codepath")
+cmake_dependent_option(BASE64_WITH_SSE42 "add SSE 4.2 codepath" ON ${_IS_X86} OFF)
+add_feature_info(SSE4.2 BASE64_WITH_SSE42 "add SSE 4.2 codepath")
+cmake_dependent_option(BASE64_WITH_AVX "add AVX codepath" ON ${_IS_X86} OFF)
+add_feature_info(AVX BASE64_WITH_AVX "add AVX codepath")
+cmake_dependent_option(BASE64_WITH_AVX2 "add AVX 2 codepath" ON ${_IS_X86} OFF)
+add_feature_info(AVX2 BASE64_WITH_AVX2 "add AVX2 codepath")
+
+cmake_dependent_option(BASE64_WITH_NEON32 "add NEON32 codepath" OFF _TARGET_ARCH_arm OFF)
+add_feature_info(NEON32 BASE64_WITH_NEON32 "add NEON32 codepath")
+
+cmake_dependent_option(BASE64_WITH_NEON64 "add NEON64 codepath" ON _TARGET_ARCH_arm64 OFF)
+add_feature_info(NEON64 BASE64_WITH_NEON64 "add NEON64 codepath")
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
+
+########################################################################
+# Regenerate headers
+
+if (BASE64_REGENERATE_TABLES)
+    # Generate tables in build folder and copy to source tree.
+    # Don't add the tables in the source tree to the outputs, to avoid `make clean` removing them.
+    add_executable(table_generator
+        lib/tables/table_generator.c
+    )
+
+    add_custom_command(OUTPUT table_dec_32bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_dec_32bit.h"
+        COMMAND table_generator > table_dec_32bit.h
+        COMMAND "${CMAKE_COMMAND}" -E copy table_dec_32bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_dec_32bit.h"
+        DEPENDS table_generator
+    )
+    set(Python_ADDITIONAL_VERSIONS 3)
+    find_package(PythonInterp REQUIRED)
+    add_custom_command(OUTPUT table_enc_12bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.h"
+        COMMAND "${PYTHON_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.py" > table_enc_12bit.h
+        COMMAND "${CMAKE_COMMAND}" -E copy table_enc_12bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.h"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.py"
+    )
+endif()
+
+
+########################################################################
+# library project
+add_library(base64
+    # library files
+    lib/lib.c
+    lib/codec_choose.c
+    include/libbase64.h
+
+    lib/tables/tables.c
+    # Add generated headers explicitly to target, to insert them in the dependency tree
+    lib/tables/table_dec_32bit.h
+    lib/tables/table_enc_12bit.h
+
+    # codec implementations
+    lib/arch/generic/codec.c
+
+    lib/arch/ssse3/codec.c
+    lib/arch/sse41/codec.c
+    lib/arch/sse42/codec.c
+    lib/arch/avx/codec.c
+    lib/arch/avx2/codec.c
+
+    lib/arch/neon32/codec.c
+    lib/arch/neon64/codec.c
+)
+
+target_include_directories(base64
+    PUBLIC
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+    PRIVATE
+        "${CMAKE_CURRENT_BINARY_DIR}"
+)
+
+####################################################################
+# platform/compiler specific configuration
+set_target_properties(base64 PROPERTIES
+    C_STANDARD 99
+    C_STANDARD_REQUIRED YES
+    C_EXTENSIONS OFF
+    DEFINE_SYMBOL BASE64_EXPORTS
+    VERSION ${PROJECT_VERSION}
+    SOVERSION ${PROJECT_VERSION_MAJOR}
+)
+
+#generate_export_header(base64)
+# the following definitions and those in libbase64.h have been
+# kept forward compatible in case we ever switch to generate_export_header
+if (BUILD_SHARED_LIBS)
+    set_target_properties(base64 PROPERTIES
+        C_VISIBILITY_PRESET hidden
+    )
+else()
+    target_compile_definitions(base64
+        PUBLIC
+            BASE64_STATIC_DEFINE
+    )
+endif()
+
+target_compile_options(base64 PRIVATE
+  $<$<C_COMPILER_ID:MSVC>:
+    /W4
+    /we4013 # Error warning C4013: 'function' undefined; assuming extern returning int
+    /we4700 # Error warning C4700: uninitialized local variable
+    /we4715 # not all control paths return a value
+    /we4003 # not enough actual parameters for macro
+    /wd4456 # disable warning C4456: declaration of 'xxx' hides previous local declaration
+  >
+  $<$<NOT:$<C_COMPILER_ID:MSVC>>:
+    -Wall
+    -Wextra
+    -Wpedantic
+  >
+  $<$<BOOL:${BASE64_WERROR}>:$<IF:$<C_COMPILER_ID:MSVC>,/WX,-Werror>>
+)
+
+target_compile_definitions(base64 PRIVATE
+  $<$<C_COMPILER_ID:MSVC>:
+    # remove unnecessary warnings about unchecked iterators
+    _SCL_SECURE_NO_WARNINGS
+  >
+)
+
+########################################################################
+# SIMD settings
+include(TargetSIMDInstructionSet)
+define_SIMD_compile_flags()
+
+if (_TARGET_ARCH STREQUAL "x86" OR _TARGET_ARCH STREQUAL "x64")
+    macro(configure_codec _TYPE)
+        if (BASE64_WITH_${_TYPE})
+            string(TOLOWER "${_TYPE}" _DIR)
+            set_source_files_properties("lib/arch/${_DIR}/codec.c" PROPERTIES
+                COMPILE_FLAGS "${COMPILE_FLAGS_${_TYPE}}"
+            )
+
+            if (${ARGC} GREATER 1 AND MSVC)
+                set_source_files_properties("lib/arch/${_DIR}/codec.c" PROPERTIES
+                    COMPILE_DEFINITIONS ${ARGV1}
+                )
+            endif()
+        endif()
+    endmacro()
+
+    configure_codec(SSSE3 __SSSE3__)
+    configure_codec(SSE41 __SSSE4_1__)
+    configure_codec(SSE42 __SSSE4_2__)
+    configure_codec(AVX)
+    configure_codec(AVX2)
+
+elseif (_TARGET_ARCH STREQUAL "arm")
+    set(BASE64_NEON32_CFLAGS "${COMPILE_FLAGS_NEON32}" CACHE STRING "the NEON32 compile flags (for 'lib/arch/neon32/codec.c')")
+    mark_as_advanced(BASE64_NEON32_CFLAGS)
+
+    if (BASE64_WITH_NEON32)
+        set_source_files_properties("lib/arch/neon32/codec.c" PROPERTIES
+            COMPILE_FLAGS "${BASE64_NEON32_CFLAGS} "
+        )
+    endif()
+
+#elseif (_TARGET_ARCH STREQUAL "arm64" AND BASE64_WITH_NEON64)
+
+endif()
+
+configure_file("${CMAKE_CURRENT_LIST_DIR}/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/config.h" @ONLY)
+
+########################################################################
+# OpenMP Settings
+if (BASE64_WITH_OpenMP)
+    target_link_libraries(base64 PRIVATE OpenMP::OpenMP_C)
+endif()
+
+########################################################################
+if (BASE64_BUILD_TESTS)
+    enable_testing()
+    add_subdirectory(test)
+endif()
+
+########################################################################
+# base64
+if (BASE64_BUILD_CLI)
+    add_executable(base64-bin
+        bin/base64.c
+    )
+    target_link_libraries(base64-bin PRIVATE base64)
+    set_target_properties(base64-bin PROPERTIES
+        OUTPUT_NAME base64
+    )
+endif()
+
+########################################################################
+# cmake install
+install(DIRECTORY include/ TYPE INCLUDE)
+install(TARGETS base64
+    EXPORT base64-targets
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+if (BASE64_BUILD_CLI)
+    install(TARGETS base64-bin EXPORT base64-targets DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(cmake/base64-config.cmake.in
+    "${CMAKE_CURRENT_BINARY_DIR}/base64-config.cmake"
+
+    INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+)
+write_basic_package_version_file(
+    "${CMAKE_CURRENT_BINARY_DIR}/base64-config-version.cmake"
+    VERSION ${BASE64_VERSION}
+    COMPATIBILITY SameMajorVersion
+)
+
+install(FILES
+        "${CMAKE_CURRENT_BINARY_DIR}/base64-config.cmake"
+        "${CMAKE_CURRENT_BINARY_DIR}/base64-config-version.cmake"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+)
+
+install(EXPORT base64-targets
+    NAMESPACE aklomp::
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+)
+
+########################################################################
+feature_summary(WHAT PACKAGES_FOUND PACKAGES_NOT_FOUND ENABLED_FEATURES DISABLED_FEATURES)
--- a/deps/base64/base64/LICENSE
+++ b/deps/base64/base64/LICENSE
@ -0,0 +1,28 @@
+Copyright (c) 2005-2007, Nick Galbreath
+Copyright (c) 2013-2019, Alfred Klomp
+Copyright (c) 2015-2017, Wojciech Mula
+Copyright (c) 2016-2017, Matthieu Darbois
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/deps/base64/base64/Makefile
+++ b/deps/base64/base64/Makefile
@ -0,0 +1,93 @@
+CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic
+
+# Set OBJCOPY if not defined by environment:
+OBJCOPY ?= objcopy
+
+OBJS = \
+  lib/arch/avx2/codec.o \
+  lib/arch/generic/codec.o \
+  lib/arch/neon32/codec.o \
+  lib/arch/neon64/codec.o \
+  lib/arch/ssse3/codec.o \
+  lib/arch/sse41/codec.o \
+  lib/arch/sse42/codec.o \
+  lib/arch/avx/codec.o \
+  lib/lib.o \
+  lib/codec_choose.o \
+  lib/tables/tables.o
+
+HAVE_AVX2   = 0
+HAVE_NEON32 = 0
+HAVE_NEON64 = 0
+HAVE_SSSE3  = 0
+HAVE_SSE41  = 0
+HAVE_SSE42  = 0
+HAVE_AVX    = 0
+
+# The user should supply compiler flags for the codecs they want to build.
+# Check which codecs we're going to include:
+ifdef AVX2_CFLAGS
+  HAVE_AVX2 = 1
+endif
+ifdef NEON32_CFLAGS
+  HAVE_NEON32 = 1
+endif
+ifdef NEON64_CFLAGS
+  HAVE_NEON64 = 1
+endif
+ifdef SSSE3_CFLAGS
+  HAVE_SSSE3 = 1
+endif
+ifdef SSE41_CFLAGS
+  HAVE_SSE41 = 1
+endif
+ifdef SSE42_CFLAGS
+  HAVE_SSE42 = 1
+endif
+ifdef AVX_CFLAGS
+  HAVE_AVX = 1
+endif
+ifdef OPENMP
+  CFLAGS += -fopenmp
+endif
+
+
+.PHONY: all analyze clean
+
+all: bin/base64 lib/libbase64.o
+
+bin/base64: bin/base64.o lib/libbase64.o
+	$(CC) $(CFLAGS) -o $@ $^
+
+lib/libbase64.o: $(OBJS)
+	$(LD) -r -o $@ $^
+	$(OBJCOPY) --keep-global-symbols=lib/exports.txt $@
+
+lib/config.h:
+	@echo "#define HAVE_AVX2   $(HAVE_AVX2)"    > $@
+	@echo "#define HAVE_NEON32 $(HAVE_NEON32)" >> $@
+	@echo "#define HAVE_NEON64 $(HAVE_NEON64)" >> $@
+	@echo "#define HAVE_SSSE3  $(HAVE_SSSE3)"  >> $@
+	@echo "#define HAVE_SSE41  $(HAVE_SSE41)"  >> $@
+	@echo "#define HAVE_SSE42  $(HAVE_SSE42)"  >> $@
+	@echo "#define HAVE_AVX    $(HAVE_AVX)"    >> $@
+
+$(OBJS): lib/config.h
+$(OBJS): CFLAGS += -Ilib
+
+lib/arch/avx2/codec.o:   CFLAGS += $(AVX2_CFLAGS)
+lib/arch/neon32/codec.o: CFLAGS += $(NEON32_CFLAGS)
+lib/arch/neon64/codec.o: CFLAGS += $(NEON64_CFLAGS)
+lib/arch/ssse3/codec.o:  CFLAGS += $(SSSE3_CFLAGS)
+lib/arch/sse41/codec.o:  CFLAGS += $(SSE41_CFLAGS)
+lib/arch/sse42/codec.o:  CFLAGS += $(SSE42_CFLAGS)
+lib/arch/avx/codec.o:    CFLAGS += $(AVX_CFLAGS)
+
+%.o: %.c
+	$(CC) $(CFLAGS) -o $@ -c $<
+
+analyze: clean
+	scan-build --use-analyzer=`which clang` --status-bugs make
+
+clean:
+	rm -f bin/base64 bin/base64.o lib/libbase64.o lib/config.h $(OBJS)
--- a/deps/base64/base64/README.md
+++ b/deps/base64/base64/README.md
@ -0,0 +1,474 @@
+# Fast Base64 stream encoder/decoder
+
+[![Build Status](https://github.com/aklomp/base64/actions/workflows/test.yml/badge.svg)](https://github.com/aklomp/base64/actions/workflows/test.yml)
+
+This is an implementation of a base64 stream encoding/decoding library in C99
+with SIMD (AVX2, NEON, AArch64/NEON, SSSE3, SSE4.1, SSE4.2, AVX) and
+[OpenMP](http://www.openmp.org) acceleration. It also contains wrapper functions
+to encode/decode simple length-delimited strings. This library aims to be:
+
+- FAST;
+- easy to use;
+- elegant.
+
+On x86, the library does runtime feature detection. The first time it's called,
+the library will determine the appropriate encoding/decoding routines for the
+machine. It then remembers them for the lifetime of the program. If your
+processor supports AVX2, SSSE3, SSE4.1, SSE4.2 or AVX instructions, the library
+will pick an optimized codec that lets it encode/decode 12 or 24 bytes at a
+time, which gives a speedup of four or more times compared to the "plain"
+bytewise codec.
+
+NEON support is hardcoded to on or off at compile time, because portable
+runtime feature detection is unavailable on ARM.
+
+Even if your processor does not support SIMD instructions, this is a very fast
+library. The fallback routine can process 32 or 64 bits of input in one round,
+depending on your processor's word width, which still makes it significantly
+faster than naive bytewise implementations. On some 64-bit machines, the 64-bit
+routines even outperform the SSSE3 ones.
+
+To the author's knowledge, at the time of original release, this was the only
+Base64 library to offer SIMD acceleration. The author wrote
+[an article](http://www.alfredklomp.com/programming/sse-base64) explaining one
+possible SIMD approach to encoding/decoding Base64. The article can help figure
+out what the code is doing, and why.
+
+Notable features:
+
+- Really fast on x86 and ARM systems by using SIMD vector processing;
+- Can use [OpenMP](http://www.openmp.org) for even more parallel speedups;
+- Really fast on other 32 or 64-bit platforms through optimized routines;
+- Reads/writes blocks of streaming data;
+- Does not dynamically allocate memory;
+- Valid C99 that compiles with pedantic options on;
+- Re-entrant and threadsafe;
+- Unit tested;
+- Uses Duff's Device.
+
+## Acknowledgements
+
+The original AVX2, NEON and Aarch64/NEON codecs were generously contributed by
+[Inkymail](https://github.com/inkymail/base64), who, in their fork, also
+implemented some additional features. Their work is slowly being backported
+into this project.
+
+The SSSE3 and AVX2 codecs were substantially improved by using some very clever
+optimizations described by Wojciech Muła in a
+[series](http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html) of
+[articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html).
+His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64).
+
+The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl).
+
+## Building
+
+The `lib` directory contains the code for the actual library.
+Typing `make` in the toplevel directory will build `lib/libbase64.o` and `bin/base64`.
+The first is a single, self-contained object file that you can link into your own project.
+The second is a standalone test binary that works similarly to the `base64` system utility.
+
+The matching header file needed to use this library is in `include/libbase64.h`.
+
+To compile just the "plain" library without SIMD codecs, type:
+
+```sh
+make lib/libbase64.o
+```
+
+Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `NEON32_CFLAGS`, `NEON64_CFLAGS`,
+`SSSE3_CFLAGS`, `SSE41_CFLAGS`, `SSE42_CFLAGS` and/or `AVX_CFLAGS` environment variables.
+A typical build invocation on x86 looks like this:
+
+```sh
+AVX2_CFLAGS=-mavx2 SSSE3_CFLAGS=-mssse3 SSE41_CFLAGS=-msse4.1 SSE42_CFLAGS=-msse4.2 AVX_CFLAGS=-mavx make lib/libbase64.o
+```
+
+### AVX2
+
+To build and include the AVX2 codec, set the `AVX2_CFLAGS` environment variable to a value that will turn on AVX2 support in your compiler, typically `-mavx2`.
+Example:
+
+```sh
+AVX2_CFLAGS=-mavx2 make
+```
+
+The codec will only be used if runtime feature detection shows that the target machine supports AVX2.
+
+### SSSE3
+
+To build and include the SSSE3 codec, set the `SSSE3_CFLAGS` environment variable to a value that will turn on SSSE3 support in your compiler, typically `-mssse3`.
+Example:
+
+```sh
+SSSE3_CFLAGS=-mssse3 make
+```
+
+The codec will only be used if runtime feature detection shows that the target machine supports SSSE3.
+
+### NEON
+
+This library includes two NEON codecs: one for regular 32-bit ARM and one for the 64-bit AArch64 with NEON, which has double the amount of SIMD registers and can do full 64-byte table lookups.
+These codecs encode in 48-byte chunks and decode in massive 64-byte chunks, so they had to be augmented with an uint32/64 codec to stay fast on smaller inputs!
+
+Use LLVM/Clang for compiling the NEON codecs.
+The code generation of at least GCC 4.6 (the version shipped with Raspbian and used for testing) contains a bug when compiling `vstq4_u8()`, and the generated assembly code is of low quality.
+NEON intrinsics are a known weak area of GCC.
+Clang does a better job.
+
+NEON support can unfortunately not be portably detected at runtime from userland (the `mrc` instruction is privileged), so the default value for using the NEON codec is determined at compile-time.
+But you can do your own runtime detection.
+You can include the NEON codec and make it the default, then do a runtime check if the CPU has NEON support, and if not, force a downgrade to non-NEON with `BASE64_FORCE_PLAIN`.
+
+These are your options:
+
+1. Don't include NEON support;
+2. build NEON support and make it the default, but build all other code without NEON flags so that you can override the default at runtime with `BASE64_FORCE_PLAIN`;
+3. build everything with NEON support and make it the default;
+4. build everything with NEON support, but don't make it the default (which makes no sense).
+
+For option 1, simply don't specify any NEON-specific compiler flags at all, like so:
+
+```sh
+CC=clang CFLAGS="-march=armv6" make
+```
+
+For option 2, keep your `CFLAGS` plain, but set the `NEON32_CFLAGS` environment variable to a value that will build NEON support.
+The line below, for instance, will build all the code at ARMv6 level, except for the NEON codec, which is built at ARMv7.
+It will also make the NEON codec the default.
+For ARMv6 platforms, override that default at runtime with the `BASE64_FORCE_PLAIN` flag.
+No ARMv7/NEON code will then be touched.
+
+```sh
+CC=clang CFLAGS="-march=armv6" NEON32_CFLAGS="-march=armv7 -mfpu=neon" make
+```
+
+For option 3, put everything in your `CFLAGS` and use a stub, but non-empty, `NEON32_CFLAGS`.
+This example works for the Raspberry Pi 2B V1.1, which has NEON support:
+
+```sh
+CC=clang CFLAGS="-march=armv7 -mtune=cortex-a7" NEON32_CFLAGS="-mfpu=neon" make
+```
+
+To build and include the NEON64 codec, use `CFLAGS` as usual to define the platform and set `NEON64_CFLAGS` to a nonempty stub.
+(The AArch64 target has mandatory NEON64 support.)
+Example:
+
+```sh
+CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a" NEON64_CFLAGS=" " make
+```
+
+### OpenMP
+
+To enable OpenMP on GCC you need to build with `-fopenmp`. This can be by setting the the `OPENMP` environment variable to `1`.
+
+Example:
+
+```sh
+OPENMP=1 make
+```
+
+This will let the compiler define `_OPENMP`, which in turn will include the OpenMP optimized `lib_openmp.c` into `lib.c`.
+
+By default the number of parallel threads will be equal to the number of cores of the processor.
+On a quad core with hyperthreading eight cores will be detected, but hyperthreading will not increase the performance.
+
+To get verbose information about OpenMP start the program with `OMP_DISPLAY_ENV=VERBOSE`, for instance
+
+```sh
+OMP_DISPLAY_ENV=VERBOSE test/benchmark
+```
+
+To put a limit on the number of threads, start the program with `OMP_THREAD_LIMIT=n`, for instance
+
+```sh
+OMP_THREAD_LIMIT=2 test/benchmark
+```
+
+An example of running a benchmark with OpenMP, SSSE3 and AVX2 enabled:
+
+```sh
+make clean && OPENMP=1 SSSE3_CFLAGS=-mssse3 AVX2_CFLAGS=-mavx2 make && OPENMP=1 make -C test
+```
+
+## API reference
+
+Strings are represented as a pointer and a length; they are not
+zero-terminated. This was a conscious design decision. In the decoding step,
+relying on zero-termination would make no sense since the output could contain
+legitimate zero bytes. In the encoding step, returning the length saves the
+overhead of calling `strlen()` on the output. If you insist on the trailing
+zero, you can easily add it yourself at the given offset.
+
+### Flags
+
+Some API calls take a `flags` argument.
+That argument can be used to force the use of a specific codec, even if that codec is a no-op in the current build.
+Mainly there for testing purposes, this is also useful on ARM where the only way to do runtime NEON detection is to ask the OS if it's available.
+The following constants can be used:
+
+- `BASE64_FORCE_AVX2`
+- `BASE64_FORCE_NEON32`
+- `BASE64_FORCE_NEON64`
+- `BASE64_FORCE_PLAIN`
+- `BASE64_FORCE_SSSE3`
+- `BASE64_FORCE_SSE41`
+- `BASE64_FORCE_SSE42`
+- `BASE64_FORCE_AVX`
+
+Set `flags` to `0` for the default behavior, which is runtime feature detection on x86, a compile-time fixed codec on ARM, and the plain codec on other platforms.
+
+### Encoding
+
+#### base64_encode
+
+```c
+void base64_encode
+    ( const char  *src
+    , size_t       srclen
+    , char        *out
+    , size_t      *outlen
+    , int          flags
+    ) ;
+```
+
+Wrapper function to encode a plain string of given length.
+Output is written to `out` without trailing zero.
+Output length in bytes is written to `outlen`.
+The buffer in `out` has been allocated by the caller and is at least 4/3 the size of the input.
+
+#### base64_stream_encode_init
+
+```c
+void base64_stream_encode_init
+    ( struct base64_state  *state
+    , int                   flags
+    ) ;
+```
+
+Call this before calling `base64_stream_encode()` to init the state.
+
+#### base64_stream_encode
+
+```c
+void base64_stream_encode
+    ( struct base64_state  *state
+    , const char           *src
+    , size_t                srclen
+    , char                 *out
+    , size_t               *outlen
+    ) ;
+```
+
+Encodes the block of data of given length at `src`, into the buffer at `out`.
+Caller is responsible for allocating a large enough out-buffer; it must be at least 4/3 the size of the in-buffer, but take some margin.
+Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
+Does not zero-terminate or finalize the output.
+
+#### base64_stream_encode_final
+
+```c
+void base64_stream_encode_final
+    ( struct base64_state  *state
+    , char                 *out
+    , size_t               *outlen
+    ) ;
+```
+
+Finalizes the output begun by previous calls to `base64_stream_encode()`.
+Adds the required end-of-stream markers if appropriate.
+`outlen` is modified and will contain the number of new bytes written at `out` (which will quite often be zero).
+
+### Decoding
+
+#### base64_decode
+
+```c
+int base64_decode
+    ( const char  *src
+    , size_t       srclen
+    , char        *out
+    , size_t      *outlen
+    , int          flags
+    ) ;
+```
+
+Wrapper function to decode a plain string of given length.
+Output is written to `out` without trailing zero. Output length in bytes is written to `outlen`.
+The buffer in `out` has been allocated by the caller and is at least 3/4 the size of the input.
+Returns `1` for success, and `0` when a decode error has occured due to invalid input.
+Returns `-1` if the chosen codec is not included in the current build.
+
+#### base64_stream_decode_init
+
+```c
+void base64_stream_decode_init
+    ( struct base64_state  *state
+    , int                   flags
+    ) ;
+```
+
+Call this before calling `base64_stream_decode()` to init the state.
+
+#### base64_stream_decode
+
+```c
+int base64_stream_decode
+    ( struct base64_state  *state
+    , const char           *src
+    , size_t                srclen
+    , char                 *out
+    , size_t               *outlen
+    ) ;
+```
+
+Decodes the block of data of given length at `src`, into the buffer at `out`.
+Caller is responsible for allocating a large enough out-buffer; it must be at least 3/4 the size of the in-buffer, but take some margin.
+Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
+Does not zero-terminate the output.
+Returns 1 if all is well, and 0 if a decoding error was found, such as an invalid character.
+Returns -1 if the chosen codec is not included in the current build.
+Used by the test harness to check whether a codec is available for testing.
+
+## Examples
+
+A simple example of encoding a static string to base64 and printing the output
+to stdout:
+
+```c
+#include <stdio.h>	/* fwrite */
+#include "libbase64.h"
+
+int main ()
+{
+	char src[] = "hello world";
+	char out[20];
+	size_t srclen = sizeof(src) - 1;
+	size_t outlen;
+
+	base64_encode(src, srclen, out, &outlen, 0);
+
+	fwrite(out, outlen, 1, stdout);
+
+	return 0;
+}
+```
+
+A simple example (no error checking, etc) of stream encoding standard input to
+standard output:
+
+```c
+#include <stdio.h>
+#include "libbase64.h"
+
+int main ()
+{
+	size_t nread, nout;
+	char buf[12000], out[16000];
+	struct base64_state state;
+
+	// Initialize stream encoder:
+	base64_stream_encode_init(&state, 0);
+
+	// Read contents of stdin into buffer:
+	while ((nread = fread(buf, 1, sizeof(buf), stdin)) > 0) {
+
+		// Encode buffer:
+		base64_stream_encode(&state, buf, nread, out, &nout);
+
+		// If there's output, print it to stdout:
+		if (nout) {
+			fwrite(out, nout, 1, stdout);
+		}
+
+		// If an error occurred, exit the loop:
+		if (feof(stdin)) {
+			break;
+		}
+	}
+
+	// Finalize encoding:
+	base64_stream_encode_final(&state, out, &nout);
+
+	// If the finalizing resulted in extra output bytes, print them:
+	if (nout) {
+		fwrite(out, nout, 1, stdout);
+	}
+
+	return 0;
+}
+```
+
+Also see `bin/base64.c` for a simple re-implementation of the `base64` utility.
+A file or standard input is fed through the encoder/decoder, and the output is
+written to standard output.
+
+## Tests
+
+See `tests/` for a small test suite. Testing is automated with
+[GitHub Actions](https://github.com/aklomp/base64/actions), which builds and
+tests the code across various architectures.
+
+## Benchmarks
+
+Benchmarks can be run with the built-in benchmark program as follows:
+
+```sh
+make -C test benchmark <buildflags> && test/benchmark
+```
+
+It will run an encoding and decoding benchmark for all of the compiled-in codecs.
+
+The tables below contain some results on random machines. All numbers measured with a 10MB buffer in MB/sec, rounded to the nearest integer.
+
+\*: Update needed
+
+x86 processors
+
+| Processor                                 | Plain enc | Plain dec | SSSE3 enc | SSSE3 dec | AVX enc | AVX dec | AVX2 enc | AVX2 dec |
+|-------------------------------------------|----------:|----------:|----------:|----------:|--------:|--------:|---------:|---------:|
+| i7-4771 @ 3.5 GHz                         | 833\*     | 1111\*    | 3333\*    | 4444\*    | TBD     | TBD     | 4999\*   | 6666\*   |
+| i7-4770 @ 3.4 GHz DDR1600                 | 1790\*    | 3038\*    | 4899\*    | 4043\*    | 4796\*  | 5709\*  | 4681\*   | 6386\*   |
+| i7-4770 @ 3.4 GHz DDR1600 OPENMP 1 thread | 1784\*    | 3041\*    | 4945\*    | 4035\*    | 4776\*  | 5719\*  | 4661\*   | 6294\*   |
+| i7-4770 @ 3.4 GHz DDR1600 OPENMP 2 thread | 3401\*    | 5729\*    | 5489\*    | 7444\*    | 5003\*  | 8624\*  | 5105\*   | 8558\*   |
+| i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4884\*    | 7099\*    | 4917\*    | 7057\*    | 4799\*  | 7143\*  | 4902\*   | 7219\*   |
+| i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 5212\*    | 8849\*    | 5284\*    | 9099\*    | 5289\*  | 9220\*  | 4849\*   | 9200\*   |
+| i7-4870HQ @ 2.5 GHz                       | 1471\*    | 3066\*    | 6721\*    | 6962\*    | 7015\*  | 8267\*  | 8328\*   | 11576\*  |
+| i5-4590S @ 3.0 GHz                        | 3356      | 3197      | 4363      | 6104      | 4243    | 6233    | 4160     | 6344     |
+| Xeon X5570 @ 2.93 GHz                     | 2161      | 1508      | 3160      | 3915      | -       | -       | -        | -        |
+| Pentium4 @ 3.4 GHz                        | 896       | 740       | -         | -         | -       | -       | -        | -        |
+| Atom N270                                 | 243       | 266       | 508       | 387       | -       | -       | -        | -        |
+| AMD E-450                                 | 645       | 564       | 625       | 634       | -       | -       | -        | -        |
+| Intel Edison @ 500 MHz                    | 79\*      | 92\*      | 152\*     | 172\*     | -       | -       | -        | -        |
+| Intel Edison @ 500 MHz OPENMP 2 thread    | 158\*     | 184\*     | 300\*     | 343\*     | -       | -       | -        | -        |
+| Intel Edison @ 500 MHz (x86-64)           | 162       | 119       | 209       | 164       | -       | -       | -        | -        |
+| Intel Edison @ 500 MHz (x86-64) 2 thread  | 319       | 237       | 412       | 329       | -       | -       | -        | -        |
+
+ARM processors
+
+| Processor                                 | Plain enc | Plain dec | NEON32 enc | NEON32 dec | NEON64 enc | NEON64 dec |
+|-------------------------------------------|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
+| Raspberry PI B+ V1.2                      | 46\*      | 40\*      | -          | -          | -          | -          |
+| Raspberry PI 2 B V1.1                     | 85        | 141       | 300        | 225        | -          | -          |
+| Apple iPhone SE armv7                     | 1056\*    | 895\*     | 2943\*     | 2618\*     | -          | -          |
+| Apple iPhone SE arm64                     | 1061\*    | 1239\*    | -          | -          | 4098\*     | 3983\*     |
+
+PowerPC processors
+
+| Processor                                 | Plain enc | Plain dec |
+|-------------------------------------------|----------:|----------:|
+| PowerPC E6500 @ 1.8GHz                    | 270\*     | 265\*     |
+
+
+Benchmarks on i7-4770 @ 3.4 GHz DDR1600 with varrying buffer sizes:
+![Benchmarks](base64-benchmarks.png)
+
+Note: optimal buffer size to take advantage of the cache is in the range of 100 kB to 1 MB, leading to 12x faster AVX encoding/decoding compared to Plain, or a throughput of 24/27GB/sec.
+Also note the performance degradation when the buffer size is less than 10 kB due to thread creation overhead.
+To prevent this from happening `lib_openmp.c` defines `OMP_THRESHOLD 20000`, requiring at least a 20000 byte buffer to enable multithreading.
+
+## License
+
+This repository is licensed under the
+[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the
+LICENSE file.
--- a/deps/base64/base64/base64-benchmarks.png
+++ b/deps/base64/base64/base64-benchmarks.png
--- a/deps/base64/base64/bin/base64.c
+++ b/deps/base64/base64/bin/base64.c
@ -0,0 +1,128 @@
+#include <stddef.h>	// size_t
+#include <stdio.h>	// fopen()
+#include <string.h>	// strlen()
+#include <getopt.h>
+#include "../include/libbase64.h"
+
+#define BUFSIZE 1024 * 1024
+
+static char buf[BUFSIZE];
+static char out[(BUFSIZE * 5) / 3];	// Technically 4/3 of input, but take some margin
+size_t nread;
+size_t nout;
+
+static int
+enc (FILE *fp)
+{
+	int ret = 1;
+	struct base64_state state;
+
+	base64_stream_encode_init(&state, 0);
+
+	while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
+		base64_stream_encode(&state, buf, nread, out, &nout);
+		if (nout) {
+			fwrite(out, nout, 1, stdout);
+		}
+		if (feof(fp)) {
+			break;
+		}
+	}
+	if (ferror(fp)) {
+		fprintf(stderr, "read error\n");
+		ret = 0;
+		goto out;
+	}
+	base64_stream_encode_final(&state, out, &nout);
+
+	if (nout) {
+		fwrite(out, nout, 1, stdout);
+	}
+out:	fclose(fp);
+	fclose(stdout);
+	return ret;
+}
+
+static int
+dec (FILE *fp)
+{
+	int ret = 1;
+	struct base64_state state;
+
+	base64_stream_decode_init(&state, 0);
+
+	while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
+		if (!base64_stream_decode(&state, buf, nread, out, &nout)) {
+			fprintf(stderr, "decoding error\n");
+			ret = 0;
+			goto out;
+		}
+		if (nout) {
+			fwrite(out, nout, 1, stdout);
+		}
+		if (feof(fp)) {
+			break;
+		}
+	}
+	if (ferror(fp)) {
+		fprintf(stderr, "read error\n");
+		ret = 0;
+	}
+out:	fclose(fp);
+	fclose(stdout);
+	return ret;
+}
+
+int
+main (int argc, char **argv)
+{
+	char *file;
+	FILE *fp;
+	int decode = 0;
+
+	// Parse options:
+	for (;;)
+	{
+		int c;
+		int opt_index = 0;
+		static struct option opt_long[] = {
+			{ "decode", 0, 0, 'd' },
+			{ 0, 0, 0, 0 }
+		};
+		if ((c = getopt_long(argc, argv, "d", opt_long, &opt_index)) == -1) {
+			break;
+		}
+		switch (c)
+		{
+			case 'd':
+				decode = 1;
+				break;
+		}
+	}
+
+	// No options left on command line? Read from stdin:
+	if (optind >= argc) {
+		fp = stdin;
+	}
+
+	// One option left on command line? Treat it as a file:
+	else if (optind + 1 == argc) {
+		file = argv[optind];
+		if (strcmp(file, "-") == 0) {
+			fp = stdin;
+		}
+		else if ((fp = fopen(file, "rb")) == NULL) {
+			printf("cannot open %s\n", file);
+			return 1;
+		}
+	}
+
+	// More than one option left on command line? Syntax error:
+	else {
+		printf("Usage: %s <file>\n", argv[0]);
+		return 1;
+	}
+
+	// Invert return codes to create shell return code:
+	return (decode) ? !dec(fp) : !enc(fp);
+}
--- a/deps/base64/base64/cmake/Modules/TargetArch.cmake
+++ b/deps/base64/base64/cmake/Modules/TargetArch.cmake
@ -0,0 +1,30 @@
+# Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
+#
+# To the extent possible under law, the author(s) have dedicated all
+# copyright and related and neighboring rights to this software to the
+# public domain worldwide. This software is distributed without any warranty.
+#
+# You should have received a copy of the CC0 Public Domain Dedication
+# along with this software. If not, see
+#
+#     http://creativecommons.org/publicdomain/zero/1.0/
+#
+########################################################################
+
+set(TARGET_ARCHITECTURE_TEST_FILE "${CMAKE_CURRENT_LIST_DIR}/../test-arch.c")
+
+function(detect_target_architecture OUTPUT_VARIABLE)
+    message(STATUS "${CMAKE_CURRENT_LIST_DIR}")
+    try_compile(_IGNORED "${CMAKE_CURRENT_BINARY_DIR}"
+        "${TARGET_ARCHITECTURE_TEST_FILE}"
+        OUTPUT_VARIABLE _LOG
+    )
+
+    string(REGEX MATCH "##arch=([^#]+)##" _IGNORED "${_LOG}")
+
+    set(${OUTPUT_VARIABLE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
+    set("${OUTPUT_VARIABLE}_${CMAKE_MATCH_1}" 1 PARENT_SCOPE)
+    if (CMAKE_MATCH_1 STREQUAL "unknown")
+        message(WARNING "could not detect the target architecture.")
+    endif()
+endfunction()
--- a/deps/base64/base64/cmake/Modules/TargetSIMDInstructionSet.cmake
+++ b/deps/base64/base64/cmake/Modules/TargetSIMDInstructionSet.cmake
@ -0,0 +1,34 @@
+# Written in 2016-2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
+#
+# To the extent possible under law, the author(s) have dedicated all
+# copyright and related and neighboring rights to this software to the
+# public domain worldwide. This software is distributed without any warranty.
+#
+# You should have received a copy of the CC0 Public Domain Dedication
+# along with this software. If not, see
+#
+#     http://creativecommons.org/publicdomain/zero/1.0/
+#
+########################################################################
+
+########################################################################
+# compiler flags definition
+macro(define_SIMD_compile_flags)
+    if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
+        # x86
+        set(COMPILE_FLAGS_SSSE3 "-mssse3")
+        set(COMPILE_FLAGS_SSE41 "-msse4.1")
+        set(COMPILE_FLAGS_SSE42 "-msse4.2")
+        set(COMPILE_FLAGS_AVX "-mavx")
+        set(COMPILE_FLAGS_AVX2 "-mavx2")
+
+        #arm
+        set(COMPILE_FLAGS_NEON32 "-mfpu=neon")
+    elseif(MSVC)
+        set(COMPILE_FLAGS_SSSE3 " ")
+        set(COMPILE_FLAGS_SSE41 " ")
+        set(COMPILE_FLAGS_SSE42 " ")
+        set(COMPILE_FLAGS_AVX "/arch:AVX")
+        set(COMPILE_FLAGS_AVX2 "/arch:AVX2")
+    endif()
+endmacro(define_SIMD_compile_flags)
--- a/deps/base64/base64/cmake/base64-config.cmake.in
+++ b/deps/base64/base64/cmake/base64-config.cmake.in
@ -0,0 +1,5 @@
+@PACKAGE_INIT@
+
+include("${CMAKE_CURRENT_LIST_DIR}/base64-targets.cmake")
+
+check_required_components(base64)
--- a/deps/base64/base64/cmake/config.h.in
+++ b/deps/base64/base64/cmake/config.h.in
@ -0,0 +1,25 @@
+#ifndef BASE64_CONFIG_H
+#define BASE64_CONFIG_H
+
+#cmakedefine01 BASE64_WITH_SSSE3
+#define HAVE_SSSE3 BASE64_WITH_SSSE3
+
+#cmakedefine01 BASE64_WITH_SSE41
+#define HAVE_SSE41 BASE64_WITH_SSE41
+
+#cmakedefine01 BASE64_WITH_SSE42
+#define HAVE_SSE42 BASE64_WITH_SSE42
+
+#cmakedefine01 BASE64_WITH_AVX
+#define HAVE_AVX BASE64_WITH_AVX
+
+#cmakedefine01 BASE64_WITH_AVX2
+#define HAVE_AVX2 BASE64_WITH_AVX2
+
+#cmakedefine01 BASE64_WITH_NEON32
+#define HAVE_NEON32 BASE64_WITH_NEON32
+
+#cmakedefine01 BASE64_WITH_NEON64
+#define HAVE_NEON64 BASE64_WITH_NEON64
+
+#endif // BASE64_CONFIG_H
--- a/deps/base64/base64/cmake/test-arch.c
+++ b/deps/base64/base64/cmake/test-arch.c
@ -0,0 +1,35 @@
+// Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
+//
+// To the extent possible under law, the author(s) have dedicated all
+// copyright and related and neighboring rights to this software to the
+// public domain worldwide. This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication
+// along with this software. If not, see
+//
+//     http://creativecommons.org/publicdomain/zero/1.0/
+//
+////////////////////////////////////////////////////////////////////////////////
+
+// ARM 64-Bit
+#if defined(__aarch64__)
+#error ##arch=arm64##
+
+// ARM 32-Bit
+#elif defined(__arm__) \
+    || defined(_M_ARM)
+#error ##arch=arm##
+
+// x86 64-Bit
+#elif defined(__x86_64__) \
+    || defined(_M_X64)
+#error ##arch=x64##
+
+// x86 32-Bit
+#elif defined(__i386__) \
+    || defined(_M_IX86)
+#error ##arch=x86##
+
+#else
+#error ##arch=unknown##
+#endif
--- a/deps/base64/base64/include/libbase64.h
+++ b/deps/base64/base64/include/libbase64.h
@ -0,0 +1,145 @@
+#ifndef LIBBASE64_H
+#define LIBBASE64_H
+
+#include <stddef.h>	/* size_t */
+
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BASE64_SYMBOL_IMPORT __declspec(dllimport)
+#define BASE64_SYMBOL_EXPORT __declspec(dllexport)
+#define BASE64_SYMBOL_PRIVATE
+
+#elif __GNUC__ >= 4
+#define BASE64_SYMBOL_IMPORT   __attribute__ ((visibility ("default")))
+#define BASE64_SYMBOL_EXPORT   __attribute__ ((visibility ("default")))
+#define BASE64_SYMBOL_PRIVATE  __attribute__ ((visibility ("hidden")))
+
+#else
+#define BASE64_SYMBOL_IMPORT
+#define BASE64_SYMBOL_EXPORT
+#define BASE64_SYMBOL_PRIVATE
+#endif
+
+#if defined(BASE64_STATIC_DEFINE)
+#define BASE64_EXPORT
+#define BASE64_NO_EXPORT
+
+#else
+#if defined(BASE64_EXPORTS) // defined if we are building the shared library
+#define BASE64_EXPORT BASE64_SYMBOL_EXPORT
+
+#else
+#define BASE64_EXPORT BASE64_SYMBOL_IMPORT
+#endif
+
+#define BASE64_NO_EXPORT BASE64_SYMBOL_PRIVATE
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These are the flags that can be passed in the `flags` argument. The values
+ * below force the use of a given codec, even if that codec is a no-op in the
+ * current build. Used in testing. Set to 0 for the default behavior, which is
+ * runtime feature detection on x86, a compile-time fixed codec on ARM, and
+ * the plain codec on other platforms: */
+#define BASE64_FORCE_AVX2	(1 << 0)
+#define BASE64_FORCE_NEON32	(1 << 1)
+#define BASE64_FORCE_NEON64	(1 << 2)
+#define BASE64_FORCE_PLAIN	(1 << 3)
+#define BASE64_FORCE_SSSE3	(1 << 4)
+#define BASE64_FORCE_SSE41	(1 << 5)
+#define BASE64_FORCE_SSE42	(1 << 6)
+#define BASE64_FORCE_AVX	(1 << 7)
+
+struct base64_state {
+	int eof;
+	int bytes;
+	int flags;
+	unsigned char carry;
+};
+
+/* Wrapper function to encode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 4/3 the
+ * size of the input. See above for `flags`; set to 0 for default operation: */
+void BASE64_EXPORT base64_encode
+	( const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	, int			 flags
+	) ;
+
+/* Call this before calling base64_stream_encode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void BASE64_EXPORT base64_stream_encode_init
+	( struct base64_state	*state
+	, int			 flags
+	) ;
+
+/* Encodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 4/3 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate or finalize the output. */
+void BASE64_EXPORT base64_stream_encode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	) ;
+
+/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
+ * Adds the required end-of-stream markers if appropriate. `outlen` is modified
+ * and will contain the number of new bytes written at `out` (which will quite
+ * often be zero). */
+void BASE64_EXPORT base64_stream_encode_final
+	( struct base64_state	*state
+	, char			*out
+	, size_t		*outlen
+	) ;
+
+/* Wrapper function to decode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 3/4 the
+ * size of the input. See above for `flags`, set to 0 for default operation: */
+int BASE64_EXPORT base64_decode
+	( const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	, int			 flags
+	) ;
+
+/* Call this before calling base64_stream_decode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void BASE64_EXPORT base64_stream_decode_init
+	( struct base64_state	*state
+	, int			 flags
+	) ;
+
+/* Decodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 3/4 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate the output. Returns 1 if all is
+ * well, and 0 if a decoding error was found, such as an invalid character.
+ * Returns -1 if the chosen codec is not included in the current build. Used by
+ * the test harness to check whether a codec is available for testing. */
+int BASE64_EXPORT base64_stream_decode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	) ;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBBASE64_H */
--- a/deps/base64/base64/lib/arch/avx/codec.c
+++ b/deps/base64/base64/lib/arch/avx/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_AVX
+#include <immintrin.h>
+
+#include "../ssse3/dec_reshuffle.c"
+#include "../ssse3/dec_loop.c"
+#include "../ssse3/enc_translate.c"
+#include "../ssse3/enc_reshuffle.c"
+#include "../ssse3/enc_loop.c"
+
+#endif	// HAVE_AVX
+
+BASE64_ENC_FUNCTION(avx)
+{
+#if HAVE_AVX
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(avx)
+{
+#if HAVE_AVX
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/deps/base64/base64/lib/arch/avx2/codec.c
+++ b/deps/base64/base64/lib/arch/avx2/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_AVX2
+#include <immintrin.h>
+
+#include "dec_reshuffle.c"
+#include "dec_loop.c"
+#include "enc_translate.c"
+#include "enc_reshuffle.c"
+#include "enc_loop.c"
+
+#endif	// HAVE_AVX2
+
+BASE64_ENC_FUNCTION(avx2)
+{
+#if HAVE_AVX2
+	#include "../generic/enc_head.c"
+	enc_loop_avx2(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(avx2)
+{
+#if HAVE_AVX2
+	#include "../generic/dec_head.c"
+	dec_loop_avx2(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/deps/base64/base64/lib/arch/avx2/dec_loop.c
+++ b/deps/base64/base64/lib/arch/avx2/dec_loop.c
@ -0,0 +1,110 @@
+static inline int
+dec_loop_avx2_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
+{
+	const __m256i lut_lo = _mm256_setr_epi8(
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+	const __m256i lut_hi = _mm256_setr_epi8(
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+	const __m256i lut_roll = _mm256_setr_epi8(
+		0,  16,  19,   4, -65, -65, -71, -71,
+		0,   0,   0,   0,   0,   0,   0,   0,
+		0,  16,  19,   4, -65, -65, -71, -71,
+		0,   0,   0,   0,   0,   0,   0,   0);
+
+	const __m256i mask_2F = _mm256_set1_epi8(0x2F);
+
+	// Load input:
+	__m256i str = _mm256_loadu_si256((__m256i *) *s);
+
+	// See the SSSE3 decoder for an explanation of the algorithm.
+	const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F);
+	const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F);
+	const __m256i hi         = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
+	const __m256i lo         = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
+
+	if (!_mm256_testz_si256(lo, hi)) {
+		return 0;
+	}
+
+	const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F);
+	const __m256i roll  = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles));
+
+	// Now simply add the delta values to the input:
+	str = _mm256_add_epi8(str, roll);
+
+	// Reshuffle the input to packed 12-byte output format:
+	str = dec_reshuffle(str);
+
+	// Store the output:
+	_mm256_storeu_si256((__m256i *) *o, str);
+
+	*s += 32;
+	*o += 24;
+	*rounds -= 1;
+
+	return 1;
+}
+
+static inline void
+dec_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 45) {
+		return;
+	}
+
+	// Process blocks of 32 bytes per round. Because 8 extra zero bytes are
+	// written after the output, ensure that there will be at least 13
+	// bytes of input data left to cover the gap. (11 data bytes and up to
+	// two end-of-string markers.)
+	size_t rounds = (*slen - 13) / 32;
+
+	*slen -= rounds * 32;	// 32 bytes consumed per round
+	*olen += rounds * 24;	// 24 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			if (dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 4) {
+			if (dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 2) {
+			if (dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		dec_loop_avx2_inner(s, o, &rounds);
+		break;
+
+	} while (rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 32;
+	*olen -= rounds * 24;
+}
--- a/deps/base64/base64/lib/arch/avx2/dec_reshuffle.c
+++ b/deps/base64/base64/lib/arch/avx2/dec_reshuffle.c
@ -0,0 +1,34 @@
+static inline __m256i
+dec_reshuffle (const __m256i in)
+{
+	// in, lower lane, bits, upper case are most significant bits, lower
+	// case are least significant bits:
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+
+	const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
+	// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+	// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+	// 0000eeee FFffffff 0000DDDD DDddEEEE
+	// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
+
+	__m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
+	// 00000000 JJJJJJjj KKKKkkkk LLllllll
+	// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+	// 00000000 DDDDDDdd EEEEeeee FFffffff
+	// 00000000 AAAAAAaa BBBBbbbb CCcccccc
+
+	// Pack bytes together in each lane:
+	out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+		2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
+		2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
+	// 00000000 00000000 00000000 00000000
+	// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+	// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+	// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
+
+	// Pack lanes:
+	return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
+}
--- a/deps/base64/base64/lib/arch/avx2/enc_loop.c
+++ b/deps/base64/base64/lib/arch/avx2/enc_loop.c
@ -0,0 +1,89 @@
+static inline void
+enc_loop_avx2_inner_first (const uint8_t **s, uint8_t **o)
+{
+	// First load is done at s - 0 to not get a segfault:
+	__m256i src = _mm256_loadu_si256((__m256i *) *s);
+
+	// Shift by 4 bytes, as required by enc_reshuffle:
+	src = _mm256_permutevar8x32_epi32(src, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
+
+	// Reshuffle, translate, store:
+	src = enc_reshuffle(src);
+	src = enc_translate(src);
+	_mm256_storeu_si256((__m256i *) *o, src);
+
+	// Subsequent loads will be done at s - 4, set pointer for next round:
+	*s += 20;
+	*o += 32;
+}
+
+static inline void
+enc_loop_avx2_inner (const uint8_t **s, uint8_t **o)
+{
+	// Load input:
+	__m256i src = _mm256_loadu_si256((__m256i *) *s);
+
+	// Reshuffle, translate, store:
+	src = enc_reshuffle(src);
+	src = enc_translate(src);
+	_mm256_storeu_si256((__m256i *) *o, src);
+
+	*s += 24;
+	*o += 32;
+}
+
+static inline void
+enc_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 32) {
+		return;
+	}
+
+	// Process blocks of 24 bytes at a time. Because blocks are loaded 32
+	// bytes at a time an offset of -4, ensure that there will be at least
+	// 4 remaining bytes after the last round, so that the final read will
+	// not pass beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 4) / 24;
+
+	*slen -= rounds * 24;   // 24 bytes consumed per round
+	*olen += rounds * 32;   // 32 bytes produced per round
+
+	// The first loop iteration requires special handling to ensure that
+	// the read, which is done at an offset, does not underflow the buffer:
+	enc_loop_avx2_inner_first(s, o);
+	rounds--;
+
+	while (rounds > 0) {
+		if (rounds >= 8) {
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_avx2_inner(s, o);
+		break;
+	}
+
+	// Add the offset back:
+	*s += 4;
+}
--- a/deps/base64/base64/lib/arch/avx2/enc_reshuffle.c
+++ b/deps/base64/base64/lib/arch/avx2/enc_reshuffle.c
@ -0,0 +1,83 @@
+static inline __m256i
+enc_reshuffle (const __m256i input)
+{
+	// Translation of the SSSE3 reshuffling algorithm to AVX2. This one
+	// works with shifted (4 bytes) input in order to be able to work
+	// efficiently in the two 128-bit lanes.
+
+	// Input, bytes MSB to LSB:
+	// 0 0 0 0 x w v u t s r q p o n m
+	// l k j i h g f e d c b a 0 0 0 0
+
+	const __m256i in = _mm256_shuffle_epi8(input, _mm256_set_epi8(
+		10, 11,  9, 10,
+		 7,  8,  6,  7,
+		 4,  5,  3,  4,
+		 1,  2,  0,  1,
+
+		14, 15, 13, 14,
+		11, 12, 10, 11,
+		 8,  9,  7,  8,
+		 5,  6,  4,  5));
+	// in, bytes MSB to LSB:
+	// w x v w
+	// t u s t
+	// q r p q
+	// n o m n
+	// k l j k
+	// h i g h
+	// e f d e
+	// b c a b
+
+	const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0FC0FC00));
+	// bits, upper case are most significant bits, lower case are least
+	// significant bits.
+	// 0000wwww XX000000 VVVVVV00 00000000
+	// 0000tttt UU000000 SSSSSS00 00000000
+	// 0000qqqq RR000000 PPPPPP00 00000000
+	// 0000nnnn OO000000 MMMMMM00 00000000
+	// 0000kkkk LL000000 JJJJJJ00 00000000
+	// 0000hhhh II000000 GGGGGG00 00000000
+	// 0000eeee FF000000 DDDDDD00 00000000
+	// 0000bbbb CC000000 AAAAAA00 00000000
+
+	const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
+	// 00000000 00wwwwXX 00000000 00VVVVVV
+	// 00000000 00ttttUU 00000000 00SSSSSS
+	// 00000000 00qqqqRR 00000000 00PPPPPP
+	// 00000000 00nnnnOO 00000000 00MMMMMM
+	// 00000000 00kkkkLL 00000000 00JJJJJJ
+	// 00000000 00hhhhII 00000000 00GGGGGG
+	// 00000000 00eeeeFF 00000000 00DDDDDD
+	// 00000000 00bbbbCC 00000000 00AAAAAA
+
+	const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003F03F0));
+	// 00000000 00xxxxxx 000000vv WWWW0000
+	// 00000000 00uuuuuu 000000ss TTTT0000
+	// 00000000 00rrrrrr 000000pp QQQQ0000
+	// 00000000 00oooooo 000000mm NNNN0000
+	// 00000000 00llllll 000000jj KKKK0000
+	// 00000000 00iiiiii 000000gg HHHH0000
+	// 00000000 00ffffff 000000dd EEEE0000
+	// 00000000 00cccccc 000000aa BBBB0000
+
+	const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
+	// 00xxxxxx 00000000 00vvWWWW 00000000
+	// 00uuuuuu 00000000 00ssTTTT 00000000
+	// 00rrrrrr 00000000 00ppQQQQ 00000000
+	// 00oooooo 00000000 00mmNNNN 00000000
+	// 00llllll 00000000 00jjKKKK 00000000
+	// 00iiiiii 00000000 00ggHHHH 00000000
+	// 00ffffff 00000000 00ddEEEE 00000000
+	// 00cccccc 00000000 00aaBBBB 00000000
+
+	return _mm256_or_si256(t1, t3);
+	// 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
+	// 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
+	// 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
+	// 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+}
--- a/deps/base64/base64/lib/arch/avx2/enc_translate.c
+++ b/deps/base64/base64/lib/arch/avx2/enc_translate.c
@ -0,0 +1,30 @@
+static inline __m256i
+enc_translate (const __m256i in)
+{
+	// A lookup table containing the absolute offsets for all ranges:
+	const __m256i lut = _mm256_setr_epi8(
+		65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
+		65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
+
+	// Translate values 0..63 to the Base64 alphabet. There are five sets:
+	// #  From      To         Abs    Index  Characters
+	// 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
+	// 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
+	// 2  [52..61]  [48..57]    -4  [2..11]  0123456789
+	// 3  [62]      [43]       -19       12  +
+	// 4  [63]      [47]       -16       13  /
+
+	// Create LUT indices from the input. The index for range #0 is right,
+	// others are 1 less than expected:
+	__m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
+
+	// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
+	const __m256i mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
+
+	// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
+	// now correct:
+	indices = _mm256_sub_epi8(indices, mask);
+
+	// Add offsets to input values:
+	return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
+}
--- a/deps/base64/base64/lib/arch/generic/32/dec_loop.c
+++ b/deps/base64/base64/lib/arch/generic/32/dec_loop.c
@ -0,0 +1,86 @@
+static inline int
+dec_loop_generic_32_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
+{
+	const uint32_t str
+		= base64_table_dec_32bit_d0[(*s)[0]]
+		| base64_table_dec_32bit_d1[(*s)[1]]
+		| base64_table_dec_32bit_d2[(*s)[2]]
+		| base64_table_dec_32bit_d3[(*s)[3]];
+
+#if BASE64_LITTLE_ENDIAN
+
+	// LUTs for little-endian set MSB in case of invalid character:
+	if (str & UINT32_C(0x80000000)) {
+		return 0;
+	}
+#else
+	// LUTs for big-endian set LSB in case of invalid character:
+	if (str & UINT32_C(1)) {
+		return 0;
+	}
+#endif
+	// Store the output:
+	memcpy(*o, &str, sizeof (str));
+
+	*s += 4;
+	*o += 3;
+	*rounds -= 1;
+
+	return 1;
+}
+
+static inline void
+dec_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 8) {
+		return;
+	}
+
+	// Process blocks of 4 bytes per round. Because one extra zero byte is
+	// written after the output, ensure that there will be at least 4 bytes
+	// of input data left to cover the gap. (Two data bytes and up to two
+	// end-of-string markers.)
+	size_t rounds = (*slen - 4) / 4;
+
+	*slen -= rounds * 4;	// 4 bytes consumed per round
+	*olen += rounds * 3;	// 3 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			if (dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 4) {
+			if (dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 2) {
+			if (dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		dec_loop_generic_32_inner(s, o, &rounds);
+		break;
+
+	} while (rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 4;
+	*olen -= rounds * 3;
+}
--- a/deps/base64/base64/lib/arch/generic/32/enc_loop.c
+++ b/deps/base64/base64/lib/arch/generic/32/enc_loop.c
@ -0,0 +1,73 @@
+static inline void
+enc_loop_generic_32_inner (const uint8_t **s, uint8_t **o)
+{
+	uint32_t src;
+
+	// Load input:
+	memcpy(&src, *s, sizeof (src));
+
+	// Reorder to 32-bit big-endian, if not already in that format. The
+	// workset must be in big-endian, otherwise the shifted bits do not
+	// carry over properly among adjacent bytes:
+	src = BASE64_HTOBE32(src);
+
+	// Two indices for the 12-bit lookup table:
+	const size_t index0 = (src >> 20) & 0xFFFU;
+	const size_t index1 = (src >>  8) & 0xFFFU;
+
+	// Table lookup and store:
+	memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
+	memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
+
+	*s += 3;
+	*o += 4;
+}
+
+static inline void
+enc_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 4) {
+		return;
+	}
+
+	// Process blocks of 3 bytes at a time. Because blocks are loaded 4
+	// bytes at a time, ensure that there will be at least one remaining
+	// byte after the last round, so that the final read will not pass
+	// beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 1) / 3;
+
+	*slen -= rounds * 3;	// 3 bytes consumed per round
+	*olen += rounds * 4;	// 4 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_generic_32_inner(s, o);
+		break;
+
+	} while (rounds > 0);
+}
--- a/deps/base64/base64/lib/arch/generic/64/enc_loop.c
+++ b/deps/base64/base64/lib/arch/generic/64/enc_loop.c
@ -0,0 +1,77 @@
+static inline void
+enc_loop_generic_64_inner (const uint8_t **s, uint8_t **o)
+{
+	uint64_t src;
+
+	// Load input:
+	memcpy(&src, *s, sizeof (src));
+
+	// Reorder to 64-bit big-endian, if not already in that format. The
+	// workset must be in big-endian, otherwise the shifted bits do not
+	// carry over properly among adjacent bytes:
+	src = BASE64_HTOBE64(src);
+
+	// Four indices for the 12-bit lookup table:
+	const size_t index0 = (src >> 52) & 0xFFFU;
+	const size_t index1 = (src >> 40) & 0xFFFU;
+	const size_t index2 = (src >> 28) & 0xFFFU;
+	const size_t index3 = (src >> 16) & 0xFFFU;
+
+	// Table lookup and store:
+	memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
+	memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
+	memcpy(*o + 4, base64_table_enc_12bit + index2, 2);
+	memcpy(*o + 6, base64_table_enc_12bit + index3, 2);
+
+	*s += 6;
+	*o += 8;
+}
+
+static inline void
+enc_loop_generic_64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 8) {
+		return;
+	}
+
+	// Process blocks of 6 bytes at a time. Because blocks are loaded 8
+	// bytes at a time, ensure that there will be at least 2 remaining
+	// bytes after the last round, so that the final read will not pass
+	// beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 2) / 6;
+
+	*slen -= rounds * 6;	// 6 bytes consumed per round
+	*olen += rounds * 8;	// 8 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_generic_64_inner(s, o);
+		break;
+
+	} while (rounds > 0);
+}
--- a/deps/base64/base64/lib/arch/generic/codec.c
+++ b/deps/base64/base64/lib/arch/generic/codec.c
@ -0,0 +1,39 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if BASE64_WORDSIZE == 32
+#  include "32/enc_loop.c"
+#elif BASE64_WORDSIZE == 64
+#  include "64/enc_loop.c"
+#endif
+
+#if BASE64_WORDSIZE >= 32
+#  include "32/dec_loop.c"
+#endif
+
+BASE64_ENC_FUNCTION(plain)
+{
+	#include "enc_head.c"
+#if BASE64_WORDSIZE == 32
+	enc_loop_generic_32(&s, &slen, &o, &olen);
+#elif BASE64_WORDSIZE == 64
+	enc_loop_generic_64(&s, &slen, &o, &olen);
+#endif
+	#include "enc_tail.c"
+}
+
+BASE64_DEC_FUNCTION(plain)
+{
+	#include "dec_head.c"
+#if BASE64_WORDSIZE >= 32
+	dec_loop_generic_32(&s, &slen, &o, &olen);
+#endif
+	#include "dec_tail.c"
+}
--- a/deps/base64/base64/lib/arch/generic/dec_head.c
+++ b/deps/base64/base64/lib/arch/generic/dec_head.c
@ -0,0 +1,37 @@
+int ret = 0;
+const uint8_t *s = (const uint8_t *) src;
+uint8_t *o = (uint8_t *) out;
+uint8_t q;
+
+// Use local temporaries to avoid cache thrashing:
+size_t olen = 0;
+size_t slen = srclen;
+struct base64_state st;
+st.eof = state->eof;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// If we previously saw an EOF or an invalid character, bail out:
+if (st.eof) {
+	*outlen = 0;
+	ret = 0;
+	// If there was a trailing '=' to check, check it:
+	if (slen && (st.eof == BASE64_AEOF)) {
+		state->bytes = 0;
+		state->eof = BASE64_EOF;
+		ret = ((base64_table_dec_8bit[*s++] == 254) && (slen == 1)) ? 1 : 0;
+	}
+	return ret;
+}
+
+// Turn four 6-bit numbers into three bytes:
+// out[0] = 11111122
+// out[1] = 22223333
+// out[2] = 33444444
+
+// Duff's device again:
+switch (st.bytes)
+{
+	for (;;)
+	{
+	case 0:
--- a/deps/base64/base64/lib/arch/generic/dec_tail.c
+++ b/deps/base64/base64/lib/arch/generic/dec_tail.c
@ -0,0 +1,91 @@
+		if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.eof = BASE64_EOF;
+			// Treat character '=' as invalid for byte 0:
+			break;
+		}
+		st.carry = q << 2;
+		st.bytes++;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 1:	if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.eof = BASE64_EOF;
+			// Treat character '=' as invalid for byte 1:
+			break;
+		}
+		*o++ = st.carry | (q >> 4);
+		st.carry = q << 4;
+		st.bytes++;
+		olen++;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 2:	if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.bytes++;
+			// When q == 254, the input char is '='.
+			// Check if next byte is also '=':
+			if (q == 254) {
+				if (slen-- != 0) {
+					st.bytes = 0;
+					// EOF:
+					st.eof = BASE64_EOF;
+					q = base64_table_dec_8bit[*s++];
+					ret = ((q == 254) && (slen == 0)) ? 1 : 0;
+					break;
+				}
+				else {
+					// Almost EOF
+					st.eof = BASE64_AEOF;
+					ret = 1;
+					break;
+				}
+			}
+			// If we get here, there was an error:
+			break;
+		}
+		*o++ = st.carry | (q >> 2);
+		st.carry = q << 6;
+		st.bytes++;
+		olen++;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 3:	if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.bytes = 0;
+			st.eof = BASE64_EOF;
+			// When q == 254, the input char is '='. Return 1 and EOF.
+			// When q == 255, the input char is invalid. Return 0 and EOF.
+			ret = ((q == 254) && (slen == 0)) ? 1 : 0;
+			break;
+		}
+		*o++ = st.carry | q;
+		st.carry = 0;
+		st.bytes = 0;
+		olen++;
+	}
+}
+
+state->eof = st.eof;
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = olen;
+return ret;
--- a/deps/base64/base64/lib/arch/generic/enc_head.c
+++ b/deps/base64/base64/lib/arch/generic/enc_head.c
@ -0,0 +1,24 @@
+// Assume that *out is large enough to contain the output.
+// Theoretically it should be 4/3 the length of src.
+const uint8_t *s = (const uint8_t *) src;
+uint8_t *o = (uint8_t *) out;
+
+// Use local temporaries to avoid cache thrashing:
+size_t olen = 0;
+size_t slen = srclen;
+struct base64_state st;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// Turn three bytes into four 6-bit numbers:
+// in[0] = 00111111
+// in[1] = 00112222
+// in[2] = 00222233
+// in[3] = 00333333
+
+// Duff's device, a for() loop inside a switch() statement. Legal!
+switch (st.bytes)
+{
+	for (;;)
+	{
+	case 0:
--- a/deps/base64/base64/lib/arch/generic/enc_tail.c
+++ b/deps/base64/base64/lib/arch/generic/enc_tail.c
@ -0,0 +1,34 @@
+		if (slen-- == 0) {
+			break;
+		}
+		*o++ = base64_table_enc_6bit[*s >> 2];
+		st.carry = (*s++ << 4) & 0x30;
+		st.bytes++;
+		olen += 1;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 1:	if (slen-- == 0) {
+			break;
+		}
+		*o++ = base64_table_enc_6bit[st.carry | (*s >> 4)];
+		st.carry = (*s++ << 2) & 0x3C;
+		st.bytes++;
+		olen += 1;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 2:	if (slen-- == 0) {
+			break;
+		}
+		*o++ = base64_table_enc_6bit[st.carry | (*s >> 6)];
+		*o++ = base64_table_enc_6bit[*s++ & 0x3F];
+		st.bytes = 0;
+		olen += 2;
+	}
+}
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = olen;
--- a/deps/base64/base64/lib/arch/neon32/codec.c
+++ b/deps/base64/base64/lib/arch/neon32/codec.c
@ -0,0 +1,77 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#ifdef __arm__
+#  if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32
+#    define BASE64_USE_NEON32
+#  endif
+#endif
+
+#ifdef BASE64_USE_NEON32
+#include <arm_neon.h>
+
+// Only enable inline assembly on supported compilers.
+#if defined(__GNUC__) || defined(__clang__)
+#define BASE64_NEON32_USE_ASM
+#endif
+
+static inline uint8x16_t
+vqtbl1q_u8 (const uint8x16_t lut, const uint8x16_t indices)
+{
+	// NEON32 only supports 64-bit wide lookups in 128-bit tables. Emulate
+	// the NEON64 `vqtbl1q_u8` intrinsic to do 128-bit wide lookups.
+	uint8x8x2_t lut2;
+	uint8x8x2_t result;
+
+	lut2.val[0] = vget_low_u8(lut);
+	lut2.val[1] = vget_high_u8(lut);
+
+	result.val[0] = vtbl2_u8(lut2, vget_low_u8(indices));
+	result.val[1] = vtbl2_u8(lut2, vget_high_u8(indices));
+
+	return vcombine_u8(result.val[0], result.val[1]);
+}
+
+#include "../generic/32/dec_loop.c"
+#include "../generic/32/enc_loop.c"
+#include "dec_loop.c"
+#include "enc_reshuffle.c"
+#include "enc_translate.c"
+#include "enc_loop.c"
+
+#endif	// BASE64_USE_NEON32
+
+// Stride size is so large on these NEON 32-bit functions
+// (48 bytes encode, 32 bytes decode) that we inline the
+// uint32 codec to stay performant on smaller inputs.
+
+BASE64_ENC_FUNCTION(neon32)
+{
+#ifdef BASE64_USE_NEON32
+	#include "../generic/enc_head.c"
+	enc_loop_neon32(&s, &slen, &o, &olen);
+	enc_loop_generic_32(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(neon32)
+{
+#ifdef BASE64_USE_NEON32
+	#include "../generic/dec_head.c"
+	dec_loop_neon32(&s, &slen, &o, &olen);
+	dec_loop_generic_32(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/deps/base64/base64/lib/arch/neon32/dec_loop.c
+++ b/deps/base64/base64/lib/arch/neon32/dec_loop.c
@ -0,0 +1,106 @@
+static inline int
+is_nonzero (const uint8x16_t v)
+{
+	uint64_t u64;
+	const uint64x2_t v64 = vreinterpretq_u64_u8(v);
+	const uint32x2_t v32 = vqmovn_u64(v64);
+
+	vst1_u64(&u64, vreinterpret_u64_u32(v32));
+	return u64 != 0;
+}
+
+static inline uint8x16_t
+delta_lookup (const uint8x16_t v)
+{
+	const uint8x8_t lut = {
+		0, 16, 19, 4, (uint8_t) -65, (uint8_t) -65, (uint8_t) -71, (uint8_t) -71,
+	};
+
+	return vcombine_u8(
+		vtbl1_u8(lut, vget_low_u8(v)),
+		vtbl1_u8(lut, vget_high_u8(v)));
+}
+
+static inline uint8x16_t
+dec_loop_neon32_lane (uint8x16_t *lane)
+{
+	// See the SSSE3 decoder for an explanation of the algorithm.
+	const uint8x16_t lut_lo = {
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A
+	};
+
+	const uint8x16_t lut_hi = {
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
+	};
+
+	const uint8x16_t mask_0F = vdupq_n_u8(0x0F);
+	const uint8x16_t mask_2F = vdupq_n_u8(0x2F);
+
+	const uint8x16_t hi_nibbles = vshrq_n_u8(*lane, 4);
+	const uint8x16_t lo_nibbles = vandq_u8(*lane, mask_0F);
+	const uint8x16_t eq_2F      = vceqq_u8(*lane, mask_2F);
+
+	const uint8x16_t hi = vqtbl1q_u8(lut_hi, hi_nibbles);
+	const uint8x16_t lo = vqtbl1q_u8(lut_lo, lo_nibbles);
+
+	// Now simply add the delta values to the input:
+	*lane = vaddq_u8(*lane, delta_lookup(vaddq_u8(eq_2F, hi_nibbles)));
+
+	// Return the validity mask:
+	return vandq_u8(lo, hi);
+}
+
+static inline void
+dec_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 64) {
+		return;
+	}
+
+	// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
+	// extra trailing zero bytes are written, so it is not necessary to
+	// reserve extra input bytes:
+	size_t rounds = *slen / 64;
+
+	*slen -= rounds * 64;	// 64 bytes consumed per round
+	*olen += rounds * 48;	// 48 bytes produced per round
+
+	do {
+		uint8x16x3_t dec;
+
+		// Load 64 bytes and deinterleave:
+		uint8x16x4_t str = vld4q_u8(*s);
+
+		// Decode each lane, collect a mask of invalid inputs:
+		const uint8x16_t classified
+			= dec_loop_neon32_lane(&str.val[0])
+			| dec_loop_neon32_lane(&str.val[1])
+			| dec_loop_neon32_lane(&str.val[2])
+			| dec_loop_neon32_lane(&str.val[3]);
+
+		// Check for invalid input: if any of the delta values are
+		// zero, fall back on bytewise code to do error checking and
+		// reporting:
+		if (is_nonzero(classified)) {
+			break;
+		}
+
+		// Compress four bytes into three:
+		dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4));
+		dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2));
+		dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]);
+
+		// Interleave and store decoded result:
+		vst3q_u8(*o, dec);
+
+		*s += 64;
+		*o += 48;
+
+	} while (--rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 64;
+	*olen -= rounds * 48;
+}
--- a/deps/base64/base64/lib/arch/neon32/enc_loop.c
+++ b/deps/base64/base64/lib/arch/neon32/enc_loop.c
@ -0,0 +1,169 @@
+#ifdef BASE64_NEON32_USE_ASM
+static inline void
+enc_loop_neon32_inner_asm (const uint8_t **s, uint8_t **o)
+{
+	// This function duplicates the functionality of enc_loop_neon32_inner,
+	// but entirely with inline assembly. This gives a significant speedup
+	// over using NEON intrinsics, which do not always generate very good
+	// code. The logic of the assembly is directly lifted from the
+	// intrinsics version, so it can be used as a guide to this code.
+
+	// Temporary registers, used as scratch space.
+	uint8x16_t tmp0, tmp1, tmp2, tmp3;
+	uint8x16_t mask0, mask1, mask2, mask3;
+
+	// A lookup table containing the absolute offsets for all ranges.
+	const uint8x16_t lut = {
+		  65U,  71U, 252U, 252U,
+		 252U, 252U, 252U, 252U,
+		 252U, 252U, 252U, 252U,
+		 237U, 240U,   0U,   0U
+	};
+
+	// Numeric constants.
+	const uint8x16_t n51 = vdupq_n_u8(51);
+	const uint8x16_t n25 = vdupq_n_u8(25);
+	const uint8x16_t n63 = vdupq_n_u8(63);
+
+	__asm__ (
+
+		// Load 48 bytes and deinterleave. The bytes are loaded to
+		// hard-coded registers q12, q13 and q14, to ensure that they
+		// are contiguous. Increment the source pointer.
+		"vld3.8 {d24, d26, d28}, [%[src]]! \n\t"
+		"vld3.8 {d25, d27, d29}, [%[src]]! \n\t"
+
+		// Reshuffle the bytes using temporaries.
+		"vshr.u8 %q[t0], q12,    #2      \n\t"
+		"vshr.u8 %q[t1], q13,    #4      \n\t"
+		"vshr.u8 %q[t2], q14,    #6      \n\t"
+		"vsli.8  %q[t1], q12,    #4      \n\t"
+		"vsli.8  %q[t2], q13,    #2      \n\t"
+		"vand.u8 %q[t1], %q[t1], %q[n63] \n\t"
+		"vand.u8 %q[t2], %q[t2], %q[n63] \n\t"
+		"vand.u8 %q[t3], q14,    %q[n63] \n\t"
+
+		// t0..t3 are the reshuffled inputs. Create LUT indices.
+		"vqsub.u8 q12, %q[t0], %q[n51] \n\t"
+		"vqsub.u8 q13, %q[t1], %q[n51] \n\t"
+		"vqsub.u8 q14, %q[t2], %q[n51] \n\t"
+		"vqsub.u8 q15, %q[t3], %q[n51] \n\t"
+
+		// Create the mask for range #0.
+		"vcgt.u8 %q[m0], %q[t0], %q[n25] \n\t"
+		"vcgt.u8 %q[m1], %q[t1], %q[n25] \n\t"
+		"vcgt.u8 %q[m2], %q[t2], %q[n25] \n\t"
+		"vcgt.u8 %q[m3], %q[t3], %q[n25] \n\t"
+
+		// Subtract -1 to correct the LUT indices.
+		"vsub.u8 q12, %q[m0] \n\t"
+		"vsub.u8 q13, %q[m1] \n\t"
+		"vsub.u8 q14, %q[m2] \n\t"
+		"vsub.u8 q15, %q[m3] \n\t"
+
+		// Lookup the delta values.
+		"vtbl.u8 d24, {%q[lut]}, d24 \n\t"
+		"vtbl.u8 d25, {%q[lut]}, d25 \n\t"
+		"vtbl.u8 d26, {%q[lut]}, d26 \n\t"
+		"vtbl.u8 d27, {%q[lut]}, d27 \n\t"
+		"vtbl.u8 d28, {%q[lut]}, d28 \n\t"
+		"vtbl.u8 d29, {%q[lut]}, d29 \n\t"
+		"vtbl.u8 d30, {%q[lut]}, d30 \n\t"
+		"vtbl.u8 d31, {%q[lut]}, d31 \n\t"
+
+		// Add the delta values.
+		"vadd.u8 q12, %q[t0] \n\t"
+		"vadd.u8 q13, %q[t1] \n\t"
+		"vadd.u8 q14, %q[t2] \n\t"
+		"vadd.u8 q15, %q[t3] \n\t"
+
+		// Store 64 bytes and interleave. Increment the dest pointer.
+		"vst4.8 {d24, d26, d28, d30}, [%[dst]]! \n\t"
+		"vst4.8 {d25, d27, d29, d31}, [%[dst]]! \n\t"
+
+		// Outputs (modified).
+		: [src] "+r"  (*s),
+		  [dst] "+r"  (*o),
+		  [t0]  "=&w" (tmp0),
+		  [t1]  "=&w" (tmp1),
+		  [t2]  "=&w" (tmp2),
+		  [t3]  "=&w" (tmp3),
+		  [m0]  "=&w" (mask0),
+		  [m1]  "=&w" (mask1),
+		  [m2]  "=&w" (mask2),
+		  [m3]  "=&w" (mask3)
+
+		// Inputs (not modified).
+		: [lut] "w" (lut),
+		  [n25] "w" (n25),
+		  [n51] "w" (n51),
+		  [n63] "w" (n63)
+
+		// Clobbers.
+		: "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
+	);
+}
+#endif
+
+static inline void
+enc_loop_neon32_inner (const uint8_t **s, uint8_t **o)
+{
+#ifdef BASE64_NEON32_USE_ASM
+	enc_loop_neon32_inner_asm(s, o);
+#else
+	// Load 48 bytes and deinterleave:
+	uint8x16x3_t src = vld3q_u8(*s);
+
+	// Reshuffle:
+	uint8x16x4_t out = enc_reshuffle(src);
+
+	// Translate reshuffled bytes to the Base64 alphabet:
+	out = enc_translate(out);
+
+	// Interleave and store output:
+	vst4q_u8(*o, out);
+
+	*s += 48;
+	*o += 64;
+#endif
+}
+
+static inline void
+enc_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	size_t rounds = *slen / 48;
+
+	*slen -= rounds * 48;	// 48 bytes consumed per round
+	*olen += rounds * 64;	// 64 bytes produced per round
+
+	while (rounds > 0) {
+		if (rounds >= 8) {
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_neon32_inner(s, o);
+		break;
+	}
+}
--- a/deps/base64/base64/lib/arch/neon32/enc_reshuffle.c
+++ b/deps/base64/base64/lib/arch/neon32/enc_reshuffle.c
@ -0,0 +1,31 @@
+static inline uint8x16x4_t
+enc_reshuffle (uint8x16x3_t in)
+{
+	uint8x16x4_t out;
+
+	// Input:
+	// in[0]  = a7 a6 a5 a4 a3 a2 a1 a0
+	// in[1]  = b7 b6 b5 b4 b3 b2 b1 b0
+	// in[2]  = c7 c6 c5 c4 c3 c2 c1 c0
+
+	// Output:
+	// out[0] = 00 00 a7 a6 a5 a4 a3 a2
+	// out[1] = 00 00 a1 a0 b7 b6 b5 b4
+	// out[2] = 00 00 b3 b2 b1 b0 c7 c6
+	// out[3] = 00 00 c5 c4 c3 c2 c1 c0
+
+	// Move the input bits to where they need to be in the outputs. Except
+	// for the first output, the high two bits are not cleared.
+	out.val[0] = vshrq_n_u8(in.val[0], 2);
+	out.val[1] = vshrq_n_u8(in.val[1], 4);
+	out.val[2] = vshrq_n_u8(in.val[2], 6);
+	out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
+	out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
+
+	// Clear the high two bits in the second, third and fourth output.
+	out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
+	out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
+	out.val[3] = vandq_u8(in.val[2],  vdupq_n_u8(0x3F));
+
+	return out;
+}
--- a/deps/base64/base64/lib/arch/neon32/enc_translate.c
+++ b/deps/base64/base64/lib/arch/neon32/enc_translate.c
@ -0,0 +1,57 @@
+static inline uint8x16x4_t
+enc_translate (const uint8x16x4_t in)
+{
+	// A lookup table containing the absolute offsets for all ranges:
+	const uint8x16_t lut = {
+		 65U,  71U, 252U, 252U,
+		252U, 252U, 252U, 252U,
+		252U, 252U, 252U, 252U,
+		237U, 240U,   0U,   0U
+	};
+
+	const uint8x16_t offset = vdupq_n_u8(51);
+
+	uint8x16x4_t indices, mask, delta, out;
+
+	// Translate values 0..63 to the Base64 alphabet. There are five sets:
+	// #  From      To         Abs    Index  Characters
+	// 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
+	// 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
+	// 2  [52..61]  [48..57]    -4  [2..11]  0123456789
+	// 3  [62]      [43]       -19       12  +
+	// 4  [63]      [47]       -16       13  /
+
+	// Create LUT indices from input:
+	// the index for range #0 is right, others are 1 less than expected:
+	indices.val[0] = vqsubq_u8(in.val[0], offset);
+	indices.val[1] = vqsubq_u8(in.val[1], offset);
+	indices.val[2] = vqsubq_u8(in.val[2], offset);
+	indices.val[3] = vqsubq_u8(in.val[3], offset);
+
+	// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
+	mask.val[0] = vcgtq_u8(in.val[0], vdupq_n_u8(25));
+	mask.val[1] = vcgtq_u8(in.val[1], vdupq_n_u8(25));
+	mask.val[2] = vcgtq_u8(in.val[2], vdupq_n_u8(25));
+	mask.val[3] = vcgtq_u8(in.val[3], vdupq_n_u8(25));
+
+	// Subtract -1, so add 1 to indices for range #[1..4], All indices are
+	// now correct:
+	indices.val[0] = vsubq_u8(indices.val[0], mask.val[0]);
+	indices.val[1] = vsubq_u8(indices.val[1], mask.val[1]);
+	indices.val[2] = vsubq_u8(indices.val[2], mask.val[2]);
+	indices.val[3] = vsubq_u8(indices.val[3], mask.val[3]);
+
+	// Lookup delta values:
+	delta.val[0] = vqtbl1q_u8(lut, indices.val[0]);
+	delta.val[1] = vqtbl1q_u8(lut, indices.val[1]);
+	delta.val[2] = vqtbl1q_u8(lut, indices.val[2]);
+	delta.val[3] = vqtbl1q_u8(lut, indices.val[3]);
+
+	// Add delta values:
+	out.val[0] = vaddq_u8(in.val[0], delta.val[0]);
+	out.val[1] = vaddq_u8(in.val[1], delta.val[1]);
+	out.val[2] = vaddq_u8(in.val[2], delta.val[2]);
+	out.val[3] = vaddq_u8(in.val[3], delta.val[3]);
+
+	return out;
+}
--- a/deps/base64/base64/lib/arch/neon64/codec.c
+++ b/deps/base64/base64/lib/arch/neon64/codec.c
@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#ifdef __aarch64__
+#  if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64
+#    define BASE64_USE_NEON64
+#  endif
+#endif
+
+#ifdef BASE64_USE_NEON64
+#include <arm_neon.h>
+
+// Only enable inline assembly on supported compilers.
+#if defined(__GNUC__) || defined(__clang__)
+#define BASE64_NEON64_USE_ASM
+#endif
+
+static inline uint8x16x4_t
+load_64byte_table (const uint8_t *p)
+{
+#ifdef BASE64_NEON64_USE_ASM
+
+	// Force the table to be loaded into contiguous registers. GCC will not
+	// normally allocate contiguous registers for a `uint8x16x4_t'. These
+	// registers are chosen to not conflict with the ones in the enc loop.
+	register uint8x16_t t0 __asm__ ("v8");
+	register uint8x16_t t1 __asm__ ("v9");
+	register uint8x16_t t2 __asm__ ("v10");
+	register uint8x16_t t3 __asm__ ("v11");
+
+	__asm__ (
+		"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
+		: [src] "+r" (p),
+		  [t0]  "=w" (t0),
+		  [t1]  "=w" (t1),
+		  [t2]  "=w" (t2),
+		  [t3]  "=w" (t3)
+	);
+
+	return (uint8x16x4_t) {
+		.val[0] = t0,
+		.val[1] = t1,
+		.val[2] = t2,
+		.val[3] = t3,
+	};
+#else
+	return vld1q_u8_x4(p);
+#endif
+}
+
+#include "../generic/32/dec_loop.c"
+#include "../generic/64/enc_loop.c"
+#include "dec_loop.c"
+#include "enc_reshuffle.c"
+#include "enc_loop.c"
+
+#endif	// BASE64_USE_NEON64
+
+// Stride size is so large on these NEON 64-bit functions
+// (48 bytes encode, 64 bytes decode) that we inline the
+// uint64 codec to stay performant on smaller inputs.
+
+BASE64_ENC_FUNCTION(neon64)
+{
+#ifdef BASE64_USE_NEON64
+	#include "../generic/enc_head.c"
+	enc_loop_neon64(&s, &slen, &o, &olen);
+	enc_loop_generic_64(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(neon64)
+{
+#ifdef BASE64_USE_NEON64
+	#include "../generic/dec_head.c"
+	dec_loop_neon64(&s, &slen, &o, &olen);
+	dec_loop_generic_32(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/deps/base64/base64/lib/arch/neon64/dec_loop.c
+++ b/deps/base64/base64/lib/arch/neon64/dec_loop.c
@ -0,0 +1,129 @@
+// The input consists of five valid character sets in the Base64 alphabet,
+// which we need to map back to the 6-bit values they represent.
+// There are three ranges, two singles, and then there's the rest.
+//
+//   #  From       To        LUT  Characters
+//   1  [0..42]    [255]      #1  invalid input
+//   2  [43]       [62]       #1  +
+//   3  [44..46]   [255]      #1  invalid input
+//   4  [47]       [63]       #1  /
+//   5  [48..57]   [52..61]   #1  0..9
+//   6  [58..63]   [255]      #1  invalid input
+//   7  [64]       [255]      #2  invalid input
+//   8  [65..90]   [0..25]    #2  A..Z
+//   9  [91..96]   [255]      #2  invalid input
+//  10  [97..122]  [26..51]   #2  a..z
+//  11  [123..126] [255]      #2  invalid input
+// (12) Everything else => invalid input
+
+// The first LUT will use the VTBL instruction (out of range indices are set to
+// 0 in destination).
+static const uint8_t dec_lut1[] = {
+	255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
+	255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
+	255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,  62U, 255U, 255U, 255U,  63U,
+	 52U,  53U,  54U,  55U,  56U,  57U,  58U,  59U,  60U,  61U, 255U, 255U, 255U, 255U, 255U, 255U,
+};
+
+// The second LUT will use the VTBX instruction (out of range indices will be
+// unchanged in destination). Input [64..126] will be mapped to index [1..63]
+// in this LUT. Index 0 means that value comes from LUT #1.
+static const uint8_t dec_lut2[] = {
+	  0U, 255U,   0U,   1U,   2U,   3U,   4U,   5U,   6U,   7U,   8U,   9U,  10U,  11U,  12U,  13U,
+	 14U,  15U,  16U,  17U,  18U,  19U,  20U,  21U,  22U,  23U,  24U,  25U, 255U, 255U, 255U, 255U,
+	255U, 255U,  26U,  27U,  28U,  29U,  30U,  31U,  32U,  33U,  34U,  35U,  36U,  37U,  38U,  39U,
+	 40U,  41U,  42U,  43U,  44U,  45U,  46U,  47U,  48U,  49U,  50U,  51U, 255U, 255U, 255U, 255U,
+};
+
+// All input values in range for the first look-up will be 0U in the second
+// look-up result. All input values out of range for the first look-up will be
+// 0U in the first look-up result. Thus, the two results can be ORed without
+// conflicts.
+//
+// Invalid characters that are in the valid range for either look-up will be
+// set to 255U in the combined result. Other invalid characters will just be
+// passed through with the second look-up result (using the VTBX instruction).
+// Since the second LUT is 64 bytes, those passed-through values are guaranteed
+// to have a value greater than 63U. Therefore, valid characters will be mapped
+// to the valid [0..63] range and all invalid characters will be mapped to
+// values greater than 63.
+
+static inline void
+dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 64) {
+		return;
+	}
+
+	// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
+	// extra trailing zero bytes are written, so it is not necessary to
+	// reserve extra input bytes:
+	size_t rounds = *slen / 64;
+
+	*slen -= rounds * 64;	// 64 bytes consumed per round
+	*olen += rounds * 48;	// 48 bytes produced per round
+
+	const uint8x16x4_t tbl_dec1 = load_64byte_table(dec_lut1);
+	const uint8x16x4_t tbl_dec2 = load_64byte_table(dec_lut2);
+
+	do {
+		const uint8x16_t offset = vdupq_n_u8(63U);
+		uint8x16x4_t dec1, dec2;
+		uint8x16x3_t dec;
+
+		// Load 64 bytes and deinterleave:
+		uint8x16x4_t str = vld4q_u8((uint8_t *) *s);
+
+		// Get indices for second LUT:
+		dec2.val[0] = vqsubq_u8(str.val[0], offset);
+		dec2.val[1] = vqsubq_u8(str.val[1], offset);
+		dec2.val[2] = vqsubq_u8(str.val[2], offset);
+		dec2.val[3] = vqsubq_u8(str.val[3], offset);
+
+		// Get values from first LUT:
+		dec1.val[0] = vqtbl4q_u8(tbl_dec1, str.val[0]);
+		dec1.val[1] = vqtbl4q_u8(tbl_dec1, str.val[1]);
+		dec1.val[2] = vqtbl4q_u8(tbl_dec1, str.val[2]);
+		dec1.val[3] = vqtbl4q_u8(tbl_dec1, str.val[3]);
+
+		// Get values from second LUT:
+		dec2.val[0] = vqtbx4q_u8(dec2.val[0], tbl_dec2, dec2.val[0]);
+		dec2.val[1] = vqtbx4q_u8(dec2.val[1], tbl_dec2, dec2.val[1]);
+		dec2.val[2] = vqtbx4q_u8(dec2.val[2], tbl_dec2, dec2.val[2]);
+		dec2.val[3] = vqtbx4q_u8(dec2.val[3], tbl_dec2, dec2.val[3]);
+
+		// Get final values:
+		str.val[0] = vorrq_u8(dec1.val[0], dec2.val[0]);
+		str.val[1] = vorrq_u8(dec1.val[1], dec2.val[1]);
+		str.val[2] = vorrq_u8(dec1.val[2], dec2.val[2]);
+		str.val[3] = vorrq_u8(dec1.val[3], dec2.val[3]);
+
+		// Check for invalid input, any value larger than 63:
+		const uint8x16_t classified
+			= vcgtq_u8(str.val[0], vdupq_n_u8(63))
+			| vcgtq_u8(str.val[1], vdupq_n_u8(63))
+			| vcgtq_u8(str.val[2], vdupq_n_u8(63))
+			| vcgtq_u8(str.val[3], vdupq_n_u8(63));
+
+		// Check that all bits are zero:
+		if (vmaxvq_u8(classified) != 0U) {
+			break;
+		}
+
+		// Compress four bytes into three:
+		dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4);
+		dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2);
+		dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3];
+
+		// Interleave and store decoded result:
+		vst3q_u8((uint8_t *) *o, dec);
+
+		*s += 64;
+		*o += 48;
+
+	} while (--rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 64;
+	*olen -= rounds * 48;
+}
--- a/deps/base64/base64/lib/arch/neon64/enc_loop.c
+++ b/deps/base64/base64/lib/arch/neon64/enc_loop.c
@ -0,0 +1,133 @@
+#ifdef BASE64_NEON64_USE_ASM
+static inline void
+enc_loop_neon64_inner_asm (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
+{
+	// This function duplicates the functionality of enc_loop_neon64_inner,
+	// but entirely with inline assembly. This gives a significant speedup
+	// over using NEON intrinsics, which do not always generate very good
+	// code. The logic of the assembly is directly lifted from the
+	// intrinsics version, so it can be used as a guide to this code.
+
+	// Temporary registers, used as scratch space.
+	uint8x16_t tmp0, tmp1, tmp2, tmp3;
+
+	// Numeric constant.
+	const uint8x16_t n63 = vdupq_n_u8(63);
+
+	__asm__ (
+
+		// Load 48 bytes and deinterleave. The bytes are loaded to
+		// hard-coded registers v12, v13 and v14, to ensure that they
+		// are contiguous. Increment the source pointer.
+		"ld3 {v12.16b, v13.16b, v14.16b}, [%[src]], #48 \n\t"
+
+		// Reshuffle the bytes using temporaries.
+		"ushr %[t0].16b, v12.16b,   #2         \n\t"
+		"ushr %[t1].16b, v13.16b,   #4         \n\t"
+		"ushr %[t2].16b, v14.16b,   #6         \n\t"
+		"sli  %[t1].16b, v12.16b,   #4         \n\t"
+		"sli  %[t2].16b, v13.16b,   #2         \n\t"
+		"and  %[t1].16b, %[t1].16b, %[n63].16b \n\t"
+		"and  %[t2].16b, %[t2].16b, %[n63].16b \n\t"
+		"and  %[t3].16b, v14.16b,   %[n63].16b \n\t"
+
+		// Translate the values to the Base64 alphabet.
+		"tbl v12.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t0].16b \n\t"
+		"tbl v13.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t1].16b \n\t"
+		"tbl v14.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t2].16b \n\t"
+		"tbl v15.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t3].16b \n\t"
+
+		// Store 64 bytes and interleave. Increment the dest pointer.
+		"st4 {v12.16b, v13.16b, v14.16b, v15.16b}, [%[dst]], #64 \n\t"
+
+		// Outputs (modified).
+		: [src] "+r"  (*s),
+		  [dst] "+r"  (*o),
+		  [t0]  "=&w" (tmp0),
+		  [t1]  "=&w" (tmp1),
+		  [t2]  "=&w" (tmp2),
+		  [t3]  "=&w" (tmp3)
+
+		// Inputs (not modified).
+		: [n63] "w" (n63),
+		  [l0]  "w" (tbl_enc.val[0]),
+		  [l1]  "w" (tbl_enc.val[1]),
+		  [l2]  "w" (tbl_enc.val[2]),
+		  [l3]  "w" (tbl_enc.val[3])
+
+		// Clobbers.
+		: "v12", "v13", "v14", "v15"
+	);
+}
+#endif
+
+static inline void
+enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
+{
+#ifdef BASE64_NEON64_USE_ASM
+	enc_loop_neon64_inner_asm(s, o, tbl_enc);
+#else
+	// Load 48 bytes and deinterleave:
+	uint8x16x3_t src = vld3q_u8(*s);
+
+	// Divide bits of three input bytes over four output bytes:
+	uint8x16x4_t out = enc_reshuffle(src);
+
+	// The bits have now been shifted to the right locations;
+	// translate their values 0..63 to the Base64 alphabet.
+	// Use a 64-byte table lookup:
+	out.val[0] = vqtbl4q_u8(tbl_enc, out.val[0]);
+	out.val[1] = vqtbl4q_u8(tbl_enc, out.val[1]);
+	out.val[2] = vqtbl4q_u8(tbl_enc, out.val[2]);
+	out.val[3] = vqtbl4q_u8(tbl_enc, out.val[3]);
+
+	// Interleave and store output:
+	vst4q_u8(*o, out);
+
+	*s += 48;
+	*o += 64;
+#endif
+}
+
+static inline void
+enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	size_t rounds = *slen / 48;
+
+	*slen -= rounds * 48;	// 48 bytes consumed per round
+	*olen += rounds * 64;	// 64 bytes produced per round
+
+	// Load the encoding table:
+	const uint8x16x4_t tbl_enc = load_64byte_table(base64_table_enc_6bit);
+
+	while (rounds > 0) {
+		if (rounds >= 8) {
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_neon64_inner(s, o, tbl_enc);
+		break;
+	}
+}
--- a/deps/base64/base64/lib/arch/neon64/enc_reshuffle.c
+++ b/deps/base64/base64/lib/arch/neon64/enc_reshuffle.c
@ -0,0 +1,31 @@
+static inline uint8x16x4_t
+enc_reshuffle (const uint8x16x3_t in)
+{
+	uint8x16x4_t out;
+
+	// Input:
+	// in[0]  = a7 a6 a5 a4 a3 a2 a1 a0
+	// in[1]  = b7 b6 b5 b4 b3 b2 b1 b0
+	// in[2]  = c7 c6 c5 c4 c3 c2 c1 c0
+
+	// Output:
+	// out[0] = 00 00 a7 a6 a5 a4 a3 a2
+	// out[1] = 00 00 a1 a0 b7 b6 b5 b4
+	// out[2] = 00 00 b3 b2 b1 b0 c7 c6
+	// out[3] = 00 00 c5 c4 c3 c2 c1 c0
+
+	// Move the input bits to where they need to be in the outputs. Except
+	// for the first output, the high two bits are not cleared.
+	out.val[0] = vshrq_n_u8(in.val[0], 2);
+	out.val[1] = vshrq_n_u8(in.val[1], 4);
+	out.val[2] = vshrq_n_u8(in.val[2], 6);
+	out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
+	out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
+
+	// Clear the high two bits in the second, third and fourth output.
+	out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
+	out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
+	out.val[3] = vandq_u8(in.val[2],  vdupq_n_u8(0x3F));
+
+	return out;
+}
--- a/deps/base64/base64/lib/arch/sse41/codec.c
+++ b/deps/base64/base64/lib/arch/sse41/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_SSE41
+#include <smmintrin.h>
+
+#include "../ssse3/dec_reshuffle.c"
+#include "../ssse3/dec_loop.c"
+#include "../ssse3/enc_translate.c"
+#include "../ssse3/enc_reshuffle.c"
+#include "../ssse3/enc_loop.c"
+
+#endif	// HAVE_SSE41
+
+BASE64_ENC_FUNCTION(sse41)
+{
+#if HAVE_SSE41
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(sse41)
+{
+#if HAVE_SSE41
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/deps/base64/base64/lib/arch/sse42/codec.c
+++ b/deps/base64/base64/lib/arch/sse42/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_SSE42
+#include <nmmintrin.h>
+
+#include "../ssse3/dec_reshuffle.c"
+#include "../ssse3/dec_loop.c"
+#include "../ssse3/enc_translate.c"
+#include "../ssse3/enc_reshuffle.c"
+#include "../ssse3/enc_loop.c"
+
+#endif	// HAVE_SSE42
+
+BASE64_ENC_FUNCTION(sse42)
+{
+#if HAVE_SSE42
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(sse42)
+{
+#if HAVE_SSE42
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/deps/base64/base64/lib/arch/ssse3/codec.c
+++ b/deps/base64/base64/lib/arch/ssse3/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_SSSE3
+#include <tmmintrin.h>
+
+#include "dec_reshuffle.c"
+#include "dec_loop.c"
+#include "enc_reshuffle.c"
+#include "enc_translate.c"
+#include "enc_loop.c"
+
+#endif	// HAVE_SSSE3
+
+BASE64_ENC_FUNCTION(ssse3)
+{
+#if HAVE_SSSE3
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(ssse3)
+{
+#if HAVE_SSSE3
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/deps/base64/base64/lib/arch/ssse3/dec_loop.c
+++ b/deps/base64/base64/lib/arch/ssse3/dec_loop.c
@ -0,0 +1,173 @@
+// The input consists of six character sets in the Base64 alphabet, which we
+// need to map back to the 6-bit values they represent. There are three ranges,
+// two singles, and then there's the rest.
+//
+//  #  From       To        Add  Characters
+//  1  [43]       [62]      +19  +
+//  2  [47]       [63]      +16  /
+//  3  [48..57]   [52..61]   +4  0..9
+//  4  [65..90]   [0..25]   -65  A..Z
+//  5  [97..122]  [26..51]  -71  a..z
+// (6) Everything else => invalid input
+//
+// We will use lookup tables for character validation and offset computation.
+// Remember that 0x2X and 0x0X are the same index for _mm_shuffle_epi8, this
+// allows to mask with 0x2F instead of 0x0F and thus save one constant
+// declaration (register and/or memory access).
+//
+// For offsets:
+// Perfect hash for lut = ((src >> 4) & 0x2F) + ((src == 0x2F) ? 0xFF : 0x00)
+// 0000 = garbage
+// 0001 = /
+// 0010 = +
+// 0011 = 0-9
+// 0100 = A-Z
+// 0101 = A-Z
+// 0110 = a-z
+// 0111 = a-z
+// 1000 >= garbage
+//
+// For validation, here's the table.
+// A character is valid if and only if the AND of the 2 lookups equals 0:
+//
+// hi \ lo              0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
+//      LUT             0x15 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x13 0x1A 0x1B 0x1B 0x1B 0x1A
+//
+// 0000 0x10 char        NUL  SOH  STX  ETX  EOT  ENQ  ACK  BEL   BS   HT   LF   VT   FF   CR   SO   SI
+//           andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+//
+// 0001 0x10 char        DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB  CAN   EM  SUB  ESC   FS   GS   RS   US
+//           andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+//
+// 0010 0x01 char               !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
+//           andlut     0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x00 0x01 0x01 0x01 0x00
+//
+// 0011 0x02 char          0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
+//           andlut     0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x02 0x02 0x02 0x02 0x02 0x02
+//
+// 0100 0x04 char          @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
+//           andlut     0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
+//
+// 0101 0x08 char          P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
+//           andlut     0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
+//
+// 0110 0x04 char          `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
+//           andlut     0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
+// 0111 0x08 char          p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
+//           andlut     0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
+//
+// 1000 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1001 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1010 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1011 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1100 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1101 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1110 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1111 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+
+static inline int
+dec_loop_ssse3_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
+{
+	const __m128i lut_lo = _mm_setr_epi8(
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+	const __m128i lut_hi = _mm_setr_epi8(
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+	const __m128i lut_roll = _mm_setr_epi8(
+		0,  16,  19,   4, -65, -65, -71, -71,
+		0,   0,   0,   0,   0,   0,   0,   0);
+
+	const __m128i mask_2F = _mm_set1_epi8(0x2F);
+
+	// Load input:
+	__m128i str = _mm_loadu_si128((__m128i *) *s);
+
+	// Table lookups:
+	const __m128i hi_nibbles = _mm_and_si128(_mm_srli_epi32(str, 4), mask_2F);
+	const __m128i lo_nibbles = _mm_and_si128(str, mask_2F);
+	const __m128i hi         = _mm_shuffle_epi8(lut_hi, hi_nibbles);
+	const __m128i lo         = _mm_shuffle_epi8(lut_lo, lo_nibbles);
+
+	// Check for invalid input: if any "and" values from lo and hi are not
+	// zero, fall back on bytewise code to do error checking and reporting:
+	if (_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_setzero_si128())) != 0) {
+		return 0;
+	}
+
+	const __m128i eq_2F = _mm_cmpeq_epi8(str, mask_2F);
+	const __m128i roll  = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2F, hi_nibbles));
+
+	// Now simply add the delta values to the input:
+	str = _mm_add_epi8(str, roll);
+
+	// Reshuffle the input to packed 12-byte output format:
+	str = dec_reshuffle(str);
+
+	// Store the output:
+	_mm_storeu_si128((__m128i *) *o, str);
+
+	*s += 16;
+	*o += 12;
+	*rounds -= 1;
+
+	return 1;
+}
+
+static inline void
+dec_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 24) {
+		return;
+	}
+
+	// Process blocks of 16 bytes per round. Because 4 extra zero bytes are
+	// written after the output, ensure that there will be at least 8 bytes
+	// of input data left to cover the gap. (6 data bytes and up to two
+	// end-of-string markers.)
+	size_t rounds = (*slen - 8) / 16;
+
+	*slen -= rounds * 16;	// 16 bytes consumed per round
+	*olen += rounds * 12;	// 12 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			if (dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 4) {
+			if (dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 2) {
+			if (dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		dec_loop_ssse3_inner(s, o, &rounds);
+		break;
+
+	} while (rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 16;
+	*olen -= rounds * 12;
+}
--- a/deps/base64/base64/lib/arch/ssse3/dec_reshuffle.c
+++ b/deps/base64/base64/lib/arch/ssse3/dec_reshuffle.c
@ -0,0 +1,33 @@
+static inline __m128i
+dec_reshuffle (const __m128i in)
+{
+	// in, bits, upper case are most significant bits, lower case are least significant bits
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+
+	const __m128i merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
+	// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+	// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+	// 0000eeee FFffffff 0000DDDD DDddEEEE
+	// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
+
+	const __m128i out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
+	// 00000000 JJJJJJjj KKKKkkkk LLllllll
+	// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+	// 00000000 DDDDDDdd EEEEeeee FFffffff
+	// 00000000 AAAAAAaa BBBBbbbb CCcccccc
+
+	// Pack bytes together:
+	return  _mm_shuffle_epi8(out, _mm_setr_epi8(
+		 2,  1,  0,
+		 6,  5,  4,
+		10,  9,  8,
+		14, 13, 12,
+		-1, -1, -1, -1));
+	// 00000000 00000000 00000000 00000000
+	// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+	// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+	// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
+}
--- a/deps/base64/base64/lib/arch/ssse3/enc_loop.c
+++ b/deps/base64/base64/lib/arch/ssse3/enc_loop.c
@ -0,0 +1,67 @@
+static inline void
+enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
+{
+	// Load input:
+	__m128i str = _mm_loadu_si128((__m128i *) *s);
+
+	// Reshuffle:
+	str = enc_reshuffle(str);
+
+	// Translate reshuffled bytes to the Base64 alphabet:
+	str = enc_translate(str);
+
+	// Store:
+	_mm_storeu_si128((__m128i *) *o, str);
+
+	*s += 12;
+	*o += 16;
+}
+
+static inline void
+enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 16) {
+		return;
+	}
+
+	// Process blocks of 12 bytes at a time. Because blocks are loaded 16
+	// bytes at a time, ensure that there will be at least 4 remaining
+	// bytes after the last round, so that the final read will not pass
+	// beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 4) / 12;
+
+	*slen -= rounds * 12;	// 12 bytes consumed per round
+	*olen += rounds * 16;	// 16 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_ssse3_inner(s, o);
+		break;
+
+	} while (rounds > 0);
+}
--- a/deps/base64/base64/lib/arch/ssse3/enc_reshuffle.c
+++ b/deps/base64/base64/lib/arch/ssse3/enc_reshuffle.c
@ -0,0 +1,48 @@
+static inline __m128i
+enc_reshuffle (__m128i in)
+{
+	// Input, bytes MSB to LSB:
+	// 0 0 0 0 l k j i h g f e d c b a
+
+	in = _mm_shuffle_epi8(in, _mm_set_epi8(
+		10, 11,  9, 10,
+		 7,  8,  6,  7,
+		 4,  5,  3,  4,
+		 1,  2,  0,  1));
+	// in, bytes MSB to LSB:
+	// k l j k
+	// h i g h
+	// e f d e
+	// b c a b
+
+	const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0FC0FC00));
+	// bits, upper case are most significant bits, lower case are least significant bits
+	// 0000kkkk LL000000 JJJJJJ00 00000000
+	// 0000hhhh II000000 GGGGGG00 00000000
+	// 0000eeee FF000000 DDDDDD00 00000000
+	// 0000bbbb CC000000 AAAAAA00 00000000
+
+	const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
+	// 00000000 00kkkkLL 00000000 00JJJJJJ
+	// 00000000 00hhhhII 00000000 00GGGGGG
+	// 00000000 00eeeeFF 00000000 00DDDDDD
+	// 00000000 00bbbbCC 00000000 00AAAAAA
+
+	const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003F03F0));
+	// 00000000 00llllll 000000jj KKKK0000
+	// 00000000 00iiiiii 000000gg HHHH0000
+	// 00000000 00ffffff 000000dd EEEE0000
+	// 00000000 00cccccc 000000aa BBBB0000
+
+	const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
+	// 00llllll 00000000 00jjKKKK 00000000
+	// 00iiiiii 00000000 00ggHHHH 00000000
+	// 00ffffff 00000000 00ddEEEE 00000000
+	// 00cccccc 00000000 00aaBBBB 00000000
+
+	return _mm_or_si128(t1, t3);
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+}
--- a/deps/base64/base64/lib/arch/ssse3/enc_translate.c
+++ b/deps/base64/base64/lib/arch/ssse3/enc_translate.c
@ -0,0 +1,33 @@
+static inline __m128i
+enc_translate (const __m128i in)
+{
+	// A lookup table containing the absolute offsets for all ranges:
+	const __m128i lut = _mm_setr_epi8(
+		 65,  71, -4, -4,
+		 -4,  -4, -4, -4,
+		 -4,  -4, -4, -4,
+		-19, -16,  0,  0
+	);
+
+	// Translate values 0..63 to the Base64 alphabet. There are five sets:
+	// #  From      To         Abs    Index  Characters
+	// 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
+	// 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
+	// 2  [52..61]  [48..57]    -4  [2..11]  0123456789
+	// 3  [62]      [43]       -19       12  +
+	// 4  [63]      [47]       -16       13  /
+
+	// Create LUT indices from the input. The index for range #0 is right,
+	// others are 1 less than expected:
+	__m128i indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
+
+	// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
+	__m128i mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
+
+	// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
+	// now correct:
+	indices = _mm_sub_epi8(indices, mask);
+
+	// Add offsets to input values:
+	return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
+}
--- a/deps/base64/base64/lib/codec_choose.c
+++ b/deps/base64/base64/lib/codec_choose.c
@ -0,0 +1,281 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../include/libbase64.h"
+#include "codecs.h"
+#include "config.h"
+#include "env.h"
+
+#if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
+  #define BASE64_X86
+  #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2)
+    #define BASE64_X86_SIMD
+  #endif
+#endif
+
+#ifdef BASE64_X86
+#ifdef _MSC_VER
+	#include <intrin.h>
+	#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
+	{						\
+		int info[4];				\
+		__cpuidex(info, __level, __count);	\
+		__eax = info[0];			\
+		__ebx = info[1];			\
+		__ecx = info[2];			\
+		__edx = info[3];			\
+	}
+	#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
+		__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
+#else
+	#include <cpuid.h>
+	#if HAVE_AVX2 || HAVE_AVX
+		#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
+			static inline uint64_t _xgetbv (uint32_t index)
+			{
+				uint32_t eax, edx;
+				__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
+				return ((uint64_t)edx << 32) | eax;
+			}
+		#else
+			#error "Platform not supported"
+		#endif
+	#endif
+#endif
+
+#ifndef bit_AVX2
+#define bit_AVX2 (1 << 5)
+#endif
+#ifndef bit_SSSE3
+#define bit_SSSE3 (1 << 9)
+#endif
+#ifndef bit_SSE41
+#define bit_SSE41 (1 << 19)
+#endif
+#ifndef bit_SSE42
+#define bit_SSE42 (1 << 20)
+#endif
+#ifndef bit_AVX
+#define bit_AVX (1 << 28)
+#endif
+
+#define bit_XSAVE_XRSTORE (1 << 27)
+
+#ifndef _XCR_XFEATURE_ENABLED_MASK
+#define _XCR_XFEATURE_ENABLED_MASK 0
+#endif
+
+#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
+#endif
+
+// Function declarations:
+#define BASE64_CODEC_FUNCS(arch)	\
+	BASE64_ENC_FUNCTION(arch);	\
+	BASE64_DEC_FUNCTION(arch);	\
+
+BASE64_CODEC_FUNCS(avx2)
+BASE64_CODEC_FUNCS(neon32)
+BASE64_CODEC_FUNCS(neon64)
+BASE64_CODEC_FUNCS(plain)
+BASE64_CODEC_FUNCS(ssse3)
+BASE64_CODEC_FUNCS(sse41)
+BASE64_CODEC_FUNCS(sse42)
+BASE64_CODEC_FUNCS(avx)
+
+static bool
+codec_choose_forced (struct codec *codec, int flags)
+{
+	// If the user wants to use a certain codec,
+	// always allow it, even if the codec is a no-op.
+	// For testing purposes.
+
+	if (!(flags & 0xFF)) {
+		return false;
+	}
+	if (flags & BASE64_FORCE_AVX2) {
+		codec->enc = base64_stream_encode_avx2;
+		codec->dec = base64_stream_decode_avx2;
+		return true;
+	}
+	if (flags & BASE64_FORCE_NEON32) {
+		codec->enc = base64_stream_encode_neon32;
+		codec->dec = base64_stream_decode_neon32;
+		return true;
+	}
+	if (flags & BASE64_FORCE_NEON64) {
+		codec->enc = base64_stream_encode_neon64;
+		codec->dec = base64_stream_decode_neon64;
+		return true;
+	}
+	if (flags & BASE64_FORCE_PLAIN) {
+		codec->enc = base64_stream_encode_plain;
+		codec->dec = base64_stream_decode_plain;
+		return true;
+	}
+	if (flags & BASE64_FORCE_SSSE3) {
+		codec->enc = base64_stream_encode_ssse3;
+		codec->dec = base64_stream_decode_ssse3;
+		return true;
+	}
+	if (flags & BASE64_FORCE_SSE41) {
+		codec->enc = base64_stream_encode_sse41;
+		codec->dec = base64_stream_decode_sse41;
+		return true;
+	}
+	if (flags & BASE64_FORCE_SSE42) {
+		codec->enc = base64_stream_encode_sse42;
+		codec->dec = base64_stream_decode_sse42;
+		return true;
+	}
+	if (flags & BASE64_FORCE_AVX) {
+		codec->enc = base64_stream_encode_avx;
+		codec->dec = base64_stream_decode_avx;
+		return true;
+	}
+	return false;
+}
+
+static bool
+codec_choose_arm (struct codec *codec)
+{
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
+
+	// Unfortunately there is no portable way to check for NEON
+	// support at runtime from userland in the same way that x86
+	// has cpuid, so just stick to the compile-time configuration:
+
+	#if defined(__aarch64__) && HAVE_NEON64
+	codec->enc = base64_stream_encode_neon64;
+	codec->dec = base64_stream_decode_neon64;
+	#else
+	codec->enc = base64_stream_encode_neon32;
+	codec->dec = base64_stream_decode_neon32;
+	#endif
+
+	return true;
+
+#else
+	(void)codec;
+	return false;
+#endif
+}
+
+static bool
+codec_choose_x86 (struct codec *codec)
+{
+#ifdef BASE64_X86_SIMD
+
+	unsigned int eax, ebx = 0, ecx = 0, edx;
+	unsigned int max_level;
+
+	#ifdef _MSC_VER
+	int info[4];
+	__cpuidex(info, 0, 0);
+	max_level = info[0];
+	#else
+	max_level = __get_cpuid_max(0, NULL);
+	#endif
+
+	#if HAVE_AVX2 || HAVE_AVX
+	// Check for AVX/AVX2 support:
+	// Checking for AVX requires 3 things:
+	// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
+	//    (allowing saving YMM registers on context switch)
+	// 2) CPUID indicates support for AVX
+	// 3) XGETBV indicates the AVX registers will be saved and restored on
+	//    context switch
+	//
+	// Note that XGETBV is only available on 686 or later CPUs, so the
+	// instruction needs to be conditionally run.
+	if (max_level >= 1) {
+		__cpuid_count(1, 0, eax, ebx, ecx, edx);
+		if (ecx & bit_XSAVE_XRSTORE) {
+			uint64_t xcr_mask;
+			xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+			if (xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) {
+				#if HAVE_AVX2
+				if (max_level >= 7) {
+					__cpuid_count(7, 0, eax, ebx, ecx, edx);
+					if (ebx & bit_AVX2) {
+						codec->enc = base64_stream_encode_avx2;
+						codec->dec = base64_stream_decode_avx2;
+						return true;
+					}
+				}
+				#endif
+				#if HAVE_AVX
+				__cpuid_count(1, 0, eax, ebx, ecx, edx);
+				if (ecx & bit_AVX) {
+					codec->enc = base64_stream_encode_avx;
+					codec->dec = base64_stream_decode_avx;
+					return true;
+				}
+				#endif
+			}
+		}
+	}
+	#endif
+
+	#if HAVE_SSE42
+	// Check for SSE42 support:
+	if (max_level >= 1) {
+		__cpuid(1, eax, ebx, ecx, edx);
+		if (ecx & bit_SSE42) {
+			codec->enc = base64_stream_encode_sse42;
+			codec->dec = base64_stream_decode_sse42;
+			return true;
+		}
+	}
+	#endif
+
+	#if HAVE_SSE41
+	// Check for SSE41 support:
+	if (max_level >= 1) {
+		__cpuid(1, eax, ebx, ecx, edx);
+		if (ecx & bit_SSE41) {
+			codec->enc = base64_stream_encode_sse41;
+			codec->dec = base64_stream_decode_sse41;
+			return true;
+		}
+	}
+	#endif
+
+	#if HAVE_SSSE3
+	// Check for SSSE3 support:
+	if (max_level >= 1) {
+		__cpuid(1, eax, ebx, ecx, edx);
+		if (ecx & bit_SSSE3) {
+			codec->enc = base64_stream_encode_ssse3;
+			codec->dec = base64_stream_decode_ssse3;
+			return true;
+		}
+	}
+	#endif
+
+#else
+	(void)codec;
+#endif
+
+	return false;
+}
+
+void
+codec_choose (struct codec *codec, int flags)
+{
+	// User forced a codec:
+	if (codec_choose_forced(codec, flags)) {
+		return;
+	}
+
+	// Runtime feature detection:
+	if (codec_choose_arm(codec)) {
+		return;
+	}
+	if (codec_choose_x86(codec)) {
+		return;
+	}
+	codec->enc = base64_stream_encode_plain;
+	codec->dec = base64_stream_decode_plain;
+}
--- a/deps/base64/base64/lib/codecs.h
+++ b/deps/base64/base64/lib/codecs.h
@ -0,0 +1,65 @@
+#include <stdint.h>
+#include <stddef.h>
+
+#include "../include/libbase64.h"
+#include "config.h"
+
+// Function parameters for encoding functions:
+#define BASE64_ENC_PARAMS			\
+	( struct base64_state	*state		\
+	, const char		*src		\
+	, size_t		 srclen		\
+	, char			*out		\
+	, size_t		*outlen		\
+	)
+
+// Function parameters for decoding functions:
+#define BASE64_DEC_PARAMS			\
+	( struct base64_state	*state		\
+	, const char		*src		\
+	, size_t		 srclen		\
+	, char			*out		\
+	, size_t		*outlen		\
+	)
+
+// Function signature for encoding functions:
+#define BASE64_ENC_FUNCTION(arch)		\
+	void					\
+	base64_stream_encode_ ## arch		\
+	BASE64_ENC_PARAMS
+
+// Function signature for decoding functions:
+#define BASE64_DEC_FUNCTION(arch)		\
+	int					\
+	base64_stream_decode_ ## arch		\
+	BASE64_DEC_PARAMS
+
+// Cast away unused variable, silence compiler:
+#define UNUSED(x)		((void)(x))
+
+// Stub function when encoder arch unsupported:
+#define BASE64_ENC_STUB				\
+	UNUSED(state);				\
+	UNUSED(src);				\
+	UNUSED(srclen);				\
+	UNUSED(out);				\
+						\
+	*outlen = 0;
+
+// Stub function when decoder arch unsupported:
+#define BASE64_DEC_STUB				\
+	UNUSED(state);				\
+	UNUSED(src);				\
+	UNUSED(srclen);				\
+	UNUSED(out);				\
+	UNUSED(outlen);				\
+						\
+	return -1;
+
+struct codec
+{
+	void (* enc) BASE64_ENC_PARAMS;
+	int  (* dec) BASE64_DEC_PARAMS;
+};
+
+extern void codec_choose (struct codec *, int flags);
--- a/deps/base64/base64/lib/config.h
+++ b/deps/base64/base64/lib/config.h
@ -0,0 +1 @@
+// Intentionally empty
--- a/deps/base64/base64/lib/env.h
+++ b/deps/base64/base64/lib/env.h
@ -0,0 +1,74 @@
+#ifndef BASE64_ENV_H
+#define BASE64_ENV_H
+
+// This header file contains macro definitions that describe certain aspects of
+// the compile-time environment. Compatibility and portability macros go here.
+
+// Define machine endianness. This is for GCC:
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#  define BASE64_LITTLE_ENDIAN 1
+#else
+#  define BASE64_LITTLE_ENDIAN 0
+#endif
+
+// This is for Clang:
+#ifdef __LITTLE_ENDIAN__
+#  define BASE64_LITTLE_ENDIAN 1
+#endif
+
+#ifdef __BIG_ENDIAN__
+#  define BASE64_LITTLE_ENDIAN 0
+#endif
+
+// MSVC++ needs intrin.h for _byteswap_uint64 (issue #68):
+#if BASE64_LITTLE_ENDIAN && defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+// Endian conversion functions:
+#if BASE64_LITTLE_ENDIAN
+#  ifdef _MSC_VER
+//   Microsoft Visual C++:
+#    define BASE64_HTOBE32(x)	_byteswap_ulong(x)
+#    define BASE64_HTOBE64(x)	_byteswap_uint64(x)
+#  else
+//   GCC and Clang:
+#    define BASE64_HTOBE32(x)	__builtin_bswap32(x)
+#    define BASE64_HTOBE64(x)	__builtin_bswap64(x)
+#  endif
+#else
+// No conversion needed:
+#  define BASE64_HTOBE32(x)	(x)
+#  define BASE64_HTOBE64(x)	(x)
+#endif
+
+// Detect word size:
+#if defined (__x86_64__)
+// This also works for the x32 ABI, which has a 64-bit word size.
+#  define BASE64_WORDSIZE 64
+#elif defined (_INTEGRAL_MAX_BITS)
+#  define BASE64_WORDSIZE _INTEGRAL_MAX_BITS
+#elif defined (__WORDSIZE)
+#  define BASE64_WORDSIZE __WORDSIZE
+#elif defined (__SIZE_WIDTH__)
+#  define BASE64_WORDSIZE __SIZE_WIDTH__
+#else
+#  error BASE64_WORDSIZE_NOT_DEFINED
+#endif
+
+// End-of-file definitions.
+// Almost end-of-file when waiting for the last '=' character:
+#define BASE64_AEOF 1
+// End-of-file when stream end has been reached or invalid input provided:
+#define BASE64_EOF 2
+
+// GCC 7 defaults to issuing a warning for fallthrough in switch statements,
+// unless the fallthrough cases are marked with an attribute. As we use
+// fallthrough deliberately, define an alias for the attribute:
+#if __GNUC__ >= 7
+#  define BASE64_FALLTHROUGH  __attribute__((fallthrough));
+#else
+#  define BASE64_FALLTHROUGH
+#endif
+
+#endif	// BASE64_ENV_H
--- a/deps/base64/base64/lib/exports.txt
+++ b/deps/base64/base64/lib/exports.txt
@ -0,0 +1,7 @@
+base64_encode
+base64_stream_encode
+base64_stream_encode_init
+base64_stream_encode_final
+base64_decode
+base64_stream_decode
+base64_stream_decode_init
--- a/deps/base64/base64/lib/lib.c
+++ b/deps/base64/base64/lib/lib.c
@ -0,0 +1,164 @@
+#include <stdint.h>
+#include <stddef.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include "../include/libbase64.h"
+#include "tables/tables.h"
+#include "codecs.h"
+#include "env.h"
+
+// These static function pointers are initialized once when the library is
+// first used, and remain in use for the remaining lifetime of the program.
+// The idea being that CPU features don't change at runtime.
+static struct codec codec = { NULL, NULL };
+
+void
+base64_stream_encode_init (struct base64_state *state, int flags)
+{
+	// If any of the codec flags are set, redo choice:
+	if (codec.enc == NULL || flags & 0xFF) {
+		codec_choose(&codec, flags);
+	}
+	state->eof = 0;
+	state->bytes = 0;
+	state->carry = 0;
+	state->flags = flags;
+}
+
+void
+base64_stream_encode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	)
+{
+	codec.enc(state, src, srclen, out, outlen);
+}
+
+void
+base64_stream_encode_final
+	( struct base64_state	*state
+	, char			*out
+	, size_t		*outlen
+	)
+{
+	uint8_t *o = (uint8_t *)out;
+
+	if (state->bytes == 1) {
+		*o++ = base64_table_enc_6bit[state->carry];
+		*o++ = '=';
+		*o++ = '=';
+		*outlen = 3;
+		return;
+	}
+	if (state->bytes == 2) {
+		*o++ = base64_table_enc_6bit[state->carry];
+		*o++ = '=';
+		*outlen = 2;
+		return;
+	}
+	*outlen = 0;
+}
+
+void
+base64_stream_decode_init (struct base64_state *state, int flags)
+{
+	// If any of the codec flags are set, redo choice:
+	if (codec.dec == NULL || flags & 0xFF) {
+		codec_choose(&codec, flags);
+	}
+	state->eof = 0;
+	state->bytes = 0;
+	state->carry = 0;
+	state->flags = flags;
+}
+
+int
+base64_stream_decode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	)
+{
+	return codec.dec(state, src, srclen, out, outlen);
+}
+
+#ifdef _OPENMP
+
+	// Due to the overhead of initializing OpenMP and creating a team of
+	// threads, we require the data length to be larger than a threshold:
+	#define OMP_THRESHOLD 20000
+
+	// Conditionally include OpenMP-accelerated codec implementations:
+	#include "lib_openmp.c"
+#endif
+
+void
+base64_encode
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	size_t s;
+	size_t t;
+	struct base64_state state;
+
+	#ifdef _OPENMP
+	if (srclen >= OMP_THRESHOLD) {
+		base64_encode_openmp(src, srclen, out, outlen, flags);
+		return;
+	}
+	#endif
+
+	// Init the stream reader:
+	base64_stream_encode_init(&state, flags);
+
+	// Feed the whole string to the stream reader:
+	base64_stream_encode(&state, src, srclen, out, &s);
+
+	// Finalize the stream by writing trailer if any:
+	base64_stream_encode_final(&state, out + s, &t);
+
+	// Final output length is stream length plus tail:
+	*outlen = s + t;
+}
+
+int
+base64_decode
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	int ret;
+	struct base64_state state;
+
+	#ifdef _OPENMP
+	if (srclen >= OMP_THRESHOLD) {
+		return base64_decode_openmp(src, srclen, out, outlen, flags);
+	}
+	#endif
+
+	// Init the stream reader:
+	base64_stream_decode_init(&state, flags);
+
+	// Feed the whole string to the stream reader:
+	ret = base64_stream_decode(&state, src, srclen, out, outlen);
+
+	// If when decoding a whole block, we're still waiting for input then fail:
+	if (ret && (state.bytes == 0)) {
+		return ret;
+	}
+	return 0;
+}
--- a/deps/base64/base64/lib/lib_openmp.c
+++ b/deps/base64/base64/lib/lib_openmp.c
@ -0,0 +1,149 @@
+// This code makes some assumptions on the implementation of
+// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
+// Basically these assumptions boil down to that when breaking the src into
+// parts, out parts can be written without side effects.
+// This is met when:
+// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
+// 2) the shared variables src and out are not read or written outside of the
+//    bounds of their parts, i.e.  when base64_stream_encode() reads a multiple
+//    of 3 bytes, it must write no more then a multiple of 4 bytes, not even
+//    temporarily;
+// 3) the state flag can be discarded after base64_stream_encode() and
+//    base64_stream_decode() on the parts.
+
+static inline void
+base64_encode_openmp
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	size_t s;
+	size_t t;
+	size_t sum = 0, len, last_len;
+	struct base64_state state, initial_state;
+	int num_threads, i;
+
+	// Request a number of threads but not necessarily get them:
+	#pragma omp parallel
+	{
+		// Get the number of threads used from one thread only,
+		// as num_threads is a shared var:
+		#pragma omp single
+		{
+			num_threads = omp_get_num_threads();
+
+			// Split the input string into num_threads parts, each
+			// part a multiple of 3 bytes. The remaining bytes will
+			// be done later:
+			len = srclen / (num_threads * 3);
+			len *= 3;
+			last_len = srclen - num_threads * len;
+
+			// Init the stream reader:
+			base64_stream_encode_init(&state, flags);
+			initial_state = state;
+		}
+
+		// Single has an implicit barrier for all threads to wait here
+		// for the above to complete:
+		#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
+		for (i = 0; i < num_threads; i++)
+		{
+			// Feed each part of the string to the stream reader:
+			base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
+			sum += s;
+		}
+	}
+
+	// As encoding should never fail and we encode an exact multiple
+	// of 3 bytes, we can discard state:
+	state = initial_state;
+
+	// Encode the remaining bytes:
+	base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
+
+	// Finalize the stream by writing trailer if any:
+	base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
+
+	// Final output length is stream length plus tail:
+	sum += s + t;
+	*outlen = sum;
+}
+
+static inline int
+base64_decode_openmp
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	int num_threads, result = 0, i;
+	size_t sum = 0, len, last_len, s;
+	struct base64_state state, initial_state;
+
+	// Request a number of threads but not necessarily get them:
+	#pragma omp parallel
+	{
+		// Get the number of threads used from one thread only,
+		// as num_threads is a shared var:
+		#pragma omp single
+		{
+			num_threads = omp_get_num_threads();
+
+			// Split the input string into num_threads parts, each
+			// part a multiple of 4 bytes. The remaining bytes will
+			// be done later:
+			len = srclen / (num_threads * 4);
+			len *= 4;
+			last_len = srclen - num_threads * len;
+
+			// Init the stream reader:
+			base64_stream_decode_init(&state, flags);
+
+			initial_state = state;
+		}
+
+		// Single has an implicit barrier to wait here for the above to
+		// complete:
+		#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
+		for (i = 0; i < num_threads; i++)
+		{
+			int this_result;
+
+			// Feed each part of the string to the stream reader:
+			this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
+			sum += s;
+			result += this_result;
+		}
+	}
+
+	// If `result' equals `-num_threads', then all threads returned -1,
+	// indicating that the requested codec is not available:
+	if (result == -num_threads) {
+		return -1;
+	}
+
+	// If `result' does not equal `num_threads', then at least one of the
+	// threads hit a decode error:
+	if (result != num_threads) {
+		return 0;
+	}
+
+	// So far so good, now decode whatever remains in the buffer. Reuse the
+	// initial state, since we are at a 4-byte boundary:
+	state = initial_state;
+	result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
+	sum += s;
+	*outlen = sum;
+
+	// If when decoding a whole block, we're still waiting for input then fail:
+	if (result && (state.bytes == 0)) {
+		return result;
+	}
+	return 0;
+}
--- a/deps/base64/base64/lib/tables/.gitignore
+++ b/deps/base64/base64/lib/tables/.gitignore
@ -0,0 +1 @@
+table_generator
--- a/deps/base64/base64/lib/tables/Makefile
+++ b/deps/base64/base64/lib/tables/Makefile
@ -0,0 +1,17 @@
+.PHONY: all clean
+
+TARGETS := table_dec_32bit.h table_enc_12bit.h table_generator
+
+all: $(TARGETS)
+
+clean:
+	$(RM) $(TARGETS)
+
+table_dec_32bit.h: table_generator
+	./$^ > $@
+
+table_enc_12bit.h: table_enc_12bit.py
+	./$^ > $@
+
+table_generator: table_generator.c
+	$(CC) $(CFLAGS) -o $@ $^
--- a/deps/base64/base64/lib/tables/table_dec_32bit.h
+++ b/deps/base64/base64/lib/tables/table_dec_32bit.h
@ -0,0 +1,393 @@
+#include <stdint.h>
+#define CHAR62 '+'
+#define CHAR63 '/'
+#define CHARPAD '='
+
+
+#if BASE64_LITTLE_ENDIAN
+
+
+/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */
+
+const uint32_t base64_table_dec_32bit_d0[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x000000f8, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000fc,
+0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4,
+0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018,
+0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030,
+0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048,
+0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060,
+0x00000064, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078,
+0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090,
+0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8,
+0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0,
+0x000000c4, 0x000000c8, 0x000000cc, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d1[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x0000e003, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000f003,
+0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003,
+0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000,
+0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000,
+0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001,
+0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001,
+0x00009001, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001,
+0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002,
+0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002,
+0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003,
+0x00001003, 0x00002003, 0x00003003, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d2[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00800f00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00c00f00,
+0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00,
+0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100,
+0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300,
+0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400,
+0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600,
+0x00400600, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700,
+0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900,
+0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00,
+0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00,
+0x00400c00, 0x00800c00, 0x00c00c00, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d3[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x003e0000, 0xffffffff, 0xffffffff, 0xffffffff, 0x003f0000,
+0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000,
+0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000,
+0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000,
+0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000,
+0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000,
+0x00190000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000,
+0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000,
+0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000,
+0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000,
+0x00310000, 0x00320000, 0x00330000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+#else
+
+
+/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */
+
+const uint32_t base64_table_dec_32bit_d0[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xf8000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xfc000000,
+0xd0000000, 0xd4000000, 0xd8000000, 0xdc000000, 0xe0000000, 0xe4000000,
+0xe8000000, 0xec000000, 0xf0000000, 0xf4000000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x04000000, 0x08000000, 0x0c000000, 0x10000000, 0x14000000, 0x18000000,
+0x1c000000, 0x20000000, 0x24000000, 0x28000000, 0x2c000000, 0x30000000,
+0x34000000, 0x38000000, 0x3c000000, 0x40000000, 0x44000000, 0x48000000,
+0x4c000000, 0x50000000, 0x54000000, 0x58000000, 0x5c000000, 0x60000000,
+0x64000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x68000000, 0x6c000000, 0x70000000, 0x74000000, 0x78000000,
+0x7c000000, 0x80000000, 0x84000000, 0x88000000, 0x8c000000, 0x90000000,
+0x94000000, 0x98000000, 0x9c000000, 0xa0000000, 0xa4000000, 0xa8000000,
+0xac000000, 0xb0000000, 0xb4000000, 0xb8000000, 0xbc000000, 0xc0000000,
+0xc4000000, 0xc8000000, 0xcc000000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d1[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x03e00000, 0xffffffff, 0xffffffff, 0xffffffff, 0x03f00000,
+0x03400000, 0x03500000, 0x03600000, 0x03700000, 0x03800000, 0x03900000,
+0x03a00000, 0x03b00000, 0x03c00000, 0x03d00000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00100000, 0x00200000, 0x00300000, 0x00400000, 0x00500000, 0x00600000,
+0x00700000, 0x00800000, 0x00900000, 0x00a00000, 0x00b00000, 0x00c00000,
+0x00d00000, 0x00e00000, 0x00f00000, 0x01000000, 0x01100000, 0x01200000,
+0x01300000, 0x01400000, 0x01500000, 0x01600000, 0x01700000, 0x01800000,
+0x01900000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x01a00000, 0x01b00000, 0x01c00000, 0x01d00000, 0x01e00000,
+0x01f00000, 0x02000000, 0x02100000, 0x02200000, 0x02300000, 0x02400000,
+0x02500000, 0x02600000, 0x02700000, 0x02800000, 0x02900000, 0x02a00000,
+0x02b00000, 0x02c00000, 0x02d00000, 0x02e00000, 0x02f00000, 0x03000000,
+0x03100000, 0x03200000, 0x03300000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d2[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x000f8000, 0xffffffff, 0xffffffff, 0xffffffff, 0x000fc000,
+0x000d0000, 0x000d4000, 0x000d8000, 0x000dc000, 0x000e0000, 0x000e4000,
+0x000e8000, 0x000ec000, 0x000f0000, 0x000f4000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00004000, 0x00008000, 0x0000c000, 0x00010000, 0x00014000, 0x00018000,
+0x0001c000, 0x00020000, 0x00024000, 0x00028000, 0x0002c000, 0x00030000,
+0x00034000, 0x00038000, 0x0003c000, 0x00040000, 0x00044000, 0x00048000,
+0x0004c000, 0x00050000, 0x00054000, 0x00058000, 0x0005c000, 0x00060000,
+0x00064000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00068000, 0x0006c000, 0x00070000, 0x00074000, 0x00078000,
+0x0007c000, 0x00080000, 0x00084000, 0x00088000, 0x0008c000, 0x00090000,
+0x00094000, 0x00098000, 0x0009c000, 0x000a0000, 0x000a4000, 0x000a8000,
+0x000ac000, 0x000b0000, 0x000b4000, 0x000b8000, 0x000bc000, 0x000c0000,
+0x000c4000, 0x000c8000, 0x000cc000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d3[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00003e00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00003f00,
+0x00003400, 0x00003500, 0x00003600, 0x00003700, 0x00003800, 0x00003900,
+0x00003a00, 0x00003b00, 0x00003c00, 0x00003d00, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00000100, 0x00000200, 0x00000300, 0x00000400, 0x00000500, 0x00000600,
+0x00000700, 0x00000800, 0x00000900, 0x00000a00, 0x00000b00, 0x00000c00,
+0x00000d00, 0x00000e00, 0x00000f00, 0x00001000, 0x00001100, 0x00001200,
+0x00001300, 0x00001400, 0x00001500, 0x00001600, 0x00001700, 0x00001800,
+0x00001900, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00001a00, 0x00001b00, 0x00001c00, 0x00001d00, 0x00001e00,
+0x00001f00, 0x00002000, 0x00002100, 0x00002200, 0x00002300, 0x00002400,
+0x00002500, 0x00002600, 0x00002700, 0x00002800, 0x00002900, 0x00002a00,
+0x00002b00, 0x00002c00, 0x00002d00, 0x00002e00, 0x00002f00, 0x00003000,
+0x00003100, 0x00003200, 0x00003300, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+#endif
--- a/deps/base64/base64/lib/tables/table_enc_12bit.h
+++ b/deps/base64/base64/lib/tables/table_enc_12bit.h
--- a/deps/base64/base64/lib/tables/table_enc_12bit.py
+++ b/deps/base64/base64/lib/tables/table_enc_12bit.py
@ -0,0 +1,45 @@
+#!/usr/bin/python3
+
+def tr(x):
+    """Translate a 6-bit value to the Base64 alphabet."""
+    s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \
+      + 'abcdefghijklmnopqrstuvwxyz' \
+      + '0123456789' \
+      + '+/'
+    return ord(s[x])
+
+def table(fn):
+    """Generate a 12-bit lookup table."""
+    ret = []
+    for n in range(0, 2**12):
+        pre = "\n\t" if n % 8 == 0 else " "
+        pre = "\t" if n == 0 else pre
+        ret.append("{}0x{:04X}U,".format(pre, fn(n)))
+    return "".join(ret)
+
+def table_be():
+    """Generate a 12-bit big-endian lookup table."""
+    return table(lambda n: (tr(n & 0x3F) << 0) | (tr(n >> 6) << 8))
+
+def table_le():
+    """Generate a 12-bit little-endian lookup table."""
+    return table(lambda n: (tr(n >> 6) << 0) | (tr(n & 0x3F) << 8))
+
+def main():
+    """Entry point."""
+    lines = [
+        "#include <stdint.h>",
+        "",
+        "const uint16_t base64_table_enc_12bit[] = {",
+        "#if BASE64_LITTLE_ENDIAN",
+        table_le(),
+        "#else",
+        table_be(),
+        "#endif",
+        "};"
+    ]
+    for line in lines:
+        print(line)
+
+if __name__ == "__main__":
+    main()
--- a/deps/base64/base64/lib/tables/table_generator.c
+++ b/deps/base64/base64/lib/tables/table_generator.c
@ -0,0 +1,184 @@
+/**
+ *
+ * Copyright 2005, 2006 Nick Galbreath -- nickg [at] modp [dot] com
+ * Copyright 2017 Matthieu Darbois
+ * All rights reserved.
+ *
+ * http://modp.com/release/base64
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/****************************/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+static uint8_t b64chars[64] = {
+	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+	'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+	'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+	'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
+};
+
+static uint8_t padchar = '=';
+
+static void printStart(void)
+{
+	printf("#include <stdint.h>\n");
+	printf("#define CHAR62 '%c'\n", b64chars[62]);
+	printf("#define CHAR63 '%c'\n", b64chars[63]);
+	printf("#define CHARPAD '%c'\n", padchar);
+}
+
+static void clearDecodeTable(uint32_t* ary)
+{
+	int i = 0;
+	for (i = 0; i < 256; ++i) {
+		ary[i] = 0xFFFFFFFF;
+	}
+}
+
+/* dump uint32_t as hex digits */
+void uint32_array_to_c_hex(const uint32_t* ary, size_t sz, const char* name)
+{
+	size_t i = 0;
+
+	printf("const uint32_t %s[%d] = {\n", name, (int)sz);
+	for (;;) {
+		printf("0x%08" PRIx32, ary[i]);
+		++i;
+		if (i == sz)
+			break;
+		if (i % 6 == 0) {
+			printf(",\n");
+		} else {
+			printf(", ");
+		}
+	}
+	printf("\n};\n");
+}
+
+int main(int argc, char** argv)
+{
+	uint32_t x;
+	uint32_t i = 0;
+	uint32_t ary[256];
+
+	/*  over-ride standard alphabet */
+	if (argc == 2) {
+		uint8_t* replacements = (uint8_t*)argv[1];
+		if (strlen((char*)replacements) != 3) {
+			fprintf(stderr, "input must be a string of 3 characters '-', '.' or '_'\n");
+			exit(1);
+		}
+		fprintf(stderr, "fusing '%s' as replacements in base64 encoding\n", replacements);
+		b64chars[62] = replacements[0];
+		b64chars[63] = replacements[1];
+		padchar = replacements[2];
+	}
+
+	printStart();
+
+	printf("\n\n#if BASE64_LITTLE_ENDIAN\n");
+
+	printf("\n\n/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = i << 2;
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d0");
+	printf("\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = ((i & 0x30) >> 4) | ((i & 0x0F) << 12);
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d1");
+	printf("\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = ((i & 0x03) << 22) | ((i & 0x3c) << 6);
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d2");
+	printf("\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = i << 16;
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d3");
+	printf("\n\n");
+
+	printf("#else\n");
+
+	printf("\n\n/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = i << 26;
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d0");
+	printf("\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = i << 20;
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d1");
+	printf("\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = i << 14;
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d2");
+	printf("\n\n");
+
+	clearDecodeTable(ary);
+	for (i = 0; i < 64; ++i) {
+		x = b64chars[i];
+		ary[x] = i << 8;
+	}
+	uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d3");
+	printf("\n\n");
+
+	printf("#endif\n");
+
+	return 0;
+}
--- a/deps/base64/base64/lib/tables/tables.c
+++ b/deps/base64/base64/lib/tables/tables.c
@ -0,0 +1,40 @@
+#include "tables.h"
+
+const uint8_t
+base64_table_enc_6bit[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+	"abcdefghijklmnopqrstuvwxyz"
+	"0123456789"
+	"+/";
+
+// In the lookup table below, note that the value for '=' (character 61) is
+// 254, not 255. This character is used for in-band signaling of the end of
+// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
+// and + / are mapped to their "decoded" values. The other bytes all map to
+// the value 255, which flags them as "invalid input".
+
+const uint8_t
+base64_table_dec_8bit[] =
+{
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,		//   0..15
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,		//  16..31
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255, 255, 255,  63,		//  32..47
+	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255, 255, 254, 255, 255,		//  48..63
+	255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,		//  64..79
+	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,		//  80..95
+	255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,		//  96..111
+	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255,		// 112..127
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,		// 128..143
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+#if BASE64_WORDSIZE >= 32
+#  include "table_dec_32bit.h"
+#  include "table_enc_12bit.h"
+#endif
--- a/deps/base64/base64/lib/tables/tables.h
+++ b/deps/base64/base64/lib/tables/tables.h
@ -0,0 +1,23 @@
+#ifndef BASE64_TABLES_H
+#define BASE64_TABLES_H
+
+#include <stdint.h>
+
+#include "../env.h"
+
+// These tables are used by all codecs for fallback plain encoding/decoding:
+extern const uint8_t base64_table_enc_6bit[];
+extern const uint8_t base64_table_dec_8bit[];
+
+// These tables are used for the 32-bit and 64-bit generic decoders:
+#if BASE64_WORDSIZE >= 32
+extern const uint32_t base64_table_dec_32bit_d0[];
+extern const uint32_t base64_table_dec_32bit_d1[];
+extern const uint32_t base64_table_dec_32bit_d2[];
+extern const uint32_t base64_table_dec_32bit_d3[];
+
+// This table is used by the 32 and 64-bit generic encoders:
+extern const uint16_t base64_table_enc_12bit[];
+#endif
+
+#endif	// BASE64_TABLES_H
--- a/deps/base64/base64/test/CMakeLists.txt
+++ b/deps/base64/base64/test/CMakeLists.txt
@ -0,0 +1,45 @@
+# Written in 2016 by Henrik Steffen Gaßmann henrik@gassmann.onl
+#
+# To the extent possible under law, the author(s) have dedicated all
+# copyright and related and neighboring rights to this software to the
+# public domain worldwide. This software is distributed without any warranty.
+#
+# You should have received a copy of the CC0 Public Domain Dedication
+# along with this software. If not, see
+#
+#     http://creativecommons.org/publicdomain/zero/1.0/
+#
+########################################################################
+
+function(add_base64_test TEST_NAME)
+    unset(SRC_FILE)
+    foreach(SRC_FILE ${ARGN})
+        list(APPEND SRC_FILES "${SRC_FILE}")
+    endforeach()
+
+    add_executable(${TEST_NAME} ${SRC_FILES})
+    target_link_libraries(${TEST_NAME} PRIVATE base64)
+
+    add_test(NAME ${TEST_NAME}
+        COMMAND ${TEST_NAME}
+    )
+    install(TARGETS ${TEST_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
+endfunction()
+
+
+add_base64_test(test_base64
+    codec_supported.c
+    test_base64.c
+)
+
+if (NOT WIN32)
+    add_base64_test(benchmark
+        codec_supported.c
+        benchmark.c
+    )
+endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    target_link_libraries(benchmark PRIVATE rt)
+endif()
+
--- a/deps/base64/base64/test/Makefile
+++ b/deps/base64/base64/test/Makefile
@ -0,0 +1,33 @@
+CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic
+ifdef OPENMP
+  CFLAGS += -fopenmp
+endif
+
+TARGET := $(shell $(CC) -dumpmachine)
+ifneq (, $(findstring darwin, $(TARGET)))
+  BENCH_LDFLAGS=
+else
+  # default to linux, -lrt needed
+  BENCH_LDFLAGS=-lrt
+endif
+
+.PHONY: clean test
+
+test: clean test_base64 benchmark
+	./test_base64
+	./benchmark
+
+test_base64: test_base64.c codec_supported.o ../lib/libbase64.o
+	$(CC) $(CFLAGS) -o $@ $^
+
+benchmark: benchmark.c codec_supported.o ../lib/libbase64.o
+	$(CC) $(CFLAGS) -o $@ $^ $(BENCH_LDFLAGS)
+
+../%:
+	make -C .. $*
+
+%.o: %.c
+	$(CC) $(CFLAGS) -o $@ -c $<
+
+clean:
+	rm -f benchmark test_base64 *.o
--- a/deps/base64/base64/test/benchmark.c
+++ b/deps/base64/base64/test/benchmark.c
@ -0,0 +1,233 @@
+// For clock_gettime(2):
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 199309L
+#endif
+
+// For CLOCK_REALTIME on FreeBSD:
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE   600
+#endif
+
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+#ifdef __MACH__
+#include <mach/mach_time.h>
+#endif
+
+#include "../include/libbase64.h"
+#include "codec_supported.h"
+
+#define KB	1024
+#define MB	(1024 * KB)
+
+#define RANDOMDEV  "/dev/urandom"
+
+struct buffers {
+	char *reg;
+	char *enc;
+	size_t regsz;
+	size_t encsz;
+};
+
+// Define buffer sizes to test with:
+static struct bufsize {
+	char	*label;
+	size_t	 len;
+	int	 repeat;
+	int	 batch;
+}
+sizes[] = {
+	{ "10 MB",	MB * 10,	10,	1	},
+	{ "1 MB",	MB * 1,		10,	10	},
+	{ "100 KB",	KB * 100,	10,	100	},
+	{ "10 KB",	KB * 10,	100,	100	},
+	{ "1 KB",	KB * 1,		100,	1000	},
+};
+
+static inline float
+bytes_to_mb (size_t bytes)
+{
+	return bytes / (float) MB;
+}
+
+static bool
+get_random_data (struct buffers *b, char **errmsg)
+{
+	int fd;
+	ssize_t nread;
+	size_t total_read = 0;
+
+	// Open random device for semi-random data:
+	if ((fd = open(RANDOMDEV, O_RDONLY)) < 0) {
+		*errmsg = "Cannot open " RANDOMDEV;
+		return false;
+	}
+
+	printf("Filling buffer with %.1f MB of random data...\n", bytes_to_mb(b->regsz));
+
+	while (total_read < b->regsz) {
+		if ((nread = read(fd, b->reg + total_read, b->regsz - total_read)) < 0) {
+			*errmsg = "Read error";
+			close(fd);
+			return false;
+		}
+		total_read += nread;
+	}
+	close(fd);
+	return true;
+}
+
+#ifdef __MACH__
+typedef uint64_t base64_timespec;
+static void
+base64_gettime (base64_timespec * o_time)
+{
+	*o_time = mach_absolute_time();
+}
+
+static float
+timediff_sec (base64_timespec *start, base64_timespec *end)
+{
+	uint64_t diff = *end - *start;
+	mach_timebase_info_data_t tb = { 0, 0 };
+	mach_timebase_info(&tb);
+
+	return (float)((diff * tb.numer) / tb.denom) / 1e9f;
+}
+#else
+typedef struct timespec base64_timespec;
+static void
+base64_gettime (base64_timespec * o_time)
+{
+	clock_gettime(CLOCK_REALTIME, o_time);
+}
+
+static float
+timediff_sec (base64_timespec *start, base64_timespec *end)
+{
+	return (end->tv_sec - start->tv_sec) + ((float)(end->tv_nsec - start->tv_nsec)) / 1e9f;
+}
+#endif
+
+static void
+codec_bench_enc (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
+{
+	float timediff, fastest = -1.0f;
+	base64_timespec start, end;
+
+	// Reset buffer size:
+	b->regsz = bs->len;
+
+	// Repeat benchmark a number of times for a fair test:
+	for (int i = bs->repeat; i; i--) {
+
+		// Timing loop, use batches to increase timer resolution:
+		base64_gettime(&start);
+		for (int j = bs->batch; j; j--)
+			base64_encode(b->reg, b->regsz, b->enc, &b->encsz, flags);
+		base64_gettime(&end);
+
+		// Calculate average time of batch:
+		timediff = timediff_sec(&start, &end) / bs->batch;
+
+		// Update fastest time seen:
+		if (fastest < 0.0f || timediff < fastest)
+			fastest = timediff;
+	}
+
+	printf("%s\tencode\t%.02f MB/sec\n", name, bytes_to_mb(b->regsz) / fastest);
+}
+
+static void
+codec_bench_dec (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
+{
+	float timediff, fastest = -1.0f;
+	base64_timespec start, end;
+
+	// Reset buffer size:
+	b->encsz = bs->len;
+
+	// Repeat benchmark a number of times for a fair test:
+	for (int i = bs->repeat; i; i--) {
+
+		// Timing loop, use batches to increase timer resolution:
+		base64_gettime(&start);
+		for (int j = bs->batch; j; j--)
+			base64_decode(b->enc, b->encsz, b->reg, &b->regsz, flags);
+		base64_gettime(&end);
+
+		// Calculate average time of batch:
+		timediff = timediff_sec(&start, &end) / bs->batch;
+
+		// Update fastest time seen:
+		if (fastest < 0.0f || timediff < fastest)
+			fastest = timediff;
+	}
+
+	printf("%s\tdecode\t%.02f MB/sec\n", name, bytes_to_mb(b->encsz) / fastest);
+}
+
+static void
+codec_bench (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
+{
+	codec_bench_enc(b, bs, name, flags);
+	codec_bench_dec(b, bs, name, flags);
+}
+
+int
+main ()
+{
+	int ret = 0;
+	char *errmsg = NULL;
+	struct buffers b;
+
+	// Set buffer sizes to largest buffer length:
+	b.regsz = sizes[0].len;
+	b.encsz = sizes[0].len * 5 / 3;
+
+	// Allocate space for megabytes of random data:
+	if ((b.reg = malloc(b.regsz)) == NULL) {
+		errmsg = "Out of memory";
+		ret = 1;
+		goto err0;
+	}
+
+	// Allocate space for encoded output:
+	if ((b.enc = malloc(b.encsz)) == NULL) {
+		errmsg = "Out of memory";
+		ret = 1;
+		goto err1;
+	}
+
+	// Fill buffer with random data:
+	if (get_random_data(&b, &errmsg) == false) {
+		ret = 1;
+		goto err2;
+	}
+
+	// Loop over all buffer sizes:
+	for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); i++) {
+		printf("Testing with buffer size %s, fastest of %d * %d\n",
+			sizes[i].label, sizes[i].repeat, sizes[i].batch);
+
+		// Loop over all codecs:
+		for (size_t j = 0; codecs[j]; j++)
+			if (codec_supported(1 << j))
+				codec_bench(&b, &sizes[i], codecs[j], 1 << j);
+	};
+
+	// Free memory:
+err2:	free(b.enc);
+err1:	free(b.reg);
+err0:	if (errmsg)
+		fputs(errmsg, stderr);
+
+	return ret;
+}
--- a/deps/base64/base64/test/ci/test.sh
+++ b/deps/base64/base64/test/ci/test.sh
@ -0,0 +1,28 @@
+#!/bin/bash
+set -ve
+
+MACHINE=$(uname -m)
+if [ "${MACHINE}" == "x86_64" ]; then
+	export SSSE3_CFLAGS=-mssse3
+	export SSE41_CFLAGS=-msse4.1
+	export SSE42_CFLAGS=-msse4.2
+	export AVX_CFLAGS=-mavx
+	# no AVX2 on GHA macOS
+	if [ "$(uname -s)" != "Darwin" ]; then
+		export AVX2_CFLAGS=-mavx2
+	fi
+elif [ "${MACHINE}" == "aarch64" ]; then
+	export NEON64_CFLAGS="-march=armv8-a"
+elif [ "${MACHINE}" == "armv7l" ]; then
+	export NEON32_CFLAGS="-march=armv7-a -mfloat-abi=hard -mfpu=neon"
+fi
+
+if [ "${OPENMP:-}" == "0" ]; then
+	unset OPENMP
+fi
+
+uname -a
+${CC} --version
+
+make
+make -C test
--- a/deps/base64/base64/test/codec_supported.c
+++ b/deps/base64/base64/test/codec_supported.c
@ -0,0 +1,28 @@
+#include <string.h>
+
+#include "../include/libbase64.h"
+
+static char *_codecs[] =
+{ "AVX2"
+, "NEON32"
+, "NEON64"
+, "plain"
+, "SSSE3"
+, "SSE41"
+, "SSE42"
+, "AVX"
+, NULL
+} ;
+
+char **codecs = _codecs;
+
+int
+codec_supported (int flags)
+{
+	// Check if given codec is supported by trying to decode a test string:
+	char *a = "aGVsbG8=";
+	char b[10];
+	size_t outlen;
+
+	return (base64_decode(a, strlen(a), b, &outlen, flags) != -1);
+}
--- a/deps/base64/base64/test/codec_supported.h
+++ b/deps/base64/base64/test/codec_supported.h
@ -0,0 +1,3 @@
+extern char **codecs;
+
+int codec_supported (int flags);
--- a/deps/base64/base64/test/moby_dick.h
+++ b/deps/base64/base64/test/moby_dick.h
@ -0,0 +1,41 @@
+static const char *moby_dick_plain =
+	"Call me Ishmael. Some years ago--never mind how long precisely--having\n"
+	"little or no money in my purse, and nothing particular to interest me on\n"
+	"shore, I thought I would sail about a little and see the watery part of\n"
+	"the world. It is a way I have of driving off the spleen and regulating\n"
+	"the circulation. Whenever I find myself growing grim about the mouth;\n"
+	"whenever it is a damp, drizzly November in my soul; whenever I find\n"
+	"myself involuntarily pausing before coffin warehouses, and bringing up\n"
+	"the rear of every funeral I meet; and especially whenever my hypos get\n"
+	"such an upper hand of me, that it requires a strong moral principle to\n"
+	"prevent me from deliberately stepping into the street, and methodically\n"
+	"knocking people's hats off--then, I account it high time to get to sea\n"
+	"as soon as I can. This is my substitute for pistol and ball. With a\n"
+	"philosophical flourish Cato throws himself upon his sword; I quietly\n"
+	"take to the ship. There is nothing surprising in this. If they but knew\n"
+	"it, almost all men in their degree, some time or other, cherish very\n"
+	"nearly the same feelings towards the ocean with me.\n";
+
+static const char *moby_dick_base64 =
+	"Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZ"
+	"yBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIG"
+	"FuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGh"
+	"vdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5"
+	"IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZ"
+	"mYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldm"
+	"VyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV"
+	"2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5l"
+	"dmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2Zma"
+	"W4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bm"
+	"VyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3V"
+	"jaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1v"
+	"cmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwa"
+	"W5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbG"
+	"UncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8"
+	"gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlz"
+	"dG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRoc"
+	"m93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2"
+	"hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV"
+	"0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l"
+	"IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd"
+	"2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==";
--- a/deps/base64/base64/test/moby_dick_base64.txt
+++ b/deps/base64/base64/test/moby_dick_base64.txt
@ -0,0 +1 @@
+Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZyBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIGFuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGhvdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZmYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldmVyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5ldmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2ZmaW4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bmVyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3VjaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1vcmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwaW5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbGUncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlzdG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRocm93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1lIG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==
--- a/deps/base64/base64/test/moby_dick_plain.txt
+++ b/deps/base64/base64/test/moby_dick_plain.txt
@ -0,0 +1,16 @@
+Call me Ishmael. Some years ago--never mind how long precisely--having
+little or no money in my purse, and nothing particular to interest me on
+shore, I thought I would sail about a little and see the watery part of
+the world. It is a way I have of driving off the spleen and regulating
+the circulation. Whenever I find myself growing grim about the mouth;
+whenever it is a damp, drizzly November in my soul; whenever I find
+myself involuntarily pausing before coffin warehouses, and bringing up
+the rear of every funeral I meet; and especially whenever my hypos get
+such an upper hand of me, that it requires a strong moral principle to
+prevent me from deliberately stepping into the street, and methodically
+knocking people's hats off--then, I account it high time to get to sea
+as soon as I can. This is my substitute for pistol and ball. With a
+philosophical flourish Cato throws himself upon his sword; I quietly
+take to the ship. There is nothing surprising in this. If they but knew
+it, almost all men in their degree, some time or other, cherish very
+nearly the same feelings towards the ocean with me.
--- a/deps/base64/base64/test/test_base64.c
+++ b/deps/base64/base64/test/test_base64.c
@ -0,0 +1,365 @@
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include "../include/libbase64.h"
+#include "codec_supported.h"
+#include "moby_dick.h"
+
+static char out[2000];
+static size_t outlen;
+
+static bool
+assert_enc (int flags, const char *src, const char *dst)
+{
+	size_t srclen = strlen(src);
+	size_t dstlen = strlen(dst);
+
+	base64_encode(src, srclen, out, &outlen, flags);
+
+	if (outlen != dstlen) {
+		printf("FAIL: encoding of '%s': length expected %lu, got %lu\n", src,
+			(unsigned long)dstlen,
+			(unsigned long)outlen
+		);
+		return true;
+	}
+	if (strncmp(dst, out, outlen) != 0) {
+		out[outlen] = '\0';
+		printf("FAIL: encoding of '%s': expected output '%s', got '%s'\n", src, dst, out);
+		return true;
+	}
+	return false;
+}
+
+static bool
+assert_dec (int flags, const char *src, const char *dst)
+{
+	size_t srclen = strlen(src);
+	size_t dstlen = strlen(dst);
+
+	if (!base64_decode(src, srclen, out, &outlen, flags)) {
+		printf("FAIL: decoding of '%s': decoding error\n", src);
+		return true;
+	}
+	if (outlen != dstlen) {
+		printf("FAIL: encoding of '%s': "
+			"length expected %lu, got %lu\n", src,
+			(unsigned long)dstlen,
+			(unsigned long)outlen
+		);
+		return true;
+	}
+	if (strncmp(dst, out, outlen) != 0) {
+		out[outlen] = '\0';
+		printf("FAIL: decoding of '%s': expected output '%s', got '%s'\n", src, dst, out);
+		return true;
+	}
+	return false;
+}
+
+static int
+assert_roundtrip (int flags, const char *src)
+{
+	char tmp[1500];
+	size_t tmplen;
+	size_t srclen = strlen(src);
+
+	// Encode the input into global buffer:
+	base64_encode(src, srclen, out, &outlen, flags);
+
+	// Decode the global buffer into local temp buffer:
+	if (!base64_decode(out, outlen, tmp, &tmplen, flags)) {
+		printf("FAIL: decoding of '%s': decoding error\n", out);
+		return true;
+	}
+
+	// Check that 'src' is identical to 'tmp':
+	if (srclen != tmplen) {
+		printf("FAIL: roundtrip of '%s': "
+			"length expected %lu, got %lu\n", src,
+			(unsigned long)srclen,
+			(unsigned long)tmplen
+		);
+		return true;
+	}
+	if (strncmp(src, tmp, tmplen) != 0) {
+		tmp[tmplen] = '\0';
+		printf("FAIL: roundtrip of '%s': got '%s'\n", src, tmp);
+		return true;
+	}
+
+	return false;
+}
+
+static int
+test_char_table (int flags)
+{
+	bool fail = false;
+	char chr[256];
+	char enc[400], dec[400];
+	size_t enclen, declen;
+
+	// Fill array with all characters 0..255:
+	for (int i = 0; i < 256; i++)
+		chr[i] = (unsigned char)i;
+
+	// Loop, using each char as a starting position to increase test coverage:
+	for (int i = 0; i < 256; i++) {
+
+		size_t chrlen = 256 - i;
+
+		base64_encode(&chr[i], chrlen, enc, &enclen, BASE64_FORCE_PLAIN);
+
+		if (!base64_decode(enc, enclen, dec, &declen, flags)) {
+			printf("FAIL: decoding @ %d: decoding error\n", i);
+			fail = true;
+			continue;
+		}
+		if (declen != chrlen) {
+			printf("FAIL: roundtrip @ %d: "
+				"length expected %lu, got %lu\n", i,
+				(unsigned long)chrlen,
+				(unsigned long)declen
+			);
+			fail = true;
+			continue;
+		}
+		if (strncmp(&chr[i], dec, declen) != 0) {
+			printf("FAIL: roundtrip @ %d: decoded output not same as input\n", i);
+			fail = true;
+		}
+	}
+
+	return fail;
+}
+
+static int
+test_streaming (int flags)
+{
+	bool fail = false;
+	char chr[256];
+	char ref[400], enc[400];
+	size_t reflen;
+	struct base64_state state;
+
+	// Fill array with all characters 0..255:
+	for (int i = 0; i < 256; i++)
+		chr[i] = (unsigned char)i;
+
+	// Create reference base64 encoding:
+	base64_encode(chr, 256, ref, &reflen, BASE64_FORCE_PLAIN);
+
+	// Encode the table with various block sizes and compare to reference:
+	for (size_t bs = 1; bs < 255; bs++)
+	{
+		size_t inpos   = 0;
+		size_t partlen = 0;
+		size_t enclen  = 0;
+
+		base64_stream_encode_init(&state, flags);
+		memset(enc, 0, 400);
+		for (;;) {
+			base64_stream_encode(&state, &chr[inpos], (inpos + bs > 256) ? 256 - inpos : bs, &enc[enclen], &partlen);
+			enclen += partlen;
+			if (inpos + bs > 256) {
+				break;
+			}
+			inpos += bs;
+		}
+		base64_stream_encode_final(&state, &enc[enclen], &partlen);
+		enclen += partlen;
+
+		if (enclen != reflen) {
+			printf("FAIL: stream encoding gave incorrect size: "
+				"%lu instead of %lu\n",
+				(unsigned long)enclen,
+				(unsigned long)reflen
+			);
+			fail = true;
+		}
+		if (strncmp(ref, enc, reflen) != 0) {
+			printf("FAIL: stream encoding with blocksize %lu failed\n",
+				(unsigned long)bs
+			);
+			fail = true;
+		}
+	}
+
+	// Decode the reference encoding with various block sizes and
+	// compare to input char table:
+	for (size_t bs = 1; bs < 255; bs++)
+	{
+		size_t inpos   = 0;
+		size_t partlen = 0;
+		size_t enclen  = 0;
+
+		base64_stream_decode_init(&state, flags);
+		memset(enc, 0, 400);
+		while (base64_stream_decode(&state, &ref[inpos], (inpos + bs > reflen) ? reflen - inpos : bs, &enc[enclen], &partlen)) {
+			enclen += partlen;
+			inpos += bs;
+		}
+		if (enclen != 256) {
+			printf("FAIL: stream decoding gave incorrect size: "
+				"%lu instead of 255\n",
+				(unsigned long)enclen
+			);
+			fail = true;
+		}
+		if (strncmp(chr, enc, 256) != 0) {
+			printf("FAIL: stream decoding with blocksize %lu failed\n",
+				(unsigned long)bs
+			);
+			fail = true;
+		}
+	}
+
+	return fail;
+}
+
+static int
+test_invalid_dec_input (int flags)
+{
+	// Subset of invalid characters to cover all ranges
+	static const char invalid_set[] = { '\0', -1, '!', '-', ';', '_', '|' };
+	static const char* invalid_strings[] = {
+		"Zm9vYg=",
+		"Zm9vYg",
+		"Zm9vY",
+		"Zm9vYmF=Zm9v"
+	};
+
+	bool fail = false;
+	char chr[256];
+	char enc[400], dec[400];
+	size_t enclen, declen;
+
+	// Fill array with all characters 0..255:
+	for (int i = 0; i < 256; i++)
+		chr[i] = (unsigned char)i;
+
+	// Create reference base64 encoding:
+	base64_encode(chr, 256, enc, &enclen, BASE64_FORCE_PLAIN);
+
+	// Test invalid strings returns error.
+	for (size_t i = 0U; i < sizeof(invalid_strings) / sizeof(invalid_strings[0]); ++i) {
+		if (base64_decode(invalid_strings[i], strlen(invalid_strings[i]), dec, &declen, flags)) {
+			printf("FAIL: decoding invalid input \"%s\": no decoding error\n", invalid_strings[i]);
+			fail = true;
+		}
+	}
+
+	// Loop, corrupting each char to increase test coverage:
+	for (size_t c = 0U; c < sizeof(invalid_set); ++c) {
+		for (size_t i = 0U; i < enclen; i++) {
+			char backup = enc[i];
+
+			enc[i] = invalid_set[c];
+
+			if (base64_decode(enc, enclen, dec, &declen, flags)) {
+				printf("FAIL: decoding invalid input @ %d: no decoding error\n", (int)i);
+				fail = true;
+				enc[i] = backup;
+				continue;
+			}
+			enc[i] = backup;
+		}
+	}
+
+	// Loop, corrupting two chars to increase test coverage:
+	for (size_t c = 0U; c < sizeof(invalid_set); ++c) {
+		for (size_t i = 0U; i < enclen - 2U; i++) {
+			char backup  = enc[i+0];
+			char backup2 = enc[i+2];
+
+			enc[i+0] = invalid_set[c];
+			enc[i+2] = invalid_set[c];
+
+			if (base64_decode(enc, enclen, dec, &declen, flags)) {
+				printf("FAIL: decoding invalid input @ %d: no decoding error\n", (int)i);
+				fail = true;
+				enc[i+0] = backup;
+				enc[i+2] = backup2;
+				continue;
+			}
+			enc[i+0] = backup;
+			enc[i+2] = backup2;
+		}
+	}
+
+	return fail;
+}
+
+static int
+test_one_codec (const char *codec, int flags)
+{
+	bool fail = false;
+
+	printf("Codec %s:\n", codec);
+
+	// Skip if this codec is not supported:
+	if (!codec_supported(flags)) {
+		puts("  skipping");
+		return false;
+	}
+
+	// Test vectors:
+	struct {
+		const char *in;
+		const char *out;
+	} vec[] = {
+
+		// These are the test vectors from RFC4648:
+		{ "",		""         },
+		{ "f",		"Zg=="     },
+		{ "fo",		"Zm8="     },
+		{ "foo",	"Zm9v"     },
+		{ "foob",	"Zm9vYg==" },
+		{ "fooba",	"Zm9vYmE=" },
+		{ "foobar",	"Zm9vYmFy" },
+
+		// The first paragraph from Moby Dick,
+		// to test the SIMD codecs with larger blocksize:
+		{ moby_dick_plain, moby_dick_base64 },
+	};
+
+	for (size_t i = 0; i < sizeof(vec) / sizeof(vec[0]); i++) {
+
+		// Encode plain string, check against output:
+		fail |= assert_enc(flags, vec[i].in, vec[i].out);
+
+		// Decode the output string, check if we get the input:
+		fail |= assert_dec(flags, vec[i].out, vec[i].in);
+
+		// Do a roundtrip on the inputs and the outputs:
+		fail |= assert_roundtrip(flags, vec[i].in);
+		fail |= assert_roundtrip(flags, vec[i].out);
+	}
+
+	fail |= test_char_table(flags);
+	fail |= test_streaming(flags);
+	fail |= test_invalid_dec_input(flags);
+
+	if (!fail)
+		puts("  all tests passed.");
+
+	return fail;
+}
+
+int
+main ()
+{
+	bool fail = false;
+
+	// Loop over all codecs:
+	for (size_t i = 0; codecs[i]; i++) {
+
+		// Flags to invoke this codec:
+		int codec_flags = (1 << i);
+
+		// Test this codec, merge the results:
+		fail |= test_one_codec(codecs[i], codec_flags);
+	}
+
+	return (fail) ? 1 : 0;
+}
--- a/node.gyp
+++ b/node.gyp
@ -461,6 +461,7 @@
        '<(SHARED_INTERMEDIATE_DIR)' # for node_natives.h
      ],
      'dependencies': [
+        'deps/base64/base64.gyp:base64',
        'deps/googletest/googletest.gyp:gtest_prod',
        'deps/histogram/histogram.gyp:histogram',
        'deps/uvwasi/uvwasi.gyp:uvwasi',
@ -1191,6 +1192,7 @@

      'dependencies': [
        '<(node_lib_target_name)',
+        'deps/base64/base64.gyp:base64',
        'deps/googletest/googletest.gyp:gtest',
        'deps/googletest/googletest.gyp:gtest_main',
        'deps/histogram/histogram.gyp:histogram',
--- a/src/base64-inl.h
+++ b/src/base64-inl.h
@ -4,6 +4,7 @@
 #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

 #include "base64.h"
+#include "libbase64.h"
 #include "util.h"

 namespace node {
@ -131,6 +132,11 @@ inline size_t base64_encode(const char* src,

  dlen = base64_encoded_size(slen, mode);

+  if (mode == Base64Mode::NORMAL) {
+    ::base64_encode(src, slen, dst, &dlen, 0);
+    return dlen;
+  }
+
  unsigned a;
  unsigned b;
  unsigned c;
--- a/tools/license-builder.sh
+++ b/tools/license-builder.sh
@ -136,4 +136,6 @@ addlicense "nghttp3" "deps/ngtcp2/nghttp3/" "$licenseText"
 licenseText="$(curl -sL https://raw.githubusercontent.com/jprichardson/node-fs-extra/b34da2762a4865b025cac06d02d6a2f1f1027b65/LICENSE)"
 addlicense "node-fs-extra" "lib/internal/fs/cp" "$licenseText"

+addlicense "base64" "deps/base64/base64/" "$(cat "${rootdir}"/deps/base64/base64/LICENSE)"
+
 mv "$tmplicense" "$licensefile"
				`@ -0,0 +1 @@`
				Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZyBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIGFuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGhvdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZmYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldmVyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5ldmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2ZmaW4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bmVyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3VjaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1vcmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwaW5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbGUncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlzdG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRocm93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1lIG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==