kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 7866df07aa0d7c13b940fa81386086ca2d4dba45
Author: [email protected] <[email protected]>
Date:   Wed, 29 Jun 2016 20:59:54 +0300

Initial commit

Diffstat:
A.clang-format | 27+++++++++++++++++++++++++++
A.gitignore | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACMakeLists.txt | 47+++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE.txt | 674+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abuild.py | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adspplot/dspplot/__init__.py | 2++
Adspplot/dspplot/dspplotting.py | 227+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adspplot/setup.py | 26++++++++++++++++++++++++++
Aexamples/CMakeLists.txt | 34++++++++++++++++++++++++++++++++++
Aexamples/biquads.cpp | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexamples/dft.cpp | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexamples/fir.cpp | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexamples/resampling.cpp | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexamples/window.cpp | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aformat-all.py | 26++++++++++++++++++++++++++
Aimg/fft_performance.png | 0
Ainclude/kfr/all.hpp | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/abs.hpp | 138+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/asin_acos.hpp | 104+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/atan.hpp | 267+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/complex.hpp | 610++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/constants.hpp | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/digitreverse.hpp | 121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/dispatch.hpp | 200+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/expression.hpp | 315+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/function.hpp | 124+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/gamma.hpp | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/intrinsics.h | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/kfr.h | 134+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/log_exp.hpp | 575+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/logical.hpp | 339+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/memory.hpp | 209+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/min_max.hpp | 377+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/operators.hpp | 663+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/read_write.hpp | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/round.hpp | 298+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/saturation.hpp | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/select.hpp | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/shuffle.hpp | 582++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/sin_cos.hpp | 586+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/sinh_cosh.hpp | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/specializations.i | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/sqrt.hpp | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/tan.hpp | 187+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/types.hpp | 728+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/univector.hpp | 300+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/base/vec.hpp | 1324+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/cident.h | 357+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/cometa.hpp | 1819+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/cometa/string.hpp | 481+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/data/bitrev.hpp | 1057+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/data/sincos.hpp | 308+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dft/bitrev.hpp | 387+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dft/fft.hpp | 998+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dft/ft.hpp | 1505+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dft/reference_dft.hpp | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dispatch/cpuid.hpp | 305++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dispatch/runtimedispatch.hpp | 173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/biquad.hpp | 401+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/fir.hpp | 280+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/goertzel.hpp | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/interpolation.hpp | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/oscillators.hpp | 338+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/resample.hpp | 244+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/speaker.hpp | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/units.hpp | 219+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/weighting.hpp | 122+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/dsp/window.hpp | 685+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/expressions/basic.hpp | 360+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/expressions/conversion.hpp | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/expressions/generators.hpp | 279+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/expressions/operators.hpp | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/expressions/pointer.hpp | 168+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/expressions/reduce.hpp | 265+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/io/audiofile.hpp | 370+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/io/file.hpp | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/io/python_plot.hpp | 155+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/io/tostring.hpp | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/math.hpp | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/misc/compiletime.hpp | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/misc/random.hpp | 180+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/misc/small_buffer.hpp | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/misc/sort.hpp | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/kfr/vec.hpp | 25+++++++++++++++++++++++++
Ainclude/kfr/version.hpp | 35+++++++++++++++++++++++++++++++++++
Areadme.md | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asources.cmake | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asyntax-check.py | 28++++++++++++++++++++++++++++
Atests/CMakeLists.txt | 47+++++++++++++++++++++++++++++++++++++++++++++++
Atests/basic_vector_test.cpp | 152+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/complex_test.cpp | 200+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/dft_test.cpp | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/empty_test.cpp | 7+++++++
Atests/test_output.py | 34++++++++++++++++++++++++++++++++++
Atests/testo/print_colored.hpp | 150+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/testo/testo.hpp | 549+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
96 files changed, 25231 insertions(+), 0 deletions(-)

diff --git a/.clang-format b/.clang-format @@ -0,0 +1,27 @@ +UseTab: Never +IndentWidth: 4 +Language : Cpp +BreakBeforeBraces: Allman +MaxEmptyLinesToKeep: 1 +IndentCaseLabels: false +NamespaceIndentation: None +AccessModifierOffset: -4 +SpacesInParentheses: false +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +PointerAlignment: Left +Cpp11BracedListStyle: false +AllowShortIfStatementsOnASingleLine: false +AllowShortFunctionsOnASingleLine : true +AlignOperands: true +Standard: Cpp11 +IndentCaseLabels: false +AlignTrailingComments : false +ConstructorInitializerAllOnOneLineOrOnePerLine : false +ColumnLimit: 110 +BinPackParameters : true +BinPackArguments : true +AlwaysBreakTemplateDeclarations : true +AlignConsecutiveAssignments : true +PenaltyReturnTypeOnItsOwnLine: 50000 +CommentPragmas: '^ >>>' diff --git a/.gitignore b/.gitignore @@ -0,0 +1,73 @@ +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# CMake files +CMakeCache.txt +CMakeFiles +CMakeScripts +Makefile +cmake_install.cmake +install_manifest.txt +CTestTestfile.cmake + +# build directory +build/ + +# test directory +svg/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Sphinx documentation +docs/_build/ + +# CLion +.idea/ diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright (C) 2016 D Levin (http://www.kfrlib.com) +# This file is part of KFR +# +# KFR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# KFR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with KFR. + + +cmake_minimum_required(VERSION 3.0) + +if (${CMAKE_GENERATOR} STREQUAL "MinGW Makefiles" OR ${CMAKE_GENERATOR} STREQUAL "MSYS Makefiles") + if (CMAKE_BUILD_TYPE_INITIALIZED_TO_DEFAULT) + set(CMAKE_BUILD_TYPE Release) + endif () + set(CMAKE_CXX_COMPILER clang++) + set(CMAKE_C_COMPILER clang) + set(CMAKE_CXX_FLAGS --target=x86_64-w64-windows-gnu CACHE STRING "compile flags" FORCE) + set(CMAKE_C_FLAGS --target=x86_64-w64-windows-gnu CACHE STRING "compile flags" FORCE) + set(CMAKE_EXE_LINKER_FLAGS --target=x86_64-w64-windows-gnu) + set(CMAKE_SHARED_LINKER_FLAGS --target=x86_64-w64-windows-gnu) + set(CMAKE_STATIC_LINKER_FLAGS --target=x86_64-w64-windows-gnu) +endif () + +project(kfr) + +include(sources.cmake) + +add_compile_options(-std=c++1y) + +set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded) + +add_compile_options(-march=native) + +add_subdirectory(examples) +add_subdirectory(tests) + +file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/svg) + diff --git a/LICENSE.txt b/LICENSE.txt @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/build.py b/build.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +# Copyright (C) 2016 D Levin (http://www.kfrlib.com) +# This file is part of KFR +# +# KFR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# KFR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with KFR. + + +from __future__ import print_function + +import os +import subprocess +import sys + +path = os.path.dirname(os.path.realpath(__file__)) +build_dir = os.path.join(path, 'build') + +try: + os.makedirs(build_dir) +except: + pass + +print('Checking clang...', end=' ') +if subprocess.call(['clang', '--version'], stdout=subprocess.PIPE): + raise Exception('clang is not on your PATH') +print('ok') +print('Checking clang++...', end=' ') +if subprocess.call(['clang++', '--version'], stdout=subprocess.PIPE): + raise Exception('clang++ is not on your PATH') +print('ok') + +if sys.platform.startswith('win32'): + generator = 'MinGW Makefiles' +elif sys.platform.startswith('darwin'): + generator = 'Unix Makefiles' + +options = [ + '-DCMAKE_BUILD_TYPE=Release', + ] + +subprocess.call(['cmake', '-G', generator, '..'] + options, cwd=build_dir) +subprocess.call(['cmake', '--build', '.'], cwd=build_dir) +subprocess.call(['ctest'], cwd=os.path.join(build_dir, 'tests')) diff --git a/dspplot/dspplot/__init__.py b/dspplot/dspplot/__init__.py @@ -0,0 +1,2 @@ +from dspplotting import plot +from dspplotting import perfplot diff --git a/dspplot/dspplot/dspplotting.py b/dspplot/dspplot/dspplotting.py @@ -0,0 +1,227 @@ +from __future__ import division + +import wave +import matplotlib +import matplotlib.ticker as tck +import matplotlib.pyplot as plt +import numpy as np +import sys +import matplotlib.colors as clr +from scipy import signal +from scipy import interpolate + +def gen_ticks(stop, start=10): + yield start + for s in range(0, 10): + if start * s > stop: + yield stop + raise StopIteration + yield start * s + for t in gen_ticks(stop, start * 10): + yield t + +def gen_tick_labels(stop, start=10): + yield (str(start) + 'Hz').replace('000Hz', 'kHz') + for s in range(0, 10): + if start * s > stop: + yield (str(int(stop)) + 'Hz').replace('000Hz', 'kHz') + raise StopIteration + yield '' + for t in gen_tick_labels(stop, start * 10): + yield t + +def smooth_colormap(colors, name='cmap1'): + to_rgb = clr.ColorConverter().to_rgb + colors = [(p, to_rgb(c)) for p, c in colors] + result = {'red': [], 'green': [], 'blue': []} + for index, item in enumerate(colors): + pos, color = item + if pos is not None: + r, g, b = color + result['red'].append([pos, r, r]) + result['green'].append([pos, g, g]) + result['blue'].append([pos, b, b]) + cmap = clr.LinearSegmentedColormap(name, result) + plt.register_cmap(name=name, cmap=cmap) + return cmap + +def wavplot(wavfile, title='Title', file=None, segmentsize=512, overlap=8): + cmap = smooth_colormap([ + (0 , '#000000'), + (1/9, '#010325'), + (2/9, '#130246'), + (3/9, '#51026e'), + (4/9, '#9e0379'), + (5/9, '#d6033e'), + (6/9, '#fc4d21'), + (7/9, '#fdc967'), + (8/9, '#f3fab8'), + (1 , '#ffffff') + ]) + + w = wave.open(wavfile, 'rb') + + sr = w.getframerate() + data = np.fromstring(w.readframes(w.getnframes()), dtype=np.int32)/2147483647.0 + datalen = len(data) + + def fast_resample(data, newlen): + oldlen=len(data) + result=[] + for i in range(newlen): + result.append(data[i*oldlen//newlen]) + return np.array(result) + + + datalen = len(data) + segments=datalen//segmentsize-1 + + im=[] + + window = signal.hann(segmentsize * overlap) + + np.seterr(all='ignore') + + for segm in range(segments-overlap): + r = range(segm*datalen//segments, segm*datalen//segments+segmentsize*overlap) + subdata = data[r] + subdata = subdata * window + n = len(subdata) + Y = np.fft.fft(subdata)/n + Y = Y[range(len(Y) // 2)] + Yfreq = 20 * np.log10(np.absolute(Y)) + Yfreq = signal.resample(Yfreq, 512) + Yfreq = np.fmax(-300, Yfreq) + im.append(Yfreq) + + im = np.transpose(im) + + plt.imshow(im,cmap=cmap, aspect='auto', vmin=-160, vmax=0, origin='lower', extent=[0, datalen / sr, 0, sr / 2 ], interpolation='bicubic') + plt.colorbar() + + if not file: + plt.show() + else: + plt.savefig(file) + + +def plot(data, + title='Title', + horizontal=True, + normalized_freq=False, + Fs=48000, + padwidth=1024, + log_freq=False, + file=None, + freqresp=True, + phaseresp=False, + dots=False, + segmentsize=512, + overlap=8): + if isinstance(data, (list, tuple, np.ndarray)): + n = len(data) + num = 1 + freqresp + phaseresp + figsize = (10 if horizontal else 6 * num, 5 * num if horizontal else 6) + fig, a = plt.subplots(num, 1, figsize=figsize) if horizontal else plt.subplots(1, num, figsize=figsize) + fig.suptitle(title, fontsize=16) + fig.subplots_adjust(top=0.85) + rect = fig.patch + rect.set_facecolor('#f0f0f0') + style = {'linewidth': 1.4, 'color': '#0072bd'} + grid_style = {'color': '#777777'} + + dataplot = a[0] if freqresp or phaseresp else a + + dataplot.plot(np.linspace(0, n, n, False), data, marker='.' if dots else None, **style) + dataplot.set_xlabel('Samples') + dataplot.set_ylabel('Amplitude') + dataplot.grid(True, **grid_style) + dataplot.set_autoscalex_on(False) + dataplot.set_xlim([0, n - 1]) + dataplot.set_ylim(bottom=np.min(data)) + + np.seterr(all='ignore') + + if freqresp or phaseresp: + padwidth = max(padwidth, n) + Y = np.fft.fft(np.pad(data, (0, padwidth - n), 'constant', constant_values=(0, 0))) + Y = Y[range(padwidth // 2)] + Yfreq = 20 * np.log10(np.abs(Y)) + Yfreq = np.fmax(-300, Yfreq) + + freq_label = [r'Normalized Frequency ($\times \pi$ rad/sample)', 'Frequency (Hz)'] + + def set_freq(a): + if normalized_freq: + a.set_xlabel(freq_label[0]) + X = np.linspace(0, 1, len(Y), False) + a.set_xlim([0, 1]) + else: + a.set_xlabel(freq_label[1]) + if log_freq: + a.set_xscale('log') + a.set_xticks(list(gen_ticks(Fs / 2))) + a.set_xticklabels(list(gen_tick_labels(Fs / 2))) + X = np.linspace(0, Fs / 2, len(Y), False) + a.set_xlim([10, Fs / 2]) + return X + + if freqresp: + freqplot = a[1] + X = set_freq(freqplot) + freqplot.set_ylabel('Gain (dB)') + freqplot.grid(True, **grid_style) + freqplot.set_autoscalex_on(False) + freqplot.plot(X, Yfreq, **style) + + if phaseresp: + phaseplot = a[1 + freqresp] + Yphase = np.angle(Y, deg=True); + X = set_freq(phaseplot) + phaseplot.grid(True, **grid_style) + phaseplot.set_ylabel(r'Phase (${\circ}$)') + phaseplot.set_autoscaley_on(False) + phaseplot.set_ylim([-180, +180]) + phaseplot.plot(X, Yphase, **style) + + plt.tight_layout(rect=[0, 0.0, 1, 0.94]) + + if not file: + plt.show() + else: + plt.savefig(file) + else: + wavplot(data, title=title, file=file, segmentsize=segmentsize, overlap=overlap) + + +def perfplot(data, labels, title='Speed', xlabel='X', units='ms', file=None): + + styles = [ + {'color': '#F6511D', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'}, + {'color': '#00A6ED', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'}, + {'color': '#FFB400', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'}, + {'color': '#7FB800', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'}, + {'color': '#0D2C54', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'}, + ] + grid_style = {'color': '#777777'} + fig, ax = plt.subplots() + ax.grid(True, **grid_style) + data = map(list, zip(*data)) + ticks = data[0] + data = data[1:] + for d, s, l in zip(data, styles, labels): + ax.set_xlabel(xlabel) + ax.set_ylabel(units) + x = np.linspace(0,len(d),len(d), False) + ax.plot(x, d, linewidth=1.6, label=l, **s) + + ax.set_ylim(bottom=0.0) + legend = ax.legend(loc='lower center', shadow=True) + + plt.xticks(x, ticks, rotation='vertical') + plt.tight_layout(rect=[0, 0.0, 1, 0.94]) + + if not file: + plt.show() + else: + plt.savefig(file) diff --git a/dspplot/setup.py b/dspplot/setup.py @@ -0,0 +1,26 @@ +from setuptools import setup + +setup(name='dspplot', + version='0.0.1', + use_2to3=False, + author='KFRLIB.COM', + author_email='[email protected]', + maintainer='KFRLIB.COM', + maintainer_email='[email protected]', + url='https://kfrlib.com/dspplot/', + description="Small python plotting library for DSP purposes", + long_description="Small python plotting library for DSP purposes", + classifiers=[ + 'Development Status :: 4 - Beta', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Topic :: Scientific/Engineering :: Visualization', + ], + license='MIT', + packages=['dspplot'], + package_data={'dspplot': []}, + install_requires=['matplotlib', 'numpy', 'scipy'], + zip_safe=False) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt @@ -0,0 +1,34 @@ +# Copyright (C) 2016 D Levin (http://www.kfrlib.com) +# This file is part of KFR +# +# KFR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# KFR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with KFR. + + +cmake_minimum_required(VERSION 3.0) + +add_compile_options(-fno-exceptions -fno-rtti) + +set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded) + +add_compile_options(-march=native) + +link_libraries(stdc++ pthread) + +include_directories(../include) + +add_executable(biquads biquads.cpp ${KFR_SRC}) +add_executable(window window.cpp ${KFR_SRC}) +add_executable(fir fir.cpp ${KFR_SRC}) +add_executable(resampling resampling.cpp ${KFR_SRC}) +add_executable(dft dft.cpp ${KFR_SRC} ${DFT_SRC}) diff --git a/examples/biquads.cpp b/examples/biquads.cpp @@ -0,0 +1,85 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +// library_version() +#include <kfr/version.hpp> + +// print(), format() +#include <kfr/cometa/string.hpp> + +#include <kfr/math.hpp> + +// simpleimpulse() +#include <kfr/dsp/oscillators.hpp> + +// biquad* +#include <kfr/dsp/biquad.hpp> + +// plot_save() +#include <kfr/io/python_plot.hpp> + +using namespace kfr; +using namespace kfr::native; + +int main(int argc, char** argv) +{ + println(library_version()); + + using namespace native; + const std::string options = "phaseresp=True"; + + univector<double, 128> output; + { + biquad_params<double> bq[] = { biquad_notch(0.1, 0.5), biquad_notch(0.2, 0.5), biquad_notch(0.3, 0.5), + biquad_notch(0.4, 0.5) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_notch", output, options); + + { + biquad_params<double> bq[] = { biquad_lowpass(0.2, 0.9) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_lowpass", output, options); + + { + biquad_params<double> bq[] = { biquad_highpass(0.3, 0.1) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_highpass", output, options); + + { + biquad_params<double> bq[] = { biquad_peak(0.3, 0.5, +9.0) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_peak", output, options); + + { + biquad_params<double> bq[] = { biquad_peak(0.3, 3.0, -2.0) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_peak2", output, options); + + { + biquad_params<double> bq[] = { biquad_lowshelf(0.3, -1.0) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_lowshelf", output, options); + + { + biquad_params<double> bq[] = { biquad_highshelf(0.3, +9.0) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_highshelf", output, options); + + { + biquad_params<double> bq[] = { biquad_bandpass(0.25, 0.2) }; + output = biquad(bq, simpleimpulse()); + } + plot_save("biquad_bandpass", output, options); + + return 0; +} diff --git a/examples/dft.cpp b/examples/dft.cpp @@ -0,0 +1,66 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +// library_version() +#include <kfr/version.hpp> + +#include <kfr/io/tostring.hpp> + +// print(), format() +#include <kfr/cometa/string.hpp> + +#include <kfr/dft/fft.hpp> +#include <kfr/dft/reference_dft.hpp> +#include <kfr/dsp/oscillators.hpp> +#include <kfr/dsp/units.hpp> +#include <kfr/expressions/basic.hpp> +#include <kfr/expressions/operators.hpp> +#include <kfr/expressions/reduce.hpp> +#include <kfr/math.hpp> +#include <kfr/misc/random.hpp> +#include <kfr/vec.hpp> + +using namespace kfr; + +int main(int argc, char** argv) +{ + println(library_version()); + + // fft size + const size_t size = 128; + using float_type = double; + + // initialize input & output buffers + univector<complex<float_type>, size> in = sin(linspace(0.0, c_pi<float_type, 2> * 4.0, size)); + univector<complex<float_type>, size> out = scalar(qnan); + + // initialize fft + const dft_plan<float_type> dft(size); + + // allocate work buffer for fft (if needed) + univector<u8> temp(dft.temp_size); + + // perform forward fft + dft.execute(out, in, temp); + + // scale output + out = out / size; + + // get magnitude and convert to decibels + univector<float_type, size> dB = amp_to_dB(cabs(out)); + + println("max = ", max(dB)); + println("min = ", min(dB)); + println("mean = ", mean(dB)); + println("rms = ", rms(dB)); + + println(in); + println(); + println(dB); + (void)argc; + (void)argv; + return 0; +} diff --git a/examples/fir.cpp b/examples/fir.cpp @@ -0,0 +1,73 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +// library_version() +#include <kfr/version.hpp> + +// print(), format() +#include <kfr/cometa/string.hpp> + +#include <kfr/math.hpp> + +// expression_pointer<>, topointer() +#include <kfr/expressions/pointer.hpp> + +// simpleimpulse() +#include <kfr/dsp/oscillators.hpp> + +// fir* +#include <kfr/dsp/fir.hpp> + +// plot_save() +#include <kfr/io/python_plot.hpp> + +#include <iostream> + +using namespace kfr; +using namespace kfr::native; + +int main(int argc, char** argv) +{ + println(library_version()); + + using namespace native; + const std::string options = "phaseresp=False"; + + univector<double, 15> taps15; + univector<double, 127> taps127; + univector<double, 8191> taps8191; + + expression_pointer<double> hann = to_pointer(window_hann(taps15.size())); + + expression_pointer<double> kaiser = to_pointer(window_kaiser(taps127.size(), 3.0)); + + expression_pointer<double> blackman_harris = to_pointer(window_blackman_harris(taps8191.size())); + + fir_lowpass(taps15, 0.15, hann, true); + plot_save("fir_lowpass_hann", taps15, options + ", title='15-point lowpass FIR, Hann window'"); + + fir_lowpass(taps127, 0.2, kaiser, true); + plot_save("fir_lowpass_kaiser", taps127, + options + ", title=r'127-point lowpass FIR, Kaiser window ($\\alpha=3.0$)'"); + + fir_highpass(taps127, 0.2, kaiser, true); + plot_save("fir_highpass_kaiser", taps127, + options + ", title=r'127-point highpass FIR, Kaiser window ($\\alpha=3.0$)'"); + + fir_bandpass(taps127, 0.2, 0.4, kaiser, true); + plot_save("fir_bandpass_kaiser", taps127, + options + ", title=r'127-point bandpass FIR, Kaiser window ($\\alpha=3.0$)'"); + + fir_bandstop(taps127, 0.2, 0.4, kaiser, true); + plot_save("fir_bandstop_kaiser", taps127, + options + ", title=r'127-point bandstop FIR, Kaiser window ($\\alpha=3.0$)'"); + + fir_lowpass(taps8191, 0.15, blackman_harris, true); + plot_save("fir_lowpass_blackman", taps8191, + options + ", title='8191-point lowpass FIR, Blackman-Harris window'"); + + return 0; +} diff --git a/examples/resampling.cpp b/examples/resampling.cpp @@ -0,0 +1,110 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +// library_version() +#include <kfr/version.hpp> + +// print(), format() +#include <kfr/cometa/string.hpp> + +#include <kfr/math.hpp> + +// resample* +#include <kfr/dsp/resample.hpp> + +// file* +#include <kfr/io/audiofile.hpp> + +// swept +#include <kfr/dsp/oscillators.hpp> + +// operator overloading for expressions +#include <kfr/expressions/operators.hpp> + +// plot_save() +#include <kfr/io/python_plot.hpp> + +#include <iostream> + +using namespace kfr; +using namespace kfr::native; + +constexpr size_t input_sr = 96000; +constexpr size_t output_sr = 44100; +constexpr size_t len = 96000 * 6; +constexpr f64 i32max = 2147483647.0; + +int main(int argc, char** argv) +{ + println(library_version()); + + using namespace native; + const std::string options = "phaseresp=False"; + + univector<f64> swept_sine = swept(0.5, len); + + { + auto r = resampler(resample_quality::high, output_sr, input_sr, 1.0, 0.496); + univector<f64> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_high_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_high_quality", "audio_high_quality.wav", ""); + } + + { + auto r = resampler(resample_quality::normal, output_sr, input_sr, 1.0, 0.496); + univector<f64> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_normal_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_normal_quality", "audio_normal_quality.wav", ""); + } + + { + auto r = resampler(resample_quality::low, output_sr, input_sr, 1.0, 0.496); + univector<f64> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_low_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_low_quality", "audio_low_quality.wav", ""); + } + + { + auto r = resampler(resample_quality::draft, output_sr, input_sr, 1.0, 0.496); + univector<f64> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_draft_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_draft_quality", "audio_draft_quality.wav", ""); + } + + return 0; +} diff --git a/examples/window.cpp b/examples/window.cpp @@ -0,0 +1,76 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +// library_version() +#include <kfr/version.hpp> + +// print(), format() +#include <kfr/cometa/string.hpp> + +#include <kfr/math.hpp> + +// simpleimpulse() +#include <kfr/dsp/oscillators.hpp> + +// window* +#include <kfr/dsp/window.hpp> + +// plot_save() +#include <kfr/io/python_plot.hpp> + +using namespace kfr; +using namespace kfr::native; + +int main(int argc, char** argv) +{ + println(library_version()); + + using namespace native; + const std::string options = "freqresp=True, dots=True, padwidth=1024, " + "log_freq=False, horizontal=False, normalized_freq=True"; + + univector<double, 64> output; + output = window_hann(output.size()); + plot_save("window_hann", output, options + ", title='Hann window'"); + + output = window_hamming(output.size()); + plot_save("window_hamming", output, options + ", title='Hamming window'"); + + output = window_blackman(output.size()); + plot_save("window_blackman", output, options + ", title='Blackman window'"); + + output = window_blackman_harris(output.size()); + plot_save("window_blackman_harris", output, options + ", title='Blackman-Harris window'"); + + output = window_gaussian(output.size()); + plot_save("window_gaussian", output, options + ", title='Gaussian window'"); + + output = window_triangular(output.size()); + plot_save("window_triangular", output, options + ", title='Triangular window'"); + + output = window_bartlett(output.size()); + plot_save("window_bartlett", output, options + ", title='Bartlett window'"); + + output = window_cosine(output.size()); + plot_save("window_cosine", output, options + ", title='Cosine window'"); + + output = window_bartlett_hann(output.size()); + plot_save("window_bartlett_hann", output, options + ", title='Bartlett-Hann window'"); + + output = window_bohman(output.size()); + plot_save("window_bohman", output, options + ", title='Bohman window'"); + + output = window_lanczos(output.size()); + plot_save("window_lanczos", output, options + ", title='Lanczos window'"); + + output = window_flattop(output.size()); + plot_save("window_flattop", output, options + ", title='Flat top window'"); + + output = window_kaiser(output.size(), 2.5); + plot_save("window_kaiser", output, options + ", title='Kaiser window'"); + + return 0; +} diff --git a/format-all.py b/format-all.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +from __future__ import print_function + +import fnmatch +import os +import subprocess +import sys +import glob + +path = os.path.dirname(os.path.realpath(__file__)) + +filenames = [] +for root, dirnames, files in os.walk(path, path): + for filename in fnmatch.filter(files, '*.hpp'): + filenames.append(os.path.join(root, filename)) + for filename in fnmatch.filter(files, '*.h'): + filenames.append(os.path.join(root, filename)) + for filename in fnmatch.filter(files, '*.cpp'): + filenames.append(os.path.join(root, filename)) + +for filename in filenames: + print( filename, '...' ) + subprocess.call(['clang-format', '-i', filename]) + # Fix clang-format bug: https://llvm.org/bugs/show_bug.cgi?id=26125 + for tmp_file in glob.glob(filename+'*.tmp'): + os.remove(tmp_file) diff --git a/img/fft_performance.png b/img/fft_performance.png Binary files differ. diff --git a/include/kfr/all.hpp b/include/kfr/all.hpp @@ -0,0 +1,84 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ + +#include "base/abs.hpp" +#include "base/asin_acos.hpp" +#include "base/atan.hpp" +#include "base/complex.hpp" +#include "base/constants.hpp" +#include "base/digitreverse.hpp" +#include "base/dispatch.hpp" +#include "base/function.hpp" +#include "base/gamma.hpp" +#include "base/log_exp.hpp" +#include "base/logical.hpp" +#include "base/memory.hpp" +#include "base/min_max.hpp" +#include "base/operators.hpp" +#include "base/read_write.hpp" +#include "base/round.hpp" +#include "base/saturation.hpp" +#include "base/select.hpp" +#include "base/shuffle.hpp" +#include "base/sin_cos.hpp" +#include "base/sinh_cosh.hpp" +#include "base/sqrt.hpp" +#include "base/tan.hpp" +#include "base/types.hpp" +#include "base/univector.hpp" +#include "base/vec.hpp" +#include "data/bitrev.hpp" +#include "data/sincos.hpp" +#include "dft/bitrev.hpp" +#include "dft/fft.hpp" +#include "dft/ft.hpp" +#include "dft/reference_dft.hpp" +#include "dispatch/cpuid.hpp" +#include "dispatch/runtimedispatch.hpp" +#include "dsp/biquad.hpp" +#include "dsp/fir.hpp" +#include "dsp/goertzel.hpp" +#include "dsp/interpolation.hpp" +#include "dsp/oscillators.hpp" +#include "dsp/resample.hpp" +#include "dsp/speaker.hpp" +#include "dsp/units.hpp" +#include "dsp/weighting.hpp" +#include "dsp/window.hpp" +#include "expressions/basic.hpp" +#include "expressions/conversion.hpp" +#include "expressions/generators.hpp" +#include "expressions/operators.hpp" +#include "expressions/pointer.hpp" +#include "expressions/reduce.hpp" +#include "io/audiofile.hpp" +#include "io/file.hpp" +#include "io/python_plot.hpp" +#include "io/tostring.hpp" +#include "math.hpp" +#include "misc/compiletime.hpp" +#include "misc/random.hpp" +#include "misc/small_buffer.hpp" +#include "misc/sort.hpp" +#include "vec.hpp" +#include "version.hpp" diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp @@ -0,0 +1,138 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "function.hpp" +#include "operators.hpp" +#include "select.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t cpu = cpu_t::native> +struct in_abs : in_abs<older(cpu)> +{ + struct fn_abs : in_abs<older(cpu)>::fn_abs, fn_disabled + { + }; +}; + +template <> +struct in_abs<cpu_t::sse2> : in_select<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse2; + +private: + using in_select<cpu_t::sse2>::select; + +public: + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> abs(vec<T, N> value) + { + return select(value >= T(), value, -value); + } + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> abs(vec<T, N> value) + { + return value & invhighbitmask<T>; + } + + KFR_HANDLE_ALL(abs) + KFR_HANDLE_SCALAR(abs) + KFR_SPEC_FN(in_abs, abs) +}; + +template <> +struct in_abs<cpu_t::ssse3> : in_abs<cpu_t::sse2>, in_select<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::ssse3; + +private: + using in_select<cpu_t::sse2>::select; + +public: + template <size_t N> + KFR_SINTRIN vec<i64, N> abs(vec<i64, N> value) + { + return select(value >= 0, value, -value); + } + + KFR_AINTRIN i32sse abs(i32sse value) { return _mm_abs_epi32(*value); } + KFR_AINTRIN i16sse abs(i16sse value) { return _mm_abs_epi16(*value); } + KFR_AINTRIN i8sse abs(i8sse value) { return _mm_abs_epi8(*value); } + + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> abs(vec<T, N> value) + { + return value & invhighbitmask<T>; + } + + KFR_HANDLE_ALL(abs) + KFR_HANDLE_SCALAR(abs) + KFR_SPEC_FN(in_abs, abs) +}; + +template <> +struct in_abs<cpu_t::avx2> : in_abs<cpu_t::ssse3> +{ + constexpr static cpu_t cpu = cpu_t::avx2; + using in_abs<cpu_t::ssse3>::abs; + + KFR_AINTRIN i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); } + KFR_AINTRIN i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); } + KFR_AINTRIN i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); } + + KFR_HANDLE_ALL(abs) + KFR_HANDLE_SCALAR(abs) + KFR_SPEC_FN(in_abs, abs) +}; +} + +namespace native +{ +using fn_abs = internal::in_abs<>::fn_abs; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> abs(const T1& x) +{ + return internal::in_abs<>::abs(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_abs, E1> abs(E1&& x) +{ + return { fn_abs(), std::forward<E1>(x) }; +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/asin_acos.hpp b/include/kfr/base/asin_acos.hpp @@ -0,0 +1,104 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "abs.hpp" +#include "atan.hpp" +#include "constants.hpp" +#include "function.hpp" +#include "min_max.hpp" +#include "operators.hpp" +#include "select.hpp" +#include "shuffle.hpp" +#include "sqrt.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t cpu = cpu_t::native> +struct in_asin_acos : private in_select<cpu>, private in_atan<cpu>, private in_sqrt<cpu> +{ +private: + using in_atan<cpu>::atan2; + using in_sqrt<cpu>::sqrt; + +public: + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> asin(vec<T, N> x) + { + return atan2(x, sqrt(T(1) - x * x)); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> acos(vec<T, N> x) + { + return atan2(sqrt(T(1) - x * x), x); + } + KFR_SPEC_FN(in_asin_acos, asin) + KFR_SPEC_FN(in_asin_acos, acos) +}; +} + +namespace native +{ +using fn_asin = internal::in_asin_acos<>::fn_asin; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> asin(const T1& x) +{ + return internal::in_asin_acos<>::asin(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_asin, E1> asin(E1&& x) +{ + return { fn_asin(), std::forward<E1>(x) }; +} + +using fn_acos = internal::in_asin_acos<>::fn_acos; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> acos(const T1& x) +{ + return internal::in_asin_acos<>::acos(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_acos, E1> acos(E1&& x) +{ + return { fn_acos(), std::forward<E1>(x) }; +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/atan.hpp b/include/kfr/base/atan.hpp @@ -0,0 +1,267 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "abs.hpp" +#include "constants.hpp" +#include "function.hpp" +#include "operators.hpp" +#include "select.hpp" +#include "sin_cos.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ +namespace internal +{ +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_atan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc> +{ +private: + using in_abs<cc>::abs; + using in_round<cc>::floor; + using in_select<cc>::select; + using in_trig<cc>::mask_horner; + using in_select<cc>::sign; + +public: + template <size_t N> + KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x) + { + vec<f32, N> s, t, u; + vec<i32, N> q; + q = select(x < 0, -2, 0); + x = select(x < 0, -x, x); + mask<i32, N> m; + m = y > x; + t = x; + x = select(m, y, x); + y = select(m, -t, y); + q = select(m, q + 1, q); + s = y / x; + t = s * s; + u = 0.00282363896258175373077393f; + u = fmadd(u, t, -0.0159569028764963150024414f); + u = fmadd(u, t, 0.0425049886107444763183594f); + u = fmadd(u, t, -0.0748900920152664184570312f); + u = fmadd(u, t, 0.106347933411598205566406f); + u = fmadd(u, t, -0.142027363181114196777344f); + u = fmadd(u, t, 0.199926957488059997558594f); + u = fmadd(u, t, -0.333331018686294555664062f); + t = u * t * s + s; + t = cast<f32>(q) * 1.5707963267948966192313216916398f + t; + return t; + } + template <size_t N> + KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x) + { + vec<f64, N> s, t, u; + vec<i64, N> q; + q = select(x < 0, -2ll, 0ll); + x = select(x < 0, -x, x); + vec<i64, N> m; + m = y > x; + t = x; + x = select(m, y, x); + y = select(m, -t, y); + q = select(m, q + 1ll, q); + s = y / x; + t = s * s; + u = -1.88796008463073496563746e-05; + u = fmadd(u, t, 0.000209850076645816976906797); + u = fmadd(u, t, -0.00110611831486672482563471); + u = fmadd(u, t, 0.00370026744188713119232403); + u = fmadd(u, t, -0.00889896195887655491740809); + u = fmadd(u, t, 0.016599329773529201970117); + u = fmadd(u, t, -0.0254517624932312641616861); + u = fmadd(u, t, 0.0337852580001353069993897); + u = fmadd(u, t, -0.0407629191276836500001934); + u = fmadd(u, t, 0.0466667150077840625632675); + u = fmadd(u, t, -0.0523674852303482457616113); + u = fmadd(u, t, 0.0587666392926673580854313); + u = fmadd(u, t, -0.0666573579361080525984562); + u = fmadd(u, t, 0.0769219538311769618355029); + u = fmadd(u, t, -0.090908995008245008229153); + u = fmadd(u, t, 0.111111105648261418443745); + u = fmadd(u, t, -0.14285714266771329383765); + u = fmadd(u, t, 0.199999999996591265594148); + u = fmadd(u, t, -0.333333333333311110369124); + t = u * t * s + s; + t = cast<f64>(q) * 1.5707963267948966192313216916398 + t; + return t; + } + template <size_t N> + KFR_SINTRIN vec<f32, N> atan2(vec<f32, N> y, vec<f32, N> x) + { + vec<f32, N> r = atan2k(abs(y), x); + constexpr f32 pi = 3.1415926535897932384626433832795f; + constexpr f32 pi_over_2 = 1.5707963267948966192313216916398f; + constexpr f32 pi_over_4 = 0.78539816339744830961566084581988f; + r = mulsign(r, x); + r = select(isinf(x) || x == 0.0f, pi_over_2 - select(x.asmask(), mulsign(pi_over_2, x), 0.0f), r); + r = select(isinf(y), pi_over_2 - select(x.asmask(), mulsign(pi_over_4, x), 0.0f), r); + r = select(y == 0.0f, fbitcast(ibitcast(sign(x) == -1.0f) & ibitcast(pi)), r); + r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y))); + return r; + } + template <size_t N> + KFR_SINTRIN vec<f64, N> atan2(vec<f64, N> y, vec<f64, N> x) + { + vec<f64, N> r = atan2k(abs(y), x); + constexpr f64 pi = 3.1415926535897932384626433832795; + constexpr f64 pi_over_2 = 1.5707963267948966192313216916398; + constexpr f64 pi_over_4 = 0.78539816339744830961566084581988; + r = mulsign(r, x); + r = select(isinf(x) || x == 0.0, pi_over_2 - select(x, mulsign(pi_over_2, x), 0.0), r); + r = select(isinf(y), pi_over_2 - select(x, mulsign(pi_over_4, x), 0.0), r); + r = select(y == 0.0, fbitcast(ibitcast(sign(x) == -1.0) & ibitcast(pi)), r); + r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y))); + return r; + } + template <size_t N> + KFR_SINTRIN vec<f32, N> atan(vec<f32, N> s) + { + vec<f32, N> t, u; + vec<i32, N> q; + q = select(s < 0.f, 2, 0); + s = select(s < 0.f, -s, s); + q = select(s > 1.f, q | 1, q); + s = select(s > 1.f, 1.0f / s, s); + t = s * s; + u = 0.00282363896258175373077393f; + u = fmadd(u, t, -0.0159569028764963150024414f); + u = fmadd(u, t, 0.0425049886107444763183594f); + u = fmadd(u, t, -0.0748900920152664184570312f); + u = fmadd(u, t, 0.106347933411598205566406f); + u = fmadd(u, t, -0.142027363181114196777344f); + u = fmadd(u, t, 0.199926957488059997558594f); + u = fmadd(u, t, -0.333331018686294555664062f); + t = s + s * (t * u); + t = select((q & 1) != 0, 1.570796326794896557998982f - t, t); + t = select((q & 2) != 0, -t, t); + return t; + } + template <size_t N> + KFR_SINTRIN vec<f64, N> atan(vec<f64, N> s) + { + vec<f64, N> t, u; + vec<i64, N> q; + q = select(s < 0.0, 2ll, 0ll); + s = select(s < 0.0, -s, s); + q = select(s > 1.0, q | 1, q); + s = select(s > 1.0, 1.0 / s, s); + t = s * s; + u = -1.88796008463073496563746e-05; + u = fmadd(u, t, 0.000209850076645816976906797); + u = fmadd(u, t, -0.00110611831486672482563471); + u = fmadd(u, t, 0.00370026744188713119232403); + u = fmadd(u, t, -0.00889896195887655491740809); + u = fmadd(u, t, 0.016599329773529201970117); + u = fmadd(u, t, -0.0254517624932312641616861); + u = fmadd(u, t, 0.0337852580001353069993897); + u = fmadd(u, t, -0.0407629191276836500001934); + u = fmadd(u, t, 0.0466667150077840625632675); + u = fmadd(u, t, -0.0523674852303482457616113); + u = fmadd(u, t, 0.0587666392926673580854313); + u = fmadd(u, t, -0.0666573579361080525984562); + u = fmadd(u, t, 0.0769219538311769618355029); + u = fmadd(u, t, -0.090908995008245008229153); + u = fmadd(u, t, 0.111111105648261418443745); + u = fmadd(u, t, -0.14285714266771329383765); + u = fmadd(u, t, 0.199999999996591265594148); + u = fmadd(u, t, -0.333333333333311110369124); + t = s + s * (t * u); + t = select((q & 1) != 0, 1.570796326794896557998982 - t, t); + t = select((q & 2) != 0, -t, t); + return t; + } + template <typename T> + KFR_SINTRIN T atandeg(const T& x) + { + return atan(x) * c_radtodeg<T>; + } + template <typename T1, typename T2> + KFR_SINTRIN common_type<T1, T2> atan2deg(const T1& y, const T2& x) + { + return atan2(y, x) * c_radtodeg<common_type<T1, T2>>; + } + KFR_HANDLE_SCALAR(atan) + KFR_HANDLE_SCALAR(atan2) + KFR_SPEC_FN(in_atan, atan) + KFR_SPEC_FN(in_atan, atandeg) + KFR_SPEC_FN(in_atan, atan2) + KFR_SPEC_FN(in_atan, atan2deg) +}; +} +namespace native +{ +using fn_atan = internal::in_atan<>::fn_atan; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> atan(const T1& y, const T2& x) +{ + return internal::in_atan<>::atan(y, x); +} +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_atan, E1, E2> atan(E1&& y, E2&& x) +{ + return { fn_atan(), std::forward<E1>(y), std::forward<E2>(x) }; +} +using fn_atan2 = internal::in_atan<>::fn_atan2; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> atan2(const T1& y, const T2& x) +{ + return internal::in_atan<>::atan2(y, x); +} +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_atan2, E1, E2> atan2(E1&& y, E2&& x) +{ + return { fn_atan2(), std::forward<E1>(y), std::forward<E2>(x) }; +} +using fn_atandeg = internal::in_atan<>::fn_atandeg; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> atandeg(const T1& y, const T2& x) +{ + return internal::in_atan<>::atandeg(y, x); +} +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_atandeg, E1, E2> atandeg(E1&& y, E2&& x) +{ + return { fn_atandeg(), std::forward<E1>(y), std::forward<E2>(x) }; +} +using fn_atan2deg = internal::in_atan<>::fn_atan2deg; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> atan2deg(const T1& y, const T2& x) +{ + return internal::in_atan<>::atan2deg(y, x); +} +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_atan2deg, E1, E2> atan2deg(E1&& y, E2&& x) +{ + return { fn_atan2deg(), std::forward<E1>(y), std::forward<E2>(x) }; +} +} +} +#pragma clang diagnostic pop diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp @@ -0,0 +1,610 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "abs.hpp" +#include "atan.hpp" +#include "constants.hpp" +#include "function.hpp" +#include "log_exp.hpp" +#include "min_max.hpp" +#include "operators.hpp" +#include "select.hpp" +#include "sin_cos.hpp" +#include "sinh_cosh.hpp" +#include "sqrt.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +template <typename T> +struct complex +{ + constexpr static bool is_pod = true; + constexpr complex() noexcept = default; + constexpr complex(T re) noexcept : re(re), im(0) {} + constexpr complex(T re, T im) noexcept : re(re), im(im) {} + constexpr complex(const complex&) noexcept = default; + constexpr complex(complex&&) noexcept = default; + template <typename U> + constexpr complex(const complex<U>& other) noexcept : re(static_cast<T>(other.re)), + im(static_cast<T>(other.im)) + { + } + template <typename U> + constexpr complex(complex<U>&& other) noexcept : re(std::move(other.re)), im(std::move(other.im)) + { + } + constexpr complex& operator=(const complex&) noexcept = default; + constexpr complex& operator=(complex&&) noexcept = default; + constexpr const T& real() const noexcept { return re; } + constexpr const T& imag() const noexcept { return im; } + constexpr void real(T value) noexcept { re = value; } + constexpr void imag(T value) noexcept { im = value; } + T re; + T im; +}; + +using c32 = complex<f32>; +using c64 = complex<f64>; +using cbase = complex<fbase>; + +template <typename T> +struct vec_op<complex<T>> : private vec_op<T> +{ + using scalar_type = T; + using vec_op<scalar_type>::add; + using vec_op<scalar_type>::sub; + using vec_op<scalar_type>::eq; + using vec_op<scalar_type>::ne; + using vec_op<scalar_type>::band; + using vec_op<scalar_type>::bor; + using vec_op<scalar_type>::bxor; + using vec_op<scalar_type>::bnot; + using vec_op<scalar_type>::neg; + + template <simdindex N> + constexpr static simd<scalar_type, N> mul(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + const vec<scalar_type, N> xx = x; + const vec<scalar_type, N> yy = y; + return *subadd(xx * dupeven(yy), swap<2>(xx) * dupodd(yy)); + } + + template <simdindex N> + constexpr static simd<scalar_type, N> div(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + const vec<scalar_type, N> xx = x; + const vec<scalar_type, N> yy = y; + const vec<scalar_type, N> m = (sqr(dupeven(yy)) + sqr(dupodd(yy))); + return *swap<2>(subadd(swap<2>(xx) * dupeven(yy), xx * dupodd(yy)) / m); + } +}; + +template <typename T, size_t N> +KFR_INLINE vec<complex<T>, N> cdupreal(const vec<complex<T>, N>& x) +{ + return subcast<complex<T>>(dupeven(subcast<T>(x))); +} +KFR_FN(cdupreal) + +template <typename T, size_t N> +KFR_INLINE vec<complex<T>, N> cdupimag(const vec<complex<T>, N>& x) +{ + return subcast<complex<T>>(dupodd(subcast<T>(x))); +} +KFR_FN(cdupimag) + +template <typename T, size_t N> +KFR_INLINE vec<complex<T>, N> cswapreim(const vec<complex<T>, N>& x) +{ + return subcast<complex<T>>(swap<2>(subcast<T>(x))); +} +KFR_FN(cswapreim) + +template <typename T, size_t N> +KFR_INLINE vec<complex<T>, N> cnegreal(const vec<complex<T>, N>& x) +{ + return x ^ complex<T>(-T(), T()); +} +KFR_FN(cnegreal) +template <typename T, size_t N> +KFR_INLINE vec<complex<T>, N> cnegimag(const vec<complex<T>, N>& x) +{ + return x ^ complex<T>(T(), -T()); +} +KFR_FN(cnegimag) + +template <typename T, size_t N> +KFR_INLINE vec<complex<T>, N> cconj(const vec<complex<T>, N>& x) +{ + return cnegimag(x); +} +KFR_FN(cconj) + +namespace internal +{ +template <typename T> +struct is_complex_impl : std::false_type +{ +}; +template <typename T> +struct is_complex_impl<complex<T>> : std::true_type +{ +}; +} + +// real to complex +template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)> +constexpr KFR_INLINE vec<To, N> cast(vec<From, N> value) noexcept +{ + const vec<subtype<To>, N> casted = cast<subtype<To>>(value); + return subcast<To>(interleave(casted, zerovector(casted))); +} + +// complex to complex +template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)> +constexpr KFR_INLINE vec<To, N> cast(vec<complex<From>, N> value) noexcept +{ + return subcast<To>(cast<subtype<To>>(subcast<From>(value))); +} + +// complex to real +template <typename To, typename From, size_t N, KFR_ENABLE_IF(!internal::is_complex_impl<To>::value)> +constexpr KFR_INLINE vec<To, N> cast(vec<complex<From>, N> value) noexcept +{ + static_assert(sizeof(To) == 0, "Can't cast complex to real"); + return {}; +} + +template <typename T, size_t N> +constexpr KFR_INLINE vec<complex<T>, N / 2> ccomp(const vec<T, N>& x) +{ + return subcast<complex<T>>(x); +} + +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N * 2> cdecom(const vec<complex<T>, N>& x) +{ + return subcast<T>(x); +} + +template <typename T> +constexpr KFR_INLINE T real(const complex<T>& value) +{ + return value.real(); +} +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> real(const vec<complex<T>, N>& value) +{ + return even(subcast<T>(value)); +} + +template <typename T> +using realtype = decltype(real(std::declval<T>())); +template <typename T> +using realftype = ftype<decltype(real(std::declval<T>()))>; + +KFR_FN(real) +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_real, E1> real(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + +template <typename T> +constexpr KFR_INLINE T imag(const complex<T>& value) +{ + return value.imag(); +} +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> imag(const vec<complex<T>, N>& value) +{ + return odd(subcast<T>(value)); +} +KFR_FN(imag) +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_imag, E1> imag(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + +template <typename T1, typename T2 = T1, size_t N, typename T = common_type<T1, T2>> +constexpr KFR_INLINE vec<complex<T>, N> make_complex(const vec<T1, N>& real, const vec<T2, N>& imag = T2(0)) +{ + return subcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag))); +} + +template <typename T1, typename T2 = T1, typename T = common_type<T1, T2>> +constexpr KFR_INLINE complex<T> make_complex(T1 real, T2 imag = T2(0)) +{ + return complex<T>(cast<T>(real), cast<T>(imag)); +} + +namespace internal +{ + +template <cpu_t c = cpu_t::native> +struct in_complex : in_select<c>, in_sin_cos<c>, in_sinh_cosh<c>, in_sqrt<c>, in_atan<c>, in_log_exp<c> +{ + constexpr static cpu_t cur = c; + using in_sqrt<c>::sqrt; + using in_sin_cos<c>::sincos; + using in_sin_cos<c>::cossin; + using in_sinh_cosh<c>::sinhcosh; + using in_sinh_cosh<c>::coshsinh; + using in_atan<c>::atan2; + using in_log_exp<c>::log; + using in_log_exp<c>::log2; + using in_log_exp<c>::log10; + using in_log_exp<c>::exp; + using in_log_exp<c>::exp2; + using in_log_exp<c>::exp10; + + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> csin(const vec<complex<T>, N>& x) + { + return ccomp(sincos(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x)))); + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> csinh(const vec<complex<T>, N>& x) + { + return ccomp(sinhcosh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> ccos(const vec<complex<T>, N>& x) + { + return ccomp(negodd(cossin(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x))))); + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> ccosh(const vec<complex<T>, N>& x) + { + return ccomp(coshsinh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> cabs(const vec<complex<T>, N>& x) + { + const vec<T, N* 2> xx = sqr(cdecom(x)); + return sqrt(even(xx) + odd(xx)); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> carg(const vec<complex<T>, N>& x) + { + const vec<T, N* 2> xx = cdecom(x); + return atan2(even(xx), odd(xx)); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> clog(const vec<complex<T>, N>& x) + { + return make_complex(log(cabs(x)), carg(x)); + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> clog2(const vec<complex<T>, N>& x) + { + return clog(x) * c_recip_log_2<T>; + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> clog10(const vec<complex<T>, N>& x) + { + return clog(x) * c_recip_log_10<T>; + } + + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> cexp(const vec<complex<T>, N>& x) + { + return ccomp(exp(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x)))); + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> cexp2(const vec<complex<T>, N>& x) + { + return cexp(x * c_log_2<T>); + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> cexp10(const vec<complex<T>, N>& x) + { + return cexp(x * c_log_10<T>); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> polar(const vec<complex<T>, N>& x) + { + return make_complex(cabs(x), carg(x)); + } + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> cartesian(const vec<complex<T>, N>& x) + { + return cdupreal(x) * ccomp(cossin(cdecom(cdupimag(x)))); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> cabsdup(vec<T, N> x) + { + x = sqr(x); + return sqrt(x + swap<2>(x)); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<complex<T>, N> csqrt(const vec<complex<T>, N>& x) + { + const vec<T, N> t = (cabsdup(cdecom(x)) + cdecom(cnegimag(cdupreal(x)))) * T(0.5); + return ccomp(select(dupodd(x) < T(), cdecom(cnegimag(ccomp(t))), t)); + } + + KFR_HANDLE_SCALAR(csin) + KFR_HANDLE_SCALAR(csinh) + KFR_HANDLE_SCALAR(ccos) + KFR_HANDLE_SCALAR(ccosh) + KFR_HANDLE_SCALAR(cabs) + KFR_HANDLE_SCALAR(carg) + KFR_HANDLE_SCALAR(clog) + KFR_HANDLE_SCALAR(clog2) + KFR_HANDLE_SCALAR(clog10) + KFR_HANDLE_SCALAR(cexp) + KFR_HANDLE_SCALAR(cexp2) + KFR_HANDLE_SCALAR(cexp10) + KFR_HANDLE_SCALAR(polar) + KFR_HANDLE_SCALAR(cartesian) + KFR_HANDLE_SCALAR(csqrt) + + KFR_SPEC_FN(in_complex, csin) + KFR_SPEC_FN(in_complex, csinh) + KFR_SPEC_FN(in_complex, ccos) + KFR_SPEC_FN(in_complex, ccosh) + KFR_SPEC_FN(in_complex, cabs) + KFR_SPEC_FN(in_complex, carg) + KFR_SPEC_FN(in_complex, clog) + KFR_SPEC_FN(in_complex, clog2) + KFR_SPEC_FN(in_complex, clog10) + KFR_SPEC_FN(in_complex, cexp) + KFR_SPEC_FN(in_complex, cexp2) + KFR_SPEC_FN(in_complex, cexp10) + KFR_SPEC_FN(in_complex, polar) + KFR_SPEC_FN(in_complex, cartesian) + KFR_SPEC_FN(in_complex, csqrt) +}; +} + +namespace native +{ +using fn_csin = internal::in_complex<>::fn_csin; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> csin(const T1& x) +{ + return internal::in_complex<>::csin(x); +} +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_csin, E1> csin(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + +using fn_csinh = internal::in_complex<>::fn_csinh; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> csinh(const T1& x) +{ + return internal::in_complex<>::csinh(x); +} +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_csinh, E1> csinh(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + +using fn_ccos = internal::in_complex<>::fn_ccos; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> ccos(const T1& x) +{ + return internal::in_complex<>::ccos(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_ccos, E1> ccos(E1&& x) +{ + return { fn_ccos(), std::forward<E1>(x) }; +} + +using fn_ccosh = internal::in_complex<>::fn_ccosh; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> ccosh(const T1& x) +{ + return internal::in_complex<>::ccosh(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_ccosh, E1> ccosh(E1&& x) +{ + return { fn_ccosh(), std::forward<E1>(x) }; +} + +using fn_cabs = internal::in_complex<>::fn_cabs; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN realftype<T1> cabs(const T1& x) +{ + return internal::in_complex<>::cabs(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_cabs, E1> cabs(E1&& x) +{ + return { fn_cabs(), std::forward<E1>(x) }; +} + +using fn_carg = internal::in_complex<>::fn_carg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN realftype<T1> carg(const T1& x) +{ + return internal::in_complex<>::carg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_carg, E1> carg(E1&& x) +{ + return { fn_carg(), std::forward<E1>(x) }; +} + +using fn_clog = internal::in_complex<>::fn_clog; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> clog(const T1& x) +{ + return internal::in_complex<>::clog(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_clog, E1> clog(E1&& x) +{ + return { fn_clog(), std::forward<E1>(x) }; +} + +using fn_clog2 = internal::in_complex<>::fn_clog2; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> clog2(const T1& x) +{ + return internal::in_complex<>::clog2(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_clog2, E1> clog2(E1&& x) +{ + return { fn_clog2(), std::forward<E1>(x) }; +} + +using fn_clog10 = internal::in_complex<>::fn_clog10; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> clog10(const T1& x) +{ + return internal::in_complex<>::clog10(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_clog10, E1> clog10(E1&& x) +{ + return { fn_clog10(), std::forward<E1>(x) }; +} + +using fn_cexp = internal::in_complex<>::fn_cexp; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> cexp(const T1& x) +{ + return internal::in_complex<>::cexp(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_cexp, E1> cexp(E1&& x) +{ + return { fn_cexp(), std::forward<E1>(x) }; +} + +using fn_cexp2 = internal::in_complex<>::fn_cexp2; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> cexp2(const T1& x) +{ + return internal::in_complex<>::cexp2(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_cexp2, E1> cexp2(E1&& x) +{ + return { fn_cexp2(), std::forward<E1>(x) }; +} + +using fn_cexp10 = internal::in_complex<>::fn_cexp10; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> cexp10(const T1& x) +{ + return internal::in_complex<>::cexp10(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_cexp10, E1> cexp10(E1&& x) +{ + return { fn_cexp10(), std::forward<E1>(x) }; +} + +using fn_polar = internal::in_complex<>::fn_polar; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> polar(const T1& x) +{ + return internal::in_complex<>::polar(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_polar, E1> polar(E1&& x) +{ + return { fn_polar(), std::forward<E1>(x) }; +} + +using fn_cartesian = internal::in_complex<>::fn_cartesian; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> cartesian(const T1& x) +{ + return internal::in_complex<>::cartesian(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_cartesian, E1> cartesian(E1&& x) +{ + return { fn_cartesian(), std::forward<E1>(x) }; +} + +using fn_csqrt = internal::in_complex<>::fn_csqrt; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> csqrt(const T1& x) +{ + return internal::in_complex<>::csqrt(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_csqrt, E1> csqrt(E1&& x) +{ + return { fn_csqrt(), std::forward<E1>(x) }; +} +} +} +namespace cometa +{ +template <typename T> +struct compound_type_traits<kfr::complex<T>> +{ + constexpr static size_t width = 2; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + template <typename U> + using rebind = kfr::complex<U>; + template <typename U> + using deep_rebind = kfr::complex<cometa::deep_rebind<subtype, U>>; + + static constexpr const subtype& at(const kfr::complex<T>& value, size_t index) + { + return index == 0 ? value.real() : value.imag(); + } +}; +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/constants.hpp b/include/kfr/base/constants.hpp @@ -0,0 +1,93 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "types.hpp" +#include <limits> + +namespace kfr +{ + +// π (pi) +// c_pi<f64, 4> = 4pi +// c_pi<f64, 3, 4> = 3/4pi +template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>> +constexpr Tsub c_pi = Tsub(3.1415926535897932384626433832795 * m / d); + +// π² (pi²) +// c_sqr_pi<f64, 4> = 4pi² +// c_sqr_pi<f64, 3, 4> = 3/4pi² +template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>> +constexpr Tsub c_sqr_pi = Tsub(9.8696044010893586188344909998762 * m / d); + +// 1/π (1/pi) +// c_recip_pi<f64> 1/pi +// c_recip_pi<f64, 4> 4/pi +template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>> +constexpr Tsub c_recip_pi = Tsub(0.31830988618379067153776752674503 * m / d); + +// degree to radian conversion factor +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_degtorad = c_pi<T, 1, 180>; + +// radian to degree conversion factor +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_radtodeg = c_recip_pi<T, 180>; + +// e, Euler's number +template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>> +constexpr Tsub c_e = Tsub(2.718281828459045235360287471352662 * m / d); + +template <typename T, typename Tsub = subtype<T>> +constexpr unsigned c_mantissa_bits = sizeof(Tsub) == 32 ? 23 : 52; + +template <typename T, typename Tsub = usubtype<T>> +constexpr Tsub c_mantissa_mask = (Tsub(1) << c_mantissa_bits<T>)-1; + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_epsilon = (std::numeric_limits<Tsub>::epsilon()); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_infinity = std::numeric_limits<Tsub>::infinity(); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_neginfinity = -std::numeric_limits<Tsub>::infinity(); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_qnan = std::numeric_limits<Tsub>::quiet_NaN(); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_recip_log_2 = Tsub(1.442695040888963407359924681001892137426645954); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_recip_log_10 = Tsub(0.43429448190325182765112891891661); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_log_2 = Tsub(0.69314718055994530941723212145818); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub c_log_10 = Tsub(2.3025850929940456840179914546844); + +template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>> +constexpr Tsub c_sqrt_2 = Tsub(1.4142135623730950488016887242097 * m / d); +} diff --git a/include/kfr/base/digitreverse.hpp b/include/kfr/base/digitreverse.hpp @@ -0,0 +1,121 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "shuffle.hpp" +#include "types.hpp" + +namespace kfr +{ + +namespace internal +{ + +template <size_t radix, size_t bits> +constexpr enable_if<radix == 2, u32> digitreverse(u32 x) +{ + x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1)); + x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2)); + x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4)); + x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8)); + return ((x >> 16) | (x << 16)) >> (32 - bits); +} + +constexpr inline u32 bit_permute_step_simple(u32 x, u32 m, u32 shift) +{ + return ((x & m) << shift) | ((x >> shift) & m); +} + +template <size_t radix, size_t bits> +constexpr enable_if<radix == 4, u32> digitreverse(u32 x) +{ + if (bits <= 2) + return x; + if (bits <= 4) + { + x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits + return x >> (4 - bits); + } + if (bits <= 8) + { + x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits + x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits + return x >> (8 - bits); + } + if (bits <= 16) + { + x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits + x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits + x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3 regroups 16 bits + return x >> (16 - bits); + } + if (bits <= 32) + { + x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits + x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits + x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3 regroups 16 bits + x = bit_permute_step_simple(x, 0x0000ffff, 16); // Bit index complement 4 regroups 32 bits + return x >> (32 - bits); + } + return x; +} + +template <size_t radix, size_t bits> +struct shuffle_index_digitreverse +{ + constexpr inline size_t operator()(size_t index) const + { + return digitreverse<radix, bits>(static_cast<u32>(index)); + } +}; +} + +template <size_t radix, size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, N> digitreverse(vec<T, N> x) +{ + return shufflevector<N, internal::shuffle_index_digitreverse<radix, ilog2(N / groupsize)>, groupsize>(x); +} + +template <size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, N> bitreverse(vec<T, N> x) +{ + return digitreverse<2, groupsize>(x); +} + +template <size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, N> digitreverse4(vec<T, N> x) +{ + return digitreverse<4, groupsize>(x); +} + +template <size_t bits> +constexpr inline u32 bitreverse(u32 x) +{ + return internal::digitreverse<2, bits>(x); +} + +template <size_t bits> +constexpr inline u32 digitreverse4(u32 x) +{ + return internal::digitreverse<4, bits>(x); +} +} diff --git a/include/kfr/base/dispatch.hpp b/include/kfr/base/dispatch.hpp @@ -0,0 +1,200 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "kfr.h" + +#include "types.hpp" + +namespace kfr +{ + +namespace internal +{ + +template <typename Fn, cpu_t newcpu, typename = void> +struct retarget_impl +{ + using type = Fn; +}; + +template <typename Fn, cpu_t newcpu> +struct retarget_impl<Fn, newcpu, void_t<typename Fn::template retarget_this<newcpu>>> +{ + using type = typename Fn::template retarget_this<newcpu>; +}; +} + +template <typename Fn, cpu_t newcpu> +using retarget = typename internal::retarget_impl<Fn, newcpu>::type; + +template <cpu_t newcpu, typename Fn, typename NewFn = retarget<Fn, newcpu>, + KFR_ENABLE_IF(std::is_constructible<NewFn, Fn&&>::value)> +KFR_INLINE NewFn retarget_func(Fn&& fn) +{ + return NewFn(std::move(fn)); +} + +template <cpu_t newcpu, typename Fn, typename NewEmptyFn = retarget<Fn, newcpu>, + KFR_ENABLE_IF(!std::is_constructible<NewEmptyFn, Fn&&>::value && std::is_empty<NewEmptyFn>::value && + std::is_constructible<NewEmptyFn>::value)> +KFR_INLINE NewEmptyFn retarget_func(Fn&&) +{ + return NewEmptyFn(); +} + +namespace internal +{ + +template <cpu_t a> +struct cpu_caller; + +template <> +struct cpu_caller<cpu_t::avx2> +{ + constexpr static cpu_t a = cpu_t::avx2; + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(avx2) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args) + { + return fn(std::forward<Args>(args)...); + } + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(avx2) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args) + { + return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...); + } +}; + +template <> +struct cpu_caller<cpu_t::avx1> +{ + constexpr static cpu_t a = cpu_t::avx1; + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(avx) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args) + { + return fn(std::forward<Args>(args)...); + } + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(avx) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args) + { + return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...); + } +}; + +template <> +struct cpu_caller<cpu_t::sse41> +{ + constexpr static cpu_t a = cpu_t::sse41; + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(sse41) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args) + { + return fn(std::forward<Args>(args)...); + } + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(sse41) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args) + { + return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...); + } +}; + +template <> +struct cpu_caller<cpu_t::ssse3> +{ + constexpr static cpu_t a = cpu_t::ssse3; + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args) + { + return fn(std::forward<Args>(args)...); + } + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args) + { + return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...); + } +}; + +template <> +struct cpu_caller<cpu_t::sse3> +{ + constexpr static cpu_t a = cpu_t::sse3; + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(sse3) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args) + { + return fn(std::forward<Args>(args)...); + } + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(sse3) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args) + { + return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...); + } +}; + +template <> +struct cpu_caller<cpu_t::sse2> +{ + constexpr static cpu_t a = cpu_t::sse2; + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(sse2) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args) + { + return fn(std::forward<Args>(args)...); + } + + template <typename Fn, typename... Args> + KFR_NOINLINE static KFR_USE_CPU(sse2) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args) + { + return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...); + } +}; + +template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c == cpu_t::native)> +KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...)) +{ + using targetFn = retarget<Fn, cpu_t::native>; + targetFn newfn = retarget_func<c>(std::forward<Fn>(fn)); + return newfn(std::forward<Args>(args)...); +} + +template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c != cpu_t::native && c != cpu_t::runtime)> +KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...)) +{ + return internal::cpu_caller<c>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...); +} +} + +template <cpu_t c, typename Fn, typename... Args> +KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...)) +{ + return internal::dispatch_impl<c>(std::forward<Fn>(fn), std::forward<Args>(args)...); +} +} diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -0,0 +1,315 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "dispatch.hpp" +#include "types.hpp" +#include "vec.hpp" + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" + +namespace kfr +{ + +template <typename T> +using is_generic = is_same<generic, typename decay<T>::value_type>; + +template <typename T> +using is_infinite = not_t<is_same<size_t, typename decay<T>::size_type>>; + +namespace internal +{ + +template <typename T1> +constexpr inline T1 minsize(T1 x) noexcept +{ + return x; +} + +template <typename T1, typename T2, typename... Ts> +constexpr inline common_type<T1, T2, Ts...> minsize(T1 x, T2 y, Ts... rest) noexcept +{ + return x < y ? minsize(x, rest...) : minsize(y, rest...); +} + +template <typename... Args> +struct expression : input_expression +{ + using value_type = common_type<typename decay<Args>::value_type...>; + + using size_type = common_type<typename decay<Args>::size_type...>; + + constexpr size_type size() const noexcept { return size_impl(indicesfor_t<Args...>()); } + + constexpr static size_t count = sizeof...(Args); + expression() = delete; + constexpr expression(Args&&... args) noexcept : args(std::forward<Args>(args)...) {} + + KFR_INLINE void begin_block(size_t size) { begin_block_impl(size, indicesfor_t<Args...>()); } + KFR_INLINE void end_block(size_t size) { end_block_impl(size, indicesfor_t<Args...>()); } + + KFR_INLINE void begin_block(size_t size) const { begin_block_impl(size, indicesfor_t<Args...>()); } + KFR_INLINE void end_block(size_t size) const { end_block_impl(size, indicesfor_t<Args...>()); } + +protected: + std::tuple<Args...> args; + + template <size_t... indices> + constexpr size_type size_impl(csizes_t<indices...>) const noexcept + { + return minsize(std::get<indices>(this->args).size()...); + } + + template <typename Fn, typename T, size_t N> + KFR_INLINE vec<T, N> call(Fn&& fn, size_t index, vec_t<T, N> x) const + { + return call_impl(std::forward<Fn>(fn), indicesfor_t<Args...>(), index, x); + } + template <size_t ArgIndex, typename T, size_t N> + KFR_INLINE vec<T, N> argument(csize_t<ArgIndex>, size_t index, vec_t<T, N> x) const + { + static_assert(ArgIndex < count, "Incorrect ArgIndex"); + return std::get<ArgIndex>(this->args)(cinput, index, x); + } + template <typename T, size_t N> + KFR_INLINE vec<T, N> argument_first(size_t index, vec_t<T, N> x) const + { + return std::get<0>(this->args)(cinput, index, x); + } + +private: + template <typename Arg, size_t N, typename Tin, + typename Tout1 = conditional<is_generic<Arg>::value, Tin, typename decay<Arg>::value_type>, + typename Tout = Tout1> + KFR_INLINE vec_t<Tout, N> vec_t_for() const + { + return {}; + } + template <typename Fn, typename T, size_t N, size_t... indices> + KFR_INLINE vec<T, N> call_impl(Fn&& fn, csizes_t<indices...>, size_t index, vec_t<T, N>) const + { + using ratio = func_ratio<Fn>; + constexpr size_t Nin = N * ratio::input / ratio::output; + using Tout = conditional<is_same<generic, value_type>::value, T, value_type>; + + return cast<T>(fn(cast<Tout>(std::get<indices>(this->args)( + cinput, index * ratio::input / ratio::output, vec_t_for<Args, Nin, Tout>()))...)); + } + template <size_t... indices> + KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) + { + swallow{ (std::get<indices>(args).begin_block(size), 0)... }; + } + template <size_t... indices> + KFR_INLINE void end_block_impl(size_t size, csizes_t<indices...>) + { + swallow{ (std::get<indices>(args).end_block(size), 0)... }; + } + template <size_t... indices> + KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) const + { + swallow{ (std::get<indices>(args).begin_block(size), 0)... }; + } + template <size_t... indices> + KFR_INLINE void end_block_impl(size_t size, csizes_t<indices...>) const + { + swallow{ (std::get<indices>(args).end_block(size), 0)... }; + } +}; + +template <typename T, size_t width = 1> +struct expression_scalar : input_expression +{ + using value_type = T; + expression_scalar() = delete; + constexpr expression_scalar(const T& val) noexcept : val(val) {} + constexpr expression_scalar(vec<T, width> val) noexcept : val(val) {} + const vec<T, width> val; + + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const + { + return resize<N>(cast<U>(val)); + } +}; + +template <typename Fn, typename Args, typename Enable = void> +struct generic_result +{ + using type = generic; +}; + +template <typename Fn, typename... Args> +struct generic_result<Fn, ctypes_t<Args...>, void_t<enable_if<!or_t<is_same<generic, Args>...>::value>>> +{ + using type = subtype<decltype(std::declval<Fn>()(std::declval<vec<decay<Args>, 1>>()...))>; +}; + +template <typename Fn, typename... Args> +struct expression_function : expression<Args...> +{ + using ratio = func_ratio<Fn>; + + using value_type = typename generic_result<Fn, ctypes_t<value_type_of<Args>...>>::type; + + template <cpu_t newcpu> + using retarget_this = expression_function<retarget<Fn, newcpu>, retarget<Args, newcpu>...>; + + expression_function(Fn&& fn, Args&&... args) noexcept : expression<Args...>(std::forward<Args>(args)...), + fn(std::forward<Fn>(fn)) + { + } + template <typename T, size_t N> + KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> x) const + { + static_assert(is_same<T, value_type_of<expression_function>>::value || + is_generic<expression_function>::value, + "Can't cast from value_type to T"); + return this->call(fn, index, x); + } + +protected: + Fn fn; +}; + +template <typename T> +using arg_impl = conditional<is_number<T>::value || is_vec<T>::value, + expression_scalar<subtype<decay<T>>, compound_type_traits<decay<T>>::width>, T>; + +template <typename T> +using arg = internal::arg_impl<T>; + +template <typename Tout, typename Tin, size_t width, typename OutFn, typename Fn> +KFR_INLINE void process_cycle(OutFn&& outfn, const Fn& fn, size_t& i, size_t size) +{ + const size_t count = size / width * width; + KFR_LOOP_NOUNROLL + for (; i < count; i += width) + { + outfn(coutput, i, cast<Tout>(fn(cinput, i, vec_t<Tin, width>()))); + } +} +} + +template <typename A> +KFR_INLINE internal::arg<A> e(A&& a) +{ + return internal::arg<A>(std::forward<A>(a)); +} + +template <typename T> +KFR_INLINE internal::expression_scalar<T> scalar(const T& val) +{ + return internal::expression_scalar<T>(val); +} + +template <typename T, size_t N> +KFR_INLINE internal::expression_scalar<T, N> scalar(vec<T, N> val) +{ + return internal::expression_scalar<T, N>(val); +} + +template <typename Fn, typename... Args> +KFR_INLINE internal::expression_function<decay<Fn>, internal::arg<Args>...> bind_expression(Fn&& fn, + Args&&... args) +{ + return internal::expression_function<decay<Fn>, internal::arg<Args>...>(std::forward<Fn>(fn), + std::forward<Args>(args)...); +} + +template <typename Tout, cpu_t c = cpu_t::native, size_t width = internal::get_vector_width<Tout, c>(2, 4), + typename OutFn, typename Fn> +KFR_INLINE void process(OutFn&& outfn, const Fn& fn, size_t size) +{ + static_assert(is_output_expression<OutFn>::value, "OutFn must be an expression"); + static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); + constexpr size_t comp = lcm(func_ratio<OutFn>::input, func_ratio<Fn>::output); + size *= comp; + outfn.output_begin_block(size); + fn.begin_block(size); + + using Tin = conditional<is_generic<Fn>::value, Tout, value_type_of<Fn>>; + + size_t i = 0; + internal::process_cycle<Tout, Tin, width>(std::forward<OutFn>(outfn), fn, i, size); + internal::process_cycle<Tout, Tin, comp>(std::forward<OutFn>(outfn), fn, i, size); + + fn.end_block(size); + outfn.output_end_block(size); +} + +namespace internal +{ + +template <typename T, typename E1> +struct expressoin_typed : input_expression +{ + using value_type = T; + + expressoin_typed(E1&& e1) : e1(std::forward<E1>(e1)) {} + + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) + { + return cast<U>(e1(cinput, index, vec_t<T, N>())); + } + E1 e1; +}; + +template <typename T, typename E1> +struct expressoin_sized : input_expression +{ + using value_type = T; + using size_type = size_t; + + expressoin_sized(E1&& e1, size_t size) : e1(std::forward<E1>(e1)), m_size(size) {} + + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + auto val = e1(cinput, index, vec_t<T, N>()); + return cast<U>(val); + } + + constexpr size_t size() const noexcept { return m_size; } + E1 e1; + size_t m_size; +}; +} + +template <typename T, typename E1> +inline internal::expressoin_typed<T, E1> typed(E1&& e1) +{ + return internal::expressoin_typed<T, E1>(std::forward<E1>(e1)); +} +template <typename T, typename E1> +inline internal::expressoin_sized<T, E1> typed(E1&& e1, size_t size) +{ + return internal::expressoin_sized<T, E1>(std::forward<E1>(e1), size); +} + +template <typename Fn, typename... Args> +using expr_func = internal::expression_function<Fn, internal::arg<Args>...>; +} +#pragma clang diagnostic pop diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp @@ -0,0 +1,124 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "dispatch.hpp" +#include "expression.hpp" +#include "shuffle.hpp" +#include "types.hpp" +#include "vec.hpp" + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" + +namespace kfr +{ + +#define KFR_HANDLE_ALL(fn) \ + template <typename T, size_t N, typename... Args> \ + KFR_SINTRIN vec<T, N> fn(vec<T, N> x, Args&&... args) \ + { \ + return handle_all<cpu, fn_##fn>(x, std::forward<Args>(args)...); \ + } +#define KFR_HANDLE_ALL_REDUCE(redfn, fn) \ + template <typename T, size_t N, typename... Args> \ + KFR_SINTRIN auto fn(vec<T, N> x, Args&&... args) \ + { \ + return handle_all_reduce<cpu, redfn, fn_##fn>(x, std::forward<Args>(args)...); \ + } + +#define KFR_HANDLE_SCALAR(fn) \ + template <typename T, typename... Ts, KFR_ENABLE_IF(!is_vec<T>::value)> \ + KFR_SINTRIN auto fn(const T& x, const Ts&... rest) \ + { \ + return fn(make_vector(x), make_vector(rest)...)[0]; \ + } + +namespace internal +{ + +struct fn_disabled +{ + constexpr static bool disabled = true; +}; + +template <cpu_t c, typename T> +constexpr inline size_t next_fast_width(size_t n) +{ + return n > vector_width<T, cpu_t::sse2> ? vector_width<T, c> : vector_width<T, cpu_t::sse2>; +} + +template <cpu_t c, typename T, size_t N, size_t Nout = next_fast_width<c, T>(N)> +KFR_INLINE vec<T, Nout> extend_reg(vec<T, N> x) +{ + return extend<Nout>(x); +} +template <cpu_t c, typename T, size_t N, size_t Nout = next_fast_width<c, T>(N)> +KFR_INLINE vec<T, Nout> extend_reg(vec<T, N> x, T value) +{ + return widen<Nout>(x, value); +} + +template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args, + KFR_ENABLE_IF(N < vector_width<T, cur>)> +KFR_INLINE auto handle_all_f(Fn&& fn, vec<T, N> x, Args&&... args) +{ + return narrow<N>(fn(extend_reg<cur>(x), extend_reg<cur>(args)...)); +} +template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args, + KFR_ENABLE_IF(N > vector_width<T, cur>)> +KFR_INLINE auto handle_all_f(Fn&& fn, vec<T, N> x, Args&&... args) +{ + return concat(fn(low(x), low(args)...), fn(high(x), high(args)...)); +} + +template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args> +KFR_INLINE auto handle_all(vec<T, N> x, Args&&... args) +{ + Fn fn{}; + return handle_all_f<cur>(fn, x, std::forward<Args>(args)...); +} + +template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args, + typename = u8[N < vector_width<T, cur>]> +KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec<T, N> x, Args&&... args) +{ + return fn(extend_reg<cur>(x, redfn(initialvalue<T>())), + extend_reg<cur>(args, redfn(initialvalue<T>()))...); +} +template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args, + typename = u8[N > vector_width<T, cur>], typename = void> +KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec<T, N> x, Args&&... args) +{ + return redfn(fn(low(x), low(args)...), fn(high(x), high(args)...)); +} +template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args> +KFR_INLINE auto handle_all_reduce(vec<T, N> x, Args&&... args) +{ + RedFn redfn{}; + Fn fn{}; + return handle_all_reduce_f<cur>(redfn, fn, x, std::forward<Args>(args)...); +} +} +} +#pragma clang diagnostic pop diff --git a/include/kfr/base/gamma.hpp b/include/kfr/base/gamma.hpp @@ -0,0 +1,108 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "function.hpp" +#include "log_exp.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Wc99-extensions") +#pragma clang diagnostic ignored "-Wc99-extensions" +#endif + +namespace kfr +{ + +namespace internal +{ +template <typename T> +constexpr T gamma_precalc[] = { + 0x2.81b263fec4e08p+0, 0x3.07b4100e04448p+16, -0xa.a0da01d4d4e2p+16, 0xf.05ccb27bb9dbp+16, + -0xa.fa79616b7c6ep+16, 0x4.6dd6c10d4df5p+16, -0xf.a2304199eb4ap+12, 0x1.c21dd4aade3dp+12, + -0x1.62f981f01cf84p+8, 0x5.a937aa5c48d98p+0, -0x3.c640bf82e2104p-8, 0xc.914c540f959cp-24, +}; + +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_gamma : in_log_exp<cc> +{ +private: + using in_log_exp<cc>::exp; + using in_log_exp<cc>::pow; + +public: + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> gamma(vec<T, N> z) + { + constexpr size_t Count = arraysize(internal::gamma_precalc<T>); + vec<T, N> accm = gamma_precalc<T>[0]; + KFR_LOOP_UNROLL + for (size_t k = 1; k < Count; k++) + accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k)); + accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5); + return accm / z; + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> factorial_approx(vec<T, N> x) + { + return gamma(x + T(1)); + } + KFR_SPEC_FN(in_gamma, gamma) + KFR_SPEC_FN(in_gamma, factorial_approx) +}; +} + +namespace native +{ +using fn_gamma = internal::in_gamma<>::fn_gamma; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> gamma(const T1& x) +{ + return internal::in_gamma<>::gamma(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_gamma, E1> gamma(E1&& x) +{ + return { fn_gamma(), std::forward<E1>(x) }; +} + +using fn_factorial_approx = internal::in_gamma<>::fn_factorial_approx; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> factorial_approx(const T1& x) +{ + return internal::in_gamma<>::factorial_approx(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_factorial_approx, E1> factorial_approx(E1&& x) +{ + return { fn_factorial_approx(), std::forward<E1>(x) }; +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/intrinsics.h b/include/kfr/base/intrinsics.h @@ -0,0 +1,145 @@ +#pragma once + +#include "kfr.h" + +#if KFR_COMPILER_CLANG + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-id-macro" + +#ifdef __AVX2__ +#define KFR_AVX2_DEFINED +#endif +#ifdef __AVX__ +#define KFR_AVX1_DEFINED +#endif +#ifdef __SSE4_2__ +#define KFR_SSE42_DEFINED +#endif +#ifdef __SSE4_1__ +#define KFR_SSE41_DEFINED +#endif +#ifdef __SSSE3__ +#define KFR_SSSE3_DEFINED +#endif +#ifdef __SSE3__ +#define KFR_SSE3_DEFINED +#endif +#ifdef __SSE2__ +#define KFR_SSE2_DEFINED +#endif +#ifdef __SSE__ +#define KFR_SSE1_DEFINED +#endif +#ifdef __MMX__ +#define KFR_MMX_DEFINED +#endif + +#ifndef KFR_AVX2_DEFINED +#define __AVX2__ +#endif +#ifndef KFR_AVX1_DEFINED +#define __AVX__ +#endif +#ifndef KFR_SSE42_DEFINED +#define __SSE4_2__ +#endif +#ifndef KFR_SSE41_DEFINED +#define __SSE4_1__ +#endif +#ifndef KFR_SSSE3_DEFINED +#define __SSSE3__ +#endif +#ifndef KFR_SSE3_DEFINED +#define __SSE3__ +#endif +#ifndef KFR_SSE2_DEFINED +#define __SSE2__ +#endif +#ifndef KFR_SSE1_DEFINED +#define __SSE__ +#endif +#ifndef KFR_MMX_DEFINED +#define __MMX__ +#endif + +#ifdef KFR_SKIP_AVX512 +#ifndef __AVX512FINTRIN_H +#define __AVX512FINTRIN_H +#endif +#ifndef __AVX512VLINTRIN_H +#define __AVX512VLINTRIN_H +#endif +#ifndef __AVX512BWINTRIN_H +#define __AVX512BWINTRIN_H +#endif +#ifndef __AVX512CDINTRIN_H +#define __AVX512CDINTRIN_H +#endif +#ifndef __AVX512DQINTRIN_H +#define __AVX512DQINTRIN_H +#endif +#ifndef __AVX512VLBWINTRIN_H +#define __AVX512VLBWINTRIN_H +#endif +#ifndef __AVX512VLDQINTRIN_H +#define __AVX512VLDQINTRIN_H +#endif +#ifndef __AVX512ERINTRIN_H +#define __AVX512ERINTRIN_H +#endif +#ifndef __IFMAINTRIN_H +#define __IFMAINTRIN_H +#endif +#ifndef __IFMAVLINTRIN_H +#define __IFMAVLINTRIN_H +#endif +#ifndef __VBMIINTRIN_H +#define __VBMIINTRIN_H +#endif +#ifndef __VBMIVLINTRIN_H +#define __VBMIVLINTRIN_H +#endif + +#endif + +#include <immintrin.h> +#ifdef KFR_OS_WIN +#include <intrin.h> +#endif + +#ifndef KFR_AVX2_DEFINED +#undef __AVX2__ +#endif +#ifndef KFR_AVX1_DEFINED +#undef __AVX__ +#endif +#ifndef KFR_SSE42_DEFINED +#undef __SSE4_2__ +#endif +#ifndef KFR_SSE41_DEFINED +#undef __SSE4_1__ +#endif +#ifndef KFR_SSSE3_DEFINED +#undef __SSSE3__ +#endif +#ifndef KFR_SSE3_DEFINED +#undef __SSE3__ +#endif +#ifndef KFR_SSE2_DEFINED +#undef __SSE2__ +#endif +#ifndef KFR_SSE1_DEFINED +#undef __SSE__ +#endif +#ifndef KFR_MMX_DEFINED +#undef __MMX__ +#endif + +#pragma clang diagnostic pop + +#else + +#include <intrin.h> + +#endif diff --git a/include/kfr/base/kfr.h b/include/kfr/base/kfr.h @@ -0,0 +1,134 @@ +#pragma once + +#include <stddef.h> +#include <stdint.h> + +#include "../cident.h" + +#define KFR_INLINE CID_INLINE +#define KFR_INLINE_MEMBER CID_INLINE_MEMBER +#define KFR_INLINE_LAMBDA CID_INLINE_LAMBDA +#define KFR_NOINLINE CID_NOINLINE +#define KFR_FLATTEN CID_FLATTEN +#define KFR_RESTRICT CID_RESTRICT + +#ifdef CID_COMPILER_CLANG +#define KFR_COMPILER_CLANG CID_COMPILER_CLANG +#endif + +#ifdef CID_OS_WIN +#define KFR_OS_WIN CID_OS_WIN +#endif + +#ifdef CID_OS_OSX +#define KFR_OS_OSX CID_OS_OSX +#endif + +#ifdef CID_OS_LINUX +#define KFR_OS_LINUX CID_OS_LINUX +#endif + +#ifdef CID_GNU_ATTRIBUTES +#define KFR_GNU_ATTRIBUTES CID_GNU_ATTRIBUTES +#endif + +#ifdef CID_MSVC_ATTRIBUTES +#define KFR_GNU_ATTRIBUTES CID_MSVC_ATTRIBUTES +#endif + +#ifdef CID_ARCH_X64 +#define KFR_ARCH_X64 CID_ARCH_X64 +#endif + +#ifdef CID_ARCH_X32 +#define KFR_ARCH_X32 CID_ARCH_X32 +#endif + +#define KFR_ARCH_NAME CID_ARCH_NAME + +#define KFR_CDECL CID_CDECL + +#define KFR_PUBLIC_C CID_PUBLIC_C + +#ifdef __cplusplus +namespace kfr +{ +using ::cid::arraysize; +} +#endif + +#define KFR_VERSION_STRING "0.9.0" +#define KFR_VERSION_MAJOR 0 +#define KFR_VERSION_MINOR 9 +#define KFR_VERSION_BUILD 0 +#define KFR_VERSION 900 + +#ifdef __cplusplus +namespace kfr +{ +constexpr const char version_string[] = KFR_VERSION_STRING; +constexpr int version_major = KFR_VERSION_MAJOR; +constexpr int version_minor = KFR_VERSION_MINOR; +constexpr int version_build = KFR_VERSION_BUILD; +constexpr int version = KFR_VERSION; +} +#endif + +//#define KFR_MEMORY_ALIGNMENT 64 + +#if KFR_COMPILER_CLANG +#define KFR_LOOP_NOUNROLL \ + _Pragma("clang loop vectorize( disable )") _Pragma("clang loop interleave( disable )") \ + _Pragma("clang loop unroll( disable )") + +#define KFR_LOOP_UNROLL _Pragma("clang loop unroll( full )") + +#define KFR_VEC_CC __attribute__((vectorcall)) +#else +#define KFR_LOOP_NOUNROLL +#define KFR_LOOP_UNROLL +#ifdef KFR_COMPILER_MSVC +#define KFR_VEC_CC __vectorcall +#endif + +#endif + +#define KFR_AVAIL_AVX2 1 +#define KFR_AVAIL_AVX 1 +#define KFR_AVAIL_SSE42 1 +#define KFR_AVAIL_SSE41 1 +#define KFR_AVAIL_SSSE3 1 +#define KFR_AVAIL_SSE3 1 +#define KFR_AVAIL_SSE2 1 +#define KFR_AVAIL_SSE 1 + +#if defined(KFR_GNU_ATTRIBUTES) + +#define KFR_CPU_NAME_avx2 "avx2" +#define KFR_CPU_NAME_avx "avx" +#define KFR_CPU_NAME_sse42 "sse4.2" +#define KFR_CPU_NAME_sse41 "sse4.1" +#define KFR_CPU_NAME_ssse3 "ssse3" +#define KFR_CPU_NAME_sse3 "sse3" +#define KFR_CPU_NAME_sse2 "sse2" + +#if __has_attribute(target) +#define KFR_USE_CPU(arch) __attribute__((target(KFR_CPU_NAME_##arch))) +#else +#define KFR_USE_CPU(arch) +#endif + +#endif + +#if defined(KFR_GNU_ATTRIBUTES) +#define KFR_FAST_CC __attribute__((fastcall)) +#else +#define KFR_FAST_CC __fastcall +#endif + +#define KFR_INTRIN CID_INTRIN +#define KFR_SINTRIN CID_INTRIN CID_NODEBUG static +#define KFR_AINTRIN inline CID_NODEBUG static +#define KFR_FAST_NOINLINE CID_NOINLINE + +#define KFR_CPU_INTRIN(c) KFR_AINTRIN KFR_USE_CPU(c) diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp @@ -0,0 +1,575 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "abs.hpp" +#include "constants.hpp" +#include "function.hpp" +#include "min_max.hpp" +#include "operators.hpp" +#include "round.hpp" +#include "select.hpp" +#include "shuffle.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t c = cpu_t::native> +struct in_log_exp : in_select<c>, in_min_max<c>, in_clamp<c>, in_round<c>, in_abs<c> +{ +private: + constexpr static cpu_t cur = c; + using in_select<c>::select; + using in_round<c>::floor; + using in_clamp<c>::clamp; + using in_abs<c>::abs; + +public: + template <size_t N> + KFR_SINTRIN vec<i32, N> vilogbp1(vec<f32, N> d) + { + mask<i32, N> m = d < 5.421010862427522E-20f; + d = select(m, 1.8446744073709552E19f * d, d); + vec<i32, N> q = (ibitcast(d) >> 23) & 0xff; + q = select(m, q - (64 + 0x7e), q - 0x7e); + return q; + } + + template <size_t N> + KFR_SINTRIN vec<i64, N> vilogbp1(vec<f64, N> d) + { + mask<i64, N> m = d < 4.9090934652977266E-91; + d = select(m, 2.037035976334486E90 * d, d); + vec<i64, N> q = (ibitcast(d) >> 52) & 0x7ff; + q = select(m, q - (300 + 0x03fe), q - 0x03fe); + return q; + } + + template <size_t N> + KFR_SINTRIN vec<f32, N> vldexpk(vec<f32, N> x, vec<i32, N> q) + { + vec<i32, N> m = q >> 31; + m = (((m + q) >> 6) - m) << 4; + q = q - (m << 2); + m = clamp(m + 0x7f, vec<i32, N>(0xff)); + vec<f32, N> u = pow4(bitcast<f32>(cast<i32>(m) << 23)); + return x * u * bitcast<f32>((cast<i32>(q + 0x7f)) << 23); + } + + template <size_t N> + KFR_SINTRIN vec<f64, N> vldexpk(vec<f64, N> x, vec<i64, N> q) + { + vec<i64, N> m = q >> 31; + m = (((m + q) >> 9) - m) << 7; + q = q - (m << 2); + m = clamp(m + 0x3ff, i64(0x7ff)); + vec<f64, N> u = pow4(bitcast<f64>(cast<i64>(m) << 52)); + return x * u * bitcast<f64>((cast<i64>(q + 0x3ff)) << 52); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> logb(vec<T, N> x) + { + return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1)); + } + + template <size_t N> + KFR_SINTRIN vec<f32, N> log(vec<f32, N> d) + { + vec<i32, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485f ); + vec<f32, N> m = vldexpk(d, -e); + + vec<f32, N> x = (m - 1.0f) / (m + 1.0f); + vec<f32, N> x2 = x * x; + + vec<f32, N> sp = select(d < 0, c_qnan<f32>, c_neginfinity<f32>); + + vec<f32, N> t = 0.2371599674224853515625f; + t = fmadd(t, x2, 0.285279005765914916992188f); + t = fmadd(t, x2, 0.400005519390106201171875f); + t = fmadd(t, x2, 0.666666567325592041015625f); + t = fmadd(t, x2, 2.0f); + + x = x * t + c_log_2<f32> * cast<f32>(e); + x = select(d > 0, x, sp); + + return x; + } + + template <size_t N> + KFR_SINTRIN vec<f64, N> log(vec<f64, N> d) + { + vec<i64, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485 ); + vec<f64, N> m = vldexpk(d, -e); + + vec<f64, N> x = (m - 1.0) / (m + 1.0); + vec<f64, N> x2 = x * x; + + vec<f64, N> sp = select(d < 0, c_qnan<f64>, c_neginfinity<f64>); + + vec<f64, N> t = 0.148197055177935105296783; + t = fmadd(t, x2, 0.153108178020442575739679); + t = fmadd(t, x2, 0.181837339521549679055568); + t = fmadd(t, x2, 0.22222194152736701733275); + t = fmadd(t, x2, 0.285714288030134544449368); + t = fmadd(t, x2, 0.399999999989941956712869); + t = fmadd(t, x2, 0.666666666666685503450651); + t = fmadd(t, x2, 2); + + x = x * t + c_log_2<f64> * cast<f64>(e); + x = select(d > 0, x, sp); + + return x; + } + + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> log2(vec<T, N> x) + { + return log(x) * c_recip_log_2<T>; + } + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> log10(vec<T, N> x) + { + return log(x) * c_recip_log_10<T>; + } + + template <size_t N> + KFR_SINTRIN vec<f32, N> exp(vec<f32, N> d) + { + const f32 ln2_part1 = 0.6931457519f; + const f32 ln2_part2 = 1.4286067653e-6f; + + vec<i32, N> q = cast<i32>(floor(d * c_recip_log_2<f32>)); + vec<f32, N> s, u; + + s = fmadd(cast<f32>(q), -ln2_part1, d); + s = fmadd(cast<f32>(q), -ln2_part2, s); + + const f32 c2 = 0.4999999105930328369140625f; + const f32 c3 = 0.166668415069580078125f; + const f32 c4 = 4.16539050638675689697265625e-2f; + const f32 c5 = 8.378830738365650177001953125e-3f; + const f32 c6 = 1.304379315115511417388916015625e-3f; + const f32 c7 = 2.7555381529964506626129150390625e-4f; + + u = c7; + u = fmadd(u, s, c6); + u = fmadd(u, s, c5); + u = fmadd(u, s, c4); + u = fmadd(u, s, c3); + u = fmadd(u, s, c2); + + u = s * s * u + s + 1.0f; + u = vldexpk(u, q); + + u = select(d == c_neginfinity<f32>, 0.f, u); + + return u; + } + + template <size_t N> + KFR_SINTRIN vec<f64, N> exp(vec<f64, N> d) + { + const f64 ln2_part1 = 0.69314717501401901245; + const f64 ln2_part2 = 5.545926273775592108e-009; + + vec<i64, N> q = cast<i64>(floor(d * c_recip_log_2<f64>)); + vec<f64, N> s, u; + + s = fmadd(cast<f64>(q), -ln2_part1, d); + s = fmadd(cast<f64>(q), -ln2_part2, s); + + const f64 c2 = 0.499999999999994948485237955537741072475910186767578; + const f64 c3 = 0.166666666667024204739888659787538927048444747924805; + const f64 c4 = 4.16666666578945840693215529881854308769106864929199e-2; + const f64 c5 = 8.3333334397461874404333670440792047884315252304077e-3; + const f64 c6 = 1.3888881489747750223179290074426717183087021112442e-3; + const f64 c7 = 1.9841587032493949419205414574918222569976933300495e-4; + const f64 c8 = 2.47929324077393282239802768662784160369483288377523e-5; + const f64 c9 = 2.77076037925831049422552981864598109496000688523054e-6; + const f64 c10 = 2.59589616274586264243611237120812340606335055781528e-7; + const f64 c11 = 3.43801438838789632454461529017381016259946591162588e-8; + + u = c11; + u = fmadd(u, s, c10); + u = fmadd(u, s, c9); + u = fmadd(u, s, c8); + u = fmadd(u, s, c7); + u = fmadd(u, s, c6); + u = fmadd(u, s, c5); + u = fmadd(u, s, c4); + u = fmadd(u, s, c3); + u = fmadd(u, s, c2); + + u = s * s * u + s + 1.0; + u = vldexpk(u, q); + + u = select(d == c_neginfinity<f64>, 0.0, u); + + return u; + } + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> exp2(vec<T, N> x) + { + return exp(x * c_log_2<T>); + } + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> exp10(vec<T, N> x) + { + return exp(x * c_log_10<T>); + } + + template <typename T1, typename T2> + KFR_SINTRIN common_type<T1, T2> logn(const T1& a, const T2& b) + { + return log(a) / log(b); + } + + template <typename T1, typename T2> + KFR_SINTRIN common_type<T1, T2> logm(const T1& a, const T2& b) + { + return log(a) * b; + } + + template <typename T1, typename T2, typename T3> + KFR_SINTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& m, const T3& a) + { + return exp(fmadd(x, m, a)); + } + + template <typename T1, typename T2, typename T3> + KFR_SINTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& m, const T3& a) + { + return fmadd(log(x), m, a); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> pow(vec<T, N> a, vec<T, N> b) + { + const vec<T, N> t = exp(b * log(abs(a))); + const mask<T, N> isint = floor(b) == b; + const mask<T, N> iseven = (cast<itype<T>>(b) & 1) == 0; + return select(a > T(), t, + select(a == T(), T(1), select(isint, select(iseven, t, -t), broadcast<N>(c_qnan<T>)))); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> root(vec<T, N> x, vec<T, N> b) + { + return exp(reciprocal(b) * log(x)); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> cbrt(vec<T, N> x) + { + return pow<T, N>(x, T(0.333333333333333333333333333333333)); + } + + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> exp(vec<T, N> x) + { + return exp(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> exp2(vec<T, N> x) + { + return exp2(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> exp10(vec<T, N> x) + { + return exp10(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> log(vec<T, N> x) + { + return log(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> log2(vec<T, N> x) + { + return log2(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> log10(vec<T, N> x) + { + return log10(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> cbrt(vec<T, N> x) + { + return cbrt(cast<Tout>(x)); + } + + KFR_HANDLE_SCALAR(exp) + KFR_HANDLE_SCALAR(exp2) + KFR_HANDLE_SCALAR(exp10) + KFR_HANDLE_SCALAR(log) + KFR_HANDLE_SCALAR(log2) + KFR_HANDLE_SCALAR(log10) + KFR_HANDLE_SCALAR(logb) + KFR_HANDLE_SCALAR(pow) + KFR_HANDLE_SCALAR(root) + KFR_HANDLE_SCALAR(cbrt) + + KFR_SPEC_FN(in_log_exp, exp) + KFR_SPEC_FN(in_log_exp, exp2) + KFR_SPEC_FN(in_log_exp, exp10) + KFR_SPEC_FN(in_log_exp, log) + KFR_SPEC_FN(in_log_exp, log2) + KFR_SPEC_FN(in_log_exp, log10) + KFR_SPEC_FN(in_log_exp, logb) + KFR_SPEC_FN(in_log_exp, logn) + KFR_SPEC_FN(in_log_exp, logm) + KFR_SPEC_FN(in_log_exp, exp_fmadd) + KFR_SPEC_FN(in_log_exp, log_fmadd) + KFR_SPEC_FN(in_log_exp, pow) + KFR_SPEC_FN(in_log_exp, root) + KFR_SPEC_FN(in_log_exp, cbrt) +}; +} +namespace native +{ +using fn_exp = internal::in_log_exp<>::fn_exp; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> exp(const T1& x) +{ + return internal::in_log_exp<>::exp(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_exp, E1> exp(E1&& x) +{ + return { fn_exp(), std::forward<E1>(x) }; +} + +using fn_exp2 = internal::in_log_exp<>::fn_exp2; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> exp2(const T1& x) +{ + return internal::in_log_exp<>::exp2(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_exp2, E1> exp2(E1&& x) +{ + return { fn_exp2(), std::forward<E1>(x) }; +} + +using fn_exp10 = internal::in_log_exp<>::fn_exp10; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> exp10(const T1& x) +{ + return internal::in_log_exp<>::exp10(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_exp10, E1> exp10(E1&& x) +{ + return { fn_exp10(), std::forward<E1>(x) }; +} + +using fn_log = internal::in_log_exp<>::fn_log; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> log(const T1& x) +{ + return internal::in_log_exp<>::log(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_log, E1> log(E1&& x) +{ + return { fn_log(), std::forward<E1>(x) }; +} + +using fn_log2 = internal::in_log_exp<>::fn_log2; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> log2(const T1& x) +{ + return internal::in_log_exp<>::log2(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_log2, E1> log2(E1&& x) +{ + return { fn_log2(), std::forward<E1>(x) }; +} + +using fn_log10 = internal::in_log_exp<>::fn_log10; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> log10(const T1& x) +{ + return internal::in_log_exp<>::log10(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_log10, E1> log10(E1&& x) +{ + return { fn_log10(), std::forward<E1>(x) }; +} + +using fn_logb = internal::in_log_exp<>::fn_logb; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> logb(const T1& x) +{ + return internal::in_log_exp<>::logb(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_logb, E1> logb(E1&& x) +{ + return { fn_logb(), std::forward<E1>(x) }; +} + +using fn_logn = internal::in_log_exp<>::fn_logn; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> logn(const T1& x) +{ + return internal::in_log_exp<>::logn(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_logn, E1> logn(E1&& x) +{ + return { fn_logn(), std::forward<E1>(x) }; +} + +using fn_logm = internal::in_log_exp<>::fn_logm; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> logm(const T1& x) +{ + return internal::in_log_exp<>::logm(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_logm, E1> logm(E1&& x) +{ + return { fn_logm(), std::forward<E1>(x) }; +} + +using fn_exp_fmadd = internal::in_log_exp<>::fn_exp_fmadd; +template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> +KFR_INLINE ftype<common_type<T1, T2, T3>> + +exp_fmadd(const T1& x, const T2& m, const T3& a) +{ + return internal::in_log_exp<>::exp_fmadd(x, m, a); +} + +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> +KFR_INLINE expr_func<fn_exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& m, E3&& a) +{ + return { fn_exp_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a) }; +} +using fn_log_fmadd = internal::in_log_exp<>::fn_log_fmadd; +template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> +KFR_INLINE ftype<common_type<T1, T2, T3>> + +log_fmadd(const T1& x, const T2& m, const T3& a) +{ + return internal::in_log_exp<>::log_fmadd(x, m, a); +} + +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> +KFR_INLINE expr_func<fn_log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& m, E3&& a) +{ + return { fn_log_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a) + + }; +} + +using fn_pow = internal::in_log_exp<>::fn_pow; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +pow(const T1& x, const T2& b) +{ + return internal::in_log_exp<>::pow(x, b); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_pow, E1, E2> pow(E1&& x, E2&& b) +{ + return { fn_pow(), std::forward<E1>(x), std::forward<E2>(b) }; +} +using fn_root = internal::in_log_exp<>::fn_root; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +root(const T1& x, const T2& b) +{ + return internal::in_log_exp<>::root(x, b); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_root, E1, E2> root(E1&& x, E2&& b) +{ + return { fn_root(), std::forward<E1>(x), std::forward<E2>(b) + + }; +} + +using fn_cbrt = internal::in_log_exp<>::fn_cbrt; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> cbrt(const T1& x) +{ + return internal::in_log_exp<>::cbrt(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_cbrt, E1> cbrt(E1&& x) +{ + return { fn_cbrt(), std::forward<E1>(x) }; +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp @@ -0,0 +1,339 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "abs.hpp" +#include "function.hpp" +#include "operators.hpp" + +namespace kfr +{ + +template <size_t bits> +struct bitmask +{ + using type = findinttype<0, (1ull << bits) - 1>; + bitmask(type val) : value(val) {} + template <typename Itype> + bitmask(Itype val) : value(static_cast<type>(val)) + { + } + type value; +}; + +namespace internal +{ + +template <cpu_t c = cpu_t::native> +struct in_bittest : in_bittest<older(c)> +{ + struct fn_bittestnone : fn_disabled + { + }; + struct fn_bittestall : fn_disabled + { + }; +}; + +struct logical_and +{ + template <typename T1, typename T2> + auto operator()(T1 x, T2 y) -> decltype(x && y) + { + return x && y; + } + template <typename T> + T operator()(initialvalue<T>) + { + return T(); + } +}; + +template <> +struct in_bittest<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse2; + + KFR_SINTRIN bitmask<4> getmask(f32sse x) { return bitmask<4>(_mm_movemask_pd(*x)); } + KFR_SINTRIN bitmask<4> getmask(f64sse x) { return bitmask<4>(_mm_movemask_pd(*x)); } + KFR_SINTRIN bitmask<16> getmask(u8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<16> getmask(u16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<16> getmask(u32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<16> getmask(u64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<16> getmask(i8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<16> getmask(i16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<16> getmask(i32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<16> getmask(i64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); } + + KFR_SINTRIN bool bittestnone(f32sse x) { return !_mm_movemask_ps(*x); } + KFR_SINTRIN bool bittestnone(f64sse x) { return !_mm_movemask_pd(*x); } + KFR_SINTRIN bool bittestnone(u8sse x) { return !_mm_movemask_epi8(*x); } + KFR_SINTRIN bool bittestnone(u16sse x) { return !_mm_movemask_epi8(*x); } + KFR_SINTRIN bool bittestnone(u32sse x) { return !_mm_movemask_epi8(*x); } + KFR_SINTRIN bool bittestnone(u64sse x) { return !_mm_movemask_epi8(*x); } + KFR_SINTRIN bool bittestnone(i8sse x) { return !_mm_movemask_epi8(*x); } + KFR_SINTRIN bool bittestnone(i16sse x) { return !_mm_movemask_epi8(*x); } + KFR_SINTRIN bool bittestnone(i32sse x) { return !_mm_movemask_epi8(*x); } + KFR_SINTRIN bool bittestnone(i64sse x) { return !_mm_movemask_epi8(*x); } + + KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return bittestnone(x & y); } + KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return bittestnone(x & y); } + + KFR_SINTRIN bool bittestall(f32sse x) { return !_mm_movemask_ps(*~x); } + KFR_SINTRIN bool bittestall(f64sse x) { return !_mm_movemask_pd(*~x); } + KFR_SINTRIN bool bittestall(u8sse x) { return !_mm_movemask_epi8(*~x); } + KFR_SINTRIN bool bittestall(u16sse x) { return !_mm_movemask_epi8(*~x); } + KFR_SINTRIN bool bittestall(u32sse x) { return !_mm_movemask_epi8(*~x); } + KFR_SINTRIN bool bittestall(u64sse x) { return !_mm_movemask_epi8(*~x); } + KFR_SINTRIN bool bittestall(i8sse x) { return !_mm_movemask_epi8(*~x); } + KFR_SINTRIN bool bittestall(i16sse x) { return !_mm_movemask_epi8(*~x); } + KFR_SINTRIN bool bittestall(i32sse x) { return !_mm_movemask_epi8(*~x); } + KFR_SINTRIN bool bittestall(i64sse x) { return !_mm_movemask_epi8(*~x); } + + KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return bittestnone(~x & y); } + KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return bittestnone(~x & y); } + + KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) + KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) + KFR_SPEC_FN(in_bittest, bittestnone) + KFR_SPEC_FN(in_bittest, bittestall) +}; + +template <> +struct in_bittest<cpu_t::sse41> : in_bittest<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse41; + + KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return _mm_testz_ps(*x, *y); } + KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return _mm_testz_pd(*x, *y); } + KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return _mm_testz_si128(*x, *y); } + KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return _mm_testz_si128(*x, *y); } + KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return _mm_testz_si128(*x, *y); } + KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return _mm_testz_si128(*x, *y); } + KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return _mm_testz_si128(*x, *y); } + KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return _mm_testz_si128(*x, *y); } + KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return _mm_testz_si128(*x, *y); } + KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return _mm_testz_si128(*x, *y); } + + KFR_SINTRIN bool bittestnone(f32sse x) { return _mm_testz_ps(*x, *x); } + KFR_SINTRIN bool bittestnone(f64sse x) { return _mm_testz_pd(*x, *x); } + KFR_SINTRIN bool bittestnone(u8sse x) { return _mm_testz_si128(*x, *x); } + KFR_SINTRIN bool bittestnone(u16sse x) { return _mm_testz_si128(*x, *x); } + KFR_SINTRIN bool bittestnone(u32sse x) { return _mm_testz_si128(*x, *x); } + KFR_SINTRIN bool bittestnone(u64sse x) { return _mm_testz_si128(*x, *x); } + KFR_SINTRIN bool bittestnone(i8sse x) { return _mm_testz_si128(*x, *x); } + KFR_SINTRIN bool bittestnone(i16sse x) { return _mm_testz_si128(*x, *x); } + KFR_SINTRIN bool bittestnone(i32sse x) { return _mm_testz_si128(*x, *x); } + KFR_SINTRIN bool bittestnone(i64sse x) { return _mm_testz_si128(*x, *x); } + + KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return _mm_testc_ps(*x, *y); } + KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return _mm_testc_pd(*x, *y); } + KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return _mm_testc_si128(*x, *y); } + KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return _mm_testc_si128(*x, *y); } + KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return _mm_testc_si128(*x, *y); } + KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return _mm_testc_si128(*x, *y); } + KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return _mm_testc_si128(*x, *y); } + KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return _mm_testc_si128(*x, *y); } + KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return _mm_testc_si128(*x, *y); } + KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return _mm_testc_si128(*x, *y); } + + KFR_SINTRIN bool bittestall(f32sse x) { return _mm_testc_ps(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(f64sse x) { return _mm_testc_pd(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(u8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(u16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(u32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(u64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); } + + KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) + KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) + KFR_SPEC_FN(in_bittest, bittestnone) + KFR_SPEC_FN(in_bittest, bittestall) +}; + +template <> +struct in_bittest<cpu_t::avx1> : in_bittest<cpu_t::sse41> +{ + constexpr static cpu_t cpu = cpu_t::avx1; + using in_bittest<cpu_t::sse41>::bittestnone; + using in_bittest<cpu_t::sse41>::bittestall; + + KFR_SINTRIN bitmask<8> getmask(f32avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); } + KFR_SINTRIN bitmask<8> getmask(f64avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); } + + KFR_SINTRIN bool bittestnone(f32avx x, f32avx y) { return _mm256_testz_ps(*x, *y); } + KFR_SINTRIN bool bittestnone(f64avx x, f64avx y) { return _mm256_testz_pd(*x, *y); } + KFR_SINTRIN bool bittestnone(f32avx x) { return _mm256_testz_ps(*x, *x); } + KFR_SINTRIN bool bittestnone(f64avx x) { return _mm256_testz_pd(*x, *x); } + KFR_SINTRIN bool bittestnall(f32avx x, f32avx y) { return _mm256_testc_ps(*x, *y); } + KFR_SINTRIN bool bittestnall(f64avx x, f64avx y) { return _mm256_testc_pd(*x, *y); } + KFR_SINTRIN bool bittestnall(f32avx x) { return _mm256_testc_ps(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestnall(f64avx x) { return _mm256_testc_pd(*x, *allonesvector(x)); } + + KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) + KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) + KFR_SPEC_FN(in_bittest, bittestnone) + KFR_SPEC_FN(in_bittest, bittestall) +}; + +template <> +struct in_bittest<cpu_t::avx2> : in_bittest<cpu_t::avx1> +{ + constexpr static cpu_t cpu = cpu_t::avx2; + using in_bittest<cpu_t::avx1>::bittestnone; + using in_bittest<cpu_t::avx1>::bittestall; + + KFR_SINTRIN bitmask<32> getmask(u8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<32> getmask(u16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<32> getmask(u32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<32> getmask(u64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<32> getmask(i8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<32> getmask(i16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<32> getmask(i32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + KFR_SINTRIN bitmask<32> getmask(i64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); } + + KFR_SINTRIN bool bittestnone(u8avx x, u8avx y) { return _mm256_testz_si256(*x, *y); } + KFR_SINTRIN bool bittestnone(u16avx x, u16avx y) { return _mm256_testz_si256(*x, *y); } + KFR_SINTRIN bool bittestnone(u32avx x, u32avx y) { return _mm256_testz_si256(*x, *y); } + KFR_SINTRIN bool bittestnone(u64avx x, u64avx y) { return _mm256_testz_si256(*x, *y); } + KFR_SINTRIN bool bittestnone(i8avx x, i8avx y) { return _mm256_testz_si256(*x, *y); } + KFR_SINTRIN bool bittestnone(i16avx x, i16avx y) { return _mm256_testz_si256(*x, *y); } + KFR_SINTRIN bool bittestnone(i32avx x, i32avx y) { return _mm256_testz_si256(*x, *y); } + KFR_SINTRIN bool bittestnone(i64avx x, i64avx y) { return _mm256_testz_si256(*x, *y); } + + KFR_SINTRIN bool bittestnone(u8avx x) { return _mm256_testz_si256(*x, *x); } + KFR_SINTRIN bool bittestnone(u16avx x) { return _mm256_testz_si256(*x, *x); } + KFR_SINTRIN bool bittestnone(u32avx x) { return _mm256_testz_si256(*x, *x); } + KFR_SINTRIN bool bittestnone(u64avx x) { return _mm256_testz_si256(*x, *x); } + KFR_SINTRIN bool bittestnone(i8avx x) { return _mm256_testz_si256(*x, *x); } + KFR_SINTRIN bool bittestnone(i16avx x) { return _mm256_testz_si256(*x, *x); } + KFR_SINTRIN bool bittestnone(i32avx x) { return _mm256_testz_si256(*x, *x); } + KFR_SINTRIN bool bittestnone(i64avx x) { return _mm256_testz_si256(*x, *x); } + + KFR_SINTRIN bool bittestall(u8avx x, u8avx y) { return _mm256_testc_si256(*x, *y); } + KFR_SINTRIN bool bittestall(u16avx x, u16avx y) { return _mm256_testc_si256(*x, *y); } + KFR_SINTRIN bool bittestall(u32avx x, u32avx y) { return _mm256_testc_si256(*x, *y); } + KFR_SINTRIN bool bittestall(u64avx x, u64avx y) { return _mm256_testc_si256(*x, *y); } + KFR_SINTRIN bool bittestall(i8avx x, i8avx y) { return _mm256_testc_si256(*x, *y); } + KFR_SINTRIN bool bittestall(i16avx x, i16avx y) { return _mm256_testc_si256(*x, *y); } + KFR_SINTRIN bool bittestall(i32avx x, i32avx y) { return _mm256_testc_si256(*x, *y); } + KFR_SINTRIN bool bittestall(i64avx x, i64avx y) { return _mm256_testc_si256(*x, *y); } + + KFR_SINTRIN bool bittestall(u8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(u16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(u32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(u64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + KFR_SINTRIN bool bittestall(i64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); } + + KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone) + KFR_HANDLE_ALL_REDUCE(logical_and, bittestall) + KFR_SPEC_FN(in_bittest, bittestnone) + KFR_SPEC_FN(in_bittest, bittestall) +}; +} + +namespace native +{ +using fn_bittestnone = internal::in_bittest<>::fn_bittestnone; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> bittestnone(const T1& x) +{ + return internal::in_bittest<>::bittestnone(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_bittestnone, E1> bittestnone(E1&& x) +{ + return { fn_bittestnone(), std::forward<E1>(x) }; +} + +using fn_bittestall = internal::in_bittest<>::fn_bittestall; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> bittestall(const T1& x) +{ + return internal::in_bittest<>::bittestall(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_bittestall, E1> bittestall(E1&& x) +{ + return { fn_bittestall(), std::forward<E1>(x) }; +} + +using fn_bittestnone = internal::in_bittest<>::fn_bittestnone; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +bittestnone(const T1& x, const T2& y) +{ + return internal::in_bittest<>::bittestnone(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_bittestnone, E1, E2> bittestnone(E1&& x, E2&& y) +{ + return { fn_bittestnone(), std::forward<E1>(x), std::forward<E2>(y) }; +} +using fn_bittestall = internal::in_bittest<>::fn_bittestall; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +bittestall(const T1& x, const T2& y) +{ + return internal::in_bittest<>::bittestall(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_bittestall, E1, E2> bittestall(E1&& x, E2&& y) +{ + return { fn_bittestall(), std::forward<E1>(x), std::forward<E2>(y) + + }; +} +} +} diff --git a/include/kfr/base/memory.hpp b/include/kfr/base/memory.hpp @@ -0,0 +1,209 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/read_write.hpp" +#include "../base/types.hpp" +#include <atomic> +#include <memory> + +namespace kfr +{ + +namespace internal +{ + +struct memory_statistics +{ + std::atomic_uintptr_t allocation_count = ATOMIC_VAR_INIT(0); + std::atomic_uintptr_t allocation_size = ATOMIC_VAR_INIT(0); + std::atomic_uintptr_t deallocation_count = ATOMIC_VAR_INIT(0); + std::atomic_uintptr_t deallocation_size = ATOMIC_VAR_INIT(0); +}; + +inline memory_statistics& get_memory_statistics() +{ + static memory_statistics ms; + return ms; +} + +struct mem_header +{ + u8 offset; + u8 alignment; + u8 reserved1; + u8 reserved2; + size_t size; +} __attribute__((__packed__)); + +inline mem_header* aligned_header(void* ptr) { return ptr_cast<mem_header>(ptr) - 1; } + +inline size_t aligned_size(void* ptr) { return aligned_header(ptr)->size; } + +inline void* aligned_malloc(size_t size, size_t alignment) +{ + get_memory_statistics().allocation_count++; + get_memory_statistics().allocation_size += size; + void* ptr = malloc(size + (alignment - 1) + sizeof(mem_header)); + if (ptr == nullptr) + return nullptr; + void* aligned_ptr = advance(ptr, sizeof(mem_header)); + aligned_ptr = align_up(aligned_ptr, alignment); + aligned_header(aligned_ptr)->alignment = static_cast<u8>(alignment > 255 ? 255 : alignment); + aligned_header(aligned_ptr)->offset = static_cast<u8>(distance(aligned_ptr, ptr)); + aligned_header(aligned_ptr)->size = size; + return aligned_ptr; +} +inline void aligned_free(void* ptr) +{ + get_memory_statistics().deallocation_count++; + get_memory_statistics().deallocation_size += aligned_size(ptr); + free(advance(ptr, -static_cast<ptrdiff_t>(aligned_header(ptr)->offset))); +} +} + +template <typename T = void, size_t alignment = native_cache_alignment> +KFR_INLINE T* aligned_allocate(size_t size = 1) +{ + T* ptr = static_cast<T*>(__builtin_assume_aligned( + internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>), alignment), alignment)); + return ptr; +} + +template <typename T = void> +KFR_INLINE void aligned_deallocate(T* ptr) +{ + return internal::aligned_free(ptr); +} + +namespace internal +{ +template <typename T> +struct aligned_deleter +{ + KFR_INLINE void operator()(T* ptr) const { aligned_deallocate(ptr); } +}; +} + +template <typename T> +struct autofree +{ + KFR_INLINE autofree() {} + explicit KFR_INLINE autofree(size_t size) : ptr(aligned_allocate<T>(size)) {} + autofree(const autofree&) = delete; + autofree& operator=(const autofree&) = delete; + autofree(autofree&&) noexcept = default; + autofree& operator=(autofree&&) noexcept = default; + KFR_INLINE T& operator[](size_t index) noexcept { return ptr[index]; } + KFR_INLINE const T& operator[](size_t index) const noexcept { return ptr[index]; } + + template <typename U = T> + KFR_INLINE U* data() noexcept + { + return ptr_cast<U>(ptr.get()); + } + template <typename U = T> + KFR_INLINE const U* data() const noexcept + { + return ptr_cast<U>(ptr.get()); + } + + std::unique_ptr<T[], internal::aligned_deleter<T>> ptr; +}; + +template <typename T> +struct allocator +{ + using value_type = T; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + template <typename U> + struct rebind + { + using other = allocator<U>; + }; + constexpr allocator() noexcept = default; + constexpr allocator(const allocator&) noexcept = default; + template <typename U> + constexpr allocator(const allocator<U>&) noexcept + { + } + pointer address(reference x) const noexcept { return std::addressof(x); } + const_pointer address(const_reference x) const noexcept { return std::addressof(x); } + pointer allocate(size_type n, std::allocator<void>::const_pointer = 0) const + { + pointer result = aligned_allocate<value_type>(n); + if (!result) + CID_THROW(std::bad_alloc()); + return result; + } + void deallocate(pointer p, size_type) { aligned_deallocate(p); } + size_type max_size() const { return std::numeric_limits<size_type>::max() / sizeof(value_type); } + template <typename U, typename... Args> + void construct(U* p, Args&&... args) + { + ::new (pvoid(p)) U(std::forward<Args>(args)...); + } + template <typename U> + void destroy(U* p) + { + p->~U(); + } +}; + +template <typename T1, typename T2> +constexpr inline bool operator==(const allocator<T1>&, const allocator<T2>&) noexcept +{ + return true; +} +template <typename T1, typename T2> +constexpr inline bool operator!=(const allocator<T1>&, const allocator<T2>&) noexcept +{ + return false; +} + +struct aligned_new +{ + inline static void* operator new(size_t size) { return aligned_allocate(size); } + inline static void operator delete(void* ptr) { return aligned_deallocate(ptr); } +}; + +#define KFR_CLASS_REFCOUNT(cl) \ +public: \ + void addref() const { m_refcount++; } \ + void release() const \ + { \ + if (--m_refcount == 0) \ + { \ + delete this; \ + } \ + } \ + \ +private: \ + mutable std::atomic_uintptr_t m_refcount = ATOMIC_VAR_INIT(0); +} diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp @@ -0,0 +1,377 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "abs.hpp" +#include "function.hpp" +#include "operators.hpp" +#include "select.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t cpu = cpu_t::native> +struct in_min_max : in_min_max<older(cpu)> +{ + struct fn_min : in_min_max<older(cpu)>::fn_min, fn_disabled + { + }; + struct fn_max : in_min_max<older(cpu)>::fn_max, fn_disabled + { + }; +}; + +template <> +struct in_min_max<cpu_t::sse2> : in_select<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse2; + +private: + using in_select<cpu>::select; + +public: + template <typename T> + KFR_SINTRIN T min(initialvalue<T>) + { + return std::numeric_limits<T>::max(); + } + template <typename T> + KFR_SINTRIN T max(initialvalue<T>) + { + return std::numeric_limits<T>::min(); + } + + KFR_CPU_INTRIN(sse2) f32sse min(f32sse x, f32sse y) { return _mm_min_ps(*x, *y); } + KFR_CPU_INTRIN(sse2) f64sse min(f64sse x, f64sse y) { return _mm_min_pd(*x, *y); } + KFR_CPU_INTRIN(sse2) i8sse min(i8sse x, i8sse y) { return select(x < y, x, y); } + KFR_CPU_INTRIN(sse2) u16sse min(u16sse x, u16sse y) { return select(x < y, x, y); } + KFR_CPU_INTRIN(sse2) i32sse min(i32sse x, i32sse y) { return select(x < y, x, y); } + KFR_CPU_INTRIN(sse2) u32sse min(u32sse x, u32sse y) { return select(x < y, x, y); } + KFR_CPU_INTRIN(sse2) u8sse min(u8sse x, u8sse y) { return _mm_min_epu8(*x, *y); } + KFR_CPU_INTRIN(sse2) i16sse min(i16sse x, i16sse y) { return _mm_min_epi16(*x, *y); } + KFR_CPU_INTRIN(sse2) i64sse min(i64sse x, i64sse y) { return select(x < y, x, y); } + KFR_CPU_INTRIN(sse2) u64sse min(u64sse x, u64sse y) { return select(x < y, x, y); } + + KFR_CPU_INTRIN(sse2) f32sse max(f32sse x, f32sse y) { return _mm_max_ps(*x, *y); } + KFR_CPU_INTRIN(sse2) f64sse max(f64sse x, f64sse y) { return _mm_max_pd(*x, *y); } + KFR_CPU_INTRIN(sse2) i8sse max(i8sse x, i8sse y) { return select(x > y, x, y); } + KFR_CPU_INTRIN(sse2) u16sse max(u16sse x, u16sse y) { return select(x > y, x, y); } + KFR_CPU_INTRIN(sse2) i32sse max(i32sse x, i32sse y) { return select(x > y, x, y); } + KFR_CPU_INTRIN(sse2) u32sse max(u32sse x, u32sse y) { return select(x > y, x, y); } + KFR_CPU_INTRIN(sse2) u8sse max(u8sse x, u8sse y) { return _mm_max_epu8(*x, *y); } + KFR_CPU_INTRIN(sse2) i16sse max(i16sse x, i16sse y) { return _mm_max_epi16(*x, *y); } + KFR_CPU_INTRIN(sse2) i64sse max(i64sse x, i64sse y) { return select(x > y, x, y); } + KFR_CPU_INTRIN(sse2) u64sse max(u64sse x, u64sse y) { return select(x > y, x, y); } + + KFR_HANDLE_ALL(min) + KFR_HANDLE_ALL(max) + + KFR_SPEC_FN(in_min_max, min) + KFR_SPEC_FN(in_min_max, max) +}; + +template <> +struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse41; + using in_min_max<cpu_t::sse2>::min; + using in_min_max<cpu_t::sse2>::max; + + KFR_CPU_INTRIN(sse41) i8sse min(i8sse x, i8sse y) { return _mm_min_epi8(*x, *y); } + KFR_CPU_INTRIN(sse41) u16sse min(u16sse x, u16sse y) { return _mm_min_epu16(*x, *y); } + KFR_CPU_INTRIN(sse41) i32sse min(i32sse x, i32sse y) { return _mm_min_epi32(*x, *y); } + KFR_CPU_INTRIN(sse41) u32sse min(u32sse x, u32sse y) { return _mm_min_epu32(*x, *y); } + + KFR_CPU_INTRIN(sse41) i8sse max(i8sse x, i8sse y) { return _mm_max_epi8(*x, *y); } + KFR_CPU_INTRIN(sse41) u16sse max(u16sse x, u16sse y) { return _mm_max_epu16(*x, *y); } + KFR_CPU_INTRIN(sse41) i32sse max(i32sse x, i32sse y) { return _mm_max_epi32(*x, *y); } + KFR_CPU_INTRIN(sse41) u32sse max(u32sse x, u32sse y) { return _mm_max_epu32(*x, *y); } + + KFR_HANDLE_ALL(min) + KFR_HANDLE_ALL(max) + KFR_SPEC_FN(in_min_max, min) + KFR_SPEC_FN(in_min_max, max) +}; + +template <> +struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41> +{ + constexpr static cpu_t cpu = cpu_t::avx1; + using in_min_max<cpu_t::sse41>::min; + using in_min_max<cpu_t::sse41>::max; + + KFR_CPU_INTRIN(avx) f32avx min(f32avx x, f32avx y) { return _mm256_min_ps(*x, *y); } + KFR_CPU_INTRIN(avx) f64avx min(f64avx x, f64avx y) { return _mm256_min_pd(*x, *y); } + KFR_CPU_INTRIN(avx) f32avx max(f32avx x, f32avx y) { return _mm256_max_ps(*x, *y); } + KFR_CPU_INTRIN(avx) f64avx max(f64avx x, f64avx y) { return _mm256_max_pd(*x, *y); } + + KFR_HANDLE_ALL(min) + KFR_HANDLE_ALL(max) + KFR_SPEC_FN(in_min_max, min) + KFR_SPEC_FN(in_min_max, max) +}; + +template <> +struct in_min_max<cpu_t::avx2> : in_min_max<cpu_t::avx1> +{ + constexpr static cpu_t cpu = cpu_t::avx2; + using in_min_max<cpu_t::avx1>::min; + using in_min_max<cpu_t::avx1>::max; + + KFR_CPU_INTRIN(avx2) u8avx min(u8avx x, u8avx y) { return _mm256_min_epu8(*x, *y); } + KFR_CPU_INTRIN(avx2) i16avx min(i16avx x, i16avx y) { return _mm256_min_epi16(*x, *y); } + KFR_CPU_INTRIN(avx2) i8avx min(i8avx x, i8avx y) { return _mm256_min_epi8(*x, *y); } + KFR_CPU_INTRIN(avx2) u16avx min(u16avx x, u16avx y) { return _mm256_min_epu16(*x, *y); } + KFR_CPU_INTRIN(avx2) i32avx min(i32avx x, i32avx y) { return _mm256_min_epi32(*x, *y); } + KFR_CPU_INTRIN(avx2) u32avx min(u32avx x, u32avx y) { return _mm256_min_epu32(*x, *y); } + + KFR_CPU_INTRIN(avx2) u8avx max(u8avx x, u8avx y) { return _mm256_max_epu8(*x, *y); } + KFR_CPU_INTRIN(avx2) i16avx max(i16avx x, i16avx y) { return _mm256_max_epi16(*x, *y); } + KFR_CPU_INTRIN(avx2) i8avx max(i8avx x, i8avx y) { return _mm256_max_epi8(*x, *y); } + KFR_CPU_INTRIN(avx2) u16avx max(u16avx x, u16avx y) { return _mm256_max_epu16(*x, *y); } + KFR_CPU_INTRIN(avx2) i32avx max(i32avx x, i32avx y) { return _mm256_max_epi32(*x, *y); } + KFR_CPU_INTRIN(avx2) u32avx max(u32avx x, u32avx y) { return _mm256_max_epu32(*x, *y); } + + KFR_HANDLE_ALL(min) + KFR_HANDLE_ALL(max) + KFR_SPEC_FN(in_min_max, min) + KFR_SPEC_FN(in_min_max, max) +}; + +template <cpu_t cpu = cpu_t::native> +struct in_minabs_maxabs +{ +public: + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> minabs(vec<T, N> x, vec<T, N> y) + { + return in_min_max<cpu>::min(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y)); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> maxabs(vec<T, N> x, vec<T, N> y) + { + return in_min_max<cpu>::max(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y)); + } + + KFR_HANDLE_ALL(minabs) + KFR_HANDLE_ALL(maxabs) + KFR_SPEC_FN(in_minabs_maxabs, minabs) + KFR_SPEC_FN(in_minabs_maxabs, maxabs) +}; + +template <cpu_t cpu = cpu_t::native> +struct in_clamp : in_min_max<cpu> +{ + using in_min_max<cpu>::min; + using in_min_max<cpu>::max; + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, T maximum) + { + return clamp(x, broadcast<N>(minimum), broadcast<N>(maximum)); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, vec<T, N> maximum) + { + return clamp(x, broadcast<N>(minimum), maximum); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, T maximum) + { + return clamp(x, minimum, broadcast<N>(maximum)); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T maximum) + { + return clamp(x, broadcast<N>(maximum)); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum) + { + return max(minimum, min(x, maximum)); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> maximum) + { + return max(zerovector<T, N>(), min(x, maximum)); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum) + { + return max(minimum, min(x, maximum - T(1))); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> maximum) + { + return max(zerovector<T, N>(), min(x, maximum - T(1))); + } + KFR_HANDLE_ALL(clamp) + KFR_HANDLE_ALL(clampm1) + KFR_SPEC_FN(in_clamp, clamp) + KFR_SPEC_FN(in_clamp, clampm1) +}; +} + +namespace native +{ +using fn_min = internal::in_min_max<>::fn_min; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +min(const T1& x, const T2& y) +{ + return internal::in_min_max<>::min(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_min, E1, E2> min(E1&& x, E2&& y) +{ + return { fn_min(), std::forward<E1>(x), std::forward<E2>(y) }; +} +using fn_max = internal::in_min_max<>::fn_max; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +max(const T1& x, const T2& y) +{ + return internal::in_min_max<>::max(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_max, E1, E2> max(E1&& x, E2&& y) +{ + return { fn_max(), std::forward<E1>(x), std::forward<E2>(y) + + }; +} +using fn_minabs = internal::in_minabs_maxabs<>::fn_minabs; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +minabs(const T1& x, const T2& y) +{ + return internal::in_minabs_maxabs<>::minabs(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_minabs, E1, E2> minabs(E1&& x, E2&& y) +{ + return { fn_minabs(), std::forward<E1>(x), std::forward<E2>(y) + + }; +} +using fn_maxabs = internal::in_minabs_maxabs<>::fn_maxabs; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +maxabs(const T1& x, const T2& y) +{ + return internal::in_minabs_maxabs<>::maxabs(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_maxabs, E1, E2> maxabs(E1&& x, E2&& y) +{ + return { fn_maxabs(), std::forward<E1>(x), std::forward<E2>(y) + + }; +} +using fn_clamp = internal::in_clamp<>::fn_clamp; +template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> +KFR_INLINE ftype<common_type<T1, T2, T3>> + +clamp(const T1& x, const T2& l, const T3& h) +{ + return internal::in_clamp<>::clamp(x, l, h); +} + +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> +KFR_INLINE expr_func<fn_clamp, E1, E2, E3> clamp(E1&& x, E2&& l, E3&& h) +{ + return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h) + + }; +} +using fn_clampm1 = internal::in_clamp<>::fn_clampm1; +template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> +KFR_INLINE ftype<common_type<T1, T2, T3>> + +clampm1(const T1& x, const T2& l, const T3& h) +{ + return internal::in_clamp<>::clampm1(x, l, h); +} + +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> +KFR_INLINE expr_func<fn_clampm1, E1, E2, E3> clampm1(E1&& x, E2&& l, E3&& h) +{ + return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h) + + }; +} + +using fn_clamp = internal::in_clamp<>::fn_clamp; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +clamp(const T1& x, const T2& h) +{ + return internal::in_clamp<>::clamp(x, h); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_clamp, E1, E2> clamp(E1&& x, E2&& h) +{ + return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(h) + + }; +} +using fn_clampm1 = internal::in_clamp<>::fn_clampm1; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +clampm1(const T1& x, const T2& h) +{ + return internal::in_clamp<>::clampm1(x, h); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_clampm1, E1, E2> clampm1(E1&& x, E2&& h) +{ + return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(h) + + }; +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp @@ -0,0 +1,663 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "function.hpp" +#include <algorithm> +#include <utility> + +namespace kfr +{ +namespace internal +{ + +template <typename T, typename ReduceFn> +KFR_INLINE T horizontal_impl(vec<T, 1> value, ReduceFn&&) +{ + return T(value[0]); +} + +template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && is_poweroftwo(N))> +KFR_INLINE T horizontal_impl(vec<T, N> value, ReduceFn&& reduce) +{ + return horizontal_impl(reduce(low(value), high(value)), std::forward<ReduceFn>(reduce)); +} +template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && !is_poweroftwo(N))> +KFR_INLINE T horizontal_impl(vec<T, N> value, ReduceFn&& reduce) +{ + const T initial = reduce(initialvalue<T>()); + return horizontal_impl(widen<next_poweroftwo(N)>(value, initial), std::forward<ReduceFn>(reduce)); +} +} + +template <typename T, size_t N, typename ReduceFn> +KFR_INLINE T horizontal(vec<T, N> value, ReduceFn&& reduce) +{ + return internal::horizontal_impl(value, std::forward<ReduceFn>(reduce)); +} + +template <typename T> +constexpr inline T add(T x) +{ + return x; +} +template <typename T1, typename T2, typename... Ts> +constexpr inline common_type<T1, T2, Ts...> add(T1 x, T2 y, Ts... rest) +{ + return x + add(std::forward<T2>(y), std::forward<Ts>(rest)...); +} +template <typename T> +constexpr inline T add(initialvalue<T>) +{ + return T(0); +} +KFR_FN(add) + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_add, E1, E2> add(E1&& x, E2&& y) +{ + return { fn_add(), std::forward<E1>(x), std::forward<E2>(y) }; +} +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_add, E1> add(E1&& x, E2&& y, E3&& z) +{ + return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) + + }; +} + +template <typename T1, typename T2> +constexpr inline common_type<T1, T2> sub(T1 x, T2 y) +{ + return x - y; +} +template <typename T> +constexpr inline T sub(initialvalue<T>) +{ + return T(0); +} +KFR_FN(sub) + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_sub, E1, E2> sub(E1&& x, E2&& y) +{ + return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y) + + }; +} + +template <typename T1> +constexpr inline T1 mul(T1 x) +{ + return x; +} +template <typename T1, typename T2, typename... Ts> +constexpr inline common_type<T1, T2, Ts...> mul(T1 x, T2 y, Ts... rest) +{ + return x * mul(std::forward<T2>(y), std::forward<Ts>(rest)...); +} + +template <typename T> +constexpr inline T mul(initialvalue<T>) +{ + return T(1); +} +KFR_FN(mul) +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_mul, E1, E2> mul(E1&& x, E2&& y) +{ + return { fn_mul(), std::forward<E1>(x), std::forward<E2>(y) }; +} +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_mul, E1> mul(E1&& x, E2&& y, E3&& z) +{ + return { fn_mul(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; +} + +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +constexpr inline T1 sqr(T1 x) +{ + return x * x; +} +KFR_FN(sqr) +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_sqr, E1> sqr(E1&& x) +{ + return { fn_sqr(), std::forward<E1>(x) }; +} + +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +constexpr inline T1 cub(T1 x) +{ + return sqr(x) * x; +} +KFR_FN(cub) + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_cub, E1> cub(E1&& x) +{ + return { fn_cub(), std::forward<E1>(x) + + }; +} + +template <typename T> +constexpr inline T pow2(T x) +{ + return sqr(x); +} + +template <typename T> +constexpr inline T pow3(T x) +{ + return cub(x); +} + +template <typename T> +constexpr inline T pow4(T x) +{ + return sqr(sqr(x)); +} + +template <typename T> +constexpr inline T pow5(T x) +{ + return pow4(x) * x; +} +KFR_FN(pow2) +KFR_FN(pow3) +KFR_FN(pow4) +KFR_FN(pow5) + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_pow2, E1> pow2(E1&& x) +{ + return { fn_pow2(), std::forward<E1>(x) + + }; +} +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_pow3, E1> pow3(E1&& x) +{ + return { fn_pow3(), std::forward<E1>(x) + + }; +} +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_pow4, E1> pow4(E1&& x) +{ + return { fn_pow4(), std::forward<E1>(x) + + }; +} +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INLINE expr_func<fn_pow5, E1> pow5(E1&& x) +{ + return { fn_pow5(), std::forward<E1>(x) + + }; +} + +/// Raise x to the power base $x^{base}$ +/// @code +/// CHECK( ipow( 10, 3 ) == 1000 ); +/// CHECK( ipow( 0.5, 2 ) == 0.25 ); +/// @endcode +template <typename T> +constexpr inline T ipow(T x, int base) +{ + T result = T(1); + while (base) + { + if (base & 1) + result *= x; + base >>= 1; + x *= x; + } + return result; +} +KFR_FN(ipow) + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_ipow, E1, E2> ipow(E1&& x, E2&& b) +{ + return { fn_ipow(), std::forward<E1>(x), std::forward<E2>(b) + + }; +} + +/// Return square of the sum of all arguments +/// *Example*:: +/// +/// CHECK(sqrsum(1,2,3) == 36); +template <typename T1, typename... Ts> +constexpr inline common_type<T1, Ts...> sqrsum(T1 x, Ts... rest) +{ + return sqr(add(x, std::forward<Ts>(rest)...)); +} + +template <typename T1, typename T2> +constexpr inline common_type<T1, T2> sqrdiff(T1 x, T2 y) +{ + return sqr(x - y); +} +KFR_FN(sqrsum) +KFR_FN(sqrdiff) + +/// Division +template <typename T1, typename T2> +inline common_type<T1, T2> div(T1 x, T2 y) +{ + return x / y; +} +KFR_FN(div) + +/// Remainder +template <typename T1, typename T2> +inline common_type<T1, T2> rem(T1 x, T2 y) +{ + return x % y; +} +KFR_FN(rem) + +/// Negation +template <typename T1> +inline T1 neg(T1 x) +{ + return -x; +} +KFR_FN(neg) + +/// Bitwise Not +template <typename T1> +inline T1 bitwisenot(T1 x) +{ + return ~x; +} +KFR_FN(bitwisenot) + +/// Bitwise And +template <typename T1, typename T2> +inline common_type<T1, T2> bitwiseand(T1 x, T2 y) +{ + return x & y; +} +template <typename T> +constexpr inline T bitwiseand(initialvalue<T>) +{ + return internal::allones<subtype<T>>; +} +KFR_FN(bitwiseand) + +/// Bitwise And-Not +template <typename T1, typename T2> +inline common_type<T1, T2> bitwiseandnot(T1 x, T2 y) +{ + return x & ~y; +} +template <typename T> +constexpr inline T bitwiseandnot(initialvalue<T>) +{ + return internal::allones<subtype<T>>; +} +KFR_FN(bitwiseandnot) + +/// Bitwise Or +template <typename T1, typename T2> +inline common_type<T1, T2> bitwiseor(T1 x, T2 y) +{ + return x | y; +} +template <typename T> +constexpr inline T bitwiseor(initialvalue<T>) +{ + return subtype<T>(); +} +KFR_FN(bitwiseor) + +/// Bitwise Xor (Exclusive Or) +template <typename T1, typename T2> +inline common_type<T1, T2> bitwisexor(T1 x, T2 y) +{ + return x ^ y; +} +template <typename T> +constexpr inline T bitwisexor(initialvalue<T>) +{ + return subtype<T>(); +} +KFR_FN(bitwisexor) + +/// Bitwise Left shift +template <typename T1, typename T2> +inline common_type<T1, T2> shl(T1 left, T2 right) +{ + return left << right; +} +KFR_FN(shl) + +/// Bitwise Right shift +template <typename T1, typename T2> +inline common_type<T1, T2> shr(T1 left, T2 right) +{ + return left >> right; +} +KFR_FN(shr) + +/// Bitwise Left Rotate +template <typename T1, typename T2> +inline common_type<T1, T2> rol(T1 left, T2 right) +{ + return shl(left, right) | shr(left, (static_cast<subtype<T1>>(typebits<T1>::bits) - right)); +} +KFR_FN(rol) + +/// Bitwise Right Rotate +template <typename T1, typename T2> +inline common_type<T1, T2> ror(T1 left, T2 right) +{ + return shr(left, right) | shl(left, (static_cast<subtype<T1>>(typebits<T1>::bits) - right)); +} +KFR_FN(ror) + +template <typename T1, typename T2> +inline common_type<T1, T2> equal(T1 x, T2 y) +{ + return bitcast<subtype<common_type<T1, T2>>>(x == y); +} +template <typename T1, typename T2> +inline common_type<T1, T2> notequal(T1 x, T2 y) +{ + return bitcast<subtype<common_type<T1, T2>>>(x != y); +} +template <typename T1, typename T2> +inline common_type<T1, T2> less(T1 x, T2 y) +{ + return bitcast<subtype<common_type<T1, T2>>>(x < y); +} +template <typename T1, typename T2> +inline common_type<T1, T2> greater(T1 x, T2 y) +{ + return bitcast<subtype<common_type<T1, T2>>>(x > y); +} +template <typename T1, typename T2> +inline common_type<T1, T2> lessorequal(T1 x, T2 y) +{ + return bitcast<subtype<common_type<T1, T2>>>(x <= y); +} +template <typename T1, typename T2> +inline common_type<T1, T2> greaterorequal(T1 x, T2 y) +{ + return bitcast<subtype<common_type<T1, T2>>>(x >= y); +} +KFR_FN(equal) +KFR_FN(notequal) +KFR_FN(less) +KFR_FN(greater) +KFR_FN(lessorequal) +KFR_FN(greaterorequal) + +/// Fused Multiply-Add +template <typename T1, typename T2, typename T3> +constexpr inline common_type<T1, T2, T3> fmadd(T1 x, T2 y, T3 z) +{ + return x * y + z; +} +/// Fused Multiply-Sub +template <typename T1, typename T2, typename T3> +constexpr inline common_type<T1, T2, T3> fmsub(T1 x, T2 y, T3 z) +{ + return x * y - z; +} +KFR_FN(fmadd) +KFR_FN(fmsub) + +/// Linear blend of `x` and `y` (`c` must be in the range 0...+1) +/// Returns `x + ( y - x ) * c` +template <typename T1, typename T2, typename T3> +constexpr inline common_type<T1, T2, T3> mix(T1 c, T2 x, T3 y) +{ + return fmadd(c, y - x, x); +} + +/// Linear blend of `x` and `y` (`c` must be in the range -1...+1) +template <typename T1, typename T2, typename T3> +constexpr inline common_type<T1, T2, T3> mixs(T1 c, T2 x, T3 y) +{ + return mix(fmadd(c, 0.5, 0.5), x, y); +} +KFR_FN(mix) +KFR_FN(mixs) + +namespace internal +{ + +template <typename T1, typename T2> +constexpr KFR_INLINE T1 horner(T1, T2 c0) +{ + return c0; +} + +template <typename T1, typename T2, typename T3, typename... Ts> +constexpr KFR_INLINE T1 horner(T1 x, T2 c0, T3 c1, Ts... values) +{ + return fmadd(horner(x, c1, values...), x, c0); +} +} + +/// Calculate polynomial using Horner's method +/// +/// ``horner(x, 1, 2, 3)`` is equivalent to \(3x^2 + 2x + 1\) +template <typename T1, typename... Ts> +constexpr KFR_INLINE T1 horner(T1 x, Ts... c) +{ + return internal::horner(x, c...); +} +KFR_FN(horner) + +/// Calculate Multiplicative Inverse of `x` +/// Returns `1/x` +template <typename T> +constexpr KFR_INLINE T reciprocal(T x) +{ + static_assert(std::is_floating_point<subtype<T>>::value, "T must be floating point type"); + return subtype<T>(1) / x; +} +KFR_FN(reciprocal) + +template <typename T, size_t N> +KFR_INLINE vec<T, N> mulsign(vec<T, N> x, vec<T, N> y) +{ + return x ^ (y & internal::highbitmask<T>); +} +KFR_FN_S(mulsign) +KFR_FN(mulsign) + +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> copysign(vec<T, N> x, vec<T, N> y) +{ + return (x & internal::highbitmask<T>) | (y & internal::highbitmask<T>); +} + +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_INLINE vec<T, N> fmod(vec<T, N> x, vec<T, N> y) +{ + return x - cast<itype<T>>(x / y) * y; +} + +KFR_FN_S(fmod) +KFR_FN(fmod) + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +constexpr KFR_INLINE vec<T, N> rem(vec<T, N> x, vec<T, N> y) +{ + return x % y; +} +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_INLINE vec<T, N> rem(vec<T, N> x, vec<T, N> y) +{ + return fmod(x, y); +} + +template <typename T, size_t N> +KFR_INLINE mask<T, N> isnan(vec<T, N> x) +{ + return x != x; +} + +template <typename T, size_t N> +KFR_INLINE mask<T, N> isinf(vec<T, N> x) +{ + return x == c_infinity<T> || x == -c_infinity<T>; +} + +template <typename T, size_t N> +KFR_INLINE mask<T, N> isfinite(vec<T, N> x) +{ + return !isnan(x) && !isinf(x); +} + +template <typename T, size_t N> +KFR_INLINE mask<T, N> isnegative(vec<T, N> x) +{ + return (x & internal::highbitmask<T>) != 0; +} + +template <typename T, size_t N> +KFR_INLINE mask<T, N> ispositive(vec<T, N> x) +{ + return !isnegative(x); +} + +template <typename T, size_t N> +KFR_INLINE mask<T, N> iszero(vec<T, N> x) +{ + return x == T(); +} + +/// Swap byte order +template <typename T, size_t N, KFR_ENABLE_IF(sizeof(vec<T, N>) > 8)> +KFR_INLINE vec<T, N> swapbyteorder(vec<T, N> x) +{ + return bitcast<T>(swap<sizeof(T)>(bitcast<u8>(x))); +} +template <typename T, KFR_ENABLE_IF(sizeof(T) == 8)> +KFR_INLINE T swapbyteorder(T x) +{ + return reinterpret_cast<const T&>(__builtin_bswap64(reinterpret_cast<const u64&>(x))); +} +template <typename T, KFR_ENABLE_IF(sizeof(T) == 4)> +KFR_INLINE T swapbyteorder(T x) +{ + return reinterpret_cast<const T&>(__builtin_bswap32(reinterpret_cast<const u32&>(x))); +} +template <typename T, KFR_ENABLE_IF(sizeof(T) == 2)> +KFR_INLINE T swapbyteorder(T x) +{ + return reinterpret_cast<const T&>(__builtin_bswap16(reinterpret_cast<const u16&>(x))); +} +KFR_FN(swapbyteorder) + +/// Sum all elements of the vector +template <typename T, size_t N> +KFR_INLINE T hadd(vec<T, N> value) +{ + return horizontal(value, fn_add()); +} +KFR_FN(hadd) + +/// Multiply all elements of the vector +template <typename T, size_t N> +KFR_INLINE T hmul(vec<T, N> value) +{ + return horizontal(value, fn_mul()); +} +KFR_FN(hmul) + +template <typename T, size_t N> +KFR_INLINE T hbitwiseand(vec<T, N> value) +{ + return horizontal(value, fn_bitwiseand()); +} +KFR_FN(hbitwiseand) +template <typename T, size_t N> +KFR_INLINE T hbitwiseor(vec<T, N> value) +{ + return horizontal(value, fn_bitwiseor()); +} +KFR_FN(hbitwiseor) +template <typename T, size_t N> +KFR_INLINE T hbitwisexor(vec<T, N> value) +{ + return horizontal(value, fn_bitwisexor()); +} +KFR_FN(hbitwisexor) + +/// Calculate the Dot-Product of two vectors +template <typename T, size_t N> +KFR_INLINE T dot(vec<T, N> x, vec<T, N> y) +{ + return hadd(x * y); +} +KFR_FN(dot) + +/// Calculate the Arithmetic mean of all elements in the vector +template <typename T, size_t N> +KFR_INLINE T avg(vec<T, N> value) +{ + return hadd(value) / N; +} +KFR_FN(avg) + +/// Calculate the RMS of all elements in the vector +template <typename T, size_t N> +KFR_INLINE T rms(vec<T, N> value) +{ + return internal::builtin_sqrt(hadd(value * value) / N); +} +KFR_FN(rms) + +template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> +KFR_INLINE vec<T, N> subadd(vec<T, N> a, vec<T, N> b) +{ + return blend<1, 0>(a + b, a - b); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> +KFR_INLINE vec<T, N> addsub(vec<T, N> a, vec<T, N> b) +{ + return blend<0, 1>(a + b, a - b); +} +KFR_FN(subadd) +KFR_FN(addsub) + +template <typename T, size_t N> +KFR_INLINE vec<T, N> negeven(const vec<T, N>& x) +{ + return x ^ broadcast<N / 2>(-T(), T()); +} +template <typename T, size_t N> +KFR_INLINE vec<T, N> negodd(const vec<T, N>& x) +{ + return x ^ broadcast<N / 2>(T(), -T()); +} +} diff --git a/include/kfr/base/read_write.hpp b/include/kfr/base/read_write.hpp @@ -0,0 +1,201 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "shuffle.hpp" +#include "types.hpp" +#include "vec.hpp" + +namespace kfr +{ + +template <size_t N, bool A = false, typename T> +KFR_INLINE vec<T, N> read(const T* src) +{ + return internal_read_write::read<N, A, T>(src); +} + +template <bool A = false, size_t N, typename T> +KFR_INLINE void write(T* dest, vec<T, N> value) +{ + internal_read_write::write<A, N, T>(dest, value); +} + +template <typename... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)> +KFR_INLINE vec<T, Nout> gather(const T* base, size_t index, Indices... indices) +{ + return make_vector(base[index], base[indices]...); +} + +template <size_t Index, size_t... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)> +KFR_INLINE vec<T, Nout> gather(const T* base) +{ + return make_vector(base[Index], base[Indices]...); +} + +template <size_t Index, size_t... Indices, typename T, size_t N, size_t InIndex = 0> +KFR_INLINE void scatter(const T* base, vec<T, N> value) +{ + base[Index] = value[InIndex]; + scatter<Indices..., T, N, InIndex + 1>(base, value); +} + +namespace internal +{ +template <typename T, size_t N, size_t... Indices> +KFR_INLINE vec<T, N> gather(const T* base, vec<u32, N> indices, csizes_t<Indices...>) +{ + return make_vector(base[indices[Indices]]...); +} +template <size_t Nout, size_t Stride, typename T, size_t... Indices> +KFR_INLINE vec<T, Nout> gather_stride(const T* base, csizes_t<Indices...>) +{ + return make_vector(base[Indices * Stride]...); +} +template <size_t Nout, typename T, size_t... Indices> +KFR_INLINE vec<T, Nout> gather_stride_s(const T* base, size_t stride, csizes_t<Indices...>) +{ + return make_vector(base[Indices * stride]...); +} +} + +template <typename T, size_t N> +KFR_INLINE vec<T, N> gather(const T* base, vec<u32, N> indices) +{ + return internal::gather(base, indices, csizeseq<N>); +} + +template <size_t Nout, typename T> +KFR_INLINE vec<T, Nout> gather_stride(const T* base, size_t stride) +{ + return internal::gather_stride_s<Nout>(base, stride, csizeseq<Nout>); +} + +template <size_t Nout, size_t Stride, typename T> +KFR_INLINE vec<T, Nout> gather_stride(const T* base) +{ + return internal::gather_stride<Nout, Stride>(base, csizeseq<Nout>); +} + +template <size_t groupsize, typename T, size_t N, typename IT, size_t... Indices> +KFR_INLINE vec<T, N * groupsize> gather_helper(const T* base, vec<IT, N> offset, csizes_t<Indices...>) +{ + return concat(read<groupsize>(base + groupsize * (*offset)[Indices])...); +} +template <size_t groupsize = 1, typename T, size_t N, typename IT> +KFR_INLINE vec<T, N * groupsize> gather(const T* base, vec<IT, N> offset) +{ + return gather_helper<groupsize>(base, offset, csizeseq<N>); +} + +template <size_t groupsize, typename T, size_t N, size_t Nout = N* groupsize, typename IT, size_t... Indices> +KFR_INLINE void scatter_helper(T* base, vec<IT, N> offset, vec<T, Nout> value, csizes_t<Indices...>) +{ + swallow{ (write(base + groupsize * (*offset)[Indices], slice<Indices * groupsize, groupsize>(value)), + 0)... }; +} +template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N* groupsize, typename IT> +KFR_INLINE void scatter(T* base, vec<IT, N> offset, vec<T, Nout> value) +{ + return scatter_helper<groupsize>(base, offset, value, csizeseq<N>); +} + +template <typename T> +constexpr T partial_masks[] = { internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + internal::allones<T>, + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T(), + T() }; + +template <typename T, size_t N> +KFR_INLINE vec<T, N> partial_mask(size_t index) +{ + static_assert(N <= arraysize(partial_masks<T>) / 2, + "N must not be greater than half of partial_masks expression_array"); + return read<N>(&partial_masks<T>[0] + arraysize(partial_masks<T>) / 2 - index); +} +template <typename T, size_t N> +KFR_INLINE vec<T, N> partial_mask(size_t index, vec_t<T, N>) +{ + return partial_mask<T, N>(index); +} +} diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp @@ -0,0 +1,298 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "function.hpp" +#include "operators.hpp" + +namespace kfr +{ + +#define KFR_mm_trunc_ps(V) _mm_round_ps((V), _MM_FROUND_TRUNC) +#define KFR_mm_roundnearest_ps(V) _mm_round_ps((V), _MM_FROUND_NINT) +#define KFR_mm_trunc_pd(V) _mm_round_pd((V), _MM_FROUND_TRUNC) +#define KFR_mm_roundnearest_pd(V) _mm_round_pd((V), _MM_FROUND_NINT) + +#define KFR_mm_trunc_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_TRUNC) +#define KFR_mm_roundnearest_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_NINT) +#define KFR_mm_trunc_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_TRUNC) +#define KFR_mm_roundnearest_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_NINT) + +#define KFR_mm_floor_ss(V) _mm_floor_ss(_mm_setzero_ps(), (V)) +#define KFR_mm_floor_sd(V) _mm_floor_sd(_mm_setzero_pd(), (V)) +#define KFR_mm_ceil_ss(V) _mm_ceil_ss(_mm_setzero_ps(), (V)) +#define KFR_mm_ceil_sd(V) _mm_ceil_sd(_mm_setzero_pd(), (V)) + +#define KFR_mm256_trunc_ps(V) _mm256_round_ps((V), _MM_FROUND_TRUNC) +#define KFR_mm256_roundnearest_ps(V) _mm256_round_ps((V), _MM_FROUND_NINT) +#define KFR_mm256_trunc_pd(V) _mm256_round_pd((V), _MM_FROUND_TRUNC) +#define KFR_mm256_roundnearest_pd(V) _mm256_round_pd((V), _MM_FROUND_NINT) + +namespace internal +{ + +template <cpu_t c = cpu_t::native> +struct in_round : in_round<older(c)> +{ + struct fn_floor : in_round<older(c)>::fn_floor, fn_disabled + { + }; + struct fn_ceil : in_round<older(c)>::fn_ceil, fn_disabled + { + }; + struct fn_round : in_round<older(c)>::fn_round, fn_disabled + { + }; + struct fn_trunc : in_round<older(c)>::fn_trunc, fn_disabled + { + }; + struct fn_fract : in_round<older(c)>::fn_fract, fn_disabled + { + }; +}; + +template <> +struct in_round<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse2; + + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> floor(vec<T, N> value) + { + return value; + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> ceil(vec<T, N> value) + { + return value; + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> trunc(vec<T, N> value) + { + return value; + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> round(vec<T, N> value) + { + return value; + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> fract(vec<T, N>) + { + return T(); + } + + KFR_SINTRIN f32sse floor(f32sse x) + { + f32sse t = cast<f32>(cast<i32>(x)); + return t - (bitcast<f32>(x < t) & 1.f); + } + KFR_SINTRIN f64sse floor(f64sse x) + { + f64sse t = cast<f64>(cast<i64>(x)); + return t - (bitcast<f64>(x < t) & 1.0); + } + KFR_SINTRIN f32sse ceil(f32sse x) + { + f32sse t = cast<f32>(cast<i32>(x)); + return t + (bitcast<f32>(x > t) & 1.f); + } + KFR_SINTRIN f64sse ceil(f64sse x) + { + f64sse t = cast<f64>(cast<i64>(x)); + return t + (bitcast<f64>(x > t) & 1.0); + } + KFR_SINTRIN f32sse round(f32sse x) { return cast<f32>(cast<i32>(x + mulsign(f32x4(0.5f), x))); } + KFR_SINTRIN f64sse round(f64sse x) { return cast<f64>(cast<i64>(x + mulsign(f64x2(0.5), x))); } + KFR_SINTRIN f32sse trunc(f32sse x) { return cast<f32>(cast<i32>(x)); } + KFR_SINTRIN f64sse trunc(f64sse x) { return cast<f64>(cast<i64>(x)); } + KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); } + KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); } + + KFR_HANDLE_ALL(floor) + KFR_HANDLE_ALL(ceil) + KFR_HANDLE_ALL(round) + KFR_HANDLE_ALL(trunc) + KFR_HANDLE_ALL(fract) + KFR_SPEC_FN(in_round, floor) + KFR_SPEC_FN(in_round, ceil) + KFR_SPEC_FN(in_round, round) + KFR_SPEC_FN(in_round, trunc) + KFR_SPEC_FN(in_round, fract) +}; + +template <> +struct in_round<cpu_t::sse41> : in_round<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse41; + + KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); } + KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); } + KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); } + KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); } + KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); } + KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); } + KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); } + KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); } + KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); } + KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); } + + KFR_HANDLE_ALL(floor) + KFR_HANDLE_ALL(ceil) + KFR_HANDLE_ALL(round) + KFR_HANDLE_ALL(trunc) + KFR_HANDLE_ALL(fract) + KFR_SPEC_FN(in_round, floor) + KFR_SPEC_FN(in_round, ceil) + KFR_SPEC_FN(in_round, round) + KFR_SPEC_FN(in_round, trunc) + KFR_SPEC_FN(in_round, fract) +}; + +template <> +struct in_round<cpu_t::avx1> : in_round<cpu_t::sse41> +{ + constexpr static cpu_t cpu = cpu_t::avx1; + using in_round<cpu_t::sse41>::floor; + using in_round<cpu_t::sse41>::ceil; + using in_round<cpu_t::sse41>::trunc; + using in_round<cpu_t::sse41>::round; + using in_round<cpu_t::sse41>::fract; + + KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); } + KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); } + KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); } + KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); } + KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); } + KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); } + KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); } + KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); } + KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); } + KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); } + + KFR_HANDLE_ALL(floor) + KFR_HANDLE_ALL(ceil) + KFR_HANDLE_ALL(round) + KFR_HANDLE_ALL(trunc) + KFR_HANDLE_ALL(fract) + KFR_SPEC_FN(in_round, floor) + KFR_SPEC_FN(in_round, ceil) + KFR_SPEC_FN(in_round, round) + KFR_SPEC_FN(in_round, trunc) + KFR_SPEC_FN(in_round, fract) +}; + +#undef KFR_mm_trunc_ps +#undef KFR_mm_roundnearest_ps +#undef KFR_mm_trunc_pd +#undef KFR_mm_roundnearest_pd +#undef KFR_mm_trunc_ss +#undef KFR_mm_roundnearest_ss +#undef KFR_mm_trunc_sd +#undef KFR_mm_roundnearest_sd +#undef KFR_mm_floor_ss +#undef KFR_mm_floor_sd +#undef KFR_mm_ceil_ss +#undef KFR_mm_ceil_sd +#undef KFR_mm256_trunc_ps +#undef KFR_mm256_roundnearest_ps +#undef KFR_mm256_trunc_pd +#undef KFR_mm256_roundnearest_pd +} + +namespace native +{ +using fn_floor = internal::in_round<>::fn_floor; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> floor(const T1& x) +{ + return internal::in_round<>::floor(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_floor, E1> floor(E1&& x) +{ + return { fn_floor(), std::forward<E1>(x) }; +} + +using fn_ceil = internal::in_round<>::fn_ceil; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> ceil(const T1& x) +{ + return internal::in_round<>::ceil(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_ceil, E1> ceil(E1&& x) +{ + return { fn_ceil(), std::forward<E1>(x) }; +} + +using fn_round = internal::in_round<>::fn_round; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> round(const T1& x) +{ + return internal::in_round<>::round(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_round, E1> round(E1&& x) +{ + return { fn_round(), std::forward<E1>(x) }; +} + +using fn_trunc = internal::in_round<>::fn_trunc; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> trunc(const T1& x) +{ + return internal::in_round<>::trunc(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_trunc, E1> trunc(E1&& x) +{ + return { fn_trunc(), std::forward<E1>(x) }; +} + +using fn_fract = internal::in_round<>::fn_fract; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> fract(const T1& x) +{ + return internal::in_round<>::fract(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_fract, E1> fract(E1&& x) +{ + return { fn_fract(), std::forward<E1>(x) }; +} +} +} diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp @@ -0,0 +1,172 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "function.hpp" +#include "select.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_saturated : in_saturated<older(c), cc> +{ + struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled + { + }; +}; + +template <cpu_t cc> +struct in_saturated<cpu_t::sse2, cc> : in_select<cc> +{ + constexpr static cpu_t cpu = cpu_t::sse2; + +private: + using in_select<cc>::select; + +public: + KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); } + KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); } + KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); } + KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); } + + KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); } + KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); } + KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); } + KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); } + + KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); } + KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); } + KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); } + KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); } + + KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); } + KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); } + KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); } + KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); } + +private: + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b) + { + constexpr size_t shift = typebits<i32>::bits - 1; + const vec<T, N> sum = a + b; + a = (a >> shift) + allonesvector(a); + + return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b) + { + constexpr size_t shift = typebits<i32>::bits - 1; + const vec<T, N> diff = a - b; + a = (a >> shift) + allonesvector(a); + + return select(((a ^ b) & (a ^ diff)) < 0, a, diff); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b) + { + constexpr vec<T, N> t = allonesvector(a); + return select(a > t - b, t, a + b); + } + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> saturated_unsigned_sub(vec<T, N> a, vec<T, N> b) + { + return select(a < b, zerovector(a), a - b); + } + +public: + KFR_HANDLE_ALL(satadd) + KFR_HANDLE_ALL(satsub) + KFR_SPEC_FN(in_saturated, satadd) + KFR_SPEC_FN(in_saturated, satsub) +}; + +template <cpu_t cc> +struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc> +{ + constexpr static cpu_t cpu = cpu_t::avx2; + using in_saturated<cpu_t::sse41>::satadd; + using in_saturated<cpu_t::sse41>::satsub; + + KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); } + KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); } + KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); } + KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); } + + KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); } + KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); } + KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); } + KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); } + + KFR_HANDLE_ALL(satadd) + KFR_HANDLE_ALL(satsub) + KFR_SPEC_FN(in_saturated, satadd) + KFR_SPEC_FN(in_saturated, satsub) +}; +} +namespace native +{ +using fn_satadd = internal::in_saturated<>::fn_satadd; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +satadd(const T1& x, const T2& y) +{ + return internal::in_saturated<>::satadd(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_satadd, E1, E2> satadd(E1&& x, E2&& y) +{ + return { fn_satadd(), std::forward<E1>(x), std::forward<E2>(y) }; +} +using fn_satsub = internal::in_saturated<>::fn_satsub; +template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)> +KFR_INLINE ftype<common_type<T1, T2>> + +satsub(const T1& x, const T2& y) +{ + return internal::in_saturated<>::satsub(x, y); +} + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_satsub, E1, E2> satsub(E1&& x, E2&& y) +{ + return { fn_satsub(), std::forward<E1>(x), std::forward<E2>(y) + + }; +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp @@ -0,0 +1,204 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "function.hpp" + +namespace kfr +{ +namespace internal +{ + +template <cpu_t c> +struct in_select_impl : in_select_impl<older(c)> +{ + struct fn_select : fn_disabled + { + }; +}; + +template <> +struct in_select_impl<cpu_t::sse2> +{ + constexpr static cpu_t cur = cpu_t::sse2; + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y) + { + return y ^ ((x ^ y) & m); + } + KFR_SPEC_FN(in_select_impl, select) +}; + +template <> +struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse41; + + KFR_SINTRIN u8sse select(u8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN u16sse select(u16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN u32sse select(u32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN u64sse select(u64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN i8sse select(i8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN i16sse select(i16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN i32sse select(i32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN i64sse select(i64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); } + KFR_SINTRIN f32sse select(f32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); } + KFR_SINTRIN f64sse select(f64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); } + + KFR_HANDLE_ALL(select) + KFR_SPEC_FN(in_select_impl, select) +}; + +template <> +struct in_select_impl<cpu_t::avx1> : in_select_impl<cpu_t::sse41> +{ + constexpr static cpu_t cpu = cpu_t::avx1; + using in_select_impl<cpu_t::sse41>::select; + + KFR_SINTRIN f64avx select(f64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); } + KFR_SINTRIN f32avx select(f32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); } + + KFR_HANDLE_ALL(select) + KFR_SPEC_FN(in_select_impl, select) +}; + +template <> +struct in_select_impl<cpu_t::avx2> : in_select_impl<cpu_t::avx1> +{ + constexpr static cpu_t cpu = cpu_t::avx2; + using in_select_impl<cpu_t::avx1>::select; + + KFR_SINTRIN KFR_USE_CPU(avx2) u8avx select(u8avx m, u8avx x, u8avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + KFR_SINTRIN KFR_USE_CPU(avx2) u16avx select(u16avx m, u16avx x, u16avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + KFR_SINTRIN KFR_USE_CPU(avx2) u32avx select(u32avx m, u32avx x, u32avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + KFR_SINTRIN KFR_USE_CPU(avx2) u64avx select(u64avx m, u64avx x, u64avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + KFR_SINTRIN KFR_USE_CPU(avx2) i8avx select(i8avx m, i8avx x, i8avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + KFR_SINTRIN KFR_USE_CPU(avx2) i16avx select(i16avx m, i16avx x, i16avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + KFR_SINTRIN KFR_USE_CPU(avx2) i32avx select(i32avx m, i32avx x, i32avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + KFR_SINTRIN KFR_USE_CPU(avx2) i64avx select(i64avx m, i64avx x, i64avx y) + { + return _mm256_blendv_epi8(*y, *x, *m); + } + + KFR_HANDLE_ALL(select) + KFR_SPEC_FN(in_select_impl, select) +}; + +template <cpu_t c = cpu_t::native> +struct in_select : in_select_impl<c> +{ + using in_select_impl<c>::select; + + template <typename T, size_t N, typename M> + KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, vec<T, N> y) + { + static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); + return in_select_impl<c>::select(bitcast<T>(m), x, y); + } + template <typename T, size_t N, typename M> + KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, mask<T, N> y) + { + static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); + return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), ref_cast<vec<T, N>>(y)); + } + + template <typename T, size_t N, typename M> + KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, T y) + { + static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); + return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), broadcast<N>(y)); + } + + template <typename T, size_t N, typename M> + KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, T y) + { + static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); + return in_select_impl<c>::select(bitcast<T>(m), x, broadcast<N>(y)); + } + + template <typename T, size_t N, typename M> + KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, vec<T, N> y) + { + static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); + return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), y); + } + template <typename T, size_t N, typename M> + KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, T y) + { + static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); + return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), broadcast<N>(y)); + } + + template <typename T, size_t N, typename M> + KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, mask<T, N> y) + { + static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); + return in_select_impl<c>::select(m, broadcast<N>(x), ref_cast<vec<T, N>>(y)); + } + KFR_SPEC_FN(in_select, select) + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> sign(vec<T, N> x) + { + return select(x > T(), T(1), select(x < T(), T(-1), T(0))); + } +}; +} + +namespace native +{ +using fn_select = internal::in_select<>::fn_select; +template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)> +KFR_INLINE ftype<common_type<T2, T3>> select(const T1& arg1, const T2& arg2, const T3& arg3) +{ + return internal::in_select<>::select(arg1, arg2, arg3); +} +template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> +KFR_INLINE expr_func<fn_select, E1, E2, E3> select(E1&& arg1, E2&& arg2, E3&& arg3) +{ + return { fn_select(), std::forward<E1>(arg1), std::forward<E2>(arg2), std::forward<E3>(arg3) }; +} +} +} diff --git a/include/kfr/base/shuffle.hpp b/include/kfr/base/shuffle.hpp @@ -0,0 +1,582 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "constants.hpp" +#include "expression.hpp" +#include "types.hpp" +#include "vec.hpp" + +#include <utility> + +namespace kfr +{ + +namespace internal +{ + +template <size_t index, typename T> +constexpr KFR_INLINE T broadcast_get_nth() +{ + return c_qnan<T>; +} + +template <size_t index, typename T, typename... Ts> +constexpr KFR_INLINE T broadcast_get_nth(T x, Ts... rest) +{ + return index == 0 ? x : broadcast_get_nth<index - 1, T>(rest...); +} + +template <typename T, typename... Ts, size_t... indices, size_t Nin = 1 + sizeof...(Ts), + size_t Nout = sizeof...(indices)> +KFR_INLINE constexpr vec<T, Nout> broadcast_helper(csizes_t<indices...>, T x, Ts... rest) +{ + simd<T, Nout> result{ broadcast_get_nth<indices % Nin>(x, rest...)... }; + return result; +} +} + +template <size_t N, typename T, typename... Ts, size_t Nout = N*(2 + sizeof...(Ts))> +constexpr KFR_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest) +{ + return internal::broadcast_helper(csizeseq<Nout>, x, y, rest...); +} +KFR_FN(broadcast) + +template <size_t Ncount, typename T, size_t N> +KFR_INLINE vec<T, N + Ncount> padhigh(vec<T, N> x) +{ + return shufflevector<N + Ncount, internal::shuffle_index_extend<0, N>>(x); +} +KFR_FN(padhigh) + +template <size_t Ncount, typename T, size_t N> +KFR_INLINE vec<T, N + Ncount> padlow(vec<T, N> x) +{ + return shufflevector<N + Ncount, internal::shuffle_index_extend<Ncount, N>>(x); +} +KFR_FN(padlow) + +template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N != Nout)> +KFR_INLINE vec<T, Nout> extend(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index_extend<0, N>>(x); +} +template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N == Nout)> +constexpr KFR_INLINE vec<T, Nout> extend(vec<T, N> x) +{ + return x; +} +KFR_FN(extend) + +template <size_t start, size_t count, typename T, size_t N> +KFR_INLINE vec<T, count> slice(vec<T, N> x) +{ + static_assert(start + count <= N, "start + count <= N"); + return shufflevector<count, internal::shuffle_index<start>>(x); +} +template <size_t start, size_t count, typename T, size_t N> +KFR_INLINE vec<T, count> slice(vec<T, N> x, vec<T, N> y) +{ + static_assert(start + count <= N * 2, "start + count <= N * 2"); + return shufflevector<count, internal::shuffle_index<start>>(x, y); +} +KFR_FN(slice) + +template <size_t, typename T, size_t N> +KFR_INLINE void split(vec<T, N>) +{ +} +template <size_t start = 0, typename T, size_t N, size_t Nout, typename... Args> +KFR_INLINE void split(vec<T, N> x, vec<T, Nout>& out, Args&&... args) +{ + out = slice<start, Nout>(x); + split<start + Nout>(x, std::forward<Args>(args)...); +} +KFR_FN(split) + +template <size_t total, size_t number, typename T, size_t N, size_t Nout = N / total> +KFR_INLINE vec<T, Nout> part(vec<T, N> x) +{ + static_assert(N % total == 0, "N % total == 0"); + return shufflevector<Nout, internal::shuffle_index<number * Nout>>(x); +} +KFR_FN(part) + +template <size_t start, size_t count, typename T, size_t N1, size_t N2> +KFR_INLINE vec<T, count> concat_and_slice(vec<T, N1> x, vec<T, N2> y) +{ + return internal::concattwo<start, count>(x, y); +} +KFR_FN(concat_and_slice) + +template <size_t Nout, typename T, size_t N> +KFR_INLINE vec<T, Nout> widen(vec<T, N> x, identity<T> newvalue = T()) +{ + static_assert(Nout > N, "Nout > N"); + return concat(x, broadcast<Nout - N>(newvalue)); +} +template <size_t Nout, typename T, typename TS> +constexpr KFR_INLINE vec<T, Nout> widen(vec<T, Nout> x, TS) +{ + return x; +} +KFR_FN(widen) + +template <size_t Nout, typename T, size_t N> +KFR_INLINE vec<T, Nout> narrow(vec<T, N> x) +{ + static_assert(Nout <= N, "Nout <= N"); + return slice<0, Nout>(x); +} +KFR_FN(narrow) + +template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2, + KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)> +KFR_INLINE vec<T, Nout> even(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index<0, 2>, groupsize>(x); +} +KFR_FNR(even, 2, 1) + +template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2, + KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)> +KFR_INLINE vec<T, Nout> odd(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index<1, 2>, groupsize>(x); +} +KFR_FNR(odd, 2, 1) + +namespace internal +{ +template <size_t groupsize = 2> +struct shuffle_index_dup1 +{ + constexpr inline size_t operator()(size_t index) const { return index / groupsize; } +}; + +template <size_t groupsize = 2, size_t start = 0> +struct shuffle_index_dup +{ + constexpr inline size_t operator()(size_t index) const { return start + index / groupsize * groupsize; } +}; +} + +template <typename T, size_t N> +KFR_INLINE vec<T, N> dupeven(vec<T, N> x) +{ + static_assert(N % 2 == 0, "N must be even"); + return shufflevector<N, internal::shuffle_index_dup<2, 0>>(x); +} +KFR_FN(dupeven) + +template <typename T, size_t N> +KFR_INLINE vec<T, N> dupodd(vec<T, N> x) +{ + static_assert(N % 2 == 0, "N must be even"); + return shufflevector<N, internal::shuffle_index_dup<2, 1>>(x); +} +KFR_FN(dupodd) + +template <typename T, size_t N> +KFR_INLINE vec<T, N * 2> duphalfs(vec<T, N> x) +{ + return concat(x, x); +} +KFR_FN(duphalfs) + +namespace internal +{ +template <size_t size, size_t... Indices> +struct shuffle_index_shuffle +{ + constexpr static size_t indexcount = sizeof...(Indices); + + template <size_t index> + constexpr inline size_t operator()() const + { + constexpr int result = csizes_t<Indices...>::get(csize<index % indexcount>); + return result + index / indexcount * indexcount; + } +}; +} + +template <size_t... Indices, typename T, size_t N> +KFR_INLINE vec<T, N> shuffle(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>()) +{ + return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>>(x, y); +} +KFR_FN(shuffle) + +template <size_t groupsize, size_t... Indices, typename T, size_t N> +KFR_INLINE vec<T, N> shufflegroups(vec<T, N> x, vec<T, N> y, + elements_t<Indices...> = elements_t<Indices...>()) +{ + return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>, groupsize>(x, y); +} +KFR_FN(shufflegroups) + +namespace internal +{ +template <size_t size, size_t... Indices> +struct shuffle_index_permute +{ + constexpr static size_t indexcount = sizeof...(Indices); + + template <size_t index> + constexpr inline size_t operator()() const + { + constexpr size_t result = csizes_t<Indices...>::get(csize<index % indexcount>); + static_assert(result < size, "result < size"); + return result + index / indexcount * indexcount; + } +}; +} + +template <size_t... Indices, typename T, size_t N> +KFR_INLINE vec<T, N> permute(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>()) +{ + return shufflevector<N, internal::shuffle_index_permute<N, Indices...>>(x); +} +KFR_FN(permute) + +template <size_t groupsize, size_t... Indices, typename T, size_t N> +KFR_INLINE vec<T, N> permutegroups(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>()) +{ + return shufflevector<N, internal::shuffle_index_permute<N, Indices...>, groupsize>(x); +} +KFR_FN(permutegroups) + +namespace internal +{ + +template <typename T, size_t Nout, typename Fn, size_t... Indices> +constexpr KFR_INLINE vec<T, Nout> generate_vector(csizes_t<Indices...>) +{ + constexpr Fn fn{}; + return make_vector(static_cast<T>(fn(Indices))...); +} +} + +template <typename T, size_t Nout, typename Fn> +constexpr KFR_INLINE vec<T, Nout> generate_vector() +{ + return internal::generate_vector<T, Nout, Fn>(csizeseq<Nout>); +} +KFR_FN(generate_vector) + +namespace internal +{ +template <typename T, size_t N, typename = u8[N > 1]> +constexpr KFR_INLINE mask<T, N> evenmask() +{ + return broadcast<N / 2, T>(maskbits<T>(true), maskbits<T>(false)); +} +template <typename T, size_t N, typename = u8[N > 1]> +constexpr KFR_INLINE mask<T, N> oddmask() +{ + return broadcast<N / 2, T>(maskbits<T>(false), maskbits<T>(true)); +} +} + +template <typename T, size_t N, size_t Nout = N * 2> +KFR_INLINE vec<T, Nout> dup(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index_dup1<2>>(x, x); +} +KFR_FNR(dup, 1, 2) + +namespace internal +{ +template <size_t count, size_t start = 0> +struct shuffle_index_duphalf +{ + constexpr inline size_t operator()(size_t index) const { return start + (index) % count; } +}; +} + +template <typename T, size_t N> +KFR_INLINE vec<T, N> duplow(vec<T, N> x) +{ + static_assert(N % 2 == 0, "N must be even"); + return shufflevector<N, internal::shuffle_index_duphalf<N / 2, 0>>(x); +} +KFR_FN(duplow) + +template <typename T, size_t N> +KFR_INLINE vec<T, N> duphigh(vec<T, N> x) +{ + static_assert(N % 2 == 0, "N must be even"); + return shufflevector<N, internal::shuffle_index_duphalf<N / 2, N / 2>>(x); +} +KFR_FN(duphigh) + +namespace internal +{ +template <size_t size, size_t... Indices> +struct shuffle_index_blend +{ + constexpr static size_t indexcount = sizeof...(Indices); + + template <size_t index> + constexpr inline size_t operator()() const + { + return (elements_t<Indices...>::get(csize<index % indexcount>) ? size : 0) + index % size; + } +}; +} + +template <size_t... Indices, typename T, size_t N> +KFR_INLINE vec<T, N> blend(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>()) +{ + return shufflevector<N, internal::shuffle_index_blend<N, Indices...>, 1>(x, y); +} +KFR_FN(blend) + +namespace internal +{ +template <size_t elements> +struct shuffle_index_swap +{ + constexpr inline size_t operator()(size_t index) const + { + static_assert(is_poweroftwo(elements), "is_poweroftwo( elements )"); + return index ^ (elements - 1); + } +}; +template <size_t amount, size_t N> +struct shuffle_index_outputright +{ + constexpr inline size_t operator()(size_t index) const + { + return index < N - amount ? index : index + amount; + } +}; +} + +template <size_t elements, typename T, size_t N> +KFR_INLINE vec<T, N> swap(vec<T, N> x) +{ + return shufflevector<N, internal::shuffle_index_swap<elements>>(x); +} +KFR_FN(swap) + +template <size_t shift, typename T, size_t N> +KFR_INLINE vec<T, N> rotatetwo(vec<T, N> lo, vec<T, N> hi) +{ + return shift == 0 ? lo : (shift == N ? hi : shufflevector<N, internal::shuffle_index<N - shift>>(hi, lo)); +} + +template <size_t amount, typename T, size_t N> +KFR_INLINE vec<T, N> rotateright(vec<T, N> x, csize_t<amount> = csize_t<amount>()) +{ + static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N"); + return shufflevector<N, internal::shuffle_index_wrap<N, N - amount>>(x); +} +KFR_FN(rotateright) + +template <size_t amount, typename T, size_t N> +KFR_INLINE vec<T, N> rotateleft(vec<T, N> x, csize_t<amount> = csize_t<amount>()) +{ + static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N"); + return shufflevector<N, internal::shuffle_index_wrap<N, amount>>(x); +} +KFR_FN(rotateleft) + +template <typename T, size_t N> +KFR_INLINE vec<T, N> insertright(T x, vec<T, N> y) +{ + return concat_and_slice<1, N>(y, vec<T, 1>(x)); +} +KFR_FN(insertright) + +template <typename T, size_t N> +KFR_INLINE vec<T, N> insertleft(T x, vec<T, N> y) +{ + return concat_and_slice<0, N>(vec<T, 1>(x), y); +} +KFR_FN(insertleft) + +template <typename T, size_t N, size_t N2> +KFR_INLINE vec<T, N> outputright(vec<T, N> x, vec<T, N2> y) +{ + return shufflevector<N, internal::shuffle_index_outputright<N2, N>>(x, extend<N>(y)); +} +KFR_FN(outputright) + +namespace internal +{ +template <size_t size, size_t side1> +struct shuffle_index_transpose +{ + constexpr inline size_t operator()(size_t index) const + { + constexpr size_t side2 = size / side1; + return index % side2 * side1 + index / side2; + } +}; +} + +template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)> +KFR_INLINE vec<T, N> transpose(vec<T, N> x) +{ + return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, side>, groupsize>(x); +} +template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)> +KFR_INLINE vec<T, N> transpose(vec<T, N> x) +{ + return x; +} +KFR_FN(transpose) + +template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)> +KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x) +{ + return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / side>, + groupsize>(x); +} +template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)> +KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x) +{ + return x; +} +KFR_FN(transposeinverse) + +template <size_t side, typename T, size_t N> +KFR_INLINE vec<T, N> ctranspose(vec<T, N> x) +{ + return transpose<side, 2>(x); +} +KFR_FN(ctranspose) + +template <size_t side, typename T, size_t N> +KFR_INLINE vec<T, N> ctransposeinverse(vec<T, N> x) +{ + return transposeinverse<side, 2>(x); +} +KFR_FN(ctransposeinverse) + +template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N * 2> +KFR_INLINE vec<T, Nout> interleave(vec<T, N> x, vec<T, N> y) +{ + return shufflevector<Nout, internal::shuffle_index_transpose<Nout / groupsize, Nout / groupsize / 2>, + groupsize>(x, y); +} +KFR_FNR(interleave, 1, 2) + +template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_INLINE expr_func<fn_interleave, E1, E2> interleave(E1&& x, E2&& y) +{ + return { fn_interleave(), std::forward<E1>(x), std::forward<E2>(y) }; +} + +template <size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, N> interleavehalfs(vec<T, N> x) +{ + return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / 2>, groupsize>( + x); +} +KFR_FN(interleavehalfs) + +template <size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, N> splitpairs(vec<T, N> x) +{ + return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, 2>, groupsize>(x); +} +KFR_FN(splitpairs) + +namespace internal +{ +template <size_t size> +struct shuffle_index_reverse +{ + constexpr inline size_t operator()(size_t index) const { return size - 1 - index; } +}; +} + +template <size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, N> reverse(vec<T, N> x) +{ + return shufflevector<N, internal::shuffle_index_reverse<N / groupsize>, groupsize>(x); +} +KFR_FN(reverse) + +namespace internal +{ +template <size_t N1, size_t N2> +struct shuffle_index_combine +{ + constexpr inline size_t operator()(size_t index) const { return index >= N2 ? index : N1 + index; } +}; +} + +template <typename T, size_t N1, size_t N2> +KFR_INLINE vec<T, N1> combine(vec<T, N1> x, vec<T, N2> y) +{ + static_assert(N2 <= N1, "N2 <= N1"); + return shufflevector<N1, internal::shuffle_index_combine<N1, N2>>(x, extend<N1>(y)); +} +KFR_FN(combine) + +namespace internal +{ +template <size_t start, size_t stride> +struct generate_index +{ + constexpr size_t operator()(size_t index) const { return start + index * stride; } +}; +template <size_t start, size_t size, int on, int off> +struct generate_onoff +{ + constexpr size_t operator()(size_t index) const + { + return index >= start && index < start + size ? on : off; + } +}; +} + +template <typename T, size_t N, size_t start = 0, size_t stride = 1> +constexpr KFR_INLINE vec<T, N> enumerate() +{ + return generate_vector<T, N, internal::generate_index<start, stride>>(); +} +template <size_t start = 0, size_t stride = 1, typename T, size_t N> +constexpr KFR_INLINE vec<T, N> enumerate(vec_t<T, N>) +{ + return generate_vector<T, N, internal::generate_index<start, stride>>(); +} +KFR_FN(enumerate) + +template <typename T, size_t N, size_t start = 0, size_t size = 1, int on = 1, int off = 0> +constexpr KFR_INLINE vec<T, N> onoff(cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>()) +{ + return generate_vector<T, N, internal::generate_onoff<start, size, on, off>>(); +} +template <size_t start = 0, size_t size = 1, int on = 1, int off = 0, typename T, size_t N> +constexpr KFR_INLINE vec<T, N> onoff(vec_t<T, N>, cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>()) +{ + return generate_vector<T, N, internal::generate_onoff<start, size, on, off>>(); +} +KFR_FN(onoff) +} +#define KFR_SHUFFLE_SPECIALIZATIONS +#include "specializations.i" diff --git a/include/kfr/base/sin_cos.hpp b/include/kfr/base/sin_cos.hpp @@ -0,0 +1,586 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "abs.hpp" +#include "constants.hpp" +#include "function.hpp" +#include "min_max.hpp" +#include "operators.hpp" +#include "round.hpp" +#include "select.hpp" +#include "shuffle.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif +#if CID_HAS_WARNING("-Wc99-extensions") +#pragma clang diagnostic ignored "-Wc99-extensions" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_trig : in_select<cc> +{ +private: + using in_select<cc>::select; + +protected: + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> mask_horner(vec<T, N>, mask<T, N> msk, T a0, T b0) + { + return select(msk, a0, b0); + } + + template <typename T, size_t N, typename... Ts> + KFR_SINTRIN vec<T, N> mask_horner(vec<T, N> x, mask<T, N> msk, T a0, T b0, T a1, T b1, Ts... values) + { + return fmadd(mask_horner(x, msk, a1, b1, values...), x, select(msk, a0, b0)); + } +}; + +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_sin_cos : private in_trig<cc>, private in_select<cc>, private in_round<cc>, private in_abs<cc> +{ + +private: + using in_abs<cc>::abs; + using in_round<cc>::floor; + using in_select<cc>::select; + using in_trig<cc>::mask_horner; + + template <typename T, size_t N, typename Tprecise = f64> + KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x, vec<itype<T>, N>& quadrant) + { + const vec<T, N> xabs = abs(x); + constexpr vec<T, N> div = fold_constant_div<T>; + vec<T, N> y = floor(xabs / div); + quadrant = cast<itype<T>>(y - floor(y * T(1.0 / 16.0)) * T(16.0)); + + const mask<T, N> msk = bitcast<T>((quadrant & 1) != 0); + quadrant = select(msk, quadrant + 1, quadrant); + y = select(msk, y + T(1.0), y); + quadrant = quadrant & 7; + + constexpr vec<Tprecise, N> hi = cast<Tprecise>(fold_constant_hi<T>); + constexpr vec<T, N> rem1 = fold_constant_rem1<T>; + constexpr vec<T, N> rem2 = fold_constant_rem2<T>; + return cast<T>(cast<Tprecise>(xabs) - cast<Tprecise>(y) * hi) - y * rem1 - y * rem2; + } + + template <size_t N> + KFR_SINTRIN vec<f32, N> trig_sincos(vec<f32, N> folded, mask<f32, N> cosmask) + { + constexpr f32 sin_c2 = -0x2.aaaaacp-4f; + constexpr f32 sin_c4 = 0x2.222334p-8f; + constexpr f32 sin_c6 = -0xd.0566ep-16f; + constexpr f32 sin_c8 = 0x3.64cc1cp-20f; + constexpr f32 sin_c10 = -0x5.6c4a4p-24f; + constexpr f32 cos_c2 = -0x8.p-4f; + constexpr f32 cos_c4 = 0xa.aaaabp-8f; + constexpr f32 cos_c6 = -0x5.b05d48p-12f; + constexpr f32 cos_c8 = 0x1.a065f8p-16f; + constexpr f32 cos_c10 = -0x4.cd156p-24f; + + const vec<f32, N> x2 = folded * folded; + + vec<f32, N> formula = mask_horner(x2, cosmask, 1.0f, 1.0f, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, + sin_c6, cos_c8, sin_c8, cos_c10, sin_c10); + + formula = select(cosmask, formula, formula * folded); + return formula; + } + + template <size_t N> + KFR_SINTRIN vec<f64, N> trig_sincos(vec<f64, N> folded, mask<f64, N> cosmask) + { + constexpr f64 sin_c2 = -0x2.aaaaaaaaaaaaap-4; + constexpr f64 sin_c4 = 0x2.22222222220cep-8; + constexpr f64 sin_c6 = -0xd.00d00cffd6618p-16; + constexpr f64 sin_c8 = 0x2.e3bc744fb879ep-20; + constexpr f64 sin_c10 = -0x6.b99034c1467a4p-28; + constexpr f64 sin_c12 = 0xb.0711ea8fe8ee8p-36; + constexpr f64 sin_c14 = -0xb.7e010897e55dp-44; + constexpr f64 sin_c16 = -0xb.64eac07f1d6bp-48; + constexpr f64 cos_c2 = -0x8.p-4; + constexpr f64 cos_c4 = 0xa.aaaaaaaaaaaa8p-8; + constexpr f64 cos_c6 = -0x5.b05b05b05ad28p-12; + constexpr f64 cos_c8 = 0x1.a01a01a0022e6p-16; + constexpr f64 cos_c10 = -0x4.9f93ed845de2cp-24; + constexpr f64 cos_c12 = 0x8.f76bc015abe48p-32; + constexpr f64 cos_c14 = -0xc.9bf2dbe00379p-40; + constexpr f64 cos_c16 = 0xd.1232ac32f7258p-48; + + vec<f64, N> x2 = folded * folded; + vec<f64, N> formula = + mask_horner(x2, cosmask, 1.0, 1.0, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6, cos_c8, sin_c8, + cos_c10, sin_c10, cos_c12, sin_c12, cos_c14, sin_c14, cos_c16, sin_c16); + + formula = select(cosmask, formula, formula * folded); + return formula; + } + + template <typename T, size_t N, typename = u8[N > 1]> + KFR_SINTRIN vec<T, N> sincos_mask(vec<T, N> x_full, mask<T, N> cosmask) + { + vec<itype<T>, N> quadrant; + vec<T, N> folded = trig_fold(x_full, quadrant); + + mask<T, N> flip_sign = select(cosmask, (quadrant == 2) || (quadrant == 4), quadrant >= 4); + + mask<T, N> usecos = (quadrant == 2) || (quadrant == 6); + usecos = usecos ^ cosmask; + + vec<T, N> formula = trig_sincos(folded, usecos); + + mask<T, N> negmask = x_full < 0; + + flip_sign = flip_sign ^ (negmask & ~cosmask); + + formula = select(flip_sign, -formula, formula); + return formula; + } + + template <typename T> + constexpr static T fold_constant_div = choose_const<T>(0x1.921fb6p-1f, 0x1.921fb54442d18p-1); + + template <typename T> + constexpr static T fold_constant_hi = choose_const<T>(0x1.922000p-1f, 0x1.921fb40000000p-1); + template <typename T> + constexpr static T fold_constant_rem1 = choose_const<T>(-0x1.2ae000p-19f, 0x1.4442d00000000p-25); + template <typename T> + constexpr static T fold_constant_rem2 = choose_const<T>(-0x1.de973ep-32f, 0x1.8469898cc5170p-49); + constexpr static cpu_t cur = c; + +public: + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> sin(vec<T, N> x) + { + vec<itype<T>, N> quadrant; + vec<T, N> folded = trig_fold(x, quadrant); + + mask<T, N> flip_sign = quadrant >= 4; + mask<T, N> usecos = (quadrant == 2) || (quadrant == 6); + + vec<T, N> formula = trig_sincos(folded, usecos); + + formula = select(flip_sign ^ x.asmask(), -formula, formula); + return formula; + } + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> cos(vec<T, N> x) + { + vec<itype<T>, N> quadrant; + vec<T, N> folded = trig_fold(x, quadrant); + + mask<T, N> eq4 = (quadrant == 4); + mask<T, N> flip_sign = (quadrant == 2) || eq4; + mask<T, N> usecos = (quadrant == 0) || eq4; + + vec<T, N> formula = trig_sincos(folded, usecos); + + formula = select(flip_sign, -formula, formula); + return formula; + } + + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> fastsin(vec<T, N> x) + { + constexpr vec<T, N> msk = broadcast<N>(highbitmask<T>); + + constexpr static T c2 = -0.16665853559970855712890625; + constexpr static T c4 = +8.31427983939647674560546875e-3; + constexpr static T c6 = -1.85423981747590005397796630859375e-4; + + const vec<T, N> pi = c_pi<T>; + + x -= pi; + vec<T, N> y = abs(x); + y = select(y > c_pi<T, 1, 2>, pi - y, y); + y = y ^ (msk & ~x); + + vec<T, N> y2 = y * y; + vec<T, N> formula = c6; + vec<T, N> y3 = y2 * y; + formula = fmadd(formula, y2, c4); + formula = fmadd(formula, y2, c2); + formula = formula * y3 + y; + return formula; + } + + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> fastcos(vec<T, N> x) + { + x += c_pi<T, 1, 2>; + x = select(x >= c_pi<T, 2>, x - c_pi<T, 2>, x); + return fastsin(x); + } + template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> sincos(vec<T, N> x) + { + return sincos_mask(x, internal::oddmask<T, N>()); + } + + template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> cossin(vec<T, N> x) + { + return sincos_mask(x, internal::evenmask<T, N>()); + } + + template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> + KFR_SINTRIN vec<T, N> sinc(vec<T, N> x) + { + return select(abs(x) <= c_epsilon<T>, T(1), sin(x) / x); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> sin(vec<T, N> x) + { + return sin(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> cos(vec<T, N> x) + { + return cos(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> fastsin(vec<T, N> x) + { + return fastsin(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> fastcos(vec<T, N> x) + { + return fastcos(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> sincos(vec<T, N> x) + { + return sincos(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> cossin(vec<T, N> x) + { + return cossin(cast<Tout>(x)); + } + template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>> + KFR_SINTRIN vec<Tout, N> sinc(vec<T, N> x) + { + return sinc(cast<Tout>(x)); + } + + template <typename T> + KFR_SINTRIN T sindeg(const T& x) + { + return sin(x * c_degtorad<T>); + } + template <typename T> + KFR_SINTRIN T cosdeg(const T& x) + { + return cos(x * c_degtorad<T>); + } + + template <typename T> + KFR_SINTRIN T fastsindeg(const T& x) + { + return fastsin(x * c_degtorad<T>); + } + template <typename T> + KFR_SINTRIN T fastcosdeg(const T& x) + { + return fastcos(x * c_degtorad<T>); + } + + template <typename T> + KFR_SINTRIN T sincosdeg(const T& x) + { + return sincos(x * c_degtorad<T>); + } + template <typename T> + KFR_SINTRIN T cossindeg(const T& x) + { + return cossin(x * c_degtorad<T>); + } + + KFR_HANDLE_SCALAR(sin) + KFR_HANDLE_SCALAR(cos) + KFR_HANDLE_SCALAR(fastsin) + KFR_HANDLE_SCALAR(fastcos) + KFR_HANDLE_SCALAR(sincos) + KFR_HANDLE_SCALAR(cossin) + KFR_HANDLE_SCALAR(sinc) + + KFR_SPEC_FN(in_sin_cos, sin) + KFR_SPEC_FN(in_sin_cos, cos) + KFR_SPEC_FN(in_sin_cos, fastsin) + KFR_SPEC_FN(in_sin_cos, fastcos) + KFR_SPEC_FN(in_sin_cos, sincos_mask) + KFR_SPEC_FN(in_sin_cos, sincos) + KFR_SPEC_FN(in_sin_cos, cossin) + KFR_SPEC_FN(in_sin_cos, sinc) + KFR_SPEC_FN(in_sin_cos, sindeg) + KFR_SPEC_FN(in_sin_cos, cosdeg) + KFR_SPEC_FN(in_sin_cos, fastsindeg) + KFR_SPEC_FN(in_sin_cos, fastcosdeg) + KFR_SPEC_FN(in_sin_cos, sincosdeg) + KFR_SPEC_FN(in_sin_cos, cossindeg) +}; +} + +namespace native +{ +using fn_sin = internal::in_sin_cos<>::fn_sin; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sin(const T1& x) +{ + return internal::in_sin_cos<>::sin(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sin, E1> sin(E1&& x) +{ + return { fn_sin(), std::forward<E1>(x) }; +} + +using fn_cos = internal::in_sin_cos<>::fn_cos; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> cos(const T1& x) +{ + return internal::in_sin_cos<>::cos(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_cos, E1> cos(E1&& x) +{ + return { fn_cos(), std::forward<E1>(x) }; +} +using fn_fastsin = internal::in_sin_cos<>::fn_fastsin; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> fastsin(const T1& x) +{ + return internal::in_sin_cos<>::fastsin(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_fastsin, E1> fastsin(E1&& x) +{ + return { fn_fastsin(), std::forward<E1>(x) }; +} + +using fn_fastcos = internal::in_sin_cos<>::fn_fastcos; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> fastcos(const T1& x) +{ + return internal::in_sin_cos<>::fastcos(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_fastcos, E1> fastcos(E1&& x) +{ + return { fn_fastcos(), std::forward<E1>(x) }; +} + +using fn_sincos_mask = internal::in_sin_cos<>::fn_sincos_mask; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sincos_mask(const T1& x) +{ + return internal::in_sin_cos<>::sincos_mask(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sincos_mask, E1> sincos_mask(E1&& x) +{ + return { fn_sincos_mask(), std::forward<E1>(x) }; +} + +using fn_sincos = internal::in_sin_cos<>::fn_sincos; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sincos(const T1& x) +{ + return internal::in_sin_cos<>::sincos(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sincos, E1> sincos(E1&& x) +{ + return { fn_sincos(), std::forward<E1>(x) }; +} + +using fn_cossin = internal::in_sin_cos<>::fn_cossin; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> cossin(const T1& x) +{ + return internal::in_sin_cos<>::cossin(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_cossin, E1> cossin(E1&& x) +{ + return { fn_cossin(), std::forward<E1>(x) }; +} + +using fn_sindeg = internal::in_sin_cos<>::fn_sindeg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sindeg(const T1& x) +{ + return internal::in_sin_cos<>::sindeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sindeg, E1> sindeg(E1&& x) +{ + return { fn_sindeg(), std::forward<E1>(x) }; +} + +using fn_cosdeg = internal::in_sin_cos<>::fn_cosdeg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> cosdeg(const T1& x) +{ + return internal::in_sin_cos<>::cosdeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_cosdeg, E1> cosdeg(E1&& x) +{ + return { fn_cosdeg(), std::forward<E1>(x) }; +} + +using fn_fastsindeg = internal::in_sin_cos<>::fn_fastsindeg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> fastsindeg(const T1& x) +{ + return internal::in_sin_cos<>::fastsindeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_fastsindeg, E1> fastsindeg(E1&& x) +{ + return { fn_fastsindeg(), std::forward<E1>(x) }; +} + +using fn_fastcosdeg = internal::in_sin_cos<>::fn_fastcosdeg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> fastcosdeg(const T1& x) +{ + return internal::in_sin_cos<>::fastcosdeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_fastcosdeg, E1> fastcosdeg(E1&& x) +{ + return { fn_fastcosdeg(), std::forward<E1>(x) }; +} + +using fn_sincosdeg = internal::in_sin_cos<>::fn_sincosdeg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sincosdeg(const T1& x) +{ + return internal::in_sin_cos<>::sincosdeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sincosdeg, E1> sincosdeg(E1&& x) +{ + return { fn_sincosdeg(), std::forward<E1>(x) }; +} + +using fn_cossindeg = internal::in_sin_cos<>::fn_cossindeg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> cossindeg(const T1& x) +{ + return internal::in_sin_cos<>::cossindeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_cossindeg, E1> cossindeg(E1&& x) +{ + return { fn_cossindeg(), std::forward<E1>(x) }; +} + +using fn_sinc = internal::in_sin_cos<>::fn_sinc; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> sinc(const T1& x) +{ + return internal::in_sin_cos<>::sinc(x); +} +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_sinc, E1> sinc(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} + +template <typename T> +inline T sin2x(const T& sinx, const T& cosx) +{ + return 2 * sinx * cosx; +} +template <typename T> +inline T sin3x(const T& sinx, const T& cosx) +{ + return sinx * (-1 + 4 * sqr(cosx)); +} + +template <typename T> +inline T cos2x(const T& sinx, const T& cosx) +{ + return sqr(cosx) - sqr(sinx); +} +template <typename T> +inline T cos3x(const T& sinx, const T& cosx) +{ + return cosx * (1 - 4 * sqr(sinx)); +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/sinh_cosh.hpp b/include/kfr/base/sinh_cosh.hpp @@ -0,0 +1,143 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "abs.hpp" +#include "constants.hpp" +#include "function.hpp" +#include "log_exp.hpp" +#include "min_max.hpp" +#include "operators.hpp" +#include "select.hpp" + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t c = cpu_t::native> +struct in_sinh_cosh : in_log_exp<c> +{ + constexpr static cpu_t cur = c; + +private: + using in_log_exp<c>::exp; + +public: + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> sinh(vec<T, N> x) + { + return (exp(x) - exp(-x)) * T(0.5); + } + + template <typename T, size_t N> + KFR_SINTRIN vec<T, N> cosh(vec<T, N> x) + { + return (exp(x) + exp(-x)) * T(0.5); + } + + template <typename T, size_t N, KFR_ENABLE_IF(N > 1)> + KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x) + { + const vec<T, N> a = exp(x); + const vec<T, N> b = exp(-x); + return subadd(a, b) * T(0.5); + } + + template <typename T, size_t N, KFR_ENABLE_IF(N > 1)> + KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x) + { + const vec<T, N> a = exp(x); + const vec<T, N> b = exp(-x); + return addsub(a, b) * T(0.5); + } + KFR_SPEC_FN(in_sinh_cosh, sinh) + KFR_SPEC_FN(in_sinh_cosh, cosh) + KFR_SPEC_FN(in_sinh_cosh, sinhcosh) + KFR_SPEC_FN(in_sinh_cosh, coshsinh) +}; +} + +namespace native +{ +using fn_sinh = internal::in_sinh_cosh<>::fn_sinh; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sinh(const T1& x) +{ + return internal::in_sinh_cosh<>::sinh(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sinh, E1> sinh(E1&& x) +{ + return { fn_sinh(), std::forward<E1>(x) }; +} + +using fn_cosh = internal::in_sinh_cosh<>::fn_cosh; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> cosh(const T1& x) +{ + return internal::in_sinh_cosh<>::cosh(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_cosh, E1> cosh(E1&& x) +{ + return { fn_cosh(), std::forward<E1>(x) }; +} + +using fn_sinhcosh = internal::in_sinh_cosh<>::fn_sinhcosh; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sinhcosh(const T1& x) +{ + return internal::in_sinh_cosh<>::sinhcosh(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sinhcosh, E1> sinhcosh(E1&& x) +{ + return { fn_sinhcosh(), std::forward<E1>(x) }; +} + +using fn_coshsinh = internal::in_sinh_cosh<>::fn_coshsinh; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> coshsinh(const T1& x) +{ + return internal::in_sinh_cosh<>::coshsinh(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_coshsinh, E1> coshsinh(E1&& x) +{ + return { fn_coshsinh(), std::forward<E1>(x) }; +} +} +} diff --git a/include/kfr/base/specializations.i b/include/kfr/base/specializations.i @@ -0,0 +1,113 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + */ +#pragma once + +#include "vec.hpp" +#ifndef KFR_SHUFFLE_SPECIALIZATIONS +#include "shuffle.hpp" +#endif + +namespace kfr +{ +namespace internal +{ +template <> +inline vec<f32, 32> shufflevector<f32, 32>( + csizes_t<0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, + 15, 22, 23, 30, 31>, + vec<f32, 32> x, vec<f32, 32>) +{ + f32x32 w = x; + + w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(low(w)), + permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(high(w))); + + w = permutegroups<(4), 0, 4, 2, 6, 1, 5, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op + return w; +} + +template <> +inline vec<f32, 32> shufflevector<f32, 32>( + csizes_t<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22, + 23, 14, 15, 30, 31>, + vec<f32, 32> x, vec<f32, 32>) +{ + f32x32 w = x; + + w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(even<8>(w)), + permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(odd<8>(w))); + + w = permutegroups<(4), 0, 4, 1, 5, 2, 6, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op + return w; +} + +inline vec<f32, 32> bitreverse_2(vec<f32, 32> x) +{ + return shufflevector<f32, 32>(csizes<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, + 19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31>, + x, x); +} + +template <> +inline vec<f32, 64> shufflevector<f32, 64>( + csizes_t<0, 1, 32, 33, 16, 17, 48, 49, 8, 9, 40, 41, 24, 25, 56, 57, 4, 5, 36, 37, 20, 21, 52, 53, 12, 13, + 44, 45, 28, 29, 60, 61, 2, 3, 34, 35, 18, 19, 50, 51, 10, 11, 42, 43, 26, 27, 58, 59, 6, 7, 38, + 39, 22, 23, 54, 55, 14, 15, 46, 47, 30, 31, 62, 63>, + vec<f32, 64> x, vec<f32, 64>) +{ + x = concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x))); + return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(x); +} + +template <> +inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15>, + vec<f32, 16> x, vec<f32, 16>) +{ +// asm volatile("int $3"); + x = permutegroups<(4), 0, 2, 1, 3>(x); + + x = concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(x), high(x)), shuffle<1, 3, 8 + 1, 8 + 3>(low(x), high(x))); + + return x; +} + +template <> +inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15>, + vec<f32, 16> x, vec<f32, 16>) +{ + x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); + + x = permutegroups<(4), 0, 2, 1, 3>(x); + + return x; +} + +template <> +inline vec<f32, 32> shufflevector<f32, 32>( + csizes_t<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, + 29, 14, 30, 15, 31>, + vec<f32, 32> x, vec<f32, 32>) +{ + x = permutegroups<(8), 0, 2, 1, 3>(x); + + x = concat(interleavehalfs(low(x)), interleavehalfs(high(x))); + + return x; +} +} +} diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "function.hpp" + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t c = cpu_t::native> +struct in_sqrt : in_sqrt<older(c)> +{ + struct fn_sqrt : fn_disabled + { + }; +}; + +template <> +struct in_sqrt<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::sse2; + + KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); } + KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); } + + KFR_HANDLE_ALL(sqrt) + KFR_HANDLE_SCALAR(sqrt) + KFR_SPEC_FN(in_sqrt, sqrt) +}; + +template <> +struct in_sqrt<cpu_t::avx1> : in_sqrt<cpu_t::sse2> +{ + constexpr static cpu_t cpu = cpu_t::avx1; + using in_sqrt<cpu_t::sse2>::sqrt; + + KFR_SINTRIN f32avx KFR_USE_CPU(avx) sqrt(f32avx x) { return _mm256_sqrt_ps(*x); } + KFR_SINTRIN f64avx KFR_USE_CPU(avx) sqrt(f64avx x) { return _mm256_sqrt_pd(*x); } + + KFR_HANDLE_ALL(sqrt) + KFR_HANDLE_SCALAR(sqrt) + KFR_SPEC_FN(in_sqrt, sqrt) +}; +} +namespace native +{ +using fn_sqrt = internal::in_sqrt<>::fn_sqrt; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> sqrt(const T1& x) +{ + return internal::in_sqrt<>::sqrt(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_sqrt, E1> sqrt(E1&& x) +{ + return { fn_sqrt(), std::forward<E1>(x) }; +} +} +} diff --git a/include/kfr/base/tan.hpp b/include/kfr/base/tan.hpp @@ -0,0 +1,187 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "abs.hpp" +#include "constants.hpp" +#include "function.hpp" +#include "operators.hpp" +#include "select.hpp" +#include "sin_cos.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif +#if CID_HAS_WARNING("-Wc99-extensions") +#pragma clang diagnostic ignored "-Wc99-extensions" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_tan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc> +{ +private: + using in_abs<cc>::abs; + using in_round<cc>::floor; + using in_select<cc>::select; + using in_trig<cc>::mask_horner; + + template <typename T, size_t N, typename IT = itype<T>> + KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x_full, mask<T, N>& inverse) + { + constexpr T pi_14 = c_pi<T, 1, 4>; + + vec<T, N> y = abs(x_full); + vec<T, N> scaled = y / pi_14; + + vec<T, N> k_real = floor(scaled); + vec<IT, N> k = cast<IT>(k_real); + + vec<T, N> x = y - k_real * pi_14; + + mask<T, N> need_offset = (k & 1) != 0; + x = select(need_offset, x - pi_14, x); + + vec<IT, N> k_mod4 = k & 3; + inverse = (k_mod4 == 1) || (k_mod4 == 2); + return x; + } + +public: + template <size_t N> + KFR_SINTRIN vec<f32, N> tan(vec<f32, N> x_full) + { + mask<f32, N> inverse; + const vec<f32, N> x = trig_fold(x_full, inverse); + + constexpr f32 tan_c2 = 0x5.555378p-4; + constexpr f32 tan_c4 = 0x2.225bb8p-4; + constexpr f32 tan_c6 = 0xd.ac3fep-8; + constexpr f32 tan_c8 = 0x6.41644p-8; + constexpr f32 tan_c10 = 0xc.bfe7ep-12; + constexpr f32 tan_c12 = 0x2.6754dp-8; + + constexpr f32 cot_c2 = -0x5.555558p-4; + constexpr f32 cot_c4 = -0x5.b0581p-8; + constexpr f32 cot_c6 = -0x8.ac5ccp-12; + constexpr f32 cot_c8 = -0xd.aaa01p-16; + constexpr f32 cot_c10 = -0x1.a9a9b4p-16; + constexpr f32 cot_c12 = -0x6.f7d4dp-24; + + const vec<f32, N> x2 = x * x; + const vec<f32, N> val = mask_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, + tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12); + + const vec<f32, N> z = select(inverse, val / -x, val * x); + return mulsign(z, x_full); + } + + template <size_t N> + KFR_SINTRIN vec<f64, N> tan(vec<f64, N> x_full) + { + mask<f64, N> inverse; + const vec<f64, N> x = trig_fold(x_full, inverse); + + constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4; + constexpr f64 tan_c4 = 0x2.222224820264p-4; + constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8; + constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8; + constexpr f64 tan_c10 = 0x2.434a142e413ap-8; + constexpr f64 tan_c12 = 0xf.2b59061305efp-12; + constexpr f64 tan_c14 = 0x4.a12565071a664p-12; + constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12; + constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12; + constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12; + + constexpr f64 cot_c2 = -0x5.5555555555554p-4; + constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8; + constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12; + constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16; + constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16; + constexpr f64 cot_c12 = -0x2.450239be0ee92p-20; + constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24; + constexpr f64 cot_c16 = -0x5.ff4c42741356p-28; + constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32; + constexpr f64 cot_c20 = -0x1.644abedc113cap-32; + + const vec<f64, N> x2 = x * x; + const vec<f64, N> val = + mask_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6, cot_c8, tan_c8, + cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14, cot_c16, tan_c16, cot_c18, + tan_c18, cot_c20, tan_c20); + + const vec<f64, N> z = select(inverse, val / -x, val * x); + return mulsign(z, x_full); + } + template <typename T> + KFR_SINTRIN T tandeg(const T& x) + { + return tan(x * c_degtorad<T>); + } + + KFR_HANDLE_SCALAR(tan) + KFR_SPEC_FN(in_tan, tan) + KFR_SPEC_FN(in_tan, tandeg) +}; +} + +namespace native +{ +using fn_tan = internal::in_tan<>::fn_tan; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> tan(const T1& x) +{ + return internal::in_tan<>::tan(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_tan, E1> tan(E1&& x) +{ + return { fn_tan(), std::forward<E1>(x) }; +} + +using fn_tandeg = internal::in_tan<>::fn_tandeg; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> + +KFR_INTRIN ftype<T1> tandeg(const T1& x) +{ + return internal::in_tan<>::tandeg(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> + +KFR_INTRIN expr_func<fn_tandeg, E1> tandeg(E1&& x) +{ + return { fn_tandeg(), std::forward<E1>(x) }; +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp @@ -0,0 +1,728 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "kfr.h" + +#include "intrinsics.h" + +#include <algorithm> +#include <tuple> +#include <type_traits> + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" + +#include "../cometa.hpp" + +#define KFR_ENABLE_IF CMT_ENABLE_IF + +#define KFR_FN(fn) \ + struct fn_##fn \ + { \ + template <typename... Args> \ + CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + { \ + return fn(std::forward<Args>(args)...); \ + } \ + }; + +#define KFR_FNR(fn, in, out) \ + struct fn_##fn \ + { \ + using ratio = ioratio<in, out>; \ + template <typename... Args> \ + CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + { \ + return fn(std::forward<Args>(args)...); \ + } \ + }; + +#define KFR_SPEC_FN(tpl, fn) \ + struct fn_##fn \ + { \ + constexpr fn_##fn() noexcept = default; \ + template <cpu_t newcpu> \ + using retarget_this = typename tpl<newcpu>::fn_##fn; \ + template <typename... Args> \ + KFR_INLINE decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + { \ + return fn(std::forward<Args>(args)...); \ + } \ + }; + +namespace kfr +{ +using namespace cometa; + +using f32 = float; +using f64 = double; +using i8 = int8_t; +using i16 = int16_t; +using i32 = int32_t; +using i64 = int64_t; +using u8 = uint8_t; +using u16 = uint16_t; +using u32 = uint32_t; +using u64 = uint64_t; +using umax = uint64_t; +using imax = int64_t; +using fmax = double; +using f80 = long double; + +#ifdef KFR_BASETYPE_F32 +using fbase = f32; +#else +using fbase = f64; +#endif + +constexpr ctype_t<f32> ctype_f32{}; +constexpr ctype_t<f64> ctype_f64{}; +constexpr ctype_t<i8> ctype_i8{}; +constexpr ctype_t<i16> ctype_i16{}; +constexpr ctype_t<i32> ctype_i32{}; +constexpr ctype_t<i64> ctype_i64{}; +constexpr ctype_t<u8> ctype_u8{}; +constexpr ctype_t<u16> ctype_u16{}; +constexpr ctype_t<u32> ctype_u32{}; +constexpr ctype_t<u64> ctype_u64{}; +constexpr ctype_t<umax> ctype_umax{}; +constexpr ctype_t<imax> ctype_imax{}; +constexpr ctype_t<fmax> ctype_fmax{}; +constexpr ctype_t<f80> ctype_f80{}; +constexpr ctype_t<fbase> ctype_base{}; + +struct u24 +{ + u8 raw[3]; +}; + +struct i24 +{ + u8 raw[3]; +}; + +struct f16 +{ + u16 raw; +}; + +template <typename T1> +struct range +{ + T1 min; + T1 max; + T1 distance() const { return max - min; } +}; + +template <size_t in, size_t out> +struct ioratio +{ + constexpr static size_t input = in; + constexpr static size_t output = out; +}; + +enum class datatype : int +{ + typebits_mask = 0xFF, + f = 0x100, + i = 0x200, + u = 0x300, + c = 0x400, + typeclass_mask = 0xF00, + x1 = 0x1000, + x2 = 0x2000, + x3 = 0x3000, + x4 = 0x4000, + typecomponents_mask = 0xF000, + f16 = static_cast<int>(f) | static_cast<int>(x1) | 16, + f32 = static_cast<int>(f) | static_cast<int>(x1) | 32, + f64 = static_cast<int>(f) | static_cast<int>(x1) | 64, + f80 = static_cast<int>(f) | static_cast<int>(x1) | 80, + i8 = static_cast<int>(i) | static_cast<int>(x1) | 8, + i16 = static_cast<int>(i) | static_cast<int>(x1) | 16, + i24 = static_cast<int>(i) | static_cast<int>(x1) | 24, + i32 = static_cast<int>(i) | static_cast<int>(x1) | 32, + i64 = static_cast<int>(i) | static_cast<int>(x1) | 64, + u8 = static_cast<int>(u) | static_cast<int>(x1) | 8, + u16 = static_cast<int>(u) | static_cast<int>(x1) | 16, + u24 = static_cast<int>(u) | static_cast<int>(x1) | 24, + u32 = static_cast<int>(u) | static_cast<int>(x1) | 32, + u64 = static_cast<int>(u) | static_cast<int>(x1) | 64, + c32 = static_cast<int>(c) | static_cast<int>(x2) | 32, + c64 = static_cast<int>(c) | static_cast<int>(x2) | 64 +}; + +inline datatype operator|(datatype x, datatype y) +{ + using type = underlying_type<datatype>; + return static_cast<datatype>(static_cast<type>(x) | static_cast<type>(y)); +} + +inline datatype operator&(datatype x, datatype y) +{ + using type = underlying_type<datatype>; + return static_cast<datatype>(static_cast<type>(x) | static_cast<type>(y)); +} + +struct generic +{ + template <typename T> + KFR_INLINE constexpr operator T() const noexcept + { + return T(); + } +}; + +struct infinite +{ + template <typename T> + KFR_INLINE constexpr operator T() const noexcept + { + return T(); + } + constexpr friend bool operator<(infinite, size_t) noexcept { return false; } + constexpr friend bool operator<(size_t, infinite) noexcept { return true; } + constexpr friend bool operator<(infinite, infinite) noexcept { return false; } +}; + +enum class accuracy : int +{ + accuracy = 1, + speed = 2, + _accuracy_min = static_cast<int>(accuracy), + _accuracy_max = static_cast<int>(speed) +}; + +enum class archendianness : int +{ + littleendian = 1, + bigendian = 2, + _archendianness_min = static_cast<int>(littleendian), + _archendianness_max = static_cast<int>(bigendian) +}; + +typedef void*(KFR_CDECL* func_allocate)(size_t); + +typedef void(KFR_CDECL* func_deallocate)(void*); + +struct mem_allocator +{ + func_allocate allocate; + func_deallocate deallocate; + size_t granularity; + size_t alignment; +}; + +struct mem_header +{ + size_t size; + mem_allocator* allocator; + uintptr_t refcount; + uintptr_t reserved; +}; + +enum class outputinput_t +{ + output, + input +}; +template <outputinput_t p> +using coutputinput_t = cval_t<outputinput_t, p>; + +template <outputinput_t p> +constexpr coutputinput_t<p> coutputinput{}; + +using coutput_t = coutputinput_t<outputinput_t::output>; +using cinput_t = coutputinput_t<outputinput_t::input>; + +constexpr coutput_t coutput{}; +constexpr cinput_t cinput{}; + +namespace internal +{ +template <typename Fn, typename enable = void_t<>> +struct func_ratio_impl +{ + using type = ioratio<1, 1>; +}; +template <typename Fn> +struct func_ratio_impl<Fn, void_t<typename Fn::ratio>> +{ + using type = typename Fn::ratio; +}; +} + +template <typename Fn> +using func_ratio = typename internal::func_ratio_impl<remove_reference<Fn>>::type; + +template <typename T> +constexpr inline T align_down(T x, identity<T> alignment) +{ + return (x) & ~(alignment - 1); +} +template <typename T> +constexpr inline T* align_down(T* x, size_t alignment) +{ + return reinterpret_cast<T*>(align_down(reinterpret_cast<size_t>(x), alignment)); +} + +template <typename T> +constexpr inline T align_up(T x, identity<T> alignment) +{ + return (x + alignment - 1) & ~(alignment - 1); +} +template <typename T> +constexpr inline T* align_up(T* x, size_t alignment) +{ + return reinterpret_cast<T*>(align_up(reinterpret_cast<size_t>(x), alignment)); +} + +template <typename T> +constexpr inline T* advance(T* x, ptrdiff_t offset) +{ + return x + offset; +} +constexpr inline void* advance(void* x, ptrdiff_t offset) +{ + return advance(static_cast<unsigned char*>(x), offset); +} + +constexpr inline ptrdiff_t distance(const void* x, const void* y) +{ + return static_cast<const unsigned char*>(x) - static_cast<const unsigned char*>(y); +} + +enum class cpu_t : int +{ + sse2 = 0, + sse3 = 1, + ssse3 = 2, + sse41 = 3, + sse42 = 4, + avx1 = 5, + avx2 = 6, + avx = static_cast<int>(avx1), + native = static_cast<int>(KFR_ARCH_NAME), + lowest = static_cast<int>(sse2), + highest = static_cast<int>(avx2), + runtime = -1, +}; + +template <cpu_t cpu> +using ccpu_t = cval_t<cpu_t, cpu>; + +template <cpu_t cpu> +constexpr ccpu_t<cpu> ccpu{}; + +namespace internal +{ +constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - 1); } +constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); } + +constexpr auto cpu_list = + cvals<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>; +} + +template <cpu_t cpu> +using cpuval_t = cval_t<cpu_t, cpu>; +template <cpu_t cpu> +constexpr auto cpuval = cpuval_t<cpu>{}; + +constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuval<cpu_t::native>); +constexpr auto cpu_shuffle = + cfilter(cpu_all, cpu_all != cpuval<cpu_t::sse3> && cpu_all != cpuval<cpu_t::ssse3>); + +template <typename T> +constexpr datatype typeclass = std::is_floating_point<typename compound_type_traits<T>::subtype>::value + ? datatype::f + : std::is_integral<typename compound_type_traits<T>::subtype>::value + ? (std::is_unsigned<typename compound_type_traits<T>::subtype>::value + ? datatype::u + : datatype::i) + : datatype(); + +template <typename T> +using is_f_class = std::integral_constant<bool, typeclass<T> == datatype::f>; +template <typename T> +using is_u_class = std::integral_constant<bool, typeclass<T> == datatype::u>; +template <typename T> +using is_i_class = std::integral_constant<bool, typeclass<T> == datatype::i>; + +template <typename T> +struct typebits +{ + constexpr static size_t bits = sizeof(typename compound_type_traits<T>::subtype) * 8; + constexpr static size_t width = compound_type_traits<T>::is_scalar ? 0 : compound_type_traits<T>::width; + using subtype = typename compound_type_traits<T>::subtype; +}; + +namespace internal +{ +template <size_t bits> +struct float_type_impl; +template <size_t bits> +struct int_type_impl; +template <size_t bits> +struct unsigned_type_impl; + +template <> +struct float_type_impl<32> +{ + using type = f32; +}; +template <> +struct float_type_impl<64> +{ + using type = f64; +}; + +template <> +struct int_type_impl<8> +{ + using type = i8; +}; +template <> +struct int_type_impl<16> +{ + using type = i16; +}; +template <> +struct int_type_impl<32> +{ + using type = i32; +}; +template <> +struct int_type_impl<64> +{ + using type = i64; +}; + +template <> +struct unsigned_type_impl<8> +{ + using type = u8; +}; +template <> +struct unsigned_type_impl<16> +{ + using type = u16; +}; +template <> +struct unsigned_type_impl<32> +{ + using type = u32; +}; +template <> +struct unsigned_type_impl<64> +{ + using type = u64; +}; +} + +template <size_t bits> +using float_type = typename internal::float_type_impl<bits>::type; +template <size_t bits> +using int_type = typename internal::int_type_impl<bits>::type; +template <size_t bits> +using unsigned_type = typename internal::unsigned_type_impl<bits>::type; + +template <typename T> +using ftype = deep_rebind<T, float_type<typebits<deep_subtype<T>>::bits>>; +template <typename T> +using itype = deep_rebind<T, int_type<typebits<deep_subtype<T>>::bits>>; +template <typename T> +using utype = deep_rebind<T, unsigned_type<typebits<deep_subtype<T>>::bits>>; + +template <typename T> +using fsubtype = ftype<subtype<T>>; +template <typename T> +using isubtype = itype<subtype<T>>; +template <typename T> +using usubtype = utype<subtype<T>>; + +template <typename T, size_t N> +struct vec_t +{ + using value_type = T; + constexpr static size_t size() noexcept { return N; } + constexpr vec_t() noexcept = default; + + using scalar_type = subtype<T>; + constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; } +}; + +template <typename T, typename R = T> +using enable_if_vec = enable_if<(typebits<T>::width > 0), R>; +template <typename T, typename R = T> +using enable_if_not_vec = enable_if<(typebits<T>::width == 0), R>; + +template <typename T, typename R = T> +using enable_if_i = enable_if<typeclass<T> == datatype::i, R>; +template <typename T, typename R = T> +using enable_if_u = enable_if<typeclass<T> == datatype::u, R>; +template <typename T, typename R = T> +using enable_if_f = enable_if<typeclass<T> == datatype::f, R>; + +template <typename T, typename R = T> +using enable_if_not_i = enable_if<typeclass<T> != datatype::i, R>; +template <typename T, typename R = T> +using enable_if_not_u = enable_if<typeclass<T> != datatype::u, R>; +template <typename T, typename R = T> +using enable_if_not_f = enable_if<typeclass<T> != datatype::f, R>; + +namespace internal +{ +KFR_INLINE f32 builtin_sqrt(f32 x) { return __builtin_sqrtf(x); } +KFR_INLINE f64 builtin_sqrt(f64 x) { return __builtin_sqrt(x); } +KFR_INLINE f80 builtin_sqrt(f80 x) { return __builtin_sqrtl(x); } +KFR_INLINE void builtin_memcpy(void* dest, const void* src, size_t size) +{ + __builtin_memcpy(dest, src, size); +} +KFR_INLINE void builtin_memset(void* dest, int val, size_t size) { __builtin_memset(dest, val, size); } +template <typename T1> +KFR_INLINE void zeroize(T1& value) +{ + builtin_memset(static_cast<void*>(std::addressof(value)), 0, sizeof(T1)); +} +} + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Wundefined-reinterpret-cast") +#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast" +#endif + +template <typename T, typename U> +constexpr inline static T& ref_cast(U& ptr) +{ + return reinterpret_cast<T&>(ptr); +} + +template <typename T, typename U> +constexpr inline static const T& ref_cast(const U& ptr) +{ + return reinterpret_cast<const T&>(ptr); +} + +template <typename T, typename U> +constexpr inline static T* ptr_cast(U* ptr) +{ + return reinterpret_cast<T*>(ptr); +} + +template <typename T, typename U> +constexpr inline static const T* ptr_cast(const U* ptr) +{ + return reinterpret_cast<const T*>(ptr); +} + +template <typename T, typename U> +constexpr inline static T* ptr_cast(U* ptr, ptrdiff_t offset) +{ + return ptr_cast<T>(ptr_cast<u8>(ptr) + offset); +} + +#pragma clang diagnostic pop + +__attribute__((unused)) static const char* cpu_name(cpu_t set) +{ + static const char* names[] = { "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" }; + if (set >= cpu_t::lowest && set <= cpu_t::highest) + return names[static_cast<size_t>(set)]; + return "-"; +} + +#define KFR_FN_S(fn) \ + template <typename Arg, typename... Args> \ + KFR_INLINE enable_if_not_vec<Arg> fn(Arg arg, Args... args) \ + { \ + return fn(make_vector(arg), make_vector(args)...)[0]; \ + } +#define KFR_FN_S_S(fn) \ + template <typename Arg, typename... Args, KFR_ENABLE_IF(is_number<Arg>::value)> \ + KFR_SINTRIN enable_if_not_vec<Arg> fn(Arg arg, Args... args) \ + { \ + return fn(make_vector(arg), make_vector(args)...)[0]; \ + } + +template <typename T> +struct initialvalue +{ +}; + +constexpr double infinity = __builtin_inf(); +constexpr double qnan = __builtin_nan(""); + +namespace internal +{ +constexpr f32 allones_f32 = -__builtin_nanf("0xFFFFFFFF"); +constexpr f64 allones_f64 = -__builtin_nan("0xFFFFFFFFFFFFFFFF"); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub allones = choose_const<Tsub>(allones_f32, allones_f64, static_cast<Tsub>(-1)); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub allzeros = Tsub(); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub highbitmask = choose_const<Tsub>(-0.f, -0.0, 1ull << (typebits<T>::bits - 1)); + +template <typename T, typename Tsub = subtype<T>> +constexpr Tsub invhighbitmask = choose_const<Tsub>(__builtin_nanf("0xFFFFFFFF"), + __builtin_nan("0xFFFFFFFFFFFFFFFF"), + ~(1ull << (typebits<T>::bits - 1))); + +template <typename T> +constexpr inline T maskbits(bool value) +{ + return value ? internal::allones<T> : T(); +} +} + +template <typename T> +constexpr size_t widthof(T) +{ + return compound_type_traits<T>::width; +} +template <typename T> +constexpr size_t widthof() +{ + return compound_type_traits<T>::width; +} + +template <typename T> +constexpr inline T bitness_const(T x32, T x64) +{ +#ifdef KFR_ARCH_X64 + (void)x32; + return x64; +#else + (void)x64; + return x32; +#endif +} + +constexpr size_t native_cache_alignment = 64; +constexpr size_t native_cache_alignment_mask = native_cache_alignment - 1; +constexpr size_t maximum_vector_alignment = 32; +constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1; +constexpr size_t native_register_count = bitness_const(8, 16); +template <cpu_t c> +constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : 0; +template <cpu_t c> +constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : 0; + +struct input_expression +{ + using value_type = generic; + using size_type = infinite; + constexpr size_type size() const noexcept { return {}; } + + KFR_INLINE void begin_block(size_t) const {} + KFR_INLINE void end_block(size_t) const {} +}; + +struct output_expression +{ + using value_type = generic; + using size_type = infinite; + constexpr size_type size() const noexcept { return {}; } + + KFR_INLINE void output_begin_block(size_t) const {} + KFR_INLINE void output_end_block(size_t) const {} +}; + +template <typename E> +using is_input_expression = std::is_base_of<input_expression, decay<E>>; + +template <typename... Es> +using is_input_expressions = or_t<std::is_base_of<input_expression, decay<Es>>...>; + +template <typename E> +using is_output_expression = std::is_base_of<output_expression, decay<E>>; + +template <typename T> +using is_numeric = is_number<deep_subtype<T>>; + +template <typename... Ts> +using is_numeric_args = and_t<is_numeric<Ts>...>; + +template <typename T, cpu_t c = cpu_t::native> +constexpr size_t vector_width = typeclass<T> == datatype::f ? native_float_vector_size<c> / sizeof(T) + : native_int_vector_size<c> / sizeof(T); + +template <cpu_t c> +constexpr size_t vector_width<void, c> = 0; + +namespace internal +{ + +template <cpu_t c> +constexpr size_t native_vector_alignment = std::max(native_float_vector_size<c>, native_int_vector_size<c>); + +template <cpu_t c> +constexpr bool fast_unaligned = c >= cpu_t::avx1; + +template <cpu_t c> +constexpr size_t native_vector_alignment_mask = native_vector_alignment<c> - 1; + +template <typename T, cpu_t c> +constexpr inline size_t get_vector_width(size_t scale = 1) +{ + return scale * vector_width<T, c>; +} +template <typename T, cpu_t c> +constexpr inline size_t get_vector_width(size_t x32scale, size_t x64scale) +{ + return bitness_const(x32scale, x64scale) * vector_width<T, c>; +} + +template <typename T, cpu_t c> +constexpr auto vector_width_range = csize<1> << csizeseq<ilog2(vector_width<T, c>) + 1>; + +template <typename T, cpu_t c> +constexpr size_t vector_capacity = native_register_count* vector_width<T, c>; + +template <typename T, cpu_t c> +constexpr size_t maximum_vector_size = std::min(static_cast<size_t>(32), vector_capacity<T, c> / 4); +} +} +namespace cometa +{ + +template <typename T, size_t N> +struct compound_type_traits<kfr::vec_t<T, N>> +{ + constexpr static size_t width = N; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + + template <typename U> + using rebind = kfr::vec_t<U, N>; + template <typename U> + using deep_rebind = kfr::vec_t<cometa::deep_rebind<subtype, U>, N>; +}; +} + +#pragma clang diagnostic pop diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp @@ -0,0 +1,300 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/memory.hpp" +#include "../base/read_write.hpp" +#include "../base/types.hpp" + +namespace kfr +{ + +constexpr size_t tag_array_ref = 0; +constexpr size_t tag_dynamic_vector = max_size_t; + +template <typename T, size_t Size = tag_dynamic_vector> +struct univector; + +template <typename T, typename Class> +struct univector_base : input_expression, output_expression +{ + template <typename U, size_t N> + KFR_INLINE void operator()(coutput_t, size_t index, vec<U, N> value) + { + T* data = ptr_cast<Class>(this)->data(); + write(ptr_cast<T>(data) + index, cast<T>(value)); + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + const T* data = ptr_cast<Class>(this)->data(); + return cast<U>(read<N>(ptr_cast<T>(data) + index)); + } + + template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)> + KFR_INLINE Class& operator=(Input&& input) + { + assign_expr(std::forward<Input>(input)); + return *ptr_cast<Class>(this); + } + univector<T, 0> slice(size_t start = 0, size_t size = max_size_t) + { + T* data = ptr_cast<Class>(this)->data(); + const size_t this_size = ptr_cast<Class>(this)->size(); + return array_ref<T>(data + start, std::min(size, this_size - start)); + } + univector<const T, 0> slice(size_t start = 0, size_t size = max_size_t) const + { + const T* data = ptr_cast<Class>(this)->data(); + const size_t this_size = ptr_cast<Class>(this)->size(); + return array_ref<const T>(data + start, std::min(size, this_size - start)); + } + + array_ref<T> ref() + { + T* data = get_data(); + const size_t size = get_size(); + return array_ref<T>(data, size); + } + array_ref<const T> ref() const + { + const T* data = get_data(); + const size_t size = get_size(); + return array_ref<const T>(data, size); + } + + void ringbuf_write(size_t& cursor, const T* src, size_t srcsize) + { + if (srcsize == 0) + return; + // skip redundant data + const size_t size = get_size(); + T* data = get_data(); + if (srcsize > size) + { + src = src + srcsize / size; + srcsize = srcsize % size; + } + const size_t fsize = size - cursor; + // one fragment + if (srcsize <= fsize) + { + std::copy_n(src, srcsize, data + cursor); + } + else // two fragments + { + std::copy_n(src, fsize, data + cursor); + std::copy_n(src + fsize, srcsize - fsize, data); + } + ringbuf_step(cursor, srcsize); + } + + void ringbuf_write(size_t& cursor, const T value) + { + T* data = get_data(); + data[cursor] = value; + + ringbuf_step(cursor, 1); + } + void ringbuf_step(size_t& cursor, size_t step) + { + const size_t size = get_size(); + cursor = cursor + step; + cursor = cursor >= size ? cursor - size : cursor; + } + +protected: + template <typename Input> + KFR_INLINE void assign_expr(Input&& input) + { + process<T>(*this, std::forward<Input>(input), get_size()); + } + +private: + constexpr infinite size() const noexcept = delete; + KFR_INLINE size_t get_size() const { return ptr_cast<Class>(this)->size(); } + KFR_INLINE const T* get_data() const { return ptr_cast<Class>(this)->data(); } + KFR_INLINE T* get_data() { return ptr_cast<Class>(this)->data(); } +}; + +template <typename T, size_t Size> +struct alignas(maximum_vector_alignment) univector : std::array<T, Size>, + univector_base<T, univector<T, Size>> +{ + using std::array<T, Size>::size; + using size_type = size_t; + template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)> + univector(Input&& input) + { + this->assign_expr(std::forward<Input>(input)); + } + template <typename... Args> + constexpr univector(T x, Args... args) noexcept : std::array<T, Size>{ { x, static_cast<T>(args)... } } + { + } + + constexpr univector() noexcept(noexcept(std::array<T, Size>())) = default; + constexpr univector(size_t, const T& value) { std::fill(this->begin(), this->end(), value); } + constexpr static bool size_known = true; + constexpr static bool is_array = true; + constexpr static bool is_array_ref = false; + constexpr static bool is_vector = false; + constexpr static bool is_aligned = true; + constexpr static bool is_pod = kfr::is_pod<T>::value; + using value_type = T; + + using univector_base<T, univector>::operator=; +}; + +template <typename T> +struct univector<T, tag_array_ref> : array_ref<T>, univector_base<T, univector<T, tag_array_ref>> +{ + using array_ref<T>::size; + using array_ref<T>::array_ref; + using size_type = size_t; + constexpr univector(const array_ref<T>& other) : array_ref<T>(other) {} + constexpr univector(array_ref<T>&& other) : array_ref<T>(std::move(other)) {} + + template <size_t Tag> + constexpr univector(const univector<T, Tag>& other) : array_ref<T>(other.data(), other.size()) + { + } + template <size_t Tag> + constexpr univector(univector<T, Tag>& other) : array_ref<T>(other.data(), other.size()) + { + } + template <typename U, size_t Tag, KFR_ENABLE_IF(is_same<remove_const<T>, U>::value&& is_const<T>::value)> + constexpr univector(const univector<U, Tag>& other) : array_ref<T>(other.data(), other.size()) + { + } + template <typename U, size_t Tag, KFR_ENABLE_IF(is_same<remove_const<T>, U>::value&& is_const<T>::value)> + constexpr univector(univector<U, Tag>& other) : array_ref<T>(other.data(), other.size()) + { + } + constexpr static bool size_known = false; + constexpr static bool is_array = false; + constexpr static bool is_array_ref = true; + constexpr static bool is_vector = false; + constexpr static bool is_aligned = false; + using value_type = T; + + using univector_base<T, univector>::operator=; +}; + +template <typename T> +struct univector<T, tag_dynamic_vector> : std::vector<T, allocator<T>>, + univector_base<T, univector<T, tag_dynamic_vector>> +{ + using std::vector<T, allocator<T>>::size; + using std::vector<T, allocator<T>>::vector; + using size_type = size_t; + template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value && !is_infinite<Input>::value)> + univector(Input&& input) + { + this->resize(input.size()); + this->assign_expr(std::forward<Input>(input)); + } + constexpr univector() noexcept = default; + constexpr univector(const std::vector<T>& other) : std::vector<T, allocator<T>>(other) {} + constexpr univector(std::vector<T>&& other) : std::vector<T, allocator<T>>(std::move(other)) {} + constexpr univector(const array_ref<T>& other) : std::vector<T, allocator<T>>(other.begin(), other.end()) + { + } + constexpr univector(const array_ref<const T>& other) + : std::vector<T, allocator<T>>(other.begin(), other.end()) + { + } + constexpr static bool size_known = false; + constexpr static bool is_array = false; + constexpr static bool is_array_ref = false; + constexpr static bool is_vector = true; + constexpr static bool is_aligned = true; + using value_type = T; + + using univector_base<T, univector>::operator=; +}; + +template <typename T> +using univector_ref = univector<T, tag_array_ref>; + +template <typename T> +using univector_dyn = univector<T, tag_dynamic_vector>; + +template <typename T, size_t Size1 = tag_dynamic_vector, size_t Size2 = tag_dynamic_vector> +using univector2d = univector<univector<T, Size2>, Size1>; + +template <typename T, size_t Size1 = tag_dynamic_vector, size_t Size2 = tag_dynamic_vector, + size_t Size3 = tag_dynamic_vector> +using univector3d = univector<univector<univector<T, Size3>, Size2>, Size1>; + +template <cpu_t c = cpu_t::native, size_t Tag, typename T, typename Fn> +KFR_INLINE void process(univector<T, Tag>& vector, Fn&& fn) +{ + static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); + return process<T, c>(vector, std::forward<Fn>(fn), vector.size()); +} + +template <cpu_t c = cpu_t::native, typename T, size_t Nsize, typename Fn> +KFR_INLINE void process(T (&dest)[Nsize], Fn&& fn) +{ + static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); + return process<T, c>(univector<T, tag_array_ref>(dest), std::forward<Fn>(fn), Nsize); +} +template <cpu_t c = cpu_t::native, typename T, typename Fn> +KFR_INLINE void process(const array_ref<T>& vector, Fn&& fn) +{ + static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); + return process<T, c>(univector<T, tag_array_ref>(vector), std::forward<Fn>(fn), vector.size()); +} + +template <typename T> +KFR_INLINE univector_ref<T> make_univector(T* data, size_t size) +{ + return univector_ref<T>(data, size); +} + +template <typename T> +KFR_INLINE univector_ref<const T> make_univector(const T* data, size_t size) +{ + return univector_ref<const T>(data, size); +} + +template <typename Expr, typename T = value_type_of<Expr>> +KFR_INLINE univector<T> render(Expr&& expr) +{ + univector<T> result; + result.resize(expr.size()); + result = expr; + return result; +} + +template <typename Expr, typename T = value_type_of<Expr>> +KFR_INLINE univector<T> render(Expr&& expr, size_t size) +{ + univector<T> result; + result.resize(size); + result = expr; + return result; +} +} diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp @@ -0,0 +1,1324 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "kfr.h" + +#include "types.hpp" + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" +#pragma clang diagnostic ignored "-Wc++98-compat-local-type-template-args" +#pragma clang diagnostic ignored "-Wshadow" +#pragma clang diagnostic ignored "-Wpacked" + +namespace kfr +{ + +template <typename T, size_t N> +struct vec; +template <typename T, size_t N> +struct mask; + +using simdindex = int; + +template <typename T, simdindex N> +using simd = T __attribute__((ext_vector_type(N))); + +namespace internal +{ +template <typename T> +struct is_vec_impl : std::false_type +{ +}; + +template <typename T, size_t N> +struct is_vec_impl<vec<T, N>> : std::true_type +{ +}; + +template <typename T, size_t N> +struct is_vec_impl<mask<T, N>> : std::true_type +{ +}; + +template <typename T, bool A> +struct struct_with_alignment +{ + T value; + KFR_INTRIN void operator=(T value) { this->value = value; } +}; + +template <typename T> +struct struct_with_alignment<T, false> +{ + T value; + KFR_INTRIN void operator=(T value) { this->value = value; } +} __attribute__((__packed__, __may_alias__)); // +} + +template <typename T> +using is_vec = internal::is_vec_impl<T>; + +template <typename T, size_t N, bool A> +using vec_algn = internal::struct_with_alignment<simd<T, N>, A>; + +template <typename T, size_t N, bool A> +struct vec_ptr +{ + constexpr KFR_INLINE vec_ptr(T* data) noexcept : data(data) {} + constexpr KFR_INLINE vec_ptr(const T* data) noexcept : data(const_cast<T*>(data)) {} + KFR_INLINE const vec_algn<T, N, A>& operator[](size_t i) const + { + return *static_cast<vec_algn<T, N, A>*>(data + i); + } + KFR_INLINE vec_algn<T, N, A>& operator[](size_t i) { return *static_cast<vec_algn<T, N, A>*>(data + i); } + T* data; +}; + +template <typename To, typename From, size_t N, + KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value), + size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width> +constexpr KFR_INLINE vec<To, Nout> subcast(vec<From, N> value) noexcept +{ + return *value; +} + +namespace internal +{ + +template <typename Fn, size_t index> +constexpr enable_if<std::is_same<size_t, decltype(std::declval<Fn>().operator()(size_t()))>::value, size_t> +get_vec_index() +{ + constexpr Fn fn{}; + return fn(index); +} + +template <typename Fn, size_t index> +constexpr enable_if< + std::is_same<size_t, decltype(std::declval<Fn>().template operator() < index > ())>::value, size_t> +get_vec_index(int = 0) +{ + constexpr Fn fn{}; + return fn.template operator()<index>(); +} + +constexpr size_t index_undefined = static_cast<size_t>(-1); + +template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(!is_compound<T>::value)> +KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, vec<T, N> x, vec<T, N> y) +{ + vec<T, sizeof...(Indices)> result = __builtin_shufflevector( + *x, *y, static_cast<intptr_t>(Indices == index_undefined ? -1 : static_cast<intptr_t>(Indices))...); + return result; +} + +template <size_t... indices, size_t... counter, size_t groupsize = sizeof...(counter) / sizeof...(indices)> +constexpr auto inflate_impl(csizes_t<indices...> ind, csizes_t<counter...> cnt) + -> csizes_t<(ind.get(csize<counter / groupsize>) == index_undefined + ? index_undefined + : (counter % groupsize + groupsize * ind.get(csize<counter / groupsize>)))...> +{ + return {}; +} + +template <size_t groupsize, size_t... indices> +constexpr auto inflate(csize_t<groupsize>, csizes_t<indices...>) +{ + return inflate_impl(csizes<indices...>, csizeseq<sizeof...(indices)*groupsize>); +} + +template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>::value)> +KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, vec<T, N> x, vec<T, N> y) +{ + return subcast<T>( + shufflevector(inflate(csize<widthof<T>()>, indices), subcast<subtype<T>>(x), subcast<subtype<T>>(y))); +} + +template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N> +KFR_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, vec<T, N> x) +{ + return internal::shufflevector<T, N>(csizes<Indices...>, x, x); +} + +template <typename Fn, size_t groupsize, typename T, size_t N, size_t... Indices, + size_t Nout = sizeof...(Indices)> +KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y, cvals_t<size_t, Indices...>) +{ + static_assert(N % groupsize == 0, "N % groupsize == 0"); + return internal::shufflevector<T, N>( + csizes<(get_vec_index<Fn, Indices / groupsize>() * groupsize + Indices % groupsize)...>, x, y); +} +} + +template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y) +{ + return internal::shufflevector<Fn, groupsize>(x, y, csizeseq<Nout>); +} + +template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N> +KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x) +{ + return internal::shufflevector<Fn, groupsize>(x, x, csizeseq<Nout>); +} + +namespace swizzle +{ +template <size_t> +struct swiz +{ + constexpr swiz() {} +}; + +constexpr swiz<0> x{}; +constexpr swiz<1> y{}; +constexpr swiz<2> z{}; +constexpr swiz<3> w{}; +constexpr swiz<0> r{}; +constexpr swiz<1> g{}; +constexpr swiz<2> b{}; +constexpr swiz<3> a{}; +constexpr swiz<0> s{}; +constexpr swiz<1> t{}; +constexpr swiz<2> p{}; +constexpr swiz<3> q{}; + +constexpr swiz<0> s0{}; +constexpr swiz<1> s1{}; +constexpr swiz<2> s2{}; +constexpr swiz<3> s3{}; +constexpr swiz<4> s4{}; +constexpr swiz<5> s5{}; +constexpr swiz<6> s6{}; +constexpr swiz<7> s7{}; +constexpr swiz<8> s8{}; +constexpr swiz<9> s9{}; +constexpr swiz<10> s10{}; +constexpr swiz<11> s11{}; +constexpr swiz<12> s12{}; +constexpr swiz<13> s13{}; +constexpr swiz<14> s14{}; +constexpr swiz<15> s15{}; +} + +template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To cast(From value) noexcept +{ + return static_cast<To>(value); +} +template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To bitcast(From value) noexcept +{ + union { + From from; + To to; + } u{ value }; + return u.to; +} + +template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To ubitcast(From value) noexcept +{ + return bitcast<To>(value); +} + +template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To ibitcast(From value) noexcept +{ + return bitcast<To>(value); +} + +template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To fbitcast(From value) noexcept +{ + return bitcast<To>(value); +} + +template <typename To, typename From, size_t N, KFR_ENABLE_IF(!is_compound<To>::value)> +constexpr KFR_INLINE vec<To, N> cast(vec<From, N> value) noexcept +{ + return __builtin_convertvector(*value, simd<To, N>); +} +template <typename To, typename From, simdindex N> +constexpr KFR_INLINE simd<To, N> cast(simd<From, N> value) noexcept +{ + return __builtin_convertvector(value, simd<To, N>); +} +template <typename To, typename From, size_t N, size_t Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE vec<To, Nout> bitcast(vec<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(*value); +} +template <typename To, typename From, simdindex N, simdindex Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE simd<To, Nout> bitcast(simd<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(value); +} + +template <typename From, size_t N, typename To = utype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE vec<To, Nout> ubitcast(vec<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(*value); +} + +template <typename From, size_t N, typename To = itype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE vec<To, Nout> ibitcast(vec<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(*value); +} + +template <typename From, size_t N, typename To = ftype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE vec<To, Nout> fbitcast(vec<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(*value); +} + +template <typename From, simdindex N, typename To = utype<From>, + simdindex Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE simd<To, Nout> ubitcast(simd<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(value); +} + +template <typename From, simdindex N, typename To = itype<From>, + simdindex Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE simd<To, Nout> ibitcast(simd<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(value); +} + +template <typename From, simdindex N, typename To = ftype<From>, + simdindex Nout = sizeof(From) * N / sizeof(To)> +constexpr KFR_INLINE simd<To, Nout> fbitcast(simd<From, N> value) noexcept +{ + return reinterpret_cast<simd<To, Nout>>(value); +} + +constexpr KFR_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); } + +template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)> +KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest); + +namespace internal +{ +template <size_t start = 0, size_t stride = 1> +struct shuffle_index +{ + constexpr KFR_INLINE size_t operator()(size_t index) const { return start + index * stride; } +}; + +template <size_t count, size_t start = 0, size_t stride = 1> +struct shuffle_index_wrap +{ + constexpr inline size_t operator()(size_t index) const { return (start + index * stride) % count; } +}; +} + +template <size_t count, typename T, size_t N, size_t Nout = N* count> +KFR_INLINE vec<T, Nout> repeat(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x); +} +KFR_FN(repeat) + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" + +template <size_t N, typename T> +constexpr KFR_INLINE vec<T, N> broadcast(T x) +{ + return (simd<T, N>)(x); +} + +#pragma clang diagnostic pop + +template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout != N)> +KFR_INLINE vec<T, Nout> resize(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x); +} +template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout == N)> +constexpr KFR_INLINE vec<T, Nout> resize(vec<T, N> x) +{ + return x; +} +KFR_FN(resize) + +namespace internal_read_write +{ + +template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(is_poweroftwo(N))> +KFR_INLINE vec<T, N> read(const T* src) +{ + return ptr_cast<vec_algn<subtype<T>, vec<T, N>::scalar_size(), A>>(src)->value; +} + +template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))> +KFR_INLINE vec<T, N> read(const T* src) +{ + constexpr size_t first = prev_poweroftwo(N); + constexpr size_t rest = N - first; + return concat(internal_read_write::read<first, A>(src), + internal_read_write::read<rest, false>(src + first)); +} + +template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(is_poweroftwo(N))> +KFR_INLINE void write(T* dest, vec<T, N> value) +{ + ptr_cast<vec_algn<subtype<T>, value.scalar_size(), A>>(dest)->value = *value; +} + +template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))> +KFR_INLINE void write(T* dest, vec<T, N> value) +{ + constexpr size_t first = prev_poweroftwo(N); + constexpr size_t rest = N - first; + internal_read_write::write<A, first>(dest, shufflevector<first, internal::shuffle_index<0>>(value)); + internal_read_write::write<false, rest>(dest + first, + shufflevector<rest, internal::shuffle_index<first>>(value)); +} +} + +template <typename T, size_t N> +struct pkd_vec +{ + constexpr pkd_vec() noexcept {} + pkd_vec(const vec<T, N>& value) noexcept { internal_read_write::write(v, value); } + template <typename... Ts> + constexpr pkd_vec(Ts... init) noexcept : v{ static_cast<T>(init)... } + { + static_assert(N <= sizeof...(Ts), "Too few initializers for pkd_vec"); + } + +private: + T v[N]; + friend struct vec<T, N>; +} __attribute__((packed)); + +template <typename T> +struct vec_op +{ + using scalar_type = subtype<T>; + + template <simdindex N> + constexpr static simd<scalar_type, N> add(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return x + y; + } + template <simdindex N> + constexpr static simd<scalar_type, N> sub(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return x - y; + } + template <simdindex N> + constexpr static simd<scalar_type, N> mul(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return x * y; + } + template <simdindex N> + constexpr static simd<scalar_type, N> div(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return x / y; + } + template <simdindex N> + constexpr static simd<scalar_type, N> rem(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return x % y; + } + template <simdindex N> + constexpr static simd<scalar_type, N> shl(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return x << y; + } + template <simdindex N> + constexpr static simd<scalar_type, N> shr(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return x >> y; + } + template <simdindex N> + constexpr static simd<scalar_type, N> neg(simd<scalar_type, N> x) noexcept + { + return -x; + } + template <simdindex N> + constexpr static simd<scalar_type, N> band(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(ubitcast(x) & ubitcast(y)); + } + template <simdindex N> + constexpr static simd<scalar_type, N> bor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(ubitcast(x) | ubitcast(y)); + } + template <simdindex N> + constexpr static simd<scalar_type, N> bxor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(ubitcast(x) ^ ubitcast(y)); + } + template <simdindex N> + constexpr static simd<scalar_type, N> bnot(simd<scalar_type, N> x) noexcept + { + return bitcast<scalar_type>(~ubitcast(x)); + } + + template <simdindex N> + constexpr static simd<scalar_type, N> eq(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(x == y); + } + template <simdindex N> + constexpr static simd<scalar_type, N> ne(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(x != y); + } + template <simdindex N> + constexpr static simd<scalar_type, N> lt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(x < y); + } + template <simdindex N> + constexpr static simd<scalar_type, N> gt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(x > y); + } + template <simdindex N> + constexpr static simd<scalar_type, N> le(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(x <= y); + } + template <simdindex N> + constexpr static simd<scalar_type, N> ge(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept + { + return bitcast<scalar_type>(x >= y); + } +}; + +namespace internal +{ +template <typename T, typename... Args, size_t... indices, size_t N = 1 + sizeof...(Args)> +constexpr KFR_INLINE vec<T, N> make_vector_impl(csizes_t<indices...>, const T& x, const Args&... rest) +{ + constexpr size_t width = compound_type_traits<T>::width; + const std::tuple<const T&, const Args&...> list(x, rest...); + typename vec<T, N>::simd_t result{ compound_type_traits<T>::at(std::get<indices / width>(list), + indices % width)... }; + return result; +} +} + +/// Create vector from scalar values +/// @code +/// CHECK( make_vector( 1, 2, 3, 4 ) == i32x4{1, 2, 3, 4} ); +/// @encode +template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1), + typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>> +constexpr KFR_INLINE vec<SubType, N> make_vector(const Arg& x, const Args&... rest) +{ + return internal::make_vector_impl<SubType>(csizeseq<N * widthof<SubType>()>, static_cast<SubType>(x), + static_cast<SubType>(rest)...); +} +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> make_vector(vec<T, N> x) +{ + return x; +} +template <typename T, T... Values, size_t N = sizeof...(Values)> +constexpr KFR_INLINE vec<T, N> make_vector(cvals_t<T, Values...>) +{ + return make_vector<T>(Values...); +} +KFR_FN(make_vector) + +template <typename T, size_t N> +struct vec : vec_t<T, N> +{ + static_assert(N > 0 && N <= 256, "Invalid vector size"); + + using value_type = T; + using scalar_type = subtype<T>; + constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; } + using simd_t = simd<scalar_type, scalar_size()>; + using ref = vec&; + using cref = const vec&; + + constexpr static bool is_pod = true; + + constexpr KFR_INLINE vec() noexcept {} + constexpr KFR_INLINE vec(simd_t value) noexcept : v(value) {} + constexpr KFR_INLINE vec(const array_ref<T>& value) noexcept + : v(*internal_read_write::read<N, false>(value.data())) + { + } + template <typename U, + KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width > 1)> + constexpr KFR_INLINE vec(const U& value) noexcept + : v(*resize<scalar_size()>(bitcast<scalar_type>(make_vector(static_cast<T>(value))))) + { + } + template <typename U, + KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width == 1)> + constexpr KFR_INLINE vec(const U& value) noexcept : v(static_cast<T>(value)) + { + } + template <typename... Ts> + constexpr KFR_INLINE vec(const T& x, const T& y, const Ts&... rest) noexcept + : v(*make_vector<T>(x, y, rest...)) + { + static_assert(N <= 2 + sizeof...(Ts), "Too few initializers for vec"); + } + template <size_t N1, size_t N2, size_t... Ns> + constexpr KFR_INLINE vec(const vec<T, N1>& v1, const vec<T, N2>& v2, + const vec<T, Ns>&... vectors) noexcept : v(*concat(v1, v2, vectors...)) + { + static_assert(csum(csizes<N1, N2, Ns...>) == N, "Can't concat vectors: invalid csizes"); + } + constexpr KFR_INLINE vec(const vec&) noexcept = default; + constexpr KFR_INLINE vec(vec&&) noexcept = default; + constexpr KFR_INLINE vec& operator=(const vec&) noexcept = default; + constexpr KFR_INLINE vec& operator=(vec&&) noexcept = default; + + friend constexpr KFR_INLINE vec operator+(vec x, vec y) { return vec_op<T>::add(x.v, y.v); } + friend constexpr KFR_INLINE vec operator-(vec x, vec y) { return vec_op<T>::sub(x.v, y.v); } + friend constexpr KFR_INLINE vec operator*(vec x, vec y) { return vec_op<T>::mul(x.v, y.v); } + friend constexpr KFR_INLINE vec operator/(vec x, vec y) { return vec_op<T>::div(x.v, y.v); } + friend constexpr KFR_INLINE vec operator%(vec x, vec y) { return vec_op<T>::rem(x.v, y.v); } + friend constexpr KFR_INLINE vec operator-(vec x) { return vec_op<T>::neg(x.v); } + + friend constexpr KFR_INLINE vec operator&(vec x, vec y) { return vec_op<T>::band(x.v, y.v); } + friend constexpr KFR_INLINE vec operator|(vec x, vec y) { return vec_op<T>::bor(x.v, y.v); } + friend constexpr KFR_INLINE vec operator^(vec x, vec y) { return vec_op<T>::bxor(x.v, y.v); } + friend constexpr KFR_INLINE vec operator~(vec x) { return vec_op<T>::bnot(x.v); } + + friend constexpr KFR_INLINE vec operator<<(vec x, vec y) { return vec_op<T>::shl(x.v, y.v); } + friend constexpr KFR_INLINE vec operator>>(vec x, vec y) { return vec_op<T>::shr(x.v, y.v); } + + friend constexpr KFR_INLINE mask<T, N> operator==(vec x, vec y) { return vec_op<T>::eq(x.v, y.v); } + friend constexpr KFR_INLINE mask<T, N> operator!=(vec x, vec y) { return vec_op<T>::ne(x.v, y.v); } + friend constexpr KFR_INLINE mask<T, N> operator<(vec x, vec y) { return vec_op<T>::lt(x.v, y.v); } + friend constexpr KFR_INLINE mask<T, N> operator>(vec x, vec y) { return vec_op<T>::gt(x.v, y.v); } + friend constexpr KFR_INLINE mask<T, N> operator<=(vec x, vec y) { return vec_op<T>::le(x.v, y.v); } + friend constexpr KFR_INLINE mask<T, N> operator>=(vec x, vec y) { return vec_op<T>::ge(x.v, y.v); } + +#define KFR_ASGN_OP(aop, op) \ + friend KFR_INLINE vec& operator aop(vec& x, vec y) \ + { \ + x = x op y; \ + return x; \ + } + KFR_ASGN_OP(+=, +) + KFR_ASGN_OP(-=, -) + KFR_ASGN_OP(*=, *) + KFR_ASGN_OP(/=, /) + KFR_ASGN_OP(%=, %) + KFR_ASGN_OP(&=, &) + KFR_ASGN_OP(|=, |) + KFR_ASGN_OP(^=, ^) + KFR_ASGN_OP(<<=, <<) + KFR_ASGN_OP(>>=, >>) + + constexpr KFR_INLINE simd_t operator*() const { return v; } + constexpr KFR_INLINE simd_t& operator*() { return v; } + KFR_INLINE mask<T, N>& asmask() { return ref_cast<mask<T, N>>(*this); } + KFR_INLINE const mask<T, N>& asmask() const { return ref_cast<mask<T, N>>(*this); } + KFR_INLINE value_type operator[](size_t index) const { return data()[index]; } + + KFR_INLINE value_type* data() { return ptr_cast<T>(&v); } + KFR_INLINE const T* data() const { return ptr_cast<T>(&v); } + using array_t = T (&)[N]; + KFR_INLINE array_t arr() { return ref_cast<array_t>(v); } + + template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value)> + constexpr operator vec<U, N>() noexcept + { + return cast<U>(*this); + } + +private: + struct getter_setter; + +public: + getter_setter operator()(size_t index) { return { v, index }; } + scalar_type operator()(size_t index) const { return v[index]; } + +protected: + template <typename U, size_t M> + friend struct vec; + template <typename U, size_t M> + friend struct mask; + simd_t v; + +private: + struct getter_setter + { + constexpr getter_setter(simd_t& v, size_t index) noexcept : v(v), index(index) {} + KFR_INLINE getter_setter& operator=(scalar_type value) noexcept + { + v[index] = value; + return *this; + } + KFR_INLINE operator scalar_type() const { return v[index]; } + private: + friend struct vec; + simd_t& v; + const size_t index; + }; +}; + +template <typename T, size_t N> +struct mask : public vec<T, N> +{ + using type = T; + constexpr static size_t width = N; + + using base = vec<T, N>; + + constexpr KFR_INLINE mask() noexcept : base() {} + + constexpr KFR_INLINE mask(simd<T, N> value) noexcept : base(value) {} + template <size_t N1, size_t... Ns> + constexpr KFR_INLINE mask(const mask<T, N1>& mask1, const mask<T, Ns>&... masks) noexcept + : base(*concat(mask1, masks...)) + { + } + template <typename... Ts, typename = enable_if<sizeof...(Ts) + 2 == N>> + constexpr KFR_INLINE mask(bool x, bool y, Ts... rest) noexcept + : base{ internal::maskbits<T>(x), internal::maskbits<T>(y), internal::maskbits<T>(rest)... } + { + } + constexpr KFR_INLINE mask(const mask&) noexcept = default; + constexpr KFR_INLINE mask(mask&&) noexcept = default; + KFR_INLINE mask& operator=(const mask&) noexcept = default; + KFR_INLINE mask& operator=(mask&&) noexcept = default; + + template <typename M, typename = u8[sizeof(T) == sizeof(M)]> + constexpr KFR_INLINE mask(vec<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value)) + { + } + + template <typename M, typename = u8[sizeof(T) == sizeof(M)]> + constexpr KFR_INLINE mask(mask<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value)) + { + } + constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); } + constexpr KFR_INLINE mask operator&(vec<T, N> x) const + { + return bitcast<T>(ubitcast(this->v) & ubitcast(x.v)); + } + constexpr KFR_INLINE mask operator|(vec<T, N> x) const + { + return bitcast<T>(ubitcast(this->v) | ubitcast(x.v)); + } + constexpr KFR_INLINE mask operator^(vec<T, N> x) const + { + return bitcast<T>(ubitcast(this->v) ^ ubitcast(x.v)); + } + + constexpr KFR_INLINE mask operator&&(mask x) const { return *this & x; } + constexpr KFR_INLINE mask operator||(mask x) const { return *this | x; } + constexpr KFR_INLINE mask operator!() const { return ~*this; } + + constexpr KFR_INLINE simd<T, N> operator*() const { return this->v; } + + KFR_INLINE vec<T, N>& asvec() { return ref_cast<mask>(*this); } + KFR_INLINE const vec<T, N>& asvec() const { return ref_cast<mask>(*this); } + + KFR_INLINE bool operator[](size_t index) const { return ibitcast(this->v[index]) < 0; } +}; + +template <typename T, size_t N> +using cvec = vec<T, N * 2>; + +namespace internal +{ + +template <size_t start, size_t count> +struct shuffle_index_extend +{ + constexpr KFR_INLINE size_t operator()(size_t index) const + { + return index >= start && index < start + count ? index - start : index_undefined; + } +}; + +template <size_t start, size_t count, typename T, size_t N> +KFR_INLINE vec<T, count> concatexact(vec<T, N> x, vec<T, N> y) +{ + return kfr::shufflevector<count, internal::shuffle_index<start>>(x, y); +} + +template <size_t start, size_t count, typename T, size_t N1, size_t N2> +KFR_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y) +{ + return concatexact<start, count>(x, y); +} + +template <size_t start, size_t count, typename T, size_t N1, size_t N2> +KFR_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y) +{ + return concatexact<start, count>(x, shufflevector<N1, internal::shuffle_index_extend<0, N2>>(y)); +} +template <size_t start, size_t count, typename T, size_t N1, size_t N2> +KFR_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y) +{ + return concatexact<N2 - N1 + start, count>( + shufflevector<N2, internal::shuffle_index_extend<N2 - N1, N1>>(x), y); +} + +template <typename T, size_t Nout, size_t N1, size_t... indices> +constexpr mask<T, Nout> partial_mask_helper(csizes_t<indices...>) +{ + return make_vector(maskbits<T>(indices < N1)...); +} +template <typename T, size_t Nout, size_t N1> +constexpr mask<T, Nout> partial_mask() +{ + return internal::partial_mask_helper<T, Nout, N1>(csizeseq<Nout>); +} + +template <typename T, size_t N> +KFR_INLINE vec<T, N> concat(vec<T, N> x) +{ + return x; +} + +template <typename T, size_t N1, size_t N2> +KFR_INLINE vec<T, N1 + N2> concat(vec<T, N1> x, vec<T, N2> y) +{ + return concattwo<0, N1 + N2>(x, y); +} + +template <typename T, size_t N1, size_t N2, size_t... Sizes> +KFR_INLINE auto concat(vec<T, N1> x, vec<T, N2> y, vec<T, Sizes>... args) +{ + return concat(x, concat(y, args...)); +} +} + +template <typename T, size_t N, size_t... Sizes, size_t Nout> +KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest) +{ + return internal::concat(x, rest...); +} +KFR_FN(concat) + +using f32x1 = vec<f32, 1>; +using f32x2 = vec<f32, 2>; +using f32x3 = vec<f32, 3>; +using f32x4 = vec<f32, 4>; +using f32x8 = vec<f32, 8>; +using f32x16 = vec<f32, 16>; +using f32x32 = vec<f32, 32>; +using f64x1 = vec<f64, 1>; +using f64x2 = vec<f64, 2>; +using f64x3 = vec<f64, 3>; +using f64x4 = vec<f64, 4>; +using f64x8 = vec<f64, 8>; +using f64x16 = vec<f64, 16>; +using f64x32 = vec<f64, 32>; +using i8x1 = vec<i8, 1>; +using i8x2 = vec<i8, 2>; +using i8x3 = vec<i8, 3>; +using i8x4 = vec<i8, 4>; +using i8x8 = vec<i8, 8>; +using i8x16 = vec<i8, 16>; +using i8x32 = vec<i8, 32>; +using i16x1 = vec<i16, 1>; +using i16x2 = vec<i16, 2>; +using i16x3 = vec<i16, 3>; +using i16x4 = vec<i16, 4>; +using i16x8 = vec<i16, 8>; +using i16x16 = vec<i16, 16>; +using i16x32 = vec<i16, 32>; +using i32x1 = vec<i32, 1>; +using i32x2 = vec<i32, 2>; +using i32x3 = vec<i32, 3>; +using i32x4 = vec<i32, 4>; +using i32x8 = vec<i32, 8>; +using i32x16 = vec<i32, 16>; +using i32x32 = vec<i32, 32>; +using i64x1 = vec<i64, 1>; +using i64x2 = vec<i64, 2>; +using i64x3 = vec<i64, 3>; +using i64x4 = vec<i64, 4>; +using i64x8 = vec<i64, 8>; +using i64x16 = vec<i64, 16>; +using i64x32 = vec<i64, 32>; +using u8x1 = vec<u8, 1>; +using u8x2 = vec<u8, 2>; +using u8x3 = vec<u8, 3>; +using u8x4 = vec<u8, 4>; +using u8x8 = vec<u8, 8>; +using u8x16 = vec<u8, 16>; +using u8x32 = vec<u8, 32>; +using u16x1 = vec<u16, 1>; +using u16x2 = vec<u16, 2>; +using u16x3 = vec<u16, 3>; +using u16x4 = vec<u16, 4>; +using u16x8 = vec<u16, 8>; +using u16x16 = vec<u16, 16>; +using u16x32 = vec<u16, 32>; +using u32x1 = vec<u32, 1>; +using u32x2 = vec<u32, 2>; +using u32x3 = vec<u32, 3>; +using u32x4 = vec<u32, 4>; +using u32x8 = vec<u32, 8>; +using u32x16 = vec<u32, 16>; +using u32x32 = vec<u32, 32>; +using u64x1 = vec<u64, 1>; +using u64x2 = vec<u64, 2>; +using u64x3 = vec<u64, 3>; +using u64x4 = vec<u64, 4>; +using u64x8 = vec<u64, 8>; +using u64x16 = vec<u64, 16>; +using u64x32 = vec<u64, 32>; + +using mf32x1 = mask<f32, 1>; +using mf32x2 = mask<f32, 2>; +using mf32x3 = mask<f32, 3>; +using mf32x4 = mask<f32, 4>; +using mf32x8 = mask<f32, 8>; +using mf32x16 = mask<f32, 16>; +using mf32x32 = mask<f32, 32>; +using mf64x1 = mask<f64, 1>; +using mf64x2 = mask<f64, 2>; +using mf64x3 = mask<f64, 3>; +using mf64x4 = mask<f64, 4>; +using mf64x8 = mask<f64, 8>; +using mf64x16 = mask<f64, 16>; +using mf64x32 = mask<f64, 32>; +using mi8x1 = mask<i8, 1>; +using mi8x2 = mask<i8, 2>; +using mi8x3 = mask<i8, 3>; +using mi8x4 = mask<i8, 4>; +using mi8x8 = mask<i8, 8>; +using mi8x16 = mask<i8, 16>; +using mi8x32 = mask<i8, 32>; +using mi16x1 = mask<i16, 1>; +using mi16x2 = mask<i16, 2>; +using mi16x3 = mask<i16, 3>; +using mi16x4 = mask<i16, 4>; +using mi16x8 = mask<i16, 8>; +using mi16x16 = mask<i16, 16>; +using mi16x32 = mask<i16, 32>; +using mi32x1 = mask<i32, 1>; +using mi32x2 = mask<i32, 2>; +using mi32x4 = mask<i32, 3>; +using mi32x3 = mask<i32, 4>; +using mi32x8 = mask<i32, 8>; +using mi32x16 = mask<i32, 16>; +using mi32x32 = mask<i32, 32>; +using mi64x1 = mask<i64, 1>; +using mi64x2 = mask<i64, 2>; +using mi64x3 = mask<i64, 3>; +using mi64x4 = mask<i64, 4>; +using mi64x8 = mask<i64, 8>; +using mi64x16 = mask<i64, 16>; +using mi64x32 = mask<i64, 32>; +using mu8x1 = mask<u8, 1>; +using mu8x2 = mask<u8, 2>; +using mu8x3 = mask<u8, 3>; +using mu8x4 = mask<u8, 4>; +using mu8x8 = mask<u8, 8>; +using mu8x16 = mask<u8, 16>; +using mu8x32 = mask<u8, 32>; +using mu16x1 = mask<u16, 1>; +using mu16x2 = mask<u16, 2>; +using mu16x3 = mask<u16, 3>; +using mu16x4 = mask<u16, 4>; +using mu16x8 = mask<u16, 8>; +using mu16x16 = mask<u16, 16>; +using mu16x32 = mask<u16, 32>; +using mu32x1 = mask<u32, 1>; +using mu32x2 = mask<u32, 2>; +using mu32x3 = mask<u32, 3>; +using mu32x4 = mask<u32, 4>; +using mu32x8 = mask<u32, 8>; +using mu32x16 = mask<u32, 16>; +using mu32x32 = mask<u32, 32>; +using mu64x1 = mask<u64, 1>; +using mu64x2 = mask<u64, 2>; +using mu64x3 = mask<u64, 3>; +using mu64x4 = mask<u64, 4>; +using mu64x8 = mask<u64, 8>; +using mu64x16 = mask<u64, 16>; +using mu64x32 = mask<u64, 32>; + +namespace glsl_names +{ +using vec2 = f32x2; +using vec3 = f32x3; +using vec4 = f32x4; +using dvec2 = f64x2; +using dvec3 = f64x3; +using dvec4 = f64x4; +using ivec2 = i32x2; +using ivec3 = i32x3; +using ivec4 = i32x4; +using uvec2 = u32x2; +using uvec3 = u32x3; +using uvec4 = u32x4; +} +namespace opencl_names +{ +using char2 = i8x2; +using char3 = i8x3; +using char4 = i8x4; +using char8 = i8x8; +using char16 = i8x16; +using uchar2 = u8x2; +using uchar3 = u8x3; +using uchar4 = u8x4; +using uchar8 = u8x8; +using uchar16 = u8x16; + +using short2 = i16x2; +using short3 = i16x3; +using short4 = i16x4; +using short8 = i16x8; +using short16 = i16x16; +using ushort2 = u16x2; +using ushort3 = u16x3; +using ushort4 = u16x4; +using ushort8 = u16x8; +using ushort16 = u16x16; + +using int2 = i32x2; +using int3 = i32x3; +using int4 = i32x4; +using int8 = i32x8; +using int16 = i32x16; +using uint2 = u32x2; +using uint3 = u32x3; +using uint4 = u32x4; +using uint8 = u32x8; +using uint16 = u32x16; + +using long2 = i64x2; +using long3 = i64x3; +using long4 = i64x4; +using long8 = i64x8; +using long16 = i64x16; +using ulong2 = u64x2; +using ulong3 = u64x3; +using ulong4 = u64x4; +using ulong8 = u64x8; +using ulong16 = u64x16; + +using float2 = f32x2; +using float3 = f32x3; +using float4 = f32x4; +using float8 = f32x8; +using float16 = f32x16; + +using double2 = f64x2; +using double3 = f64x3; +using double4 = f64x4; +using double8 = f64x8; +using double16 = f64x16; +} + +namespace internal +{ +using f32sse = vec<f32, vector_width<f32, cpu_t::sse2>>; +using f64sse = vec<f64, vector_width<f64, cpu_t::sse2>>; +using i8sse = vec<i8, vector_width<i8, cpu_t::sse2>>; +using i16sse = vec<i16, vector_width<i16, cpu_t::sse2>>; +using i32sse = vec<i32, vector_width<i32, cpu_t::sse2>>; +using i64sse = vec<i64, vector_width<i64, cpu_t::sse2>>; +using u8sse = vec<u8, vector_width<u8, cpu_t::sse2>>; +using u16sse = vec<u16, vector_width<u16, cpu_t::sse2>>; +using u32sse = vec<u32, vector_width<u32, cpu_t::sse2>>; +using u64sse = vec<u64, vector_width<u64, cpu_t::sse2>>; + +using mf32sse = mask<f32, vector_width<f32, cpu_t::sse2>>; +using mf64sse = mask<f64, vector_width<f64, cpu_t::sse2>>; +using mi8sse = mask<i8, vector_width<i8, cpu_t::sse2>>; +using mi16sse = mask<i16, vector_width<i16, cpu_t::sse2>>; +using mi32sse = mask<i32, vector_width<i32, cpu_t::sse2>>; +using mi64sse = mask<i64, vector_width<i64, cpu_t::sse2>>; +using mu8sse = mask<u8, vector_width<u8, cpu_t::sse2>>; +using mu16sse = mask<u16, vector_width<u16, cpu_t::sse2>>; +using mu32sse = mask<u32, vector_width<u32, cpu_t::sse2>>; +using mu64sse = mask<u64, vector_width<u64, cpu_t::sse2>>; + +using f32avx = vec<f32, vector_width<f32, cpu_t::avx1>>; +using f64avx = vec<f64, vector_width<f64, cpu_t::avx1>>; +using i8avx = vec<i8, vector_width<i8, cpu_t::avx2>>; +using i16avx = vec<i16, vector_width<i16, cpu_t::avx2>>; +using i32avx = vec<i32, vector_width<i32, cpu_t::avx2>>; +using i64avx = vec<i64, vector_width<i64, cpu_t::avx2>>; +using u8avx = vec<u8, vector_width<u8, cpu_t::avx2>>; +using u16avx = vec<u16, vector_width<u16, cpu_t::avx2>>; +using u32avx = vec<u32, vector_width<u32, cpu_t::avx2>>; +using u64avx = vec<u64, vector_width<u64, cpu_t::avx2>>; + +using mf32avx = mask<f32, vector_width<f32, cpu_t::avx1>>; +using mf64avx = mask<f64, vector_width<f64, cpu_t::avx1>>; +using mi8avx = mask<i8, vector_width<i8, cpu_t::avx2>>; +using mi16avx = mask<i16, vector_width<i16, cpu_t::avx2>>; +using mi32avx = mask<i32, vector_width<i32, cpu_t::avx2>>; +using mi64avx = mask<i64, vector_width<i64, cpu_t::avx2>>; +using mu8avx = mask<u8, vector_width<u8, cpu_t::avx2>>; +using mu16avx = mask<u16, vector_width<u16, cpu_t::avx2>>; +using mu32avx = mask<u32, vector_width<u32, cpu_t::avx2>>; +using mu64avx = mask<u64, vector_width<u64, cpu_t::avx2>>; + +template <typename T, size_t N> +struct vec_type +{ + using type = vec<T, N>; +}; + +template <typename T, size_t Nmax> +struct maxvec +{ + constexpr static size_t size = Nmax; + vec<T, size> vmax; + maxvec(T initial) : vmax(initial) {} + template <int N> + vec<T, N>& v() + { + static_assert(N <= size, "N <= size"); + return reinterpret_cast<vec<T, N>&>(*this); + } + template <int N> + const vec<T, N>& v() const + { + static_assert(N <= size, "N <= size"); + return reinterpret_cast<const vec<T, N>&>(*this); + } +}; + +template <size_t Index, typename T, size_t N, typename Fn, typename... Args, + typename Tout = result_of<Fn(subtype<remove_reference<Args>>...)>> +constexpr KFR_INLINE Tout applyfn_helper(Fn&& fn, Args&&... args) +{ + return fn(args[Index]...); +} + +template <typename T, size_t N, typename Fn, typename... Args, + typename Tout = result_of<Fn(subtype<remove_reference<Args>>...)>, size_t... Indices> +constexpr KFR_INLINE vec<Tout, N> apply_helper(Fn&& fn, csizes_t<Indices...>, Args&&... args) +{ + return make_vector(applyfn_helper<Indices, T, N>(std::forward<Fn>(fn), std::forward<Args>(args)...)...); +} +template <typename T, size_t N, typename Fn, size_t... Indices> +constexpr KFR_INLINE vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>) +{ + return make_vector(((void)Indices, void(), fn())...); +} +} + +template <typename T, size_t N, typename Fn, typename... Args, + typename Tout = result_of<Fn(T, subtype<remove_reference<Args>>...)>> +constexpr KFR_INLINE vec<Tout, N> apply(Fn&& fn, vec<T, N> arg, Args&&... args) +{ + return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...); +} + +template <size_t N, typename Fn, typename T = result_of<Fn()>> +constexpr KFR_INLINE vec<T, N> apply(Fn&& fn) +{ + return internal::apply0_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>); +} + +template <typename T, int N> +KFR_INLINE vec<T, N> tovec(simd<T, N> x) +{ + return x; +} +KFR_INLINE f32x4 tovec(__m128 x) { return f32x4(x); } +KFR_INLINE f64x2 tovec(__m128d x) { return f64x2(x); } +KFR_INLINE f32x8 tovec(__m256 x) { return f32x8(x); } +KFR_INLINE f64x4 tovec(__m256d x) { return f64x4(x); } + +template <typename T, typename... Args, size_t Nout = (sizeof...(Args) + 1)> +constexpr KFR_INLINE mask<T, Nout> make_mask(bool arg, Args... args) +{ + simd<T, Nout> temp{ internal::maskbits<T>(arg), internal::maskbits<T>(static_cast<bool>(args))... }; + return temp; +} +KFR_FN(make_mask) + +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> zerovector() +{ + constexpr size_t width = N * compound_type_traits<T>::width; + return subcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>())); +} + +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> zerovector(vec_t<T, N>) +{ + return zerovector<T, N>(); +} +KFR_FN(zerovector) + +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> allonesvector() +{ + return zerovector<T, N>() == zerovector<T, N>(); +} +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> allonesvector(vec_t<T, N>) +{ + return allonesvector<T, N>(); +} +KFR_FN(allonesvector) + +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> undefinedvector() +{ + return vec<T, N>{}; +} +template <typename T, size_t N> +constexpr KFR_INLINE vec<T, N> undefinedvector(vec_t<T, N>) +{ + return undefinedvector<T, N>(); +} +KFR_FN(undefinedvector) + +template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)> +KFR_INLINE vec<T, Nout> low(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index<>>(x); +} + +template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)> +KFR_INLINE vec_t<T, Nout> low(vec_t<T, N>) +{ + return {}; +} + +template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)> +KFR_INLINE vec<T, Nout> high(vec<T, N> x) +{ + return shufflevector<Nout, internal::shuffle_index<prev_poweroftwo(N - 1)>>(x); +} + +template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)> +KFR_INLINE vec_t<T, Nout> high(vec_t<T, N>) +{ + return {}; +} +KFR_FN(low) +KFR_FN(high) + +namespace internal +{ + +template <typename Fn> +struct expression_lambda : input_expression +{ + KFR_INLINE expression_lambda(Fn&& fn) : fn(std::move(fn)) {} + + template <typename T, size_t N, KFR_ENABLE_IF(is_callable<Fn, cinput_t, size_t, vec_t<T, N>>::value)> + KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const + { + return fn(cinput, index, y); + } + + template <typename T, size_t N, KFR_ENABLE_IF(N&& is_callable<Fn, size_t>::value)> + KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) const + { + vec<T, N> result; + for (size_t i = 0; i < N; i++) + { + result(i) = fn(index + i); + } + return result; + } + template <typename T, size_t N, KFR_ENABLE_IF(N&& is_callable<Fn>::value)> + KFR_INLINE vec<T, N> operator()(cinput_t, size_t, vec_t<T, N>) const + { + vec<T, N> result; + for (size_t i = 0; i < N; i++) + { + result(i) = fn(); + } + return result; + } + + Fn fn; +}; +} + +template <typename Fn> +internal::expression_lambda<decay<Fn>> lambda(Fn&& fn) +{ + return internal::expression_lambda<Fn>(std::move(fn)); +} +} + +#pragma clang diagnostic pop + +namespace cometa +{ + +template <typename T, size_t N> +struct compound_type_traits<kfr::simd<T, N>> +{ + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static bool is_scalar = false; + template <typename U> + using rebind = kfr::simd<U, N>; + template <typename U> + using deep_rebind = kfr::simd<cometa::deep_rebind<subtype, U>, N>; + + static constexpr const subtype& at(const kfr::simd<T, N>& value, size_t index) { return value[index]; } +}; + +template <typename T, size_t N> +struct compound_type_traits<kfr::vec<T, N>> +{ + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static bool is_scalar = false; + template <typename U> + using rebind = kfr::vec<U, N>; + template <typename U> + using deep_rebind = kfr::vec<cometa::deep_rebind<subtype, U>, N>; + + static constexpr subtype at(const kfr::vec<T, N>& value, size_t index) { return value[index]; } +}; + +template <typename T, size_t N> +struct compound_type_traits<kfr::mask<T, N>> +{ + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static bool is_scalar = false; + template <typename U> + using rebind = kfr::mask<U, N>; + template <typename U> + using deep_rebind = kfr::mask<cometa::deep_rebind<subtype, U>, N>; + + static constexpr subtype at(const kfr::mask<T, N>& value, size_t index) { return value[index]; } +}; +} diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -0,0 +1,357 @@ +#pragma once + +#if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__) +#define CID_ARCH_X86 1 +#endif + +#ifdef CID_ARCH_X86 +#if defined(_M_X64) || defined(__x86_64__) +#define CID_ARCH_X64 1 +#else +#define CID_ARCH_X32 1 +#endif + +#if defined __AVX512F__ && !defined CID_ARCH_AVX512 +#define CID_ARCH_AVX512 1 +#define CID_ARCH_AVX2 1 +#define CID_ARCH_AVX 1 +#define CID_ARCH_SSE42 1 +#define CID_ARCH_SSE41 1 +#define CID_ARCH_SSSE3 1 +#define CID_ARCH_SSE3 1 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif +#if defined __AVX2__ && !defined CID_ARCH_AVX2 +#define CID_ARCH_AVX2 1 +#define CID_ARCH_AVX 1 +#define CID_ARCH_SSE42 1 +#define CID_ARCH_SSE41 1 +#define CID_ARCH_SSSE3 1 +#define CID_ARCH_SSE3 1 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif +#if defined __AVX__ && !defined CID_ARCH_AVX +#define CID_ARCH_AVX 1 +#define CID_ARCH_SSE42 1 +#define CID_ARCH_SSE41 1 +#define CID_ARCH_SSSE3 1 +#define CID_ARCH_SSE3 1 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif +#if defined __SSE4_2__ && !defined CID_ARCH_SSE4_2 +#define CID_ARCH_SSE4_2 1 +#define CID_ARCH_SSE41 1 +#define CID_ARCH_SSSE3 1 +#define CID_ARCH_SSE3 1 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif +#if defined __SSE4_1__ && !defined CID_ARCH_SSE4_1 +#define CID_ARCH_SSE4_1 1 +#define CID_ARCH_SSSE3 1 +#define CID_ARCH_SSE3 1 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif +#if defined __SSSE3__ && !defined CID_ARCH_SSSE3 +#define CID_ARCH_SSSE3 1 +#define CID_ARCH_SSE3 1 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif +#if defined __SSE3__ && !defined CID_ARCH_SSE3 +#define CID_ARCH_SSE3 1 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif +#if (defined CID_ARCH_X64 || defined __SSE2__) && !defined CID_ARCH_SSE2 +#define CID_ARCH_SSE2 1 +#define CID_ARCH_SSE 1 +#endif + +#if (defined CID_ARCH_X64 || defined __SSE__) && !defined CID_ARCH_SSE1 +#define CID_ARCH_SSE 1 +#endif + +#if defined __FMA__ && !defined CID_ARCH_FMA +#define CID_ARCH_FMA 1 +#endif + +#if defined __AES__ && !defined CID_ARCH_AES +#define CID_ARCH_AES 1 +#endif + +#if defined __BMI__ && !defined CID_ARCH_BMI +#define CID_ARCH_BMI 1 +#endif + +#if defined __BMI2__ && !defined CID_ARCH_BMI2 +#define CID_ARCH_BMI2 1 +#endif + +#if defined __LZCNT__ && !defined CID_ARCH_LZCNT +#define CID_ARCH_LZCNT 1 +#endif + +#if defined CID_ARCH_AVX512 +#define CID_ARCH_NAME avx512 +#elif defined CID_ARCH_AVX2 +#define CID_ARCH_NAME avx2 +#elif defined CID_ARCH_AVX +#define CID_ARCH_NAME avx +#elif defined CID_ARCH_SSE4_1 +#define CID_ARCH_NAME sse41 +#elif defined CID_ARCH_SSSE3 +#define CID_ARCH_NAME ssse3 +#elif defined CID_ARCH_SSE3 +#define CID_ARCH_NAME sse3 +#elif defined CID_ARCH_SSE2 +#define CID_ARCH_NAME sse2 +#elif defined CID_ARCH_SSE +#define CID_ARCH_NAME sse +#else +#define CID_ARCH_NAME legacy +#endif + +#endif + +#define CID_STRINGIFY2(x) #x +#define CID_STRINGIFY(x) CID_STRINGIFY2(x) + +#if defined(_WIN32) // Windows +#define CID_OS_WIN 1 +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" +#ifdef TARGET_OS_IPHONE +#define CID_OS_IOS 1 +#define CID_OS_MOBILE 1 +#elif TARGET_IPHONE_SIMULATOR +#define CID_OS_IOS 1 +#define CID_OS_IOS_SIMULATOR 1 +#define CID_OS_MOBILE 1 +#elif TARGET_OS_MAC +#define CID_OS_MAC 1 +#define CID_OS_OSX 1 +#endif +#define CID_OS_POSIX 1 +#endif + +#if defined(__ANDROID__) +#define CID_OS_ANDROID 1 +#define CID_OS_MOBILE 1 +#define CID_OS_POSIX 1 +#endif + +#if defined(__linux__) +#define CID_OS_LINUX 1 +#define CID_OS_POSIX 1 +#endif + +#if defined(_MSC_VER) // Visual C/C++ +#define CID_COMPILER_MSVC 1 +#define CID_MSVC_ATTRIBUTES 1 +#define CID_MSC_VER _MSC_VER +#else +#define CID_MSC_VER 0 +#endif + +#if defined(__GNUC__) // GCC, Clang +#define CID_COMPILER_GNU 1 +#define CID_GNU_ATTRIBUTES 1 +#define CID_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#if __cplusplus >= 201103L || defined __GXX_EXPERIMENTAL_CXX0X__ +#define CID_HAS_GXX_CXX11 1 +#endif +#else +#define CID_GCC_VERSION 0 +#endif + +#if defined(__INTEL_COMPILER) // Intel Compiler +#define CID_COMPILER_INTEL 1 +#define CID_ICC_VERSION __INTEL_COMPILER +#elif defined(__ICL) +#define CID_COMPILER_INTEL 1 +#define CID_ICC_VERSION __ICL +#else +#define CID_ICC_VERSION 0 +#endif + +#if defined(__clang__) // Clang +#define CID_COMPILER_CLANG 1 +#ifndef CID_GNU_ATTRIBUTES +#define CID_GNU_ATTRIBUTES 1 +#endif +#endif + +#if defined(CID_GNU_ATTRIBUTES) + +#define CID_NODEBUG +// __attribute__((__nodebug__)) +#define CID_INLINE __inline__ __attribute__((__always_inline__)) +#define CID_INTRIN CID_INLINE CID_NODEBUG +#define CID_INLINE_MEMBER __attribute__((__always_inline__)) +#define CID_INLINE_LAMBDA CID_INLINE_MEMBER +#define CID_NOINLINE __attribute__((__noinline__)) +#define CID_FLATTEN __attribute__((__flatten__)) +#define CID_RESTRICT __restrict__ + +#elif defined(CID_MSVC_ATTRIBUTES) + +#define CID_NODEBUG +#define CID_INLINE inline __forceinline +#define CID_INTRIN CID_INLINE CID_NODEBUG +#define CID_INLINE_MEMBER __forceinline +#define CID_INLINE_LAMBDA +#define CID_NOINLINE __declspec(noinline) +#define CID_FLATTEN +#define CID_RESTRICT __restrict + +#endif + +#define CID_INLINE_STATIC CID_INLINE static + +#define CID_EXTERN_C extern "C" + +#define CID_PUBLIC_C CID_EXTERN_C CID_NOINLINE + +#define CID_ALWAYS_INLINE_STATIC CID_ALWAYS_INLINE static + +#ifdef CID_OS_WIN +#define CID_CDECL __cdecl +#else +#define CID_CDECL __attribute__((cdecl)) +#endif + +#ifdef CID_OS_WIN +#if defined(CID_MSVC_ATTRIBUTES) +#define CID_DLL_EXPORT __declspec(dllexport) +#define CID_DLL_IMPORT __declspec(dllimport) +#else +#define CID_DLL_EXPORT __attribute__((dllexport)) +#define CID_DLL_IMPORT __attribute__((dllimport)) +#endif +#else +#define CID_DLL_EXPORT +#define CID_DLL_IMPORT +#endif + +#ifdef __has_builtin +#define CID_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +#define CID_HAS_BUILTIN(builtin) 0 +#endif + +#ifdef __has_feature +#define CID_HAS_FEATURE(feature) __has_feature(feature) +#else +#define CID_HAS_FEATURE(feature) 0 +#endif + +#ifdef __has_extension +#define CID_HAS_EXTENSION(extension) __has_extension(extension) +#else +#define CID_HAS_EXTENSION(extension) 0 +#endif + +#ifdef __has_attribute +#define CID_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +#define CID_HAS_ATTRIBUTE(attribute) 0 +#endif + +#ifdef __has_warning +#define CID_HAS_WARNING(warning) __has_warning(warning) +#else +#define CID_HAS_WARNING(warning) 0 +#endif + +#define CID_HAS_VARIADIC_TEMPLATES \ + (CID_HAS_FEATURE(cxx_variadic_templates) || (CID_GCC_VERSION >= 404 && CID_HAS_GXX_CXX11) || \ + CID_MSC_VER >= 1800) + +#ifdef CID_BUILDING_DLL +#define CID_C_API CID_DLL_EXPORT +#else +#define CID_C_API CID_DLL_IMPORT +#endif + +#if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr) +#define CID_HAS_CONSTEXPR 1 +#endif + +#if __cpp_constexpr >= 201304 || CID_HAS_FEATURE(cxx_constexpr) +#define CID_HAS_FULL_CONSTEXPR 1 +#endif + +#if CID_HAS_CONSTEXPR +#define CID_CONSTEXPR constexpr +#else +#define CID_CONSTEXPR +#endif + +#if CID_HAS_FEATURE(cxx_noexcept) || (CID_GCC_VERSION >= 408 && CID_HAS_GXX_CXX11) || CID_MSC_VER >= 1900 +#define CID_HAS_NOEXCEPT 1 +#endif + +#if CID_HAS_NOEXCEPT +#define CID_NOEXCEPT noexcept +#else +#define CID_NOEXCEPT +#endif + +#if CID_COMPILER_GNU && !defined(__EXCEPTIONS) +#define CID_HAS_EXCEPTIONS 0 +#endif +#if CID_MSC_VER && !_HAS_EXCEPTIONS +#define CID_HAS_EXCEPTIONS 0 +#endif + +#ifndef CID_HAS_EXCEPTIONS +#define CID_HAS_EXCEPTIONS 1 +#endif + +#include <assert.h> + +#ifndef CID_THROW +#if CID_HAS_EXCEPTIONS +#define CID_THROW(x) throw x +#else +#define CID_THROW(x) assert(false) +#endif +#endif + +#if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr) + +#include <cstdint> +namespace cid +{ +template <typename T, size_t N> +constexpr inline static size_t arraysize(const T (&)[N]) noexcept +{ + return N; +} +} + +#define CID_ARRAYSIZE(arr) ::cid::arraysize(arr) +#elif CID_COMPILER_MSVC +#define CID_ARRAYSIZE(arr) _countof(arr) +#elif __cplusplus >= 199711L && \ + (defined(__INTEL_COMPILER) || defined(__clang__) || \ + (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)))) +template <typename T, size_t N> +char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(T (&)[N]))[N]; +#define CID_ARRAYSIZE(x) sizeof(COUNTOF_REQUIRES_ARRAY_ARGUMENT(x)) +#else +#define CID_ARRAYSIZE(arr) sizeof(arr) / sizeof(arr[0]) +#endif + +#ifdef CID_COMPILER_MSVC +#define CID_FUNC_SIGNATURE __FUNCSIG__ +#else +#define CID_FUNC_SIGNATURE __PRETTY_FUNCTION__ +#endif diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp @@ -0,0 +1,1819 @@ +#pragma once + +#include "cident.h" + +#include <algorithm> +#include <array> +#include <tuple> +#include <type_traits> +#include <vector> + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" + +namespace cometa +{ + +using std::size_t; + +using pvoid = void*; + +template <typename...> +using void_t = void; + +namespace details +{ +constexpr inline bool args_or() { return false; } +template <typename... Ts> +constexpr inline bool args_or(bool x, Ts... rest) +{ + return x || args_or(rest...); +} + +constexpr inline bool args_and() { return true; } +template <typename... Ts> +constexpr inline bool args_and(bool x, Ts... rest) +{ + return x && args_or(rest...); +} + +template <typename T, typename Enable = void> +struct is_pod_impl : std::false_type +{ +}; + +template <typename T> +struct is_pod_impl<T, void_t<decltype(T::is_pod)>> : std::integral_constant<bool, T::is_pod> +{ +}; +} + +template <typename... Ts> +struct or_t : std::integral_constant<bool, details::args_or(Ts::value...)> +{ +}; + +template <typename... Ts> +struct and_t : std::integral_constant<bool, details::args_and(Ts::value...)> +{ +}; + +template <typename T> +struct not_t : std::integral_constant<bool, !T::value> +{ +}; + +constexpr size_t max_size_t = size_t(-1); + +template <typename... T> +using common_type = typename std::common_type<T...>::type; + +template <typename T> +using result_of = typename std::result_of<T>::type; + +template <bool Condition, typename Type = void> +using enable_if = typename std::enable_if<Condition, Type>::type; + +template <bool Condition, typename T, typename F> +using conditional = typename std::conditional<Condition, T, F>::type; + +template <typename T> +using remove_reference = typename std::remove_reference<T>::type; + +template <typename T> +using remove_cv = typename std::remove_cv<T>::type; + +template <typename T> +using remove_pointer = typename std::remove_pointer<T>::type; + +template <typename T> +using remove_extent = typename std::remove_extent<T>::type; + +template <typename T> +using remove_const = typename std::remove_const<T>::type; + +template <typename T> +using underlying_type = typename std::underlying_type<T>::type; + +template <typename T> +using is_pod = or_t<std::is_pod<T>, details::is_pod_impl<T>>; + +template <typename T> +using is_class = std::is_class<T>; + +template <typename T> +using is_const = std::is_const<T>; + +template <typename T> +using is_pointer = std::is_pointer<T>; + +template <typename T> +using is_array = std::is_array<T>; + +template <typename T> +using is_void = std::is_void<T>; + +template <typename T1, typename T2> +using is_same = std::is_same<T1, T2>; + +template <typename T> +using is_template_arg = std::integral_constant<bool, std::is_integral<T>::value || std::is_enum<T>::value>; + +template <typename T> +using decay = typename std::decay<T>::type; + +template <typename... T> +using decay_common = decay<common_type<T...>>; + +template <typename T1, typename T2 = void, typename... Ts> +constexpr size_t typeindex() +{ + return is_same<T1, T2>() ? 0 : 1 + typeindex<T1, Ts...>(); +} + +template <typename T> +struct compound_type_traits +{ + constexpr static size_t width = 1; + using subtype = T; + using deep_subtype = T; + constexpr static bool is_scalar = true; + + template <typename U> + using rebind = U; + template <typename U> + using deep_rebind = U; + + static constexpr const subtype& at(const T& value, size_t /*index*/) { return value; } +}; + +template <typename T> +using is_compound = std::integral_constant<bool, !compound_type_traits<decay<T>>::is_scalar>; + +template <typename T> +using subtype = typename compound_type_traits<T>::subtype; + +template <typename T> +using deep_subtype = typename compound_type_traits<T>::deep_subtype; + +template <typename T, typename SubType> +using rebind = typename compound_type_traits<T>::template rebind<SubType>; + +template <typename T, typename SubType> +using deep_rebind = typename compound_type_traits<T>::template deep_rebind<SubType>; + +template <typename T> +struct compound_type_traits<std::pair<T, T>> +{ + constexpr static size_t width = 2; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + + template <typename U> + using rebind = std::pair<U, U>; + template <typename U> + using deep_rebind = std::pair<cometa::deep_rebind<subtype, U>, cometa::deep_rebind<subtype, U>>; + + static constexpr const subtype& at(const std::pair<subtype, subtype>& value, size_t index) + { + return index == 0 ? value.first : value.second; + } +}; + +template <typename T, T val> +struct cval_t +{ + constexpr static T value = val; + constexpr cval_t() noexcept = default; + constexpr cval_t(const cval_t&) noexcept = default; + constexpr cval_t(cval_t&&) noexcept = default; + typedef T value_type; + typedef cval_t type; + constexpr operator value_type() const { return value; } + constexpr value_type operator()() const { return value; } +}; + +template <typename T, T value> +constexpr inline T val_of(cval_t<T, value>) +{ + return value; +} + +template <typename T> +constexpr inline T val_of(T value) +{ + return value; +} + +template <typename T> +constexpr inline bool is_constant_val(T) +{ + return false; +} + +template <typename T, T value> +constexpr inline bool is_constant_val(cval_t<T, value>) +{ + return true; +} + +namespace details +{ + +template <typename T> +struct inherit : T +{ +}; + +template <typename T, typename Enable = void> +struct is_inheritable_impl : std::false_type +{ +}; + +template <typename T> +struct is_inheritable_impl<T, void_t<inherit<T>>> : std::true_type +{ +}; + +template <typename T> +struct is_val_impl : std::false_type +{ +}; + +template <typename T, T val> +struct is_val_impl<cval_t<T, val>> : std::true_type +{ +}; +} + +template <typename T> +using is_inheritable = typename details::is_inheritable_impl<T>::type; + +template <typename T> +using is_val_t = typename details::is_val_impl<T>::type; + +template <bool val> +using cbool_t = cval_t<bool, val>; + +template <int val> +using cint_t = cval_t<int, val>; + +template <unsigned val> +using cuint_t = cval_t<unsigned, val>; + +template <size_t val> +using csize_t = cval_t<size_t, val>; + +template <typename T, T val> +constexpr cval_t<T, val> cval{}; + +template <bool val> +constexpr cbool_t<val> cbool{}; + +using cfalse_t = cbool_t<false>; +using ctrue_t = cbool_t<true>; + +constexpr ctrue_t ctrue{}; +constexpr cfalse_t cfalse{}; + +template <int val> +constexpr cint_t<val> cint{}; + +template <unsigned val> +constexpr cuint_t<val> cuint{}; + +template <size_t val> +constexpr csize_t<val> csize{}; + +namespace details +{ +template <size_t index, typename T, T first, T... rest> +struct get_nth : get_nth<index - 1, T, rest...> +{ +}; + +template <typename T, T first, T... rest> +struct get_nth<0, T, first, rest...> +{ + constexpr static T value = first; +}; + +template <size_t index, typename... Types> +struct get_nth_type; + +template <size_t index, typename first, typename... rest> +struct get_nth_type<index, first, rest...> : get_nth_type<index - 1, rest...> +{ +}; + +template <typename first, typename... rest> +struct get_nth_type<0, first, rest...> +{ + using type = first; +}; + +template <size_t index> +struct get_nth_type<index> +{ +}; +} + +template <typename T, T... values> +struct cvals_t +{ + using type = cvals_t<T, values...>; + constexpr static size_t size() { return sizeof...(values); } + template <size_t index> + constexpr T operator[](csize_t<index>) + { + return get(csize<index>); + } + template <size_t index> + constexpr static T get(csize_t<index> = csize_t<index>()) + { + return details::get_nth<index, T, values...>::value; + } + constexpr static T front() { return get(csize<0>); } + constexpr static T back() { return get(csize<size() - 1>); } + + static const T* begin() { return array(); } + static const T* end() { return array() + size(); } + + static const T* array() + { + static const T arr[] = { values... }; + return &arr[0]; + } + template <size_t... indices> + constexpr cvals_t<T, details::get_nth<indices, T, values...>::value...> operator[]( + cvals_t<size_t, indices...>) const + { + return {}; + } +}; + +template <typename T> +struct cvals_t<T> +{ + using type = cvals_t<T>; + constexpr static size_t size() { return 0; } +}; + +namespace details +{ +template <typename T1, typename T2> +struct concat_impl; + +template <typename T, T... values1, T... values2> +struct concat_impl<cvals_t<T, values1...>, cvals_t<T, values2...>> +{ + using type = cvals_t<T, values1..., values2...>; +}; +} +template <typename T1, typename T2> +using concat_lists = typename details::concat_impl<T1, T2>::type; + +template <typename T1, typename T2> +constexpr inline concat_lists<T1, T2> cconcat(T1, T2) +{ + return {}; +} + +template <bool... values> +using cbools_t = cvals_t<bool, values...>; + +template <int... values> +using cints_t = cvals_t<int, values...>; + +template <char... values> +using cchars_t = cvals_t<char, values...>; + +template <unsigned... values> +using cuints_t = cvals_t<unsigned, values...>; + +template <size_t... values> +using csizes_t = cvals_t<size_t, values...>; + +template <size_t... values> +using elements_t = cvals_t<size_t, values...>; + +template <typename T, T... values> +constexpr cvals_t<T, values...> cvals{}; + +template <bool... vals> +constexpr cbools_t<vals...> cbools{}; + +constexpr cbools_t<false, true> cfalse_true{}; + +template <int... vals> +constexpr cints_t<vals...> cints{}; + +template <char... vals> +constexpr cchars_t<vals...> cchars{}; + +template <unsigned... vals> +constexpr cuints_t<vals...> cuints{}; + +template <size_t... vals> +constexpr csizes_t<vals...> csizes{}; + +template <size_t... vals> +constexpr elements_t<vals...> elements{}; + +template <typename T> +constexpr inline T csum(cvals_t<T>) +{ + return 0; +} + +template <typename T, T first, T... rest> +constexpr inline T csum(cvals_t<T, first, rest...>) +{ + return first + csum(cvals<T, rest...>); +} + +template <typename T> +constexpr inline T cprod(cvals_t<T>) +{ + return 1; +} + +template <typename T, T first, T... rest> +constexpr inline T cprod(cvals_t<T, first, rest...>) +{ + return first * cprod(cvals<T, rest...>); +} + +template <typename T> +struct ctype_t +{ + using type = T; +}; + +template <typename T> +using type_of = typename T::type; + +template <typename T> +constexpr ctype_t<T> ctype{}; + +template <typename... Types> +struct ctypes_t +{ + constexpr static size_t size() { return sizeof...(Types); } + + template <size_t index> + using nth = typename details::get_nth_type<index, Types...>::type; + + template <size_t index> + constexpr static auto get(csize_t<index>) -> ctype_t<nth<index>> + { + return {}; + } +}; + +template <typename... Ts> +constexpr ctypes_t<Ts...> ctypes{}; + +namespace details +{ + +template <typename> +struct function_arguments_impl; + +template <typename Ret, typename... Args> +struct function_arguments_impl<Ret (*)(Args...)> +{ + using result = Ret; + using args = ctypes_t<Args...>; +}; + +template <typename Class, typename Ret, typename... Args> +struct function_arguments_impl<Ret (Class::*)(Args...)> +{ + using result = Ret; + using args = ctypes_t<Args...>; +}; + +template <typename Class, typename Ret, typename... Args> +struct function_arguments_impl<Ret (Class::*)(Args...) const> +{ + using result = Ret; + using args = ctypes_t<Args...>; +}; + +template <typename T1, typename T2> +struct filter_impl; + +template <typename T> +struct filter_impl<cvals_t<T>, cvals_t<bool>> +{ + using type = cvals_t<T>; +}; + +template <typename T, T value, T... values, bool flag, bool... flags> +struct filter_impl<cvals_t<T, value, values...>, cvals_t<bool, flag, flags...>> +{ + using filtered = typename filter_impl<cvals_t<T, values...>, cvals_t<bool, flags...>>::type; + using type = conditional<flag, concat_lists<cvals_t<T, value>, filtered>, filtered>; +}; +} + +template <typename Fn> +using function_arguments = typename details::function_arguments_impl<decltype(&Fn::operator())>::args; + +template <typename Fn> +using function_result = typename details::function_arguments_impl<decltype(&Fn::operator())>::result; + +template <typename T1, typename T2> +using cfilter_t = typename details::filter_impl<T1, T2>::type; + +template <typename T, T... vals, bool... flags, + typename Ret = cfilter_t<cvals_t<T, vals...>, cvals_t<bool, flags...>>> +constexpr inline Ret cfilter(cvals_t<T, vals...>, cvals_t<bool, flags...>) +{ + return Ret{}; +} + +#define CMT_UN_OP(op) \ + template <typename T1, T1... vals1, \ + typename Ret = cvals_t<decltype(op std::declval<T1>()), (op vals1)...>> \ + constexpr inline Ret operator op(cvals_t<T1, vals1...>) \ + { \ + return Ret{}; \ + } \ + template <typename T1, T1 val1, typename Ret = cval_t<decltype(op std::declval<T1>()), (op val1)>> \ + constexpr inline Ret operator op(cval_t<T1, val1>) \ + { \ + return Ret{}; \ + } + +#define CMT_BIN_OP(op) \ + template <typename T1, T1... vals1, typename T2, T2... vals2, \ + typename Ret = \ + cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op vals2)...>> \ + constexpr inline Ret operator op(cvals_t<T1, vals1...>, cvals_t<T2, vals2...>) \ + { \ + return Ret{}; \ + } \ + template <typename T1, T1... vals1, typename T2, T2 val2, \ + typename Ret = \ + cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op val2)...>> \ + constexpr inline Ret operator op(cvals_t<T1, vals1...>, cval_t<T2, val2>) \ + { \ + return Ret{}; \ + } \ + template <typename T1, T1 val1, typename T2, T2... vals2, \ + typename Ret = \ + cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (val1 op vals2)...>> \ + constexpr inline Ret operator op(cval_t<T1, val1>, cvals_t<T2, vals2...>) \ + { \ + return Ret{}; \ + } + +// clang-format off +CMT_UN_OP(-) +CMT_UN_OP(+) +CMT_UN_OP(~) +CMT_UN_OP(!) + +CMT_BIN_OP(&&) +CMT_BIN_OP(||) +CMT_BIN_OP(==) +CMT_BIN_OP(!=) +CMT_BIN_OP(<) +CMT_BIN_OP(>) +CMT_BIN_OP(<=) +CMT_BIN_OP(>=) +CMT_BIN_OP(+) +CMT_BIN_OP(-) +CMT_BIN_OP(*) +CMT_BIN_OP(/) +CMT_BIN_OP(%) +CMT_BIN_OP(<<) +CMT_BIN_OP(>>) +CMT_BIN_OP(&) +CMT_BIN_OP(|) +CMT_BIN_OP(^) +// clang-format on + +namespace details +{ +template <typename T, size_t Nsize, T Nstart, ptrdiff_t Nstep> +struct cvalseq_impl; + +template <typename T, size_t Nsize, T Nstart, ptrdiff_t Nstep> +using cgen_seq = typename cvalseq_impl<T, Nsize, Nstart, Nstep>::type; + +template <typename T, size_t Nsize, T Nstart, ptrdiff_t Nstep> +struct cvalseq_impl : concat_impl<cgen_seq<T, Nsize / 2, Nstart, Nstep>, + cgen_seq<T, Nsize - Nsize / 2, Nstart + (Nsize / 2) * Nstep, Nstep>> +{ +}; + +template <typename T, T Nstart, ptrdiff_t Nstep> +struct cvalseq_impl<T, 0, Nstart, Nstep> : cvals_t<T> +{ +}; +template <typename T, T Nstart, ptrdiff_t Nstep> +struct cvalseq_impl<T, 1, Nstart, Nstep> : cvals_t<T, static_cast<T>(Nstart)> +{ +}; +} + +template <typename T, size_t size, T start = T(), ptrdiff_t step = 1> +using cvalseq_t = typename details::cvalseq_impl<T, size, start, step>::type; + +template <typename T, T begin, T end> +constexpr cvalseq_t<T, end - begin, begin> cvalrange{}; + +template <size_t begin, size_t end> +constexpr cvalseq_t<size_t, end - begin, begin> csizerange{}; + +template <int begin, int end> +constexpr cvalseq_t<int, end - begin, begin> cintrange{}; + +template <unsigned begin, unsigned end> +constexpr cvalseq_t<unsigned, end - begin, begin> cuintrange{}; + +template <typename T, size_t size, T start = T(), ptrdiff_t step = 1> +constexpr cvalseq_t<T, size, start, step> cvalseq{}; + +template <size_t size, size_t start = 0, ptrdiff_t step = 1> +constexpr cvalseq_t<size_t, size, start, step> csizeseq{}; + +template <size_t size, int start = 0, ptrdiff_t step = 1> +constexpr cvalseq_t<int, size, start, step> cintseq{}; + +template <size_t size, unsigned start = 0, ptrdiff_t step = 1> +constexpr cvalseq_t<unsigned, size, start, step> cuintseq{}; + +template <typename... List> +using indicesfor_t = cvalseq_t<size_t, sizeof...(List), 0>; + +template <typename... List> +constexpr indicesfor_t<List...> indicesfor{}; + +namespace details +{ + +template <typename Ret, typename T, typename enable = void_t<>> +struct is_returning_type_impl : std::false_type +{ +}; + +template <typename Ret, typename Fn, typename... Args> +struct is_returning_type_impl<Ret, Fn(Args...), void_t<result_of<Fn(Args...)>>> + : std::is_same<Ret, result_of<Fn(Args...)>> +{ +}; + +template <typename Fn, typename Args, typename enable = void_t<>> +struct is_callable_impl : std::false_type +{ +}; + +template <typename Fn, typename... Args> +struct is_callable_impl<Fn, ctypes_t<Args...>, void_t<result_of<Fn(Args...)>>> : std::true_type +{ +}; + +template <typename T, typename enable = void_t<>> +struct is_enabled_impl : std::true_type +{ +}; + +template <typename Fn> +struct is_enabled_impl<Fn, void_t<decltype(Fn::disabled)>> : std::integral_constant<bool, !Fn::disabled> +{ +}; + +template <size_t N> +struct unique_enum_impl +{ + enum class type : size_t + { + value = N + }; +}; +template <size_t N> +using unique_enum = typename unique_enum_impl<N>::type; + +#define CMT_ENABLE_IF_IMPL(N, ...) \ + typename ::std::enable_if<(__VA_ARGS__), ::cometa::details::unique_enum<N>>::type = \ + ::cometa::details::unique_enum<N>::value + +#define CMT_ENABLE_IF(...) CMT_ENABLE_IF_IMPL(__LINE__, __VA_ARGS__) +} + +template <typename T> +struct is_enabled : details::is_enabled_impl<T> +{ +}; + +template <typename Fn, typename... Args> +struct is_callable : details::is_callable_impl<Fn, ctypes_t<Args...>> +{ +}; + +template <typename Ret, typename T> +struct is_returning_type : details::is_returning_type_impl<Ret, T> +{ +}; + +namespace details +{ +template <typename Fn, CMT_ENABLE_IF(is_callable<Fn()>())> +inline auto call_if_callable(Fn&& fn) +{ + return fn(); +} + +template <typename Fn, CMT_ENABLE_IF(!is_callable<Fn()>())> +inline auto call_if_callable(Fn&& fn) +{ + return std::forward<Fn>(fn); +} +} + +template <typename Fn, typename... Args> +inline auto bind_func(Fn&& fn, Args&&... args) +{ + return [=]() CID_INLINE_LAMBDA { return fn(details::call_if_callable(std::forward<Args>(args))...); }; +} + +template <typename T> +constexpr inline bool is_even(T x) +{ + return (x % 2) == 0; +} + +template <typename T> +constexpr inline bool is_odd(T x) +{ + return !is_even(x); +} + +template <typename T> +constexpr inline bool is_poweroftwo(T x) +{ + return ((x != 0) && !(x & (x - 1))); +} + +template <typename T> +constexpr inline unsigned ilog2(T n, unsigned p = 0) +{ + return (n <= 1) ? p : ilog2(n / 2, p + 1); +} + +template <typename T> +constexpr inline T next_poweroftwo(T n) +{ + return n > 2 ? T(1) << (ilog2(n - 1) + 1) : n; +} + +template <typename T> +constexpr inline T prev_poweroftwo(T n) +{ + return n > 2 ? T(1) << (ilog2(n)) : n; +} + +template <typename T> +constexpr inline bool is_divisible(T x, T divisor) +{ + return x % divisor == 0; +} + +template <typename T> +constexpr inline T gcd(T a) +{ + return a; +} + +template <typename T> +constexpr inline T gcd(T a, T b) +{ + return a < b ? gcd(b, a) : ((a % b == 0) ? b : gcd(b, a % b)); +} + +template <typename T, typename... Ts> +constexpr inline T gcd(T a, T b, T c, Ts... rest) +{ + return gcd(a, gcd(b, c, rest...)); +} + +template <typename T> +constexpr inline T lcm(T a) +{ + return a; +} + +template <typename T> +constexpr inline T lcm(T a, T b) +{ + return a * b / gcd(a, b); +} + +template <typename T, typename... Ts> +constexpr inline T lcm(T a, T b, T c, Ts... rest) +{ + return lcm(a, lcm(b, c, rest...)); +} + +namespace details +{ +template <int64_t min, int64_t max, typename... Types> +struct findinttype_impl +{ +}; +template <int64_t min, int64_t max, typename T, typename... Types> +struct findinttype_impl<min, max, T, Types...> +{ + using type = conditional<(std::numeric_limits<T>::min() <= min && std::numeric_limits<T>::max() >= max), + T, typename findinttype_impl<min, max, Types...>::type>; +}; +template <int64_t min, int64_t max> +struct findinttype_impl<min, max> +{ + using type = void; +}; + +template <typename T> +using is_number_impl = + std::integral_constant<bool, ((std::is_integral<T>::value) || (std::is_floating_point<T>::value)) && + !std::is_same<T, bool>::value>; +} + +template <int64_t min, int64_t max> +using findinttype = typename details::findinttype_impl<min, max, uint8_t, int8_t, uint16_t, int16_t, uint32_t, + int32_t, uint64_t, int64_t>::type; + +template <typename T> +using is_number = details::is_number_impl<decay<T>>; + +template <typename... Ts> +using is_numbers = and_t<details::is_number_impl<decay<Ts>>...>; + +namespace details +{ +template <typename T> +struct identity_impl +{ + using type = T; +}; + +template <typename T> +constexpr size_t elementsize = sizeof(T); + +template <> +constexpr size_t elementsize<void> = 1; +} + +template <typename T> +using identity = typename details::identity_impl<T>::type; + +struct swallow +{ + template <typename... T> + CID_INTRIN constexpr swallow(T&&...) noexcept + { + } +}; + +template <typename T, size_t N> +struct carray; + +template <typename T> +struct carray<T, 1> +{ + constexpr carray() noexcept = default; + constexpr carray(T val) noexcept : val(val) {} + + template <typename Fn, size_t index = 0> + constexpr carray(Fn&& fn, csize_t<index> = csize_t<index>{}) noexcept + : val(static_cast<T>(fn(csize<index>))) + { + } + + constexpr carray(const carray&) noexcept = default; + constexpr carray(carray&&) noexcept = default; + static constexpr size_t size() noexcept { return 1; } + + template <size_t index> + CID_INTRIN constexpr T& get(csize_t<index>) noexcept + { + static_assert(index == 0, "carray: Array index is out of range"); + return val; + } + template <size_t index> + CID_INTRIN constexpr const T& get(csize_t<index>) const noexcept + { + static_assert(index == 0, "carray: Array index is out of range"); + return val; + } + template <size_t index> + CID_INTRIN constexpr T& get() noexcept + { + return get(csize<index>); + } + template <size_t index> + CID_INTRIN constexpr const T& get() const noexcept + { + return get(csize<index>); + } + constexpr const T* front() const noexcept { return val; } + constexpr T* front() noexcept { return val; } + constexpr const T* back() const noexcept { return val; } + constexpr T* back() noexcept { return val; } + constexpr const T* begin() const noexcept { return &val; } + constexpr const T* end() const noexcept { return &val + 1; } + constexpr T* begin() noexcept { return &val; } + constexpr T* end() noexcept { return &val + 1; } + constexpr const T* data() const noexcept { return begin(); } + constexpr T* data() noexcept { return begin(); } + constexpr bool empty() const noexcept { return false; } + T val; +}; + +template <typename T, size_t N> +struct carray : carray<T, N - 1> +{ + template <typename... Ts> + constexpr carray(T first, Ts... list) noexcept : carray<T, N - 1>(list...), val(first) + { + static_assert(sizeof...(list) + 1 == N, "carray: Argument count is invalid"); + } + + template <typename Fn, size_t index = N - 1> + constexpr carray(Fn&& fn, csize_t<index> = csize_t<index>{}) noexcept + : carray<T, N - 1>(std::forward<Fn>(fn), csize<index - 1>), + val(static_cast<T>(fn(csize<index>))) + { + } + + constexpr carray() noexcept = default; + constexpr carray(const carray&) noexcept = default; + constexpr carray(carray&&) noexcept = default; + static constexpr size_t size() noexcept { return N; } + CID_INTRIN constexpr T& get(csize_t<N - 1>) noexcept { return val; } + template <size_t index> + CID_INTRIN constexpr T& get(csize_t<index>) noexcept + { + return carray<T, N - 1>::get(csize<index>); + } + template <size_t index> + CID_INTRIN constexpr T& get() noexcept + { + return get(csize<index>); + } + CID_INTRIN constexpr const T& get(csize_t<N - 1>) const noexcept { return val; } + template <size_t index> + CID_INTRIN constexpr const T& get(csize_t<index>) const noexcept + { + return carray<T, N - 1>::get(csize<index>); + } + template <size_t index> + CID_INTRIN constexpr const T& get() const noexcept + { + return get(csize<index>); + } + CID_INTRIN constexpr const T* front() const noexcept { return carray<T, N - 1>::front(); } + CID_INTRIN constexpr T* front() noexcept { return carray<T, N - 1>::front(); } + CID_INTRIN constexpr const T* back() const noexcept { return val; } + CID_INTRIN constexpr T* back() noexcept { return val; } + CID_INTRIN constexpr const T* begin() const noexcept { return carray<T, N - 1>::begin(); } + CID_INTRIN constexpr const T* end() const noexcept { return &val + 1; } + CID_INTRIN constexpr T* begin() noexcept { return carray<T, N - 1>::begin(); } + CID_INTRIN constexpr T* end() noexcept { return &val + 1; } + CID_INTRIN constexpr const T* data() const noexcept { return begin(); } + CID_INTRIN constexpr T* data() noexcept { return begin(); } + CID_INTRIN constexpr bool empty() const noexcept { return false; } +private: + T val; +}; + +#define CMT_FN(fn) \ + struct fn_##fn \ + { \ + template <typename... Args> \ + CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + { \ + return fn(std::forward<Args>(args)...); \ + } \ + }; + +#define CMT_ESC(...) __VA_ARGS__ + +#define CMT_FN_TPL(tpl_list, tpl_args, fn) \ + template <CMT_ESC tpl_list> \ + struct fn_##fn \ + { \ + template <typename... Args> \ + CID_INLINE_MEMBER decltype(fn<CMT_ESC tpl_args>(std::declval<Args>()...)) operator()( \ + Args&&... args) const \ + { \ + return fn<CMT_ESC tpl_args>(std::forward<Args>(args)...); \ + } \ + }; + +template <typename T> +inline auto pass_through(T&& x) noexcept +{ + return x; +} + +template <typename... Ts> +inline void noop(Ts...) noexcept +{ +} + +template <typename T1, typename... Ts> +constexpr inline T1&& get_first(T1&& x, Ts...) noexcept +{ + return std::forward<T1>(x); +} + +template <typename T1, typename T2, typename... Ts> +constexpr inline T2&& get_second(T1, T2&& x, Ts...) noexcept +{ + return std::forward<T2>(x); +} + +template <typename T1, typename T2, typename T3, typename... Ts> +constexpr inline T3&& get_third(T1, T2, T3&& x, Ts...) noexcept +{ + return std::forward<T3>(x); +} +template <typename T, typename... Ts> +constexpr inline T returns(Ts...) +{ + return T(); +} + +CMT_FN(pass_through) +CMT_FN(noop) +CMT_FN(get_first) +CMT_FN(get_second) +CMT_FN(get_third) +CMT_FN_TPL((typename T), (T), returns) + +template <typename T1, typename T2> +inline bool is_equal(const T1& x, const T2& y) +{ + return x == y; +} +template <typename T1, typename T2> +inline bool is_notequal(const T1& x, const T2& y) +{ + return x != y; +} +template <typename T1, typename T2> +inline bool is_less(const T1& x, const T2& y) +{ + return x < y; +} +template <typename T1, typename T2> +inline bool is_greater(const T1& x, const T2& y) +{ + return x > y; +} +template <typename T1, typename T2> +inline bool is_lessorequal(const T1& x, const T2& y) +{ + return x <= y; +} +template <typename T1, typename T2> +inline bool is_greaterorequal(const T1& x, const T2& y) +{ + return x >= y; +} +CMT_FN(is_equal) +CMT_FN(is_notequal) +CMT_FN(is_less) +CMT_FN(is_greater) +CMT_FN(is_lessorequal) +CMT_FN(is_greaterorequal) + +namespace details +{ +template <typename, typename = void> +struct has_begin_end_impl : std::false_type +{ +}; + +template <typename T> +struct has_begin_end_impl<T, void_t<decltype(std::declval<T>().begin()), decltype(std::declval<T>().end())>> + : std::true_type +{ +}; + +template <typename, typename = void> +struct has_value_type_impl : std::false_type +{ +}; + +template <typename T> +struct has_value_type_impl<T, void_t<typename T::value_type>> : std::true_type +{ +}; + +template <typename, typename = void> +struct has_data_size_impl : std::false_type +{ +}; + +template <typename T> +struct has_data_size_impl<T, void_t<decltype(std::declval<T>().size()), decltype(std::declval<T>().data())>> + : std::true_type +{ +}; + +template <typename, typename Fallback, typename = void> +struct value_type_impl +{ + using type = Fallback; +}; + +template <typename T, typename Fallback> +struct value_type_impl<T, Fallback, void_t<typename T::value_type>> +{ + using type = typename T::value_type; +}; +} + +template <typename T> +using has_begin_end = details::has_begin_end_impl<decay<T>>; + +template <typename T> +using has_data_size = details::has_data_size_impl<decay<T>>; + +template <typename T> +using value_type_of = typename decay<T>::value_type; + +template <typename T, typename Fn> +CID_INTRIN void cforeach(cvals_t<T>, Fn&&) +{ +} + +template <typename T, T v0, T... values, typename Fn> +CID_INTRIN void cforeach(cvals_t<T, v0, values...>, Fn&& fn) +{ + fn(cval<T, v0>); + cforeach(cvals_t<T, values...>(), std::forward<Fn>(fn)); +} + +template <typename Fn> +CID_INTRIN void cforeach(ctypes_t<>, Fn&&) +{ +} + +template <typename T0, typename... types, typename Fn> +CID_INTRIN void cforeach(ctypes_t<T0, types...>, Fn&& fn) +{ + fn(ctype<T0>); + cforeach(ctypes_t<types...>(), std::forward<Fn>(fn)); +} + +template <typename T, typename Fn, CMT_ENABLE_IF(has_begin_end<T>::value)> +CID_INTRIN void cforeach(T&& list, Fn&& fn) +{ + for (const auto& v : list) + { + fn(v); + } +} + +template <typename T, size_t N, typename Fn> +CID_INTRIN void cforeach(const T (&array)[N], Fn&& fn) +{ + for (size_t i = 0; i < N; i++) + { + fn(array[i]); + } +} + +namespace details +{ +template <typename... Ts, typename Fn, size_t... indices> +CID_INTRIN void cforeach_tuple_impl(const std::tuple<Ts...>& tuple, Fn&& fn, csizes_t<indices...>) +{ + swallow{ (fn(std::get<indices>(tuple)), void(), 0)... }; +} +} + +template <typename... Ts, typename Fn> +CID_INTRIN void cforeach(const std::tuple<Ts...>& tuple, Fn&& fn) +{ + details::cforeach_tuple_impl(tuple, std::forward<Fn>(fn), csizeseq<sizeof...(Ts)>); +} + +template <typename A0, typename A1, typename Fn> +CID_INTRIN void cforeach(A0&& a0, A1&& a1, Fn&& fn) +{ + cforeach(std::forward<A0>(a0), + [&](auto v0) { cforeach(std::forward<A1>(a1), [&](auto v1) { fn(v0, v1); }); }); +} + +template <typename A0, typename A1, typename A2, typename Fn> +CID_INTRIN void cforeach(A0&& a0, A1&& a1, A2&& a2, Fn&& fn) +{ + cforeach(std::forward<A0>(a0), [&](auto v0) { + cforeach(std::forward<A1>(a1), + [&](auto v1) { cforeach(std::forward<A2>(a2), [&](auto v2) { fn(v0, v1, v2); }); }); + }); +} + +template <typename T, typename Fn, typename DefFn = fn_noop, typename CmpFn = fn_is_equal> +CID_INTRIN decltype(auto) cswitch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn(), CmpFn&& = CmpFn()) +{ + return deffn(); +} + +template <typename T, T v0, T... values, typename Fn, typename DefFn = fn_noop, typename CmpFn = fn_is_equal> +CID_INTRIN decltype(auto) cswitch(cvals_t<T, v0, values...>, identity<T> value, Fn&& fn, + DefFn&& deffn = DefFn(), CmpFn&& cmpfn = CmpFn()) +{ + if (cmpfn(value, v0)) + { + return fn(cval<T, v0>); + } + else + { + return cswitch(cvals_t<T, values...>(), value, std::forward<Fn>(fn), std::forward<DefFn>(deffn), + std::forward<CmpFn>(cmpfn)); + } +} + +template <typename TrueFn, typename FalseFn = fn_noop> +CID_INTRIN decltype(auto) cif(cbool_t<true>, TrueFn&& truefn, FalseFn&& = FalseFn()) +{ + return truefn(cbool<true>); +} + +template <typename TrueFn, typename FalseFn = fn_noop> +CID_INTRIN decltype(auto) cif(cbool_t<false>, TrueFn&&, FalseFn&& falsefn = FalseFn()) +{ + return falsefn(cbool<false>); +} + +template <typename T, T start, T stop, typename BodyFn> +CID_INTRIN decltype(auto) cfor(cval_t<T, start>, cval_t<T, stop>, BodyFn&& bodyfn) +{ + return cforeach(cvalrange<T, start, stop>, std::forward<BodyFn>(bodyfn)); +} + +namespace details +{ + +template <typename T, typename Fn1, typename Fn2, typename... Fns> +inline decltype(auto) cmatch_impl(T&& value, Fn1&& first, Fn2&& second, Fns&&... rest); +template <typename T, typename Fn, typename... Ts> +inline decltype(auto) cmatch_impl(T&& value, Fn&& last); + +template <typename T, typename Fn, typename... Fns> +inline decltype(auto) cmatch_impl2(cbool_t<true>, T&& value, Fn&& fn, Fns&&...) +{ + return fn(std::forward<T>(value)); +} + +template <typename T, typename Fn, typename... Fns> +inline decltype(auto) cmatch_impl2(cbool_t<false>, T&& value, Fn&&, Fns&&... rest) +{ + return cmatch_impl(std::forward<T>(value), std::forward<Fns>(rest)...); +} + +template <typename T, typename Fn1, typename Fn2, typename... Fns> +inline decltype(auto) cmatch_impl(T&& value, Fn1&& first, Fn2&& second, Fns&&... rest) +{ + using first_arg = typename function_arguments<Fn1>::template nth<0>; + constexpr bool is_same = std::is_same<decay<T>, decay<first_arg>>::value; + return cmatch_impl2(cbool<is_same>, std::forward<T>(value), std::forward<Fn1>(first), + std::forward<Fn2>(second), std::forward<Fns>(rest)...); +} + +template <typename T, typename Fn, typename... Ts> +inline decltype(auto) cmatch_impl(T&& value, Fn&& last) +{ + return last(std::forward<T>(value)); +} +} + +template <typename T, typename Fn, typename... Args> +inline decltype(auto) cmatch(T&& value, Fn&& fn, Args... args) +{ + return details::cmatch_impl(std::forward<T>(value), std::forward<Fn>(fn), std::forward<Args>(args)...); +} + +namespace details +{ + +template <typename Result, typename... Args> +struct virtual_function +{ + virtual Result operator()(Args... args) = 0; + virtual virtual_function* make_copy() const = 0; + CID_INTRIN virtual ~virtual_function() = default; +}; + +template <typename Fn, typename Result, typename... Args> +struct virtual_function_impl : virtual_function<Result, Args...> +{ +public: + CID_INTRIN virtual_function_impl(const Fn& fn) : fn(fn) {} + CID_INTRIN Result operator()(Args... args) override final { return fn(args...); } + CID_INTRIN virtual_function<Result, Args...>* make_copy() const override final + { + return new virtual_function_impl{ fn }; + } + CID_INTRIN ~virtual_function_impl() {} + +private: + Fn fn; +}; + +template <typename Fn> +struct func_filter +{ + typedef Fn type; +}; +template <typename Result, typename... Args> +struct func_filter<Result(Args...)> +{ + typedef Result (*type)(Args...); +}; + +template <typename T> +constexpr CID_INTRIN T return_val() noexcept +{ + return {}; +} + +template <> +constexpr CID_INTRIN void return_val<void>() noexcept +{ +} +} + +template <typename> +struct function; + +/** + * @brief std::function-like lightweight function wrapper + * @code + * function<int( float )> f = []( float x ){ return static_cast<int>( x ); }; + * CHECK( f( 3.4f ) == 3 ) + * @encode + */ +template <typename Result, typename... Args> +struct function<Result(Args...)> +{ + using this_t = function<Result(Args...)>; + + function(function&& other) : fn(other.fn) { other.fn = nullptr; } + function& operator=(function&& other) + { + fn = other.fn; + other.fn = nullptr; + return *this; + } + + CID_INTRIN function() : fn(nullptr) {} + CID_INTRIN function(std::nullptr_t) : fn(nullptr) {} + template <typename Func> + CID_INTRIN function(const Func& x) + : fn(new details::virtual_function_impl<typename details::func_filter<Func>::type, Result, Args...>( + x)) + { + } + function(const this_t& other) : fn(other.fn ? other.fn->make_copy() : nullptr) {} + CID_INTRIN function& operator=(const this_t& other) + { + if ((&other != this) && (other.fn)) + { + auto* temp = other.fn->make_copy(); + delete fn; + fn = temp; + } + return *this; + } + CID_INTRIN function& operator=(std::nullptr_t) + { + delete fn; + fn = nullptr; + return *this; + } + template <typename Fn> + CID_INTRIN function& operator=(const Fn& x) + { + using FnImpl = + details::virtual_function_impl<typename details::func_filter<Fn>::type, Result, Args...>; + FnImpl* temp = new FnImpl(x); + delete fn; + fn = temp; + return *this; + } + CID_INTRIN Result operator()(Args... args) const + { + if (fn) + return (*fn)(args...); + else + return details::return_val<Result>(); + } + CID_INTRIN explicit operator bool() const noexcept { return !!fn; } + + CID_INTRIN ~function() { delete fn; } +private: + details::virtual_function<Result, Args...>* fn; +}; + +template <typename Ret, typename... Args, typename T, typename Fn, typename DefFn = fn_noop> +CID_INLINE function<Ret(Args...)> cdispatch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn()) +{ + return [=](Args... args) CID_INLINE_MEMBER -> Ret { return deffn(std::forward<Args>(args)...); }; +} + +template <typename Ret, typename... Args, typename T, T v0, T... values, typename Fn, + typename DefFn = fn_noop> +inline function<Ret(Args...)> cdispatch(cvals_t<T, v0, values...>, identity<T> value, Fn&& fn, + DefFn&& deffn = DefFn()) +{ + if (value == v0) + { + return [=](Args... args) + CID_INLINE_MEMBER -> Ret { return fn(cval<T, v0>, std::forward<Args>(args)...); }; + } + else + { + return cdispatch<Ret, Args...>(cvals_t<T, values...>(), value, std::forward<Fn>(fn), + std::forward<DefFn>(deffn)); + } +} + +template <typename T, T... values> +inline size_t cfind(cvals_t<T, values...>, identity<T> value) +{ + static const T temp[] = { values... }; + return static_cast<size_t>( + std::distance(std::begin(temp), std::find(std::begin(temp), std::end(temp), value))); +} + +template <typename Fn, typename... Args> +CID_NOINLINE static result_of<Fn(Args...)> noinline(Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} + +template <typename Fn> +struct fn_noinline +{ + template <typename... Args> + CID_INTRIN result_of<Fn(Args...)> operator()(Args&&... args) const + { + return noinline(Fn{}, std::forward<Args>(args)...); + } +}; + +template <typename... Args, typename Fn, typename Ret = decltype(std::declval<Fn>()(std::declval<Args>()...)), + typename NonMemFn = Ret (*)(Fn*, Args...)> +CID_INTRIN NonMemFn make_nonmember(const Fn&) +{ + return [](Fn* fn, Args... args) -> Ret { return fn->operator()(std::forward<Args>(args)...); }; +} + +using type_id_t = const void*; + +namespace details +{ + +constexpr inline size_t strlen(const char* str) { return *str ? 1 + cometa::details::strlen(str + 1) : 0; } + +template <size_t... indices, size_t Nout = 1 + sizeof...(indices)> +constexpr inline std::array<char, Nout> gettypename_impl(const char* str, csizes_t<indices...>) +{ + std::array<char, Nout> arr{ { str[indices]..., 0 } }; + return arr; +} + +template <typename T> +constexpr inline const void* typeident_impl() noexcept +{ + return type_id_t(&typeident_impl<T>); +} +} + +/** + * @brief Gets the fully qualified name of the type, including namespace and template parameters (if any) + * @tparam T type + * @return name of the type + */ +template <typename T> +inline const char* type_name() noexcept +{ + constexpr size_t prefix = details::strlen("const char *cometa::type_name() [T = "); + constexpr size_t postfix = details::strlen("]"); + constexpr size_t length = sizeof(CID_FUNC_SIGNATURE) - 1 - prefix - postfix; + static const std::array<char, length + 1> name = + details::gettypename_impl(CID_FUNC_SIGNATURE + prefix, csizeseq<length>); + return name.data(); +} + +/** + * @brief Gets the fully qualified name of the type, including namespace and template parameters (if any) + * @param x value of specific type + * @return name of the type + */ +template <typename T> +inline const char* type_name(T x) noexcept +{ + (void)x; + return type_name<T>(); +} + +/** + * @brief Gets unique value associated with the type + * @tparam T type + * @return value of type that supports operator== and operator!= + */ +template <typename T> +constexpr inline type_id_t ctypeid() +{ + return details::typeident_impl<T>(); +} +/** + * @brief Gets unique value associated with the type + * @param x value of specific type + * @return value of type that supports operator== and operator!= + */ +template <typename T> +constexpr inline type_id_t ctypeid(T x) +{ + (void)x; + return details::typeident_impl<T>(); +} + +template <typename T> +struct array_ref +{ +public: + using value_type = T; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator<pointer>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + constexpr array_ref() noexcept : m_data(nullptr), m_size(0) {} + constexpr array_ref(const array_ref&) noexcept = default; + constexpr array_ref(array_ref&&) noexcept = default; + constexpr array_ref& operator=(const array_ref&) noexcept = default; + constexpr array_ref& operator=(array_ref&&) noexcept = default; + + template <size_t N> + constexpr array_ref(value_type (&arr)[N]) noexcept : m_data(arr), m_size(N) + { + } + template <size_t N> + constexpr array_ref(const std::array<T, N>& arr) noexcept : m_data(arr.data()), m_size(N) + { + } + template <size_t N> + constexpr array_ref(std::array<T, N>& arr) noexcept : m_data(arr.data()), m_size(N) + { + } + template <typename... Ts> + constexpr array_ref(const std::vector<T, Ts...>& vec) noexcept : m_data(vec.data()), m_size(vec.size()) + { + } + template <typename... Ts, CMT_ENABLE_IF(sizeof...(Ts), is_const<T>::value)> + constexpr array_ref(const std::vector<remove_const<T>, Ts...>& vec) noexcept : m_data(vec.data()), + m_size(vec.size()) + { + } + template <typename... Ts> + constexpr array_ref(std::vector<T, Ts...>& vec) noexcept : m_data(vec.data()), m_size(vec.size()) + { + } + template <typename InputIter> + constexpr array_ref(InputIter first, InputIter last) noexcept : m_data(std::addressof(*first)), + m_size(std::distance(first, last)) + { + } + constexpr array_ref(T* data, size_type size) noexcept : m_data(data), m_size(size) {} + + constexpr reference front() const noexcept { return m_data[0]; } + constexpr reference back() const noexcept { return m_data[m_size - 1]; } + constexpr iterator begin() const noexcept { return m_data; } + constexpr iterator end() const noexcept { return m_data + m_size; } + constexpr const_iterator cbegin() const noexcept { return m_data; } + constexpr const_iterator cend() const noexcept { return m_data + m_size; } + constexpr pointer data() const noexcept { return m_data; } + constexpr std::size_t size() const noexcept { return m_size; } + constexpr bool empty() const noexcept { return !m_size; } + constexpr reference operator[](std::size_t index) const { return m_data[index]; } + +private: + pointer m_data; + size_type m_size; +}; + +template <typename T> +constexpr inline T choose_const() +{ + static_assert(sizeof(T) != 0, "T not found in the list of template arguments"); + return T(); +} + +/** + * Selects constant of the specific type + * @code + * CHECK( choose_const<f32>( 32.0f, 64.0 ) == 32.0f ); + * CHECK( choose_const<f64>( 32.0f, 64.0 ) == 64.0 ); + * @endcode + */ +template <typename T, typename C1, typename... Cs> +constexpr inline T choose_const(C1 c1, Cs... constants) +{ + return std::is_same<T, C1>::value ? static_cast<T>(c1) : choose_const<T>(constants...); +} + +template <typename T, std::size_t size> +inline array_ref<T> make_array_ref(T (&data)[size]) +{ + return array_ref<T>(data); +} + +template <typename T> +inline array_ref<T> make_array_ref(T* data, std::size_t size) +{ + return array_ref<T>(data, data + size); +} + +template <typename Container, CMT_ENABLE_IF(has_data_size<Container>::value), + typename T = remove_pointer<decltype(std::declval<Container>().data())>> +inline array_ref<T> make_array_ref(Container& cont) +{ + return array_ref<T>(cont.data(), cont.size()); +} + +template <typename Container, CMT_ENABLE_IF(has_data_size<Container>::value), + typename T = remove_pointer<decltype(std::declval<Container>().data())>> +inline array_ref<T> make_array_ref(const Container& cont) +{ + return array_ref<T>(cont.data(), cont.size()); +} + +template <typename T> +inline array_ref<T> make_array_ref(std::vector<T>& cont) +{ + return array_ref<T>(cont.data(), cont.size()); +} +template <typename T> +inline array_ref<const T> make_array_ref(const std::vector<T>& cont) +{ + return array_ref<const T>(cont.data(), cont.size()); +} + +template <typename Type, typename ErrEnum, ErrEnum OkValue = static_cast<ErrEnum>(0)> +struct result +{ + using value_type = Type; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + + using error_type = ErrEnum; + + constexpr static error_type ok_value = OkValue; + + constexpr result(const result&) = default; + constexpr result(result&&) noexcept = default; + + constexpr result(ErrEnum error) noexcept : m_error(error) {} + + template <typename ValueInit, CMT_ENABLE_IF(std::is_constructible<value_type, ValueInit>::value)> + constexpr result(ValueInit&& value) noexcept : m_value(std::forward<ValueInit>(value)), m_error(OkValue) + { + } + + constexpr result(const Type& value) noexcept : m_value(value), m_error(OkValue) {} + constexpr result(Type&& value) noexcept : m_value(std::move(value)), m_error(OkValue) {} + + constexpr explicit operator bool() const { return m_error == OkValue; } + constexpr const_reference operator*() const { return m_value; } + constexpr reference operator*() { return m_value; } + constexpr const_pointer operator->() const { return &m_value; } + constexpr pointer operator->() { return &m_value; } + + constexpr const_reference value() const { return m_value; } + constexpr reference value() { return m_value; } + constexpr ErrEnum error() const { return m_error; } + constexpr bool ok() const { return m_error == OkValue; } +private: + Type m_value; + ErrEnum m_error; +}; + +template <typename Tfrom> +struct autocast_impl +{ + const Tfrom value; + template <typename T> + CID_INTRIN constexpr operator T() const noexcept + { + return static_cast<T>(value); + } +}; + +template <typename Tfrom> +CID_INTRIN constexpr autocast_impl<Tfrom> autocast(const Tfrom& value) noexcept +{ + return { value }; +} + +inline void stop_constexpr() {} + +namespace details +{ +template <typename T, typename = void> +struct signed_type_impl +{ + using type = T; +}; +template <typename T> +struct signed_type_impl<T, void_t<enable_if<std::is_unsigned<T>::value>>> +{ + using type = findinttype<std::numeric_limits<T>::min(), std::numeric_limits<T>::max()>; +}; +} + +template <typename T> +using signed_type = typename details::signed_type_impl<T>::type; + +template <typename T> +struct range +{ + using value_type = T; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + + struct iterator + { + T value; + const_reference operator*() const { return value; } + const_pointer operator->() const { return &value; } + iterator& operator++() + { + ++value; + return *this; + } + iterator operator++(int) + { + iterator copy = *this; + ++(*this); + return copy; + } + bool operator!=(const iterator& other) const { return value != other.value; } + }; + T value_begin; + T value_end; + iterator begin() const { return iterator{ value_begin }; } + iterator end() const { return iterator{ value_end }; } +}; + +template <typename T1, typename T2> +range<common_type<T1, T2>> make_range(T1 begin, T2 end) +{ + return { begin, end }; +} + +template <typename T> +struct named_arg +{ + T value; + const char* name; +}; + +struct named +{ + constexpr named(const char* name) noexcept : name(name) {} + + template <typename T> + constexpr named_arg<T> operator=(T&& value) + { + return named_arg<T>{ std::forward<T>(value), name }; + } + const char* name; +}; + +inline named operator""_arg(const char* name, size_t) { return name; } +} + +#pragma clang diagnostic pop diff --git a/include/kfr/cometa/string.hpp b/include/kfr/cometa/string.hpp @@ -0,0 +1,481 @@ +#pragma once + +#include "../cometa.hpp" +#include <array> +#include <cstdio> +#include <string> + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Wformat-security") +#pragma clang diagnostic ignored "-Wformat-security" +#pragma clang diagnostic ignored "-Wused-but-marked-unused" +#endif + +namespace cometa +{ + +template <typename... Args> +CID_INLINE std::string as_string(const Args&... args); + +template <typename T> +constexpr inline const T& repr(const T& value) +{ + return value; +} + +template <typename T> +inline std::string repr(const named_arg<T>& value) +{ + return std::string(value.name) + " = " + as_string(value.value); +} + +template <typename T> +using repr_type = decay<decltype(repr(std::declval<T>()))>; + +template <size_t N> +using cstring = std::array<char, N>; + +namespace details +{ + +template <size_t N, size_t... indices> +CID_INLINE constexpr cstring<N> make_cstring_impl(const char (&str)[N], csizes_t<indices...>) +{ + return { { str[indices]..., 0 } }; +} + +template <size_t N1, size_t N2, size_t... indices> +CID_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1, + const cstring<N2>& str2, + csizes_t<indices...>) +{ + constexpr size_t L1 = N1 - 1; + return { { (indices < L1 ? str1[indices] : str2[indices - L1])..., 0 } }; +} +template <size_t N1, size_t N2, typename... Args> +CID_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1, + const cstring<N2>& str2) +{ + return concat_str_impl(str1, str2, csizeseq<N1 - 1 + N2 - 1>); +} +template <size_t N1, size_t Nfrom, size_t Nto, size_t... indices> +cstring<N1 - Nfrom + Nto> str_replace_impl(size_t pos, const cstring<N1>& str, const cstring<Nfrom>&, + const cstring<Nto>& to, csizes_t<indices...>) +{ + if (pos == size_t(-1)) + stop_constexpr(); + return { { (indices < pos ? str[indices] : (indices < pos + Nto - 1) ? to[indices - pos] + : str[indices - Nto + Nfrom])..., + 0 } }; +} +} + +CID_INLINE constexpr cstring<1> concat_cstring() { return { { 0 } }; } + +template <size_t N1> +CID_INLINE constexpr cstring<N1> concat_cstring(const cstring<N1>& str1) +{ + return str1; +} + +template <size_t N1, size_t N2, typename... Args> +CID_INLINE constexpr auto concat_cstring(const cstring<N1>& str1, const cstring<N2>& str2, + const Args&... args) +{ + return details::concat_str_impl(str1, concat_cstring(str2, args...)); +} + +template <size_t N> +CID_INLINE constexpr cstring<N> make_cstring(const char (&str)[N]) +{ + return details::make_cstring_impl(str, csizeseq<N - 1>); +} + +template <char... chars> +CID_INLINE constexpr cstring<sizeof...(chars) + 1> make_cstring(cchars_t<chars...>) +{ + return { { chars..., 0 } }; +} + +template <size_t N1, size_t Nneedle> +size_t str_find(const cstring<N1>& str, const cstring<Nneedle>& needle) +{ + size_t count = 0; + for (size_t i = 0; i < N1; i++) + { + if (str[i] == needle[count]) + count++; + else + count = 0; + if (count == Nneedle - 1) + return i + 1 - (Nneedle - 1); + } + return size_t(-1); +} + +template <size_t N1, size_t Nfrom, size_t Nto> +cstring<N1 - Nfrom + Nto> str_replace(const cstring<N1>& str, const cstring<Nfrom>& from, + const cstring<Nto>& to) +{ + return details::str_replace_impl(str_find(str, from), str, from, to, csizeseq<N1 - Nfrom + Nto - 1>); +} + +namespace details +{ +template <typename T, char t = -1, int width = -1, int prec = -1> +struct fmt_t +{ + const T& value; +}; + +template <int number, CMT_ENABLE_IF(number >= 0 && number < 10)> +constexpr cstring<2> itoa() +{ + return cstring<2>{ { static_cast<char>(number + '0'), 0 } }; +} +template <int number, CMT_ENABLE_IF(number >= 10)> +constexpr auto itoa() +{ + return concat_cstring(itoa<number / 10>(), itoa<number % 10>()); +} +template <int number, CMT_ENABLE_IF(number < 0)> +constexpr auto itoa() +{ + return concat_cstring(make_cstring("-"), itoa<-number>()); +} + +template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width < 0 && prec >= 0)> +CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +{ + return concat_cstring(make_cstring("."), itoa<prec>()); +} +template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width >= 0 && prec < 0)> +CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +{ + return itoa<width>(); +} +template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width < 0 && prec < 0)> +CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +{ + return make_cstring(""); +} +template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width >= 0 && prec >= 0)> +CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +{ + return concat_cstring(itoa<width>(), make_cstring("."), itoa<prec>()); +} + +CID_INLINE constexpr auto value_fmt(ctype_t<bool>) { return make_cstring("s"); } +CID_INLINE constexpr auto value_fmt(ctype_t<std::string>) { return make_cstring("s"); } +CID_INLINE constexpr auto value_fmt(ctype_t<char>) { return make_cstring("d"); } +CID_INLINE constexpr auto value_fmt(ctype_t<signed char>) { return make_cstring("d"); } +CID_INLINE constexpr auto value_fmt(ctype_t<unsigned char>) { return make_cstring("d"); } +CID_INLINE constexpr auto value_fmt(ctype_t<short>) { return make_cstring("d"); } +CID_INLINE constexpr auto value_fmt(ctype_t<unsigned short>) { return make_cstring("d"); } +CID_INLINE constexpr auto value_fmt(ctype_t<int>) { return make_cstring("d"); } +CID_INLINE constexpr auto value_fmt(ctype_t<long>) { return make_cstring("ld"); } +CID_INLINE constexpr auto value_fmt(ctype_t<long long>) { return make_cstring("lld"); } +CID_INLINE constexpr auto value_fmt(ctype_t<unsigned int>) { return make_cstring("u"); } +CID_INLINE constexpr auto value_fmt(ctype_t<unsigned long>) { return make_cstring("lu"); } +CID_INLINE constexpr auto value_fmt(ctype_t<unsigned long long>) { return make_cstring("llu"); } +CID_INLINE constexpr auto value_fmt(ctype_t<float>) { return make_cstring("g"); } +CID_INLINE constexpr auto value_fmt(ctype_t<double>) { return make_cstring("g"); } +CID_INLINE constexpr auto value_fmt(ctype_t<long double>) { return make_cstring("Lg"); } +CID_INLINE constexpr auto value_fmt(ctype_t<const char*>) { return make_cstring("s"); } +CID_INLINE constexpr auto value_fmt(ctype_t<char*>) { return make_cstring("s"); } +CID_INLINE constexpr auto value_fmt(ctype_t<void*>) { return make_cstring("p"); } +CID_INLINE constexpr auto value_fmt(ctype_t<const void*>) { return make_cstring("p"); } + +template <char... chars> +CID_INLINE constexpr auto value_fmt(ctype_t<cchars_t<chars...>>) +{ + return concat_cstring(make_cstring("s"), make_cstring(cchars<chars...>)); +} + +template <typename T> +CID_INLINE constexpr auto value_fmt(ctype_t<ctype_t<T>>) +{ + return make_cstring("s"); +} + +template <typename T, int width, int prec> +CID_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, -1, width, prec>> fmt) +{ + return concat_cstring(value_fmt_arg(fmt), value_fmt(ctype<repr_type<T>>)); +} +template <typename T, char t, int width, int prec> +CID_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, t, width, prec>> fmt) +{ + return concat_cstring(value_fmt_arg(fmt), cstring<2>{ { t, 0 } }); +} + +template <char... chars> +CID_INLINE const char* pack_value(const cchars_t<chars...>&) +{ + return ""; +} + +template <typename Arg> +CID_INLINE const Arg& pack_value(const Arg& value) +{ + return value; +} +CID_INLINE double pack_value(float value) { return static_cast<double>(value); } +CID_INLINE auto pack_value(bool value) { return value ? "true" : "false"; } +CID_INLINE auto pack_value(const std::string& value) { return value.c_str(); } + +template <typename T> +CID_INLINE const char* pack_value(ctype_t<T>) +{ + return type_name<T>(); +} + +template <typename T, char t, int width, int prec> +CID_INLINE auto pack_value(const fmt_t<T, t, width, prec>& value) +{ + return pack_value(repr(value.value)); +} + +template <size_t N1, size_t Nnew, size_t... indices> +CID_INLINE constexpr cstring<N1 - 3 + Nnew> fmt_replace_impl(const cstring<N1>& str, + const cstring<Nnew>& newfmt, + csizes_t<indices...>) +{ + size_t start = 0; + size_t end = 0; + cstring<N1 - 3 + Nnew> result; + for (size_t i = 0; i < N1; i++) + { + if (str[i] == '{') + start = i; + else if (str[i] == '}') + end = i; + } + + if (end - start == 1) // {} + { + for (size_t i = 0; i < N1; i++) + { + if (i < start) + result[i] = str[i]; + else if (i == start) + result[i] = '%'; + else if (i > start && i - start - 1 < Nnew - 1) + result[i] = newfmt[i - start - 1]; + else if (i - Nnew + 3 < N1 - 1) + result[i] = str[i - Nnew + 3]; + else + result[i] = 0; + } + } + return result; +} + +template <size_t N1, size_t Nto> +CID_INLINE constexpr cstring<N1 - 3 + Nto> fmt_replace(const cstring<N1>& str, const cstring<Nto>& newfmt) +{ + return fmt_replace_impl(str, newfmt, csizeseq<N1 - 3 + Nto - 1>); +} + +inline std::string replace_one(const std::string& str, const std::string& from, const std::string& to) +{ + std::string r = str; + size_t start_pos = 0; + if ((start_pos = r.find(from, start_pos)) != std::string::npos) + { + r.replace(start_pos, from.size(), to); + } + return r; +} + +CID_INLINE const std::string& build_fmt(const std::string& str, ctypes_t<>) { return str; } + +template <typename Arg, typename... Args> +CID_INLINE auto build_fmt(const std::string& str, ctypes_t<Arg, Args...>) +{ + constexpr auto fmt = value_fmt(ctype<decay<Arg>>); + return build_fmt(replace_one(str, "{}", "%" + std::string(fmt.data())), ctypes<Args...>); +} +} + +template <char t, int width = -1, int prec = -1, typename T> +CID_INLINE details::fmt_t<T, t, width, prec> fmt(const T& value) +{ + return { value }; +} + +template <int width = -1, int prec = -1, typename T> +CID_INLINE details::fmt_t<T, -1, width, prec> fmtwidth(const T& value) +{ + return { value }; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wgnu-string-literal-operator-template" + +constexpr auto build_fmt_str(cchars_t<>, ctypes_t<>) { return make_cstring(""); } + +template <char... chars, typename Arg, typename... Args> +constexpr auto build_fmt_str(cchars_t<'@', chars...>, ctypes_t<Arg, Args...>) +{ + return concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<Arg>>), + build_fmt_str(cchars<chars...>, ctypes<Args...>)); +} + +template <char ch, char... chars, typename... Args> +constexpr auto build_fmt_str(cchars_t<ch, chars...>, ctypes_t<Args...>) +{ + return concat_cstring(make_cstring(cchars<ch>), build_fmt_str(cchars<chars...>, ctypes<Args...>)); +} + +template <char... chars> +struct format_t +{ + template <typename... Args> + inline std::string operator()(const Args&... args) + { + constexpr auto format_str = build_fmt_str(cchars<chars...>, ctypes<repr_type<Args>...>); + + std::string result; + const int size = std::snprintf(nullptr, 0, format_str.data(), details::pack_value(args)...); + if (size <= 0) + return result; + result.resize(size_t(size + 1)); + result.resize(size_t(std::snprintf(&result[0], size_t(size + 1), format_str.data(), + details::pack_value(repr(args))...))); + return result; + } +}; + +template <char... chars> +struct print_t +{ + template <typename... Args> + CID_INLINE void operator()(const Args&... args) + { + constexpr auto format_str = build_fmt_str(cchars<chars...>, ctypes<repr_type<Args>...>); + + std::printf(format_str.data(), details::pack_value(args)...); + } +}; + +template <typename Char, Char... chars> +constexpr format_t<chars...> operator""_format() +{ + return {}; +} + +template <typename Char, Char... chars> +constexpr CID_INLINE print_t<chars...> operator""_print() +{ + return {}; +} + +#pragma clang diagnostic pop + +template <typename... Args> +CID_INLINE void printfmt(const std::string& fmt, const Args&... args) +{ + const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); + std::printf(format_str.data(), details::pack_value(repr(args))...); +} + +template <typename... Args> +CID_INLINE void fprintfmt(FILE* f, const std::string& fmt, const Args&... args) +{ + const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); + std::fprintf(f, format_str.data(), details::pack_value(repr(args))...); +} + +template <typename... Args> +CID_INLINE int snprintfmt(char* str, size_t size, const std::string& fmt, const Args&... args) +{ + const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); + return std::snprintf(str, size, format_str.data(), details::pack_value(repr(args))...); +} + +template <typename... Args> +CID_INLINE std::string format(const std::string& fmt, const Args&... args) +{ + std::string result; + const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); + const int size = std::snprintf(nullptr, 0, format_str.data(), details::pack_value(repr(args))...); + if (size <= 0) + return result; + result.resize(size_t(size + 1)); + result.resize(size_t( + std::snprintf(&result[0], size_t(size + 1), format_str.data(), details::pack_value(repr(args))...))); + return result; +} + +template <typename... Args> +CID_INLINE void print(const Args&... args) +{ + constexpr auto format_str = concat_cstring( + concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))...); + std::printf(format_str.data(), details::pack_value(repr(args))...); +} + +template <typename... Args> +CID_INLINE void println(const Args&... args) +{ + constexpr auto format_str = concat_cstring( + concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))..., + make_cstring("\n")); + std::printf(format_str.data(), details::pack_value(repr(args))...); +} + +template <typename... Args> +CID_INLINE std::string as_string(const Args&... args) +{ + std::string result; + constexpr auto format_str = concat_cstring( + concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))...); + + const int size = std::snprintf(nullptr, 0, format_str.data(), details::pack_value(repr(args))...); + if (size <= 0) + return result; + result.resize(size_t(size + 1)); + result.resize(size_t( + std::snprintf(&result[0], size_t(size + 1), format_str.data(), details::pack_value(repr(args))...))); + return result; +} + +inline std::string padright(size_t size, const std::string& text, char character = ' ') +{ + const size_t pad = size >= text.size() ? size - text.size() : 0; + return std::string(pad, character) + text; +} + +inline std::string padleft(size_t size, const std::string& text, char character = ' ') +{ + const size_t pad = size >= text.size() ? size - text.size() : 0; + return text + std::string(pad, character); +} + +inline std::string padcenter(size_t size, const std::string& text, char character = ' ') +{ + const size_t pad = size >= text.size() ? size - text.size() : 0; + return std::string(pad / 2, character) + text + std::string(pad - pad / 2, character); +} + +template <typename T> +inline std::string q(T x) +{ + return "\"" + as_string(std::forward<T>(x)) + "\""; +} + +template <typename T> +inline std::string join(T x) +{ + return as_string(std::forward<T>(x)); +} + +template <typename T, typename U, typename... Ts> +inline std::string join(T x, U y, Ts... rest) +{ + return format("{}, {}", x, join(std::forward<U>(y), std::forward<Ts>(rest)...)); +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/data/bitrev.hpp b/include/kfr/data/bitrev.hpp @@ -0,0 +1,1057 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +namespace kfr +{ + +namespace data +{ +constexpr unsigned short bitrev_table[] = { + 0, 8192, 4096, 12288, 2048, 10240, 6144, 14336, 1024, 9216, 5120, 13312, 3072, 11264, 7168, 15360, + 512, 8704, 4608, 12800, 2560, 10752, 6656, 14848, 1536, 9728, 5632, 13824, 3584, 11776, 7680, 15872, + 256, 8448, 4352, 12544, 2304, 10496, 6400, 14592, 1280, 9472, 5376, 13568, 3328, 11520, 7424, 15616, + 768, 8960, 4864, 13056, 2816, 11008, 6912, 15104, 1792, 9984, 5888, 14080, 3840, 12032, 7936, 16128, + 128, 8320, 4224, 12416, 2176, 10368, 6272, 14464, 1152, 9344, 5248, 13440, 3200, 11392, 7296, 15488, + 640, 8832, 4736, 12928, 2688, 10880, 6784, 14976, 1664, 9856, 5760, 13952, 3712, 11904, 7808, 16000, + 384, 8576, 4480, 12672, 2432, 10624, 6528, 14720, 1408, 9600, 5504, 13696, 3456, 11648, 7552, 15744, + 896, 9088, 4992, 13184, 2944, 11136, 7040, 15232, 1920, 10112, 6016, 14208, 3968, 12160, 8064, 16256, + 64, 8256, 4160, 12352, 2112, 10304, 6208, 14400, 1088, 9280, 5184, 13376, 3136, 11328, 7232, 15424, + 576, 8768, 4672, 12864, 2624, 10816, 6720, 14912, 1600, 9792, 5696, 13888, 3648, 11840, 7744, 15936, + 320, 8512, 4416, 12608, 2368, 10560, 6464, 14656, 1344, 9536, 5440, 13632, 3392, 11584, 7488, 15680, + 832, 9024, 4928, 13120, 2880, 11072, 6976, 15168, 1856, 10048, 5952, 14144, 3904, 12096, 8000, 16192, + 192, 8384, 4288, 12480, 2240, 10432, 6336, 14528, 1216, 9408, 5312, 13504, 3264, 11456, 7360, 15552, + 704, 8896, 4800, 12992, 2752, 10944, 6848, 15040, 1728, 9920, 5824, 14016, 3776, 11968, 7872, 16064, + 448, 8640, 4544, 12736, 2496, 10688, 6592, 14784, 1472, 9664, 5568, 13760, 3520, 11712, 7616, 15808, + 960, 9152, 5056, 13248, 3008, 11200, 7104, 15296, 1984, 10176, 6080, 14272, 4032, 12224, 8128, 16320, + 32, 8224, 4128, 12320, 2080, 10272, 6176, 14368, 1056, 9248, 5152, 13344, 3104, 11296, 7200, 15392, + 544, 8736, 4640, 12832, 2592, 10784, 6688, 14880, 1568, 9760, 5664, 13856, 3616, 11808, 7712, 15904, + 288, 8480, 4384, 12576, 2336, 10528, 6432, 14624, 1312, 9504, 5408, 13600, 3360, 11552, 7456, 15648, + 800, 8992, 4896, 13088, 2848, 11040, 6944, 15136, 1824, 10016, 5920, 14112, 3872, 12064, 7968, 16160, + 160, 8352, 4256, 12448, 2208, 10400, 6304, 14496, 1184, 9376, 5280, 13472, 3232, 11424, 7328, 15520, + 672, 8864, 4768, 12960, 2720, 10912, 6816, 15008, 1696, 9888, 5792, 13984, 3744, 11936, 7840, 16032, + 416, 8608, 4512, 12704, 2464, 10656, 6560, 14752, 1440, 9632, 5536, 13728, 3488, 11680, 7584, 15776, + 928, 9120, 5024, 13216, 2976, 11168, 7072, 15264, 1952, 10144, 6048, 14240, 4000, 12192, 8096, 16288, + 96, 8288, 4192, 12384, 2144, 10336, 6240, 14432, 1120, 9312, 5216, 13408, 3168, 11360, 7264, 15456, + 608, 8800, 4704, 12896, 2656, 10848, 6752, 14944, 1632, 9824, 5728, 13920, 3680, 11872, 7776, 15968, + 352, 8544, 4448, 12640, 2400, 10592, 6496, 14688, 1376, 9568, 5472, 13664, 3424, 11616, 7520, 15712, + 864, 9056, 4960, 13152, 2912, 11104, 7008, 15200, 1888, 10080, 5984, 14176, 3936, 12128, 8032, 16224, + 224, 8416, 4320, 12512, 2272, 10464, 6368, 14560, 1248, 9440, 5344, 13536, 3296, 11488, 7392, 15584, + 736, 8928, 4832, 13024, 2784, 10976, 6880, 15072, 1760, 9952, 5856, 14048, 3808, 12000, 7904, 16096, + 480, 8672, 4576, 12768, 2528, 10720, 6624, 14816, 1504, 9696, 5600, 13792, 3552, 11744, 7648, 15840, + 992, 9184, 5088, 13280, 3040, 11232, 7136, 15328, 2016, 10208, 6112, 14304, 4064, 12256, 8160, 16352, + 16, 8208, 4112, 12304, 2064, 10256, 6160, 14352, 1040, 9232, 5136, 13328, 3088, 11280, 7184, 15376, + 528, 8720, 4624, 12816, 2576, 10768, 6672, 14864, 1552, 9744, 5648, 13840, 3600, 11792, 7696, 15888, + 272, 8464, 4368, 12560, 2320, 10512, 6416, 14608, 1296, 9488, 5392, 13584, 3344, 11536, 7440, 15632, + 784, 8976, 4880, 13072, 2832, 11024, 6928, 15120, 1808, 10000, 5904, 14096, 3856, 12048, 7952, 16144, + 144, 8336, 4240, 12432, 2192, 10384, 6288, 14480, 1168, 9360, 5264, 13456, 3216, 11408, 7312, 15504, + 656, 8848, 4752, 12944, 2704, 10896, 6800, 14992, 1680, 9872, 5776, 13968, 3728, 11920, 7824, 16016, + 400, 8592, 4496, 12688, 2448, 10640, 6544, 14736, 1424, 9616, 5520, 13712, 3472, 11664, 7568, 15760, + 912, 9104, 5008, 13200, 2960, 11152, 7056, 15248, 1936, 10128, 6032, 14224, 3984, 12176, 8080, 16272, + 80, 8272, 4176, 12368, 2128, 10320, 6224, 14416, 1104, 9296, 5200, 13392, 3152, 11344, 7248, 15440, + 592, 8784, 4688, 12880, 2640, 10832, 6736, 14928, 1616, 9808, 5712, 13904, 3664, 11856, 7760, 15952, + 336, 8528, 4432, 12624, 2384, 10576, 6480, 14672, 1360, 9552, 5456, 13648, 3408, 11600, 7504, 15696, + 848, 9040, 4944, 13136, 2896, 11088, 6992, 15184, 1872, 10064, 5968, 14160, 3920, 12112, 8016, 16208, + 208, 8400, 4304, 12496, 2256, 10448, 6352, 14544, 1232, 9424, 5328, 13520, 3280, 11472, 7376, 15568, + 720, 8912, 4816, 13008, 2768, 10960, 6864, 15056, 1744, 9936, 5840, 14032, 3792, 11984, 7888, 16080, + 464, 8656, 4560, 12752, 2512, 10704, 6608, 14800, 1488, 9680, 5584, 13776, 3536, 11728, 7632, 15824, + 976, 9168, 5072, 13264, 3024, 11216, 7120, 15312, 2000, 10192, 6096, 14288, 4048, 12240, 8144, 16336, + 48, 8240, 4144, 12336, 2096, 10288, 6192, 14384, 1072, 9264, 5168, 13360, 3120, 11312, 7216, 15408, + 560, 8752, 4656, 12848, 2608, 10800, 6704, 14896, 1584, 9776, 5680, 13872, 3632, 11824, 7728, 15920, + 304, 8496, 4400, 12592, 2352, 10544, 6448, 14640, 1328, 9520, 5424, 13616, 3376, 11568, 7472, 15664, + 816, 9008, 4912, 13104, 2864, 11056, 6960, 15152, 1840, 10032, 5936, 14128, 3888, 12080, 7984, 16176, + 176, 8368, 4272, 12464, 2224, 10416, 6320, 14512, 1200, 9392, 5296, 13488, 3248, 11440, 7344, 15536, + 688, 8880, 4784, 12976, 2736, 10928, 6832, 15024, 1712, 9904, 5808, 14000, 3760, 11952, 7856, 16048, + 432, 8624, 4528, 12720, 2480, 10672, 6576, 14768, 1456, 9648, 5552, 13744, 3504, 11696, 7600, 15792, + 944, 9136, 5040, 13232, 2992, 11184, 7088, 15280, 1968, 10160, 6064, 14256, 4016, 12208, 8112, 16304, + 112, 8304, 4208, 12400, 2160, 10352, 6256, 14448, 1136, 9328, 5232, 13424, 3184, 11376, 7280, 15472, + 624, 8816, 4720, 12912, 2672, 10864, 6768, 14960, 1648, 9840, 5744, 13936, 3696, 11888, 7792, 15984, + 368, 8560, 4464, 12656, 2416, 10608, 6512, 14704, 1392, 9584, 5488, 13680, 3440, 11632, 7536, 15728, + 880, 9072, 4976, 13168, 2928, 11120, 7024, 15216, 1904, 10096, 6000, 14192, 3952, 12144, 8048, 16240, + 240, 8432, 4336, 12528, 2288, 10480, 6384, 14576, 1264, 9456, 5360, 13552, 3312, 11504, 7408, 15600, + 752, 8944, 4848, 13040, 2800, 10992, 6896, 15088, 1776, 9968, 5872, 14064, 3824, 12016, 7920, 16112, + 496, 8688, 4592, 12784, 2544, 10736, 6640, 14832, 1520, 9712, 5616, 13808, 3568, 11760, 7664, 15856, + 1008, 9200, 5104, 13296, 3056, 11248, 7152, 15344, 2032, 10224, 6128, 14320, 4080, 12272, 8176, 16368, + 8, 8200, 4104, 12296, 2056, 10248, 6152, 14344, 1032, 9224, 5128, 13320, 3080, 11272, 7176, 15368, + 520, 8712, 4616, 12808, 2568, 10760, 6664, 14856, 1544, 9736, 5640, 13832, 3592, 11784, 7688, 15880, + 264, 8456, 4360, 12552, 2312, 10504, 6408, 14600, 1288, 9480, 5384, 13576, 3336, 11528, 7432, 15624, + 776, 8968, 4872, 13064, 2824, 11016, 6920, 15112, 1800, 9992, 5896, 14088, 3848, 12040, 7944, 16136, + 136, 8328, 4232, 12424, 2184, 10376, 6280, 14472, 1160, 9352, 5256, 13448, 3208, 11400, 7304, 15496, + 648, 8840, 4744, 12936, 2696, 10888, 6792, 14984, 1672, 9864, 5768, 13960, 3720, 11912, 7816, 16008, + 392, 8584, 4488, 12680, 2440, 10632, 6536, 14728, 1416, 9608, 5512, 13704, 3464, 11656, 7560, 15752, + 904, 9096, 5000, 13192, 2952, 11144, 7048, 15240, 1928, 10120, 6024, 14216, 3976, 12168, 8072, 16264, + 72, 8264, 4168, 12360, 2120, 10312, 6216, 14408, 1096, 9288, 5192, 13384, 3144, 11336, 7240, 15432, + 584, 8776, 4680, 12872, 2632, 10824, 6728, 14920, 1608, 9800, 5704, 13896, 3656, 11848, 7752, 15944, + 328, 8520, 4424, 12616, 2376, 10568, 6472, 14664, 1352, 9544, 5448, 13640, 3400, 11592, 7496, 15688, + 840, 9032, 4936, 13128, 2888, 11080, 6984, 15176, 1864, 10056, 5960, 14152, 3912, 12104, 8008, 16200, + 200, 8392, 4296, 12488, 2248, 10440, 6344, 14536, 1224, 9416, 5320, 13512, 3272, 11464, 7368, 15560, + 712, 8904, 4808, 13000, 2760, 10952, 6856, 15048, 1736, 9928, 5832, 14024, 3784, 11976, 7880, 16072, + 456, 8648, 4552, 12744, 2504, 10696, 6600, 14792, 1480, 9672, 5576, 13768, 3528, 11720, 7624, 15816, + 968, 9160, 5064, 13256, 3016, 11208, 7112, 15304, 1992, 10184, 6088, 14280, 4040, 12232, 8136, 16328, + 40, 8232, 4136, 12328, 2088, 10280, 6184, 14376, 1064, 9256, 5160, 13352, 3112, 11304, 7208, 15400, + 552, 8744, 4648, 12840, 2600, 10792, 6696, 14888, 1576, 9768, 5672, 13864, 3624, 11816, 7720, 15912, + 296, 8488, 4392, 12584, 2344, 10536, 6440, 14632, 1320, 9512, 5416, 13608, 3368, 11560, 7464, 15656, + 808, 9000, 4904, 13096, 2856, 11048, 6952, 15144, 1832, 10024, 5928, 14120, 3880, 12072, 7976, 16168, + 168, 8360, 4264, 12456, 2216, 10408, 6312, 14504, 1192, 9384, 5288, 13480, 3240, 11432, 7336, 15528, + 680, 8872, 4776, 12968, 2728, 10920, 6824, 15016, 1704, 9896, 5800, 13992, 3752, 11944, 7848, 16040, + 424, 8616, 4520, 12712, 2472, 10664, 6568, 14760, 1448, 9640, 5544, 13736, 3496, 11688, 7592, 15784, + 936, 9128, 5032, 13224, 2984, 11176, 7080, 15272, 1960, 10152, 6056, 14248, 4008, 12200, 8104, 16296, + 104, 8296, 4200, 12392, 2152, 10344, 6248, 14440, 1128, 9320, 5224, 13416, 3176, 11368, 7272, 15464, + 616, 8808, 4712, 12904, 2664, 10856, 6760, 14952, 1640, 9832, 5736, 13928, 3688, 11880, 7784, 15976, + 360, 8552, 4456, 12648, 2408, 10600, 6504, 14696, 1384, 9576, 5480, 13672, 3432, 11624, 7528, 15720, + 872, 9064, 4968, 13160, 2920, 11112, 7016, 15208, 1896, 10088, 5992, 14184, 3944, 12136, 8040, 16232, + 232, 8424, 4328, 12520, 2280, 10472, 6376, 14568, 1256, 9448, 5352, 13544, 3304, 11496, 7400, 15592, + 744, 8936, 4840, 13032, 2792, 10984, 6888, 15080, 1768, 9960, 5864, 14056, 3816, 12008, 7912, 16104, + 488, 8680, 4584, 12776, 2536, 10728, 6632, 14824, 1512, 9704, 5608, 13800, 3560, 11752, 7656, 15848, + 1000, 9192, 5096, 13288, 3048, 11240, 7144, 15336, 2024, 10216, 6120, 14312, 4072, 12264, 8168, 16360, + 24, 8216, 4120, 12312, 2072, 10264, 6168, 14360, 1048, 9240, 5144, 13336, 3096, 11288, 7192, 15384, + 536, 8728, 4632, 12824, 2584, 10776, 6680, 14872, 1560, 9752, 5656, 13848, 3608, 11800, 7704, 15896, + 280, 8472, 4376, 12568, 2328, 10520, 6424, 14616, 1304, 9496, 5400, 13592, 3352, 11544, 7448, 15640, + 792, 8984, 4888, 13080, 2840, 11032, 6936, 15128, 1816, 10008, 5912, 14104, 3864, 12056, 7960, 16152, + 152, 8344, 4248, 12440, 2200, 10392, 6296, 14488, 1176, 9368, 5272, 13464, 3224, 11416, 7320, 15512, + 664, 8856, 4760, 12952, 2712, 10904, 6808, 15000, 1688, 9880, 5784, 13976, 3736, 11928, 7832, 16024, + 408, 8600, 4504, 12696, 2456, 10648, 6552, 14744, 1432, 9624, 5528, 13720, 3480, 11672, 7576, 15768, + 920, 9112, 5016, 13208, 2968, 11160, 7064, 15256, 1944, 10136, 6040, 14232, 3992, 12184, 8088, 16280, + 88, 8280, 4184, 12376, 2136, 10328, 6232, 14424, 1112, 9304, 5208, 13400, 3160, 11352, 7256, 15448, + 600, 8792, 4696, 12888, 2648, 10840, 6744, 14936, 1624, 9816, 5720, 13912, 3672, 11864, 7768, 15960, + 344, 8536, 4440, 12632, 2392, 10584, 6488, 14680, 1368, 9560, 5464, 13656, 3416, 11608, 7512, 15704, + 856, 9048, 4952, 13144, 2904, 11096, 7000, 15192, 1880, 10072, 5976, 14168, 3928, 12120, 8024, 16216, + 216, 8408, 4312, 12504, 2264, 10456, 6360, 14552, 1240, 9432, 5336, 13528, 3288, 11480, 7384, 15576, + 728, 8920, 4824, 13016, 2776, 10968, 6872, 15064, 1752, 9944, 5848, 14040, 3800, 11992, 7896, 16088, + 472, 8664, 4568, 12760, 2520, 10712, 6616, 14808, 1496, 9688, 5592, 13784, 3544, 11736, 7640, 15832, + 984, 9176, 5080, 13272, 3032, 11224, 7128, 15320, 2008, 10200, 6104, 14296, 4056, 12248, 8152, 16344, + 56, 8248, 4152, 12344, 2104, 10296, 6200, 14392, 1080, 9272, 5176, 13368, 3128, 11320, 7224, 15416, + 568, 8760, 4664, 12856, 2616, 10808, 6712, 14904, 1592, 9784, 5688, 13880, 3640, 11832, 7736, 15928, + 312, 8504, 4408, 12600, 2360, 10552, 6456, 14648, 1336, 9528, 5432, 13624, 3384, 11576, 7480, 15672, + 824, 9016, 4920, 13112, 2872, 11064, 6968, 15160, 1848, 10040, 5944, 14136, 3896, 12088, 7992, 16184, + 184, 8376, 4280, 12472, 2232, 10424, 6328, 14520, 1208, 9400, 5304, 13496, 3256, 11448, 7352, 15544, + 696, 8888, 4792, 12984, 2744, 10936, 6840, 15032, 1720, 9912, 5816, 14008, 3768, 11960, 7864, 16056, + 440, 8632, 4536, 12728, 2488, 10680, 6584, 14776, 1464, 9656, 5560, 13752, 3512, 11704, 7608, 15800, + 952, 9144, 5048, 13240, 3000, 11192, 7096, 15288, 1976, 10168, 6072, 14264, 4024, 12216, 8120, 16312, + 120, 8312, 4216, 12408, 2168, 10360, 6264, 14456, 1144, 9336, 5240, 13432, 3192, 11384, 7288, 15480, + 632, 8824, 4728, 12920, 2680, 10872, 6776, 14968, 1656, 9848, 5752, 13944, 3704, 11896, 7800, 15992, + 376, 8568, 4472, 12664, 2424, 10616, 6520, 14712, 1400, 9592, 5496, 13688, 3448, 11640, 7544, 15736, + 888, 9080, 4984, 13176, 2936, 11128, 7032, 15224, 1912, 10104, 6008, 14200, 3960, 12152, 8056, 16248, + 248, 8440, 4344, 12536, 2296, 10488, 6392, 14584, 1272, 9464, 5368, 13560, 3320, 11512, 7416, 15608, + 760, 8952, 4856, 13048, 2808, 11000, 6904, 15096, 1784, 9976, 5880, 14072, 3832, 12024, 7928, 16120, + 504, 8696, 4600, 12792, 2552, 10744, 6648, 14840, 1528, 9720, 5624, 13816, 3576, 11768, 7672, 15864, + 1016, 9208, 5112, 13304, 3064, 11256, 7160, 15352, 2040, 10232, 6136, 14328, 4088, 12280, 8184, 16376, + 4, 8196, 4100, 12292, 2052, 10244, 6148, 14340, 1028, 9220, 5124, 13316, 3076, 11268, 7172, 15364, + 516, 8708, 4612, 12804, 2564, 10756, 6660, 14852, 1540, 9732, 5636, 13828, 3588, 11780, 7684, 15876, + 260, 8452, 4356, 12548, 2308, 10500, 6404, 14596, 1284, 9476, 5380, 13572, 3332, 11524, 7428, 15620, + 772, 8964, 4868, 13060, 2820, 11012, 6916, 15108, 1796, 9988, 5892, 14084, 3844, 12036, 7940, 16132, + 132, 8324, 4228, 12420, 2180, 10372, 6276, 14468, 1156, 9348, 5252, 13444, 3204, 11396, 7300, 15492, + 644, 8836, 4740, 12932, 2692, 10884, 6788, 14980, 1668, 9860, 5764, 13956, 3716, 11908, 7812, 16004, + 388, 8580, 4484, 12676, 2436, 10628, 6532, 14724, 1412, 9604, 5508, 13700, 3460, 11652, 7556, 15748, + 900, 9092, 4996, 13188, 2948, 11140, 7044, 15236, 1924, 10116, 6020, 14212, 3972, 12164, 8068, 16260, + 68, 8260, 4164, 12356, 2116, 10308, 6212, 14404, 1092, 9284, 5188, 13380, 3140, 11332, 7236, 15428, + 580, 8772, 4676, 12868, 2628, 10820, 6724, 14916, 1604, 9796, 5700, 13892, 3652, 11844, 7748, 15940, + 324, 8516, 4420, 12612, 2372, 10564, 6468, 14660, 1348, 9540, 5444, 13636, 3396, 11588, 7492, 15684, + 836, 9028, 4932, 13124, 2884, 11076, 6980, 15172, 1860, 10052, 5956, 14148, 3908, 12100, 8004, 16196, + 196, 8388, 4292, 12484, 2244, 10436, 6340, 14532, 1220, 9412, 5316, 13508, 3268, 11460, 7364, 15556, + 708, 8900, 4804, 12996, 2756, 10948, 6852, 15044, 1732, 9924, 5828, 14020, 3780, 11972, 7876, 16068, + 452, 8644, 4548, 12740, 2500, 10692, 6596, 14788, 1476, 9668, 5572, 13764, 3524, 11716, 7620, 15812, + 964, 9156, 5060, 13252, 3012, 11204, 7108, 15300, 1988, 10180, 6084, 14276, 4036, 12228, 8132, 16324, + 36, 8228, 4132, 12324, 2084, 10276, 6180, 14372, 1060, 9252, 5156, 13348, 3108, 11300, 7204, 15396, + 548, 8740, 4644, 12836, 2596, 10788, 6692, 14884, 1572, 9764, 5668, 13860, 3620, 11812, 7716, 15908, + 292, 8484, 4388, 12580, 2340, 10532, 6436, 14628, 1316, 9508, 5412, 13604, 3364, 11556, 7460, 15652, + 804, 8996, 4900, 13092, 2852, 11044, 6948, 15140, 1828, 10020, 5924, 14116, 3876, 12068, 7972, 16164, + 164, 8356, 4260, 12452, 2212, 10404, 6308, 14500, 1188, 9380, 5284, 13476, 3236, 11428, 7332, 15524, + 676, 8868, 4772, 12964, 2724, 10916, 6820, 15012, 1700, 9892, 5796, 13988, 3748, 11940, 7844, 16036, + 420, 8612, 4516, 12708, 2468, 10660, 6564, 14756, 1444, 9636, 5540, 13732, 3492, 11684, 7588, 15780, + 932, 9124, 5028, 13220, 2980, 11172, 7076, 15268, 1956, 10148, 6052, 14244, 4004, 12196, 8100, 16292, + 100, 8292, 4196, 12388, 2148, 10340, 6244, 14436, 1124, 9316, 5220, 13412, 3172, 11364, 7268, 15460, + 612, 8804, 4708, 12900, 2660, 10852, 6756, 14948, 1636, 9828, 5732, 13924, 3684, 11876, 7780, 15972, + 356, 8548, 4452, 12644, 2404, 10596, 6500, 14692, 1380, 9572, 5476, 13668, 3428, 11620, 7524, 15716, + 868, 9060, 4964, 13156, 2916, 11108, 7012, 15204, 1892, 10084, 5988, 14180, 3940, 12132, 8036, 16228, + 228, 8420, 4324, 12516, 2276, 10468, 6372, 14564, 1252, 9444, 5348, 13540, 3300, 11492, 7396, 15588, + 740, 8932, 4836, 13028, 2788, 10980, 6884, 15076, 1764, 9956, 5860, 14052, 3812, 12004, 7908, 16100, + 484, 8676, 4580, 12772, 2532, 10724, 6628, 14820, 1508, 9700, 5604, 13796, 3556, 11748, 7652, 15844, + 996, 9188, 5092, 13284, 3044, 11236, 7140, 15332, 2020, 10212, 6116, 14308, 4068, 12260, 8164, 16356, + 20, 8212, 4116, 12308, 2068, 10260, 6164, 14356, 1044, 9236, 5140, 13332, 3092, 11284, 7188, 15380, + 532, 8724, 4628, 12820, 2580, 10772, 6676, 14868, 1556, 9748, 5652, 13844, 3604, 11796, 7700, 15892, + 276, 8468, 4372, 12564, 2324, 10516, 6420, 14612, 1300, 9492, 5396, 13588, 3348, 11540, 7444, 15636, + 788, 8980, 4884, 13076, 2836, 11028, 6932, 15124, 1812, 10004, 5908, 14100, 3860, 12052, 7956, 16148, + 148, 8340, 4244, 12436, 2196, 10388, 6292, 14484, 1172, 9364, 5268, 13460, 3220, 11412, 7316, 15508, + 660, 8852, 4756, 12948, 2708, 10900, 6804, 14996, 1684, 9876, 5780, 13972, 3732, 11924, 7828, 16020, + 404, 8596, 4500, 12692, 2452, 10644, 6548, 14740, 1428, 9620, 5524, 13716, 3476, 11668, 7572, 15764, + 916, 9108, 5012, 13204, 2964, 11156, 7060, 15252, 1940, 10132, 6036, 14228, 3988, 12180, 8084, 16276, + 84, 8276, 4180, 12372, 2132, 10324, 6228, 14420, 1108, 9300, 5204, 13396, 3156, 11348, 7252, 15444, + 596, 8788, 4692, 12884, 2644, 10836, 6740, 14932, 1620, 9812, 5716, 13908, 3668, 11860, 7764, 15956, + 340, 8532, 4436, 12628, 2388, 10580, 6484, 14676, 1364, 9556, 5460, 13652, 3412, 11604, 7508, 15700, + 852, 9044, 4948, 13140, 2900, 11092, 6996, 15188, 1876, 10068, 5972, 14164, 3924, 12116, 8020, 16212, + 212, 8404, 4308, 12500, 2260, 10452, 6356, 14548, 1236, 9428, 5332, 13524, 3284, 11476, 7380, 15572, + 724, 8916, 4820, 13012, 2772, 10964, 6868, 15060, 1748, 9940, 5844, 14036, 3796, 11988, 7892, 16084, + 468, 8660, 4564, 12756, 2516, 10708, 6612, 14804, 1492, 9684, 5588, 13780, 3540, 11732, 7636, 15828, + 980, 9172, 5076, 13268, 3028, 11220, 7124, 15316, 2004, 10196, 6100, 14292, 4052, 12244, 8148, 16340, + 52, 8244, 4148, 12340, 2100, 10292, 6196, 14388, 1076, 9268, 5172, 13364, 3124, 11316, 7220, 15412, + 564, 8756, 4660, 12852, 2612, 10804, 6708, 14900, 1588, 9780, 5684, 13876, 3636, 11828, 7732, 15924, + 308, 8500, 4404, 12596, 2356, 10548, 6452, 14644, 1332, 9524, 5428, 13620, 3380, 11572, 7476, 15668, + 820, 9012, 4916, 13108, 2868, 11060, 6964, 15156, 1844, 10036, 5940, 14132, 3892, 12084, 7988, 16180, + 180, 8372, 4276, 12468, 2228, 10420, 6324, 14516, 1204, 9396, 5300, 13492, 3252, 11444, 7348, 15540, + 692, 8884, 4788, 12980, 2740, 10932, 6836, 15028, 1716, 9908, 5812, 14004, 3764, 11956, 7860, 16052, + 436, 8628, 4532, 12724, 2484, 10676, 6580, 14772, 1460, 9652, 5556, 13748, 3508, 11700, 7604, 15796, + 948, 9140, 5044, 13236, 2996, 11188, 7092, 15284, 1972, 10164, 6068, 14260, 4020, 12212, 8116, 16308, + 116, 8308, 4212, 12404, 2164, 10356, 6260, 14452, 1140, 9332, 5236, 13428, 3188, 11380, 7284, 15476, + 628, 8820, 4724, 12916, 2676, 10868, 6772, 14964, 1652, 9844, 5748, 13940, 3700, 11892, 7796, 15988, + 372, 8564, 4468, 12660, 2420, 10612, 6516, 14708, 1396, 9588, 5492, 13684, 3444, 11636, 7540, 15732, + 884, 9076, 4980, 13172, 2932, 11124, 7028, 15220, 1908, 10100, 6004, 14196, 3956, 12148, 8052, 16244, + 244, 8436, 4340, 12532, 2292, 10484, 6388, 14580, 1268, 9460, 5364, 13556, 3316, 11508, 7412, 15604, + 756, 8948, 4852, 13044, 2804, 10996, 6900, 15092, 1780, 9972, 5876, 14068, 3828, 12020, 7924, 16116, + 500, 8692, 4596, 12788, 2548, 10740, 6644, 14836, 1524, 9716, 5620, 13812, 3572, 11764, 7668, 15860, + 1012, 9204, 5108, 13300, 3060, 11252, 7156, 15348, 2036, 10228, 6132, 14324, 4084, 12276, 8180, 16372, + 12, 8204, 4108, 12300, 2060, 10252, 6156, 14348, 1036, 9228, 5132, 13324, 3084, 11276, 7180, 15372, + 524, 8716, 4620, 12812, 2572, 10764, 6668, 14860, 1548, 9740, 5644, 13836, 3596, 11788, 7692, 15884, + 268, 8460, 4364, 12556, 2316, 10508, 6412, 14604, 1292, 9484, 5388, 13580, 3340, 11532, 7436, 15628, + 780, 8972, 4876, 13068, 2828, 11020, 6924, 15116, 1804, 9996, 5900, 14092, 3852, 12044, 7948, 16140, + 140, 8332, 4236, 12428, 2188, 10380, 6284, 14476, 1164, 9356, 5260, 13452, 3212, 11404, 7308, 15500, + 652, 8844, 4748, 12940, 2700, 10892, 6796, 14988, 1676, 9868, 5772, 13964, 3724, 11916, 7820, 16012, + 396, 8588, 4492, 12684, 2444, 10636, 6540, 14732, 1420, 9612, 5516, 13708, 3468, 11660, 7564, 15756, + 908, 9100, 5004, 13196, 2956, 11148, 7052, 15244, 1932, 10124, 6028, 14220, 3980, 12172, 8076, 16268, + 76, 8268, 4172, 12364, 2124, 10316, 6220, 14412, 1100, 9292, 5196, 13388, 3148, 11340, 7244, 15436, + 588, 8780, 4684, 12876, 2636, 10828, 6732, 14924, 1612, 9804, 5708, 13900, 3660, 11852, 7756, 15948, + 332, 8524, 4428, 12620, 2380, 10572, 6476, 14668, 1356, 9548, 5452, 13644, 3404, 11596, 7500, 15692, + 844, 9036, 4940, 13132, 2892, 11084, 6988, 15180, 1868, 10060, 5964, 14156, 3916, 12108, 8012, 16204, + 204, 8396, 4300, 12492, 2252, 10444, 6348, 14540, 1228, 9420, 5324, 13516, 3276, 11468, 7372, 15564, + 716, 8908, 4812, 13004, 2764, 10956, 6860, 15052, 1740, 9932, 5836, 14028, 3788, 11980, 7884, 16076, + 460, 8652, 4556, 12748, 2508, 10700, 6604, 14796, 1484, 9676, 5580, 13772, 3532, 11724, 7628, 15820, + 972, 9164, 5068, 13260, 3020, 11212, 7116, 15308, 1996, 10188, 6092, 14284, 4044, 12236, 8140, 16332, + 44, 8236, 4140, 12332, 2092, 10284, 6188, 14380, 1068, 9260, 5164, 13356, 3116, 11308, 7212, 15404, + 556, 8748, 4652, 12844, 2604, 10796, 6700, 14892, 1580, 9772, 5676, 13868, 3628, 11820, 7724, 15916, + 300, 8492, 4396, 12588, 2348, 10540, 6444, 14636, 1324, 9516, 5420, 13612, 3372, 11564, 7468, 15660, + 812, 9004, 4908, 13100, 2860, 11052, 6956, 15148, 1836, 10028, 5932, 14124, 3884, 12076, 7980, 16172, + 172, 8364, 4268, 12460, 2220, 10412, 6316, 14508, 1196, 9388, 5292, 13484, 3244, 11436, 7340, 15532, + 684, 8876, 4780, 12972, 2732, 10924, 6828, 15020, 1708, 9900, 5804, 13996, 3756, 11948, 7852, 16044, + 428, 8620, 4524, 12716, 2476, 10668, 6572, 14764, 1452, 9644, 5548, 13740, 3500, 11692, 7596, 15788, + 940, 9132, 5036, 13228, 2988, 11180, 7084, 15276, 1964, 10156, 6060, 14252, 4012, 12204, 8108, 16300, + 108, 8300, 4204, 12396, 2156, 10348, 6252, 14444, 1132, 9324, 5228, 13420, 3180, 11372, 7276, 15468, + 620, 8812, 4716, 12908, 2668, 10860, 6764, 14956, 1644, 9836, 5740, 13932, 3692, 11884, 7788, 15980, + 364, 8556, 4460, 12652, 2412, 10604, 6508, 14700, 1388, 9580, 5484, 13676, 3436, 11628, 7532, 15724, + 876, 9068, 4972, 13164, 2924, 11116, 7020, 15212, 1900, 10092, 5996, 14188, 3948, 12140, 8044, 16236, + 236, 8428, 4332, 12524, 2284, 10476, 6380, 14572, 1260, 9452, 5356, 13548, 3308, 11500, 7404, 15596, + 748, 8940, 4844, 13036, 2796, 10988, 6892, 15084, 1772, 9964, 5868, 14060, 3820, 12012, 7916, 16108, + 492, 8684, 4588, 12780, 2540, 10732, 6636, 14828, 1516, 9708, 5612, 13804, 3564, 11756, 7660, 15852, + 1004, 9196, 5100, 13292, 3052, 11244, 7148, 15340, 2028, 10220, 6124, 14316, 4076, 12268, 8172, 16364, + 28, 8220, 4124, 12316, 2076, 10268, 6172, 14364, 1052, 9244, 5148, 13340, 3100, 11292, 7196, 15388, + 540, 8732, 4636, 12828, 2588, 10780, 6684, 14876, 1564, 9756, 5660, 13852, 3612, 11804, 7708, 15900, + 284, 8476, 4380, 12572, 2332, 10524, 6428, 14620, 1308, 9500, 5404, 13596, 3356, 11548, 7452, 15644, + 796, 8988, 4892, 13084, 2844, 11036, 6940, 15132, 1820, 10012, 5916, 14108, 3868, 12060, 7964, 16156, + 156, 8348, 4252, 12444, 2204, 10396, 6300, 14492, 1180, 9372, 5276, 13468, 3228, 11420, 7324, 15516, + 668, 8860, 4764, 12956, 2716, 10908, 6812, 15004, 1692, 9884, 5788, 13980, 3740, 11932, 7836, 16028, + 412, 8604, 4508, 12700, 2460, 10652, 6556, 14748, 1436, 9628, 5532, 13724, 3484, 11676, 7580, 15772, + 924, 9116, 5020, 13212, 2972, 11164, 7068, 15260, 1948, 10140, 6044, 14236, 3996, 12188, 8092, 16284, + 92, 8284, 4188, 12380, 2140, 10332, 6236, 14428, 1116, 9308, 5212, 13404, 3164, 11356, 7260, 15452, + 604, 8796, 4700, 12892, 2652, 10844, 6748, 14940, 1628, 9820, 5724, 13916, 3676, 11868, 7772, 15964, + 348, 8540, 4444, 12636, 2396, 10588, 6492, 14684, 1372, 9564, 5468, 13660, 3420, 11612, 7516, 15708, + 860, 9052, 4956, 13148, 2908, 11100, 7004, 15196, 1884, 10076, 5980, 14172, 3932, 12124, 8028, 16220, + 220, 8412, 4316, 12508, 2268, 10460, 6364, 14556, 1244, 9436, 5340, 13532, 3292, 11484, 7388, 15580, + 732, 8924, 4828, 13020, 2780, 10972, 6876, 15068, 1756, 9948, 5852, 14044, 3804, 11996, 7900, 16092, + 476, 8668, 4572, 12764, 2524, 10716, 6620, 14812, 1500, 9692, 5596, 13788, 3548, 11740, 7644, 15836, + 988, 9180, 5084, 13276, 3036, 11228, 7132, 15324, 2012, 10204, 6108, 14300, 4060, 12252, 8156, 16348, + 60, 8252, 4156, 12348, 2108, 10300, 6204, 14396, 1084, 9276, 5180, 13372, 3132, 11324, 7228, 15420, + 572, 8764, 4668, 12860, 2620, 10812, 6716, 14908, 1596, 9788, 5692, 13884, 3644, 11836, 7740, 15932, + 316, 8508, 4412, 12604, 2364, 10556, 6460, 14652, 1340, 9532, 5436, 13628, 3388, 11580, 7484, 15676, + 828, 9020, 4924, 13116, 2876, 11068, 6972, 15164, 1852, 10044, 5948, 14140, 3900, 12092, 7996, 16188, + 188, 8380, 4284, 12476, 2236, 10428, 6332, 14524, 1212, 9404, 5308, 13500, 3260, 11452, 7356, 15548, + 700, 8892, 4796, 12988, 2748, 10940, 6844, 15036, 1724, 9916, 5820, 14012, 3772, 11964, 7868, 16060, + 444, 8636, 4540, 12732, 2492, 10684, 6588, 14780, 1468, 9660, 5564, 13756, 3516, 11708, 7612, 15804, + 956, 9148, 5052, 13244, 3004, 11196, 7100, 15292, 1980, 10172, 6076, 14268, 4028, 12220, 8124, 16316, + 124, 8316, 4220, 12412, 2172, 10364, 6268, 14460, 1148, 9340, 5244, 13436, 3196, 11388, 7292, 15484, + 636, 8828, 4732, 12924, 2684, 10876, 6780, 14972, 1660, 9852, 5756, 13948, 3708, 11900, 7804, 15996, + 380, 8572, 4476, 12668, 2428, 10620, 6524, 14716, 1404, 9596, 5500, 13692, 3452, 11644, 7548, 15740, + 892, 9084, 4988, 13180, 2940, 11132, 7036, 15228, 1916, 10108, 6012, 14204, 3964, 12156, 8060, 16252, + 252, 8444, 4348, 12540, 2300, 10492, 6396, 14588, 1276, 9468, 5372, 13564, 3324, 11516, 7420, 15612, + 764, 8956, 4860, 13052, 2812, 11004, 6908, 15100, 1788, 9980, 5884, 14076, 3836, 12028, 7932, 16124, + 508, 8700, 4604, 12796, 2556, 10748, 6652, 14844, 1532, 9724, 5628, 13820, 3580, 11772, 7676, 15868, + 1020, 9212, 5116, 13308, 3068, 11260, 7164, 15356, 2044, 10236, 6140, 14332, 4092, 12284, 8188, 16380, + 2, 8194, 4098, 12290, 2050, 10242, 6146, 14338, 1026, 9218, 5122, 13314, 3074, 11266, 7170, 15362, + 514, 8706, 4610, 12802, 2562, 10754, 6658, 14850, 1538, 9730, 5634, 13826, 3586, 11778, 7682, 15874, + 258, 8450, 4354, 12546, 2306, 10498, 6402, 14594, 1282, 9474, 5378, 13570, 3330, 11522, 7426, 15618, + 770, 8962, 4866, 13058, 2818, 11010, 6914, 15106, 1794, 9986, 5890, 14082, 3842, 12034, 7938, 16130, + 130, 8322, 4226, 12418, 2178, 10370, 6274, 14466, 1154, 9346, 5250, 13442, 3202, 11394, 7298, 15490, + 642, 8834, 4738, 12930, 2690, 10882, 6786, 14978, 1666, 9858, 5762, 13954, 3714, 11906, 7810, 16002, + 386, 8578, 4482, 12674, 2434, 10626, 6530, 14722, 1410, 9602, 5506, 13698, 3458, 11650, 7554, 15746, + 898, 9090, 4994, 13186, 2946, 11138, 7042, 15234, 1922, 10114, 6018, 14210, 3970, 12162, 8066, 16258, + 66, 8258, 4162, 12354, 2114, 10306, 6210, 14402, 1090, 9282, 5186, 13378, 3138, 11330, 7234, 15426, + 578, 8770, 4674, 12866, 2626, 10818, 6722, 14914, 1602, 9794, 5698, 13890, 3650, 11842, 7746, 15938, + 322, 8514, 4418, 12610, 2370, 10562, 6466, 14658, 1346, 9538, 5442, 13634, 3394, 11586, 7490, 15682, + 834, 9026, 4930, 13122, 2882, 11074, 6978, 15170, 1858, 10050, 5954, 14146, 3906, 12098, 8002, 16194, + 194, 8386, 4290, 12482, 2242, 10434, 6338, 14530, 1218, 9410, 5314, 13506, 3266, 11458, 7362, 15554, + 706, 8898, 4802, 12994, 2754, 10946, 6850, 15042, 1730, 9922, 5826, 14018, 3778, 11970, 7874, 16066, + 450, 8642, 4546, 12738, 2498, 10690, 6594, 14786, 1474, 9666, 5570, 13762, 3522, 11714, 7618, 15810, + 962, 9154, 5058, 13250, 3010, 11202, 7106, 15298, 1986, 10178, 6082, 14274, 4034, 12226, 8130, 16322, + 34, 8226, 4130, 12322, 2082, 10274, 6178, 14370, 1058, 9250, 5154, 13346, 3106, 11298, 7202, 15394, + 546, 8738, 4642, 12834, 2594, 10786, 6690, 14882, 1570, 9762, 5666, 13858, 3618, 11810, 7714, 15906, + 290, 8482, 4386, 12578, 2338, 10530, 6434, 14626, 1314, 9506, 5410, 13602, 3362, 11554, 7458, 15650, + 802, 8994, 4898, 13090, 2850, 11042, 6946, 15138, 1826, 10018, 5922, 14114, 3874, 12066, 7970, 16162, + 162, 8354, 4258, 12450, 2210, 10402, 6306, 14498, 1186, 9378, 5282, 13474, 3234, 11426, 7330, 15522, + 674, 8866, 4770, 12962, 2722, 10914, 6818, 15010, 1698, 9890, 5794, 13986, 3746, 11938, 7842, 16034, + 418, 8610, 4514, 12706, 2466, 10658, 6562, 14754, 1442, 9634, 5538, 13730, 3490, 11682, 7586, 15778, + 930, 9122, 5026, 13218, 2978, 11170, 7074, 15266, 1954, 10146, 6050, 14242, 4002, 12194, 8098, 16290, + 98, 8290, 4194, 12386, 2146, 10338, 6242, 14434, 1122, 9314, 5218, 13410, 3170, 11362, 7266, 15458, + 610, 8802, 4706, 12898, 2658, 10850, 6754, 14946, 1634, 9826, 5730, 13922, 3682, 11874, 7778, 15970, + 354, 8546, 4450, 12642, 2402, 10594, 6498, 14690, 1378, 9570, 5474, 13666, 3426, 11618, 7522, 15714, + 866, 9058, 4962, 13154, 2914, 11106, 7010, 15202, 1890, 10082, 5986, 14178, 3938, 12130, 8034, 16226, + 226, 8418, 4322, 12514, 2274, 10466, 6370, 14562, 1250, 9442, 5346, 13538, 3298, 11490, 7394, 15586, + 738, 8930, 4834, 13026, 2786, 10978, 6882, 15074, 1762, 9954, 5858, 14050, 3810, 12002, 7906, 16098, + 482, 8674, 4578, 12770, 2530, 10722, 6626, 14818, 1506, 9698, 5602, 13794, 3554, 11746, 7650, 15842, + 994, 9186, 5090, 13282, 3042, 11234, 7138, 15330, 2018, 10210, 6114, 14306, 4066, 12258, 8162, 16354, + 18, 8210, 4114, 12306, 2066, 10258, 6162, 14354, 1042, 9234, 5138, 13330, 3090, 11282, 7186, 15378, + 530, 8722, 4626, 12818, 2578, 10770, 6674, 14866, 1554, 9746, 5650, 13842, 3602, 11794, 7698, 15890, + 274, 8466, 4370, 12562, 2322, 10514, 6418, 14610, 1298, 9490, 5394, 13586, 3346, 11538, 7442, 15634, + 786, 8978, 4882, 13074, 2834, 11026, 6930, 15122, 1810, 10002, 5906, 14098, 3858, 12050, 7954, 16146, + 146, 8338, 4242, 12434, 2194, 10386, 6290, 14482, 1170, 9362, 5266, 13458, 3218, 11410, 7314, 15506, + 658, 8850, 4754, 12946, 2706, 10898, 6802, 14994, 1682, 9874, 5778, 13970, 3730, 11922, 7826, 16018, + 402, 8594, 4498, 12690, 2450, 10642, 6546, 14738, 1426, 9618, 5522, 13714, 3474, 11666, 7570, 15762, + 914, 9106, 5010, 13202, 2962, 11154, 7058, 15250, 1938, 10130, 6034, 14226, 3986, 12178, 8082, 16274, + 82, 8274, 4178, 12370, 2130, 10322, 6226, 14418, 1106, 9298, 5202, 13394, 3154, 11346, 7250, 15442, + 594, 8786, 4690, 12882, 2642, 10834, 6738, 14930, 1618, 9810, 5714, 13906, 3666, 11858, 7762, 15954, + 338, 8530, 4434, 12626, 2386, 10578, 6482, 14674, 1362, 9554, 5458, 13650, 3410, 11602, 7506, 15698, + 850, 9042, 4946, 13138, 2898, 11090, 6994, 15186, 1874, 10066, 5970, 14162, 3922, 12114, 8018, 16210, + 210, 8402, 4306, 12498, 2258, 10450, 6354, 14546, 1234, 9426, 5330, 13522, 3282, 11474, 7378, 15570, + 722, 8914, 4818, 13010, 2770, 10962, 6866, 15058, 1746, 9938, 5842, 14034, 3794, 11986, 7890, 16082, + 466, 8658, 4562, 12754, 2514, 10706, 6610, 14802, 1490, 9682, 5586, 13778, 3538, 11730, 7634, 15826, + 978, 9170, 5074, 13266, 3026, 11218, 7122, 15314, 2002, 10194, 6098, 14290, 4050, 12242, 8146, 16338, + 50, 8242, 4146, 12338, 2098, 10290, 6194, 14386, 1074, 9266, 5170, 13362, 3122, 11314, 7218, 15410, + 562, 8754, 4658, 12850, 2610, 10802, 6706, 14898, 1586, 9778, 5682, 13874, 3634, 11826, 7730, 15922, + 306, 8498, 4402, 12594, 2354, 10546, 6450, 14642, 1330, 9522, 5426, 13618, 3378, 11570, 7474, 15666, + 818, 9010, 4914, 13106, 2866, 11058, 6962, 15154, 1842, 10034, 5938, 14130, 3890, 12082, 7986, 16178, + 178, 8370, 4274, 12466, 2226, 10418, 6322, 14514, 1202, 9394, 5298, 13490, 3250, 11442, 7346, 15538, + 690, 8882, 4786, 12978, 2738, 10930, 6834, 15026, 1714, 9906, 5810, 14002, 3762, 11954, 7858, 16050, + 434, 8626, 4530, 12722, 2482, 10674, 6578, 14770, 1458, 9650, 5554, 13746, 3506, 11698, 7602, 15794, + 946, 9138, 5042, 13234, 2994, 11186, 7090, 15282, 1970, 10162, 6066, 14258, 4018, 12210, 8114, 16306, + 114, 8306, 4210, 12402, 2162, 10354, 6258, 14450, 1138, 9330, 5234, 13426, 3186, 11378, 7282, 15474, + 626, 8818, 4722, 12914, 2674, 10866, 6770, 14962, 1650, 9842, 5746, 13938, 3698, 11890, 7794, 15986, + 370, 8562, 4466, 12658, 2418, 10610, 6514, 14706, 1394, 9586, 5490, 13682, 3442, 11634, 7538, 15730, + 882, 9074, 4978, 13170, 2930, 11122, 7026, 15218, 1906, 10098, 6002, 14194, 3954, 12146, 8050, 16242, + 242, 8434, 4338, 12530, 2290, 10482, 6386, 14578, 1266, 9458, 5362, 13554, 3314, 11506, 7410, 15602, + 754, 8946, 4850, 13042, 2802, 10994, 6898, 15090, 1778, 9970, 5874, 14066, 3826, 12018, 7922, 16114, + 498, 8690, 4594, 12786, 2546, 10738, 6642, 14834, 1522, 9714, 5618, 13810, 3570, 11762, 7666, 15858, + 1010, 9202, 5106, 13298, 3058, 11250, 7154, 15346, 2034, 10226, 6130, 14322, 4082, 12274, 8178, 16370, + 10, 8202, 4106, 12298, 2058, 10250, 6154, 14346, 1034, 9226, 5130, 13322, 3082, 11274, 7178, 15370, + 522, 8714, 4618, 12810, 2570, 10762, 6666, 14858, 1546, 9738, 5642, 13834, 3594, 11786, 7690, 15882, + 266, 8458, 4362, 12554, 2314, 10506, 6410, 14602, 1290, 9482, 5386, 13578, 3338, 11530, 7434, 15626, + 778, 8970, 4874, 13066, 2826, 11018, 6922, 15114, 1802, 9994, 5898, 14090, 3850, 12042, 7946, 16138, + 138, 8330, 4234, 12426, 2186, 10378, 6282, 14474, 1162, 9354, 5258, 13450, 3210, 11402, 7306, 15498, + 650, 8842, 4746, 12938, 2698, 10890, 6794, 14986, 1674, 9866, 5770, 13962, 3722, 11914, 7818, 16010, + 394, 8586, 4490, 12682, 2442, 10634, 6538, 14730, 1418, 9610, 5514, 13706, 3466, 11658, 7562, 15754, + 906, 9098, 5002, 13194, 2954, 11146, 7050, 15242, 1930, 10122, 6026, 14218, 3978, 12170, 8074, 16266, + 74, 8266, 4170, 12362, 2122, 10314, 6218, 14410, 1098, 9290, 5194, 13386, 3146, 11338, 7242, 15434, + 586, 8778, 4682, 12874, 2634, 10826, 6730, 14922, 1610, 9802, 5706, 13898, 3658, 11850, 7754, 15946, + 330, 8522, 4426, 12618, 2378, 10570, 6474, 14666, 1354, 9546, 5450, 13642, 3402, 11594, 7498, 15690, + 842, 9034, 4938, 13130, 2890, 11082, 6986, 15178, 1866, 10058, 5962, 14154, 3914, 12106, 8010, 16202, + 202, 8394, 4298, 12490, 2250, 10442, 6346, 14538, 1226, 9418, 5322, 13514, 3274, 11466, 7370, 15562, + 714, 8906, 4810, 13002, 2762, 10954, 6858, 15050, 1738, 9930, 5834, 14026, 3786, 11978, 7882, 16074, + 458, 8650, 4554, 12746, 2506, 10698, 6602, 14794, 1482, 9674, 5578, 13770, 3530, 11722, 7626, 15818, + 970, 9162, 5066, 13258, 3018, 11210, 7114, 15306, 1994, 10186, 6090, 14282, 4042, 12234, 8138, 16330, + 42, 8234, 4138, 12330, 2090, 10282, 6186, 14378, 1066, 9258, 5162, 13354, 3114, 11306, 7210, 15402, + 554, 8746, 4650, 12842, 2602, 10794, 6698, 14890, 1578, 9770, 5674, 13866, 3626, 11818, 7722, 15914, + 298, 8490, 4394, 12586, 2346, 10538, 6442, 14634, 1322, 9514, 5418, 13610, 3370, 11562, 7466, 15658, + 810, 9002, 4906, 13098, 2858, 11050, 6954, 15146, 1834, 10026, 5930, 14122, 3882, 12074, 7978, 16170, + 170, 8362, 4266, 12458, 2218, 10410, 6314, 14506, 1194, 9386, 5290, 13482, 3242, 11434, 7338, 15530, + 682, 8874, 4778, 12970, 2730, 10922, 6826, 15018, 1706, 9898, 5802, 13994, 3754, 11946, 7850, 16042, + 426, 8618, 4522, 12714, 2474, 10666, 6570, 14762, 1450, 9642, 5546, 13738, 3498, 11690, 7594, 15786, + 938, 9130, 5034, 13226, 2986, 11178, 7082, 15274, 1962, 10154, 6058, 14250, 4010, 12202, 8106, 16298, + 106, 8298, 4202, 12394, 2154, 10346, 6250, 14442, 1130, 9322, 5226, 13418, 3178, 11370, 7274, 15466, + 618, 8810, 4714, 12906, 2666, 10858, 6762, 14954, 1642, 9834, 5738, 13930, 3690, 11882, 7786, 15978, + 362, 8554, 4458, 12650, 2410, 10602, 6506, 14698, 1386, 9578, 5482, 13674, 3434, 11626, 7530, 15722, + 874, 9066, 4970, 13162, 2922, 11114, 7018, 15210, 1898, 10090, 5994, 14186, 3946, 12138, 8042, 16234, + 234, 8426, 4330, 12522, 2282, 10474, 6378, 14570, 1258, 9450, 5354, 13546, 3306, 11498, 7402, 15594, + 746, 8938, 4842, 13034, 2794, 10986, 6890, 15082, 1770, 9962, 5866, 14058, 3818, 12010, 7914, 16106, + 490, 8682, 4586, 12778, 2538, 10730, 6634, 14826, 1514, 9706, 5610, 13802, 3562, 11754, 7658, 15850, + 1002, 9194, 5098, 13290, 3050, 11242, 7146, 15338, 2026, 10218, 6122, 14314, 4074, 12266, 8170, 16362, + 26, 8218, 4122, 12314, 2074, 10266, 6170, 14362, 1050, 9242, 5146, 13338, 3098, 11290, 7194, 15386, + 538, 8730, 4634, 12826, 2586, 10778, 6682, 14874, 1562, 9754, 5658, 13850, 3610, 11802, 7706, 15898, + 282, 8474, 4378, 12570, 2330, 10522, 6426, 14618, 1306, 9498, 5402, 13594, 3354, 11546, 7450, 15642, + 794, 8986, 4890, 13082, 2842, 11034, 6938, 15130, 1818, 10010, 5914, 14106, 3866, 12058, 7962, 16154, + 154, 8346, 4250, 12442, 2202, 10394, 6298, 14490, 1178, 9370, 5274, 13466, 3226, 11418, 7322, 15514, + 666, 8858, 4762, 12954, 2714, 10906, 6810, 15002, 1690, 9882, 5786, 13978, 3738, 11930, 7834, 16026, + 410, 8602, 4506, 12698, 2458, 10650, 6554, 14746, 1434, 9626, 5530, 13722, 3482, 11674, 7578, 15770, + 922, 9114, 5018, 13210, 2970, 11162, 7066, 15258, 1946, 10138, 6042, 14234, 3994, 12186, 8090, 16282, + 90, 8282, 4186, 12378, 2138, 10330, 6234, 14426, 1114, 9306, 5210, 13402, 3162, 11354, 7258, 15450, + 602, 8794, 4698, 12890, 2650, 10842, 6746, 14938, 1626, 9818, 5722, 13914, 3674, 11866, 7770, 15962, + 346, 8538, 4442, 12634, 2394, 10586, 6490, 14682, 1370, 9562, 5466, 13658, 3418, 11610, 7514, 15706, + 858, 9050, 4954, 13146, 2906, 11098, 7002, 15194, 1882, 10074, 5978, 14170, 3930, 12122, 8026, 16218, + 218, 8410, 4314, 12506, 2266, 10458, 6362, 14554, 1242, 9434, 5338, 13530, 3290, 11482, 7386, 15578, + 730, 8922, 4826, 13018, 2778, 10970, 6874, 15066, 1754, 9946, 5850, 14042, 3802, 11994, 7898, 16090, + 474, 8666, 4570, 12762, 2522, 10714, 6618, 14810, 1498, 9690, 5594, 13786, 3546, 11738, 7642, 15834, + 986, 9178, 5082, 13274, 3034, 11226, 7130, 15322, 2010, 10202, 6106, 14298, 4058, 12250, 8154, 16346, + 58, 8250, 4154, 12346, 2106, 10298, 6202, 14394, 1082, 9274, 5178, 13370, 3130, 11322, 7226, 15418, + 570, 8762, 4666, 12858, 2618, 10810, 6714, 14906, 1594, 9786, 5690, 13882, 3642, 11834, 7738, 15930, + 314, 8506, 4410, 12602, 2362, 10554, 6458, 14650, 1338, 9530, 5434, 13626, 3386, 11578, 7482, 15674, + 826, 9018, 4922, 13114, 2874, 11066, 6970, 15162, 1850, 10042, 5946, 14138, 3898, 12090, 7994, 16186, + 186, 8378, 4282, 12474, 2234, 10426, 6330, 14522, 1210, 9402, 5306, 13498, 3258, 11450, 7354, 15546, + 698, 8890, 4794, 12986, 2746, 10938, 6842, 15034, 1722, 9914, 5818, 14010, 3770, 11962, 7866, 16058, + 442, 8634, 4538, 12730, 2490, 10682, 6586, 14778, 1466, 9658, 5562, 13754, 3514, 11706, 7610, 15802, + 954, 9146, 5050, 13242, 3002, 11194, 7098, 15290, 1978, 10170, 6074, 14266, 4026, 12218, 8122, 16314, + 122, 8314, 4218, 12410, 2170, 10362, 6266, 14458, 1146, 9338, 5242, 13434, 3194, 11386, 7290, 15482, + 634, 8826, 4730, 12922, 2682, 10874, 6778, 14970, 1658, 9850, 5754, 13946, 3706, 11898, 7802, 15994, + 378, 8570, 4474, 12666, 2426, 10618, 6522, 14714, 1402, 9594, 5498, 13690, 3450, 11642, 7546, 15738, + 890, 9082, 4986, 13178, 2938, 11130, 7034, 15226, 1914, 10106, 6010, 14202, 3962, 12154, 8058, 16250, + 250, 8442, 4346, 12538, 2298, 10490, 6394, 14586, 1274, 9466, 5370, 13562, 3322, 11514, 7418, 15610, + 762, 8954, 4858, 13050, 2810, 11002, 6906, 15098, 1786, 9978, 5882, 14074, 3834, 12026, 7930, 16122, + 506, 8698, 4602, 12794, 2554, 10746, 6650, 14842, 1530, 9722, 5626, 13818, 3578, 11770, 7674, 15866, + 1018, 9210, 5114, 13306, 3066, 11258, 7162, 15354, 2042, 10234, 6138, 14330, 4090, 12282, 8186, 16378, + 6, 8198, 4102, 12294, 2054, 10246, 6150, 14342, 1030, 9222, 5126, 13318, 3078, 11270, 7174, 15366, + 518, 8710, 4614, 12806, 2566, 10758, 6662, 14854, 1542, 9734, 5638, 13830, 3590, 11782, 7686, 15878, + 262, 8454, 4358, 12550, 2310, 10502, 6406, 14598, 1286, 9478, 5382, 13574, 3334, 11526, 7430, 15622, + 774, 8966, 4870, 13062, 2822, 11014, 6918, 15110, 1798, 9990, 5894, 14086, 3846, 12038, 7942, 16134, + 134, 8326, 4230, 12422, 2182, 10374, 6278, 14470, 1158, 9350, 5254, 13446, 3206, 11398, 7302, 15494, + 646, 8838, 4742, 12934, 2694, 10886, 6790, 14982, 1670, 9862, 5766, 13958, 3718, 11910, 7814, 16006, + 390, 8582, 4486, 12678, 2438, 10630, 6534, 14726, 1414, 9606, 5510, 13702, 3462, 11654, 7558, 15750, + 902, 9094, 4998, 13190, 2950, 11142, 7046, 15238, 1926, 10118, 6022, 14214, 3974, 12166, 8070, 16262, + 70, 8262, 4166, 12358, 2118, 10310, 6214, 14406, 1094, 9286, 5190, 13382, 3142, 11334, 7238, 15430, + 582, 8774, 4678, 12870, 2630, 10822, 6726, 14918, 1606, 9798, 5702, 13894, 3654, 11846, 7750, 15942, + 326, 8518, 4422, 12614, 2374, 10566, 6470, 14662, 1350, 9542, 5446, 13638, 3398, 11590, 7494, 15686, + 838, 9030, 4934, 13126, 2886, 11078, 6982, 15174, 1862, 10054, 5958, 14150, 3910, 12102, 8006, 16198, + 198, 8390, 4294, 12486, 2246, 10438, 6342, 14534, 1222, 9414, 5318, 13510, 3270, 11462, 7366, 15558, + 710, 8902, 4806, 12998, 2758, 10950, 6854, 15046, 1734, 9926, 5830, 14022, 3782, 11974, 7878, 16070, + 454, 8646, 4550, 12742, 2502, 10694, 6598, 14790, 1478, 9670, 5574, 13766, 3526, 11718, 7622, 15814, + 966, 9158, 5062, 13254, 3014, 11206, 7110, 15302, 1990, 10182, 6086, 14278, 4038, 12230, 8134, 16326, + 38, 8230, 4134, 12326, 2086, 10278, 6182, 14374, 1062, 9254, 5158, 13350, 3110, 11302, 7206, 15398, + 550, 8742, 4646, 12838, 2598, 10790, 6694, 14886, 1574, 9766, 5670, 13862, 3622, 11814, 7718, 15910, + 294, 8486, 4390, 12582, 2342, 10534, 6438, 14630, 1318, 9510, 5414, 13606, 3366, 11558, 7462, 15654, + 806, 8998, 4902, 13094, 2854, 11046, 6950, 15142, 1830, 10022, 5926, 14118, 3878, 12070, 7974, 16166, + 166, 8358, 4262, 12454, 2214, 10406, 6310, 14502, 1190, 9382, 5286, 13478, 3238, 11430, 7334, 15526, + 678, 8870, 4774, 12966, 2726, 10918, 6822, 15014, 1702, 9894, 5798, 13990, 3750, 11942, 7846, 16038, + 422, 8614, 4518, 12710, 2470, 10662, 6566, 14758, 1446, 9638, 5542, 13734, 3494, 11686, 7590, 15782, + 934, 9126, 5030, 13222, 2982, 11174, 7078, 15270, 1958, 10150, 6054, 14246, 4006, 12198, 8102, 16294, + 102, 8294, 4198, 12390, 2150, 10342, 6246, 14438, 1126, 9318, 5222, 13414, 3174, 11366, 7270, 15462, + 614, 8806, 4710, 12902, 2662, 10854, 6758, 14950, 1638, 9830, 5734, 13926, 3686, 11878, 7782, 15974, + 358, 8550, 4454, 12646, 2406, 10598, 6502, 14694, 1382, 9574, 5478, 13670, 3430, 11622, 7526, 15718, + 870, 9062, 4966, 13158, 2918, 11110, 7014, 15206, 1894, 10086, 5990, 14182, 3942, 12134, 8038, 16230, + 230, 8422, 4326, 12518, 2278, 10470, 6374, 14566, 1254, 9446, 5350, 13542, 3302, 11494, 7398, 15590, + 742, 8934, 4838, 13030, 2790, 10982, 6886, 15078, 1766, 9958, 5862, 14054, 3814, 12006, 7910, 16102, + 486, 8678, 4582, 12774, 2534, 10726, 6630, 14822, 1510, 9702, 5606, 13798, 3558, 11750, 7654, 15846, + 998, 9190, 5094, 13286, 3046, 11238, 7142, 15334, 2022, 10214, 6118, 14310, 4070, 12262, 8166, 16358, + 22, 8214, 4118, 12310, 2070, 10262, 6166, 14358, 1046, 9238, 5142, 13334, 3094, 11286, 7190, 15382, + 534, 8726, 4630, 12822, 2582, 10774, 6678, 14870, 1558, 9750, 5654, 13846, 3606, 11798, 7702, 15894, + 278, 8470, 4374, 12566, 2326, 10518, 6422, 14614, 1302, 9494, 5398, 13590, 3350, 11542, 7446, 15638, + 790, 8982, 4886, 13078, 2838, 11030, 6934, 15126, 1814, 10006, 5910, 14102, 3862, 12054, 7958, 16150, + 150, 8342, 4246, 12438, 2198, 10390, 6294, 14486, 1174, 9366, 5270, 13462, 3222, 11414, 7318, 15510, + 662, 8854, 4758, 12950, 2710, 10902, 6806, 14998, 1686, 9878, 5782, 13974, 3734, 11926, 7830, 16022, + 406, 8598, 4502, 12694, 2454, 10646, 6550, 14742, 1430, 9622, 5526, 13718, 3478, 11670, 7574, 15766, + 918, 9110, 5014, 13206, 2966, 11158, 7062, 15254, 1942, 10134, 6038, 14230, 3990, 12182, 8086, 16278, + 86, 8278, 4182, 12374, 2134, 10326, 6230, 14422, 1110, 9302, 5206, 13398, 3158, 11350, 7254, 15446, + 598, 8790, 4694, 12886, 2646, 10838, 6742, 14934, 1622, 9814, 5718, 13910, 3670, 11862, 7766, 15958, + 342, 8534, 4438, 12630, 2390, 10582, 6486, 14678, 1366, 9558, 5462, 13654, 3414, 11606, 7510, 15702, + 854, 9046, 4950, 13142, 2902, 11094, 6998, 15190, 1878, 10070, 5974, 14166, 3926, 12118, 8022, 16214, + 214, 8406, 4310, 12502, 2262, 10454, 6358, 14550, 1238, 9430, 5334, 13526, 3286, 11478, 7382, 15574, + 726, 8918, 4822, 13014, 2774, 10966, 6870, 15062, 1750, 9942, 5846, 14038, 3798, 11990, 7894, 16086, + 470, 8662, 4566, 12758, 2518, 10710, 6614, 14806, 1494, 9686, 5590, 13782, 3542, 11734, 7638, 15830, + 982, 9174, 5078, 13270, 3030, 11222, 7126, 15318, 2006, 10198, 6102, 14294, 4054, 12246, 8150, 16342, + 54, 8246, 4150, 12342, 2102, 10294, 6198, 14390, 1078, 9270, 5174, 13366, 3126, 11318, 7222, 15414, + 566, 8758, 4662, 12854, 2614, 10806, 6710, 14902, 1590, 9782, 5686, 13878, 3638, 11830, 7734, 15926, + 310, 8502, 4406, 12598, 2358, 10550, 6454, 14646, 1334, 9526, 5430, 13622, 3382, 11574, 7478, 15670, + 822, 9014, 4918, 13110, 2870, 11062, 6966, 15158, 1846, 10038, 5942, 14134, 3894, 12086, 7990, 16182, + 182, 8374, 4278, 12470, 2230, 10422, 6326, 14518, 1206, 9398, 5302, 13494, 3254, 11446, 7350, 15542, + 694, 8886, 4790, 12982, 2742, 10934, 6838, 15030, 1718, 9910, 5814, 14006, 3766, 11958, 7862, 16054, + 438, 8630, 4534, 12726, 2486, 10678, 6582, 14774, 1462, 9654, 5558, 13750, 3510, 11702, 7606, 15798, + 950, 9142, 5046, 13238, 2998, 11190, 7094, 15286, 1974, 10166, 6070, 14262, 4022, 12214, 8118, 16310, + 118, 8310, 4214, 12406, 2166, 10358, 6262, 14454, 1142, 9334, 5238, 13430, 3190, 11382, 7286, 15478, + 630, 8822, 4726, 12918, 2678, 10870, 6774, 14966, 1654, 9846, 5750, 13942, 3702, 11894, 7798, 15990, + 374, 8566, 4470, 12662, 2422, 10614, 6518, 14710, 1398, 9590, 5494, 13686, 3446, 11638, 7542, 15734, + 886, 9078, 4982, 13174, 2934, 11126, 7030, 15222, 1910, 10102, 6006, 14198, 3958, 12150, 8054, 16246, + 246, 8438, 4342, 12534, 2294, 10486, 6390, 14582, 1270, 9462, 5366, 13558, 3318, 11510, 7414, 15606, + 758, 8950, 4854, 13046, 2806, 10998, 6902, 15094, 1782, 9974, 5878, 14070, 3830, 12022, 7926, 16118, + 502, 8694, 4598, 12790, 2550, 10742, 6646, 14838, 1526, 9718, 5622, 13814, 3574, 11766, 7670, 15862, + 1014, 9206, 5110, 13302, 3062, 11254, 7158, 15350, 2038, 10230, 6134, 14326, 4086, 12278, 8182, 16374, + 14, 8206, 4110, 12302, 2062, 10254, 6158, 14350, 1038, 9230, 5134, 13326, 3086, 11278, 7182, 15374, + 526, 8718, 4622, 12814, 2574, 10766, 6670, 14862, 1550, 9742, 5646, 13838, 3598, 11790, 7694, 15886, + 270, 8462, 4366, 12558, 2318, 10510, 6414, 14606, 1294, 9486, 5390, 13582, 3342, 11534, 7438, 15630, + 782, 8974, 4878, 13070, 2830, 11022, 6926, 15118, 1806, 9998, 5902, 14094, 3854, 12046, 7950, 16142, + 142, 8334, 4238, 12430, 2190, 10382, 6286, 14478, 1166, 9358, 5262, 13454, 3214, 11406, 7310, 15502, + 654, 8846, 4750, 12942, 2702, 10894, 6798, 14990, 1678, 9870, 5774, 13966, 3726, 11918, 7822, 16014, + 398, 8590, 4494, 12686, 2446, 10638, 6542, 14734, 1422, 9614, 5518, 13710, 3470, 11662, 7566, 15758, + 910, 9102, 5006, 13198, 2958, 11150, 7054, 15246, 1934, 10126, 6030, 14222, 3982, 12174, 8078, 16270, + 78, 8270, 4174, 12366, 2126, 10318, 6222, 14414, 1102, 9294, 5198, 13390, 3150, 11342, 7246, 15438, + 590, 8782, 4686, 12878, 2638, 10830, 6734, 14926, 1614, 9806, 5710, 13902, 3662, 11854, 7758, 15950, + 334, 8526, 4430, 12622, 2382, 10574, 6478, 14670, 1358, 9550, 5454, 13646, 3406, 11598, 7502, 15694, + 846, 9038, 4942, 13134, 2894, 11086, 6990, 15182, 1870, 10062, 5966, 14158, 3918, 12110, 8014, 16206, + 206, 8398, 4302, 12494, 2254, 10446, 6350, 14542, 1230, 9422, 5326, 13518, 3278, 11470, 7374, 15566, + 718, 8910, 4814, 13006, 2766, 10958, 6862, 15054, 1742, 9934, 5838, 14030, 3790, 11982, 7886, 16078, + 462, 8654, 4558, 12750, 2510, 10702, 6606, 14798, 1486, 9678, 5582, 13774, 3534, 11726, 7630, 15822, + 974, 9166, 5070, 13262, 3022, 11214, 7118, 15310, 1998, 10190, 6094, 14286, 4046, 12238, 8142, 16334, + 46, 8238, 4142, 12334, 2094, 10286, 6190, 14382, 1070, 9262, 5166, 13358, 3118, 11310, 7214, 15406, + 558, 8750, 4654, 12846, 2606, 10798, 6702, 14894, 1582, 9774, 5678, 13870, 3630, 11822, 7726, 15918, + 302, 8494, 4398, 12590, 2350, 10542, 6446, 14638, 1326, 9518, 5422, 13614, 3374, 11566, 7470, 15662, + 814, 9006, 4910, 13102, 2862, 11054, 6958, 15150, 1838, 10030, 5934, 14126, 3886, 12078, 7982, 16174, + 174, 8366, 4270, 12462, 2222, 10414, 6318, 14510, 1198, 9390, 5294, 13486, 3246, 11438, 7342, 15534, + 686, 8878, 4782, 12974, 2734, 10926, 6830, 15022, 1710, 9902, 5806, 13998, 3758, 11950, 7854, 16046, + 430, 8622, 4526, 12718, 2478, 10670, 6574, 14766, 1454, 9646, 5550, 13742, 3502, 11694, 7598, 15790, + 942, 9134, 5038, 13230, 2990, 11182, 7086, 15278, 1966, 10158, 6062, 14254, 4014, 12206, 8110, 16302, + 110, 8302, 4206, 12398, 2158, 10350, 6254, 14446, 1134, 9326, 5230, 13422, 3182, 11374, 7278, 15470, + 622, 8814, 4718, 12910, 2670, 10862, 6766, 14958, 1646, 9838, 5742, 13934, 3694, 11886, 7790, 15982, + 366, 8558, 4462, 12654, 2414, 10606, 6510, 14702, 1390, 9582, 5486, 13678, 3438, 11630, 7534, 15726, + 878, 9070, 4974, 13166, 2926, 11118, 7022, 15214, 1902, 10094, 5998, 14190, 3950, 12142, 8046, 16238, + 238, 8430, 4334, 12526, 2286, 10478, 6382, 14574, 1262, 9454, 5358, 13550, 3310, 11502, 7406, 15598, + 750, 8942, 4846, 13038, 2798, 10990, 6894, 15086, 1774, 9966, 5870, 14062, 3822, 12014, 7918, 16110, + 494, 8686, 4590, 12782, 2542, 10734, 6638, 14830, 1518, 9710, 5614, 13806, 3566, 11758, 7662, 15854, + 1006, 9198, 5102, 13294, 3054, 11246, 7150, 15342, 2030, 10222, 6126, 14318, 4078, 12270, 8174, 16366, + 30, 8222, 4126, 12318, 2078, 10270, 6174, 14366, 1054, 9246, 5150, 13342, 3102, 11294, 7198, 15390, + 542, 8734, 4638, 12830, 2590, 10782, 6686, 14878, 1566, 9758, 5662, 13854, 3614, 11806, 7710, 15902, + 286, 8478, 4382, 12574, 2334, 10526, 6430, 14622, 1310, 9502, 5406, 13598, 3358, 11550, 7454, 15646, + 798, 8990, 4894, 13086, 2846, 11038, 6942, 15134, 1822, 10014, 5918, 14110, 3870, 12062, 7966, 16158, + 158, 8350, 4254, 12446, 2206, 10398, 6302, 14494, 1182, 9374, 5278, 13470, 3230, 11422, 7326, 15518, + 670, 8862, 4766, 12958, 2718, 10910, 6814, 15006, 1694, 9886, 5790, 13982, 3742, 11934, 7838, 16030, + 414, 8606, 4510, 12702, 2462, 10654, 6558, 14750, 1438, 9630, 5534, 13726, 3486, 11678, 7582, 15774, + 926, 9118, 5022, 13214, 2974, 11166, 7070, 15262, 1950, 10142, 6046, 14238, 3998, 12190, 8094, 16286, + 94, 8286, 4190, 12382, 2142, 10334, 6238, 14430, 1118, 9310, 5214, 13406, 3166, 11358, 7262, 15454, + 606, 8798, 4702, 12894, 2654, 10846, 6750, 14942, 1630, 9822, 5726, 13918, 3678, 11870, 7774, 15966, + 350, 8542, 4446, 12638, 2398, 10590, 6494, 14686, 1374, 9566, 5470, 13662, 3422, 11614, 7518, 15710, + 862, 9054, 4958, 13150, 2910, 11102, 7006, 15198, 1886, 10078, 5982, 14174, 3934, 12126, 8030, 16222, + 222, 8414, 4318, 12510, 2270, 10462, 6366, 14558, 1246, 9438, 5342, 13534, 3294, 11486, 7390, 15582, + 734, 8926, 4830, 13022, 2782, 10974, 6878, 15070, 1758, 9950, 5854, 14046, 3806, 11998, 7902, 16094, + 478, 8670, 4574, 12766, 2526, 10718, 6622, 14814, 1502, 9694, 5598, 13790, 3550, 11742, 7646, 15838, + 990, 9182, 5086, 13278, 3038, 11230, 7134, 15326, 2014, 10206, 6110, 14302, 4062, 12254, 8158, 16350, + 62, 8254, 4158, 12350, 2110, 10302, 6206, 14398, 1086, 9278, 5182, 13374, 3134, 11326, 7230, 15422, + 574, 8766, 4670, 12862, 2622, 10814, 6718, 14910, 1598, 9790, 5694, 13886, 3646, 11838, 7742, 15934, + 318, 8510, 4414, 12606, 2366, 10558, 6462, 14654, 1342, 9534, 5438, 13630, 3390, 11582, 7486, 15678, + 830, 9022, 4926, 13118, 2878, 11070, 6974, 15166, 1854, 10046, 5950, 14142, 3902, 12094, 7998, 16190, + 190, 8382, 4286, 12478, 2238, 10430, 6334, 14526, 1214, 9406, 5310, 13502, 3262, 11454, 7358, 15550, + 702, 8894, 4798, 12990, 2750, 10942, 6846, 15038, 1726, 9918, 5822, 14014, 3774, 11966, 7870, 16062, + 446, 8638, 4542, 12734, 2494, 10686, 6590, 14782, 1470, 9662, 5566, 13758, 3518, 11710, 7614, 15806, + 958, 9150, 5054, 13246, 3006, 11198, 7102, 15294, 1982, 10174, 6078, 14270, 4030, 12222, 8126, 16318, + 126, 8318, 4222, 12414, 2174, 10366, 6270, 14462, 1150, 9342, 5246, 13438, 3198, 11390, 7294, 15486, + 638, 8830, 4734, 12926, 2686, 10878, 6782, 14974, 1662, 9854, 5758, 13950, 3710, 11902, 7806, 15998, + 382, 8574, 4478, 12670, 2430, 10622, 6526, 14718, 1406, 9598, 5502, 13694, 3454, 11646, 7550, 15742, + 894, 9086, 4990, 13182, 2942, 11134, 7038, 15230, 1918, 10110, 6014, 14206, 3966, 12158, 8062, 16254, + 254, 8446, 4350, 12542, 2302, 10494, 6398, 14590, 1278, 9470, 5374, 13566, 3326, 11518, 7422, 15614, + 766, 8958, 4862, 13054, 2814, 11006, 6910, 15102, 1790, 9982, 5886, 14078, 3838, 12030, 7934, 16126, + 510, 8702, 4606, 12798, 2558, 10750, 6654, 14846, 1534, 9726, 5630, 13822, 3582, 11774, 7678, 15870, + 1022, 9214, 5118, 13310, 3070, 11262, 7166, 15358, 2046, 10238, 6142, 14334, 4094, 12286, 8190, 16382, + 1, 8193, 4097, 12289, 2049, 10241, 6145, 14337, 1025, 9217, 5121, 13313, 3073, 11265, 7169, 15361, + 513, 8705, 4609, 12801, 2561, 10753, 6657, 14849, 1537, 9729, 5633, 13825, 3585, 11777, 7681, 15873, + 257, 8449, 4353, 12545, 2305, 10497, 6401, 14593, 1281, 9473, 5377, 13569, 3329, 11521, 7425, 15617, + 769, 8961, 4865, 13057, 2817, 11009, 6913, 15105, 1793, 9985, 5889, 14081, 3841, 12033, 7937, 16129, + 129, 8321, 4225, 12417, 2177, 10369, 6273, 14465, 1153, 9345, 5249, 13441, 3201, 11393, 7297, 15489, + 641, 8833, 4737, 12929, 2689, 10881, 6785, 14977, 1665, 9857, 5761, 13953, 3713, 11905, 7809, 16001, + 385, 8577, 4481, 12673, 2433, 10625, 6529, 14721, 1409, 9601, 5505, 13697, 3457, 11649, 7553, 15745, + 897, 9089, 4993, 13185, 2945, 11137, 7041, 15233, 1921, 10113, 6017, 14209, 3969, 12161, 8065, 16257, + 65, 8257, 4161, 12353, 2113, 10305, 6209, 14401, 1089, 9281, 5185, 13377, 3137, 11329, 7233, 15425, + 577, 8769, 4673, 12865, 2625, 10817, 6721, 14913, 1601, 9793, 5697, 13889, 3649, 11841, 7745, 15937, + 321, 8513, 4417, 12609, 2369, 10561, 6465, 14657, 1345, 9537, 5441, 13633, 3393, 11585, 7489, 15681, + 833, 9025, 4929, 13121, 2881, 11073, 6977, 15169, 1857, 10049, 5953, 14145, 3905, 12097, 8001, 16193, + 193, 8385, 4289, 12481, 2241, 10433, 6337, 14529, 1217, 9409, 5313, 13505, 3265, 11457, 7361, 15553, + 705, 8897, 4801, 12993, 2753, 10945, 6849, 15041, 1729, 9921, 5825, 14017, 3777, 11969, 7873, 16065, + 449, 8641, 4545, 12737, 2497, 10689, 6593, 14785, 1473, 9665, 5569, 13761, 3521, 11713, 7617, 15809, + 961, 9153, 5057, 13249, 3009, 11201, 7105, 15297, 1985, 10177, 6081, 14273, 4033, 12225, 8129, 16321, + 33, 8225, 4129, 12321, 2081, 10273, 6177, 14369, 1057, 9249, 5153, 13345, 3105, 11297, 7201, 15393, + 545, 8737, 4641, 12833, 2593, 10785, 6689, 14881, 1569, 9761, 5665, 13857, 3617, 11809, 7713, 15905, + 289, 8481, 4385, 12577, 2337, 10529, 6433, 14625, 1313, 9505, 5409, 13601, 3361, 11553, 7457, 15649, + 801, 8993, 4897, 13089, 2849, 11041, 6945, 15137, 1825, 10017, 5921, 14113, 3873, 12065, 7969, 16161, + 161, 8353, 4257, 12449, 2209, 10401, 6305, 14497, 1185, 9377, 5281, 13473, 3233, 11425, 7329, 15521, + 673, 8865, 4769, 12961, 2721, 10913, 6817, 15009, 1697, 9889, 5793, 13985, 3745, 11937, 7841, 16033, + 417, 8609, 4513, 12705, 2465, 10657, 6561, 14753, 1441, 9633, 5537, 13729, 3489, 11681, 7585, 15777, + 929, 9121, 5025, 13217, 2977, 11169, 7073, 15265, 1953, 10145, 6049, 14241, 4001, 12193, 8097, 16289, + 97, 8289, 4193, 12385, 2145, 10337, 6241, 14433, 1121, 9313, 5217, 13409, 3169, 11361, 7265, 15457, + 609, 8801, 4705, 12897, 2657, 10849, 6753, 14945, 1633, 9825, 5729, 13921, 3681, 11873, 7777, 15969, + 353, 8545, 4449, 12641, 2401, 10593, 6497, 14689, 1377, 9569, 5473, 13665, 3425, 11617, 7521, 15713, + 865, 9057, 4961, 13153, 2913, 11105, 7009, 15201, 1889, 10081, 5985, 14177, 3937, 12129, 8033, 16225, + 225, 8417, 4321, 12513, 2273, 10465, 6369, 14561, 1249, 9441, 5345, 13537, 3297, 11489, 7393, 15585, + 737, 8929, 4833, 13025, 2785, 10977, 6881, 15073, 1761, 9953, 5857, 14049, 3809, 12001, 7905, 16097, + 481, 8673, 4577, 12769, 2529, 10721, 6625, 14817, 1505, 9697, 5601, 13793, 3553, 11745, 7649, 15841, + 993, 9185, 5089, 13281, 3041, 11233, 7137, 15329, 2017, 10209, 6113, 14305, 4065, 12257, 8161, 16353, + 17, 8209, 4113, 12305, 2065, 10257, 6161, 14353, 1041, 9233, 5137, 13329, 3089, 11281, 7185, 15377, + 529, 8721, 4625, 12817, 2577, 10769, 6673, 14865, 1553, 9745, 5649, 13841, 3601, 11793, 7697, 15889, + 273, 8465, 4369, 12561, 2321, 10513, 6417, 14609, 1297, 9489, 5393, 13585, 3345, 11537, 7441, 15633, + 785, 8977, 4881, 13073, 2833, 11025, 6929, 15121, 1809, 10001, 5905, 14097, 3857, 12049, 7953, 16145, + 145, 8337, 4241, 12433, 2193, 10385, 6289, 14481, 1169, 9361, 5265, 13457, 3217, 11409, 7313, 15505, + 657, 8849, 4753, 12945, 2705, 10897, 6801, 14993, 1681, 9873, 5777, 13969, 3729, 11921, 7825, 16017, + 401, 8593, 4497, 12689, 2449, 10641, 6545, 14737, 1425, 9617, 5521, 13713, 3473, 11665, 7569, 15761, + 913, 9105, 5009, 13201, 2961, 11153, 7057, 15249, 1937, 10129, 6033, 14225, 3985, 12177, 8081, 16273, + 81, 8273, 4177, 12369, 2129, 10321, 6225, 14417, 1105, 9297, 5201, 13393, 3153, 11345, 7249, 15441, + 593, 8785, 4689, 12881, 2641, 10833, 6737, 14929, 1617, 9809, 5713, 13905, 3665, 11857, 7761, 15953, + 337, 8529, 4433, 12625, 2385, 10577, 6481, 14673, 1361, 9553, 5457, 13649, 3409, 11601, 7505, 15697, + 849, 9041, 4945, 13137, 2897, 11089, 6993, 15185, 1873, 10065, 5969, 14161, 3921, 12113, 8017, 16209, + 209, 8401, 4305, 12497, 2257, 10449, 6353, 14545, 1233, 9425, 5329, 13521, 3281, 11473, 7377, 15569, + 721, 8913, 4817, 13009, 2769, 10961, 6865, 15057, 1745, 9937, 5841, 14033, 3793, 11985, 7889, 16081, + 465, 8657, 4561, 12753, 2513, 10705, 6609, 14801, 1489, 9681, 5585, 13777, 3537, 11729, 7633, 15825, + 977, 9169, 5073, 13265, 3025, 11217, 7121, 15313, 2001, 10193, 6097, 14289, 4049, 12241, 8145, 16337, + 49, 8241, 4145, 12337, 2097, 10289, 6193, 14385, 1073, 9265, 5169, 13361, 3121, 11313, 7217, 15409, + 561, 8753, 4657, 12849, 2609, 10801, 6705, 14897, 1585, 9777, 5681, 13873, 3633, 11825, 7729, 15921, + 305, 8497, 4401, 12593, 2353, 10545, 6449, 14641, 1329, 9521, 5425, 13617, 3377, 11569, 7473, 15665, + 817, 9009, 4913, 13105, 2865, 11057, 6961, 15153, 1841, 10033, 5937, 14129, 3889, 12081, 7985, 16177, + 177, 8369, 4273, 12465, 2225, 10417, 6321, 14513, 1201, 9393, 5297, 13489, 3249, 11441, 7345, 15537, + 689, 8881, 4785, 12977, 2737, 10929, 6833, 15025, 1713, 9905, 5809, 14001, 3761, 11953, 7857, 16049, + 433, 8625, 4529, 12721, 2481, 10673, 6577, 14769, 1457, 9649, 5553, 13745, 3505, 11697, 7601, 15793, + 945, 9137, 5041, 13233, 2993, 11185, 7089, 15281, 1969, 10161, 6065, 14257, 4017, 12209, 8113, 16305, + 113, 8305, 4209, 12401, 2161, 10353, 6257, 14449, 1137, 9329, 5233, 13425, 3185, 11377, 7281, 15473, + 625, 8817, 4721, 12913, 2673, 10865, 6769, 14961, 1649, 9841, 5745, 13937, 3697, 11889, 7793, 15985, + 369, 8561, 4465, 12657, 2417, 10609, 6513, 14705, 1393, 9585, 5489, 13681, 3441, 11633, 7537, 15729, + 881, 9073, 4977, 13169, 2929, 11121, 7025, 15217, 1905, 10097, 6001, 14193, 3953, 12145, 8049, 16241, + 241, 8433, 4337, 12529, 2289, 10481, 6385, 14577, 1265, 9457, 5361, 13553, 3313, 11505, 7409, 15601, + 753, 8945, 4849, 13041, 2801, 10993, 6897, 15089, 1777, 9969, 5873, 14065, 3825, 12017, 7921, 16113, + 497, 8689, 4593, 12785, 2545, 10737, 6641, 14833, 1521, 9713, 5617, 13809, 3569, 11761, 7665, 15857, + 1009, 9201, 5105, 13297, 3057, 11249, 7153, 15345, 2033, 10225, 6129, 14321, 4081, 12273, 8177, 16369, + 9, 8201, 4105, 12297, 2057, 10249, 6153, 14345, 1033, 9225, 5129, 13321, 3081, 11273, 7177, 15369, + 521, 8713, 4617, 12809, 2569, 10761, 6665, 14857, 1545, 9737, 5641, 13833, 3593, 11785, 7689, 15881, + 265, 8457, 4361, 12553, 2313, 10505, 6409, 14601, 1289, 9481, 5385, 13577, 3337, 11529, 7433, 15625, + 777, 8969, 4873, 13065, 2825, 11017, 6921, 15113, 1801, 9993, 5897, 14089, 3849, 12041, 7945, 16137, + 137, 8329, 4233, 12425, 2185, 10377, 6281, 14473, 1161, 9353, 5257, 13449, 3209, 11401, 7305, 15497, + 649, 8841, 4745, 12937, 2697, 10889, 6793, 14985, 1673, 9865, 5769, 13961, 3721, 11913, 7817, 16009, + 393, 8585, 4489, 12681, 2441, 10633, 6537, 14729, 1417, 9609, 5513, 13705, 3465, 11657, 7561, 15753, + 905, 9097, 5001, 13193, 2953, 11145, 7049, 15241, 1929, 10121, 6025, 14217, 3977, 12169, 8073, 16265, + 73, 8265, 4169, 12361, 2121, 10313, 6217, 14409, 1097, 9289, 5193, 13385, 3145, 11337, 7241, 15433, + 585, 8777, 4681, 12873, 2633, 10825, 6729, 14921, 1609, 9801, 5705, 13897, 3657, 11849, 7753, 15945, + 329, 8521, 4425, 12617, 2377, 10569, 6473, 14665, 1353, 9545, 5449, 13641, 3401, 11593, 7497, 15689, + 841, 9033, 4937, 13129, 2889, 11081, 6985, 15177, 1865, 10057, 5961, 14153, 3913, 12105, 8009, 16201, + 201, 8393, 4297, 12489, 2249, 10441, 6345, 14537, 1225, 9417, 5321, 13513, 3273, 11465, 7369, 15561, + 713, 8905, 4809, 13001, 2761, 10953, 6857, 15049, 1737, 9929, 5833, 14025, 3785, 11977, 7881, 16073, + 457, 8649, 4553, 12745, 2505, 10697, 6601, 14793, 1481, 9673, 5577, 13769, 3529, 11721, 7625, 15817, + 969, 9161, 5065, 13257, 3017, 11209, 7113, 15305, 1993, 10185, 6089, 14281, 4041, 12233, 8137, 16329, + 41, 8233, 4137, 12329, 2089, 10281, 6185, 14377, 1065, 9257, 5161, 13353, 3113, 11305, 7209, 15401, + 553, 8745, 4649, 12841, 2601, 10793, 6697, 14889, 1577, 9769, 5673, 13865, 3625, 11817, 7721, 15913, + 297, 8489, 4393, 12585, 2345, 10537, 6441, 14633, 1321, 9513, 5417, 13609, 3369, 11561, 7465, 15657, + 809, 9001, 4905, 13097, 2857, 11049, 6953, 15145, 1833, 10025, 5929, 14121, 3881, 12073, 7977, 16169, + 169, 8361, 4265, 12457, 2217, 10409, 6313, 14505, 1193, 9385, 5289, 13481, 3241, 11433, 7337, 15529, + 681, 8873, 4777, 12969, 2729, 10921, 6825, 15017, 1705, 9897, 5801, 13993, 3753, 11945, 7849, 16041, + 425, 8617, 4521, 12713, 2473, 10665, 6569, 14761, 1449, 9641, 5545, 13737, 3497, 11689, 7593, 15785, + 937, 9129, 5033, 13225, 2985, 11177, 7081, 15273, 1961, 10153, 6057, 14249, 4009, 12201, 8105, 16297, + 105, 8297, 4201, 12393, 2153, 10345, 6249, 14441, 1129, 9321, 5225, 13417, 3177, 11369, 7273, 15465, + 617, 8809, 4713, 12905, 2665, 10857, 6761, 14953, 1641, 9833, 5737, 13929, 3689, 11881, 7785, 15977, + 361, 8553, 4457, 12649, 2409, 10601, 6505, 14697, 1385, 9577, 5481, 13673, 3433, 11625, 7529, 15721, + 873, 9065, 4969, 13161, 2921, 11113, 7017, 15209, 1897, 10089, 5993, 14185, 3945, 12137, 8041, 16233, + 233, 8425, 4329, 12521, 2281, 10473, 6377, 14569, 1257, 9449, 5353, 13545, 3305, 11497, 7401, 15593, + 745, 8937, 4841, 13033, 2793, 10985, 6889, 15081, 1769, 9961, 5865, 14057, 3817, 12009, 7913, 16105, + 489, 8681, 4585, 12777, 2537, 10729, 6633, 14825, 1513, 9705, 5609, 13801, 3561, 11753, 7657, 15849, + 1001, 9193, 5097, 13289, 3049, 11241, 7145, 15337, 2025, 10217, 6121, 14313, 4073, 12265, 8169, 16361, + 25, 8217, 4121, 12313, 2073, 10265, 6169, 14361, 1049, 9241, 5145, 13337, 3097, 11289, 7193, 15385, + 537, 8729, 4633, 12825, 2585, 10777, 6681, 14873, 1561, 9753, 5657, 13849, 3609, 11801, 7705, 15897, + 281, 8473, 4377, 12569, 2329, 10521, 6425, 14617, 1305, 9497, 5401, 13593, 3353, 11545, 7449, 15641, + 793, 8985, 4889, 13081, 2841, 11033, 6937, 15129, 1817, 10009, 5913, 14105, 3865, 12057, 7961, 16153, + 153, 8345, 4249, 12441, 2201, 10393, 6297, 14489, 1177, 9369, 5273, 13465, 3225, 11417, 7321, 15513, + 665, 8857, 4761, 12953, 2713, 10905, 6809, 15001, 1689, 9881, 5785, 13977, 3737, 11929, 7833, 16025, + 409, 8601, 4505, 12697, 2457, 10649, 6553, 14745, 1433, 9625, 5529, 13721, 3481, 11673, 7577, 15769, + 921, 9113, 5017, 13209, 2969, 11161, 7065, 15257, 1945, 10137, 6041, 14233, 3993, 12185, 8089, 16281, + 89, 8281, 4185, 12377, 2137, 10329, 6233, 14425, 1113, 9305, 5209, 13401, 3161, 11353, 7257, 15449, + 601, 8793, 4697, 12889, 2649, 10841, 6745, 14937, 1625, 9817, 5721, 13913, 3673, 11865, 7769, 15961, + 345, 8537, 4441, 12633, 2393, 10585, 6489, 14681, 1369, 9561, 5465, 13657, 3417, 11609, 7513, 15705, + 857, 9049, 4953, 13145, 2905, 11097, 7001, 15193, 1881, 10073, 5977, 14169, 3929, 12121, 8025, 16217, + 217, 8409, 4313, 12505, 2265, 10457, 6361, 14553, 1241, 9433, 5337, 13529, 3289, 11481, 7385, 15577, + 729, 8921, 4825, 13017, 2777, 10969, 6873, 15065, 1753, 9945, 5849, 14041, 3801, 11993, 7897, 16089, + 473, 8665, 4569, 12761, 2521, 10713, 6617, 14809, 1497, 9689, 5593, 13785, 3545, 11737, 7641, 15833, + 985, 9177, 5081, 13273, 3033, 11225, 7129, 15321, 2009, 10201, 6105, 14297, 4057, 12249, 8153, 16345, + 57, 8249, 4153, 12345, 2105, 10297, 6201, 14393, 1081, 9273, 5177, 13369, 3129, 11321, 7225, 15417, + 569, 8761, 4665, 12857, 2617, 10809, 6713, 14905, 1593, 9785, 5689, 13881, 3641, 11833, 7737, 15929, + 313, 8505, 4409, 12601, 2361, 10553, 6457, 14649, 1337, 9529, 5433, 13625, 3385, 11577, 7481, 15673, + 825, 9017, 4921, 13113, 2873, 11065, 6969, 15161, 1849, 10041, 5945, 14137, 3897, 12089, 7993, 16185, + 185, 8377, 4281, 12473, 2233, 10425, 6329, 14521, 1209, 9401, 5305, 13497, 3257, 11449, 7353, 15545, + 697, 8889, 4793, 12985, 2745, 10937, 6841, 15033, 1721, 9913, 5817, 14009, 3769, 11961, 7865, 16057, + 441, 8633, 4537, 12729, 2489, 10681, 6585, 14777, 1465, 9657, 5561, 13753, 3513, 11705, 7609, 15801, + 953, 9145, 5049, 13241, 3001, 11193, 7097, 15289, 1977, 10169, 6073, 14265, 4025, 12217, 8121, 16313, + 121, 8313, 4217, 12409, 2169, 10361, 6265, 14457, 1145, 9337, 5241, 13433, 3193, 11385, 7289, 15481, + 633, 8825, 4729, 12921, 2681, 10873, 6777, 14969, 1657, 9849, 5753, 13945, 3705, 11897, 7801, 15993, + 377, 8569, 4473, 12665, 2425, 10617, 6521, 14713, 1401, 9593, 5497, 13689, 3449, 11641, 7545, 15737, + 889, 9081, 4985, 13177, 2937, 11129, 7033, 15225, 1913, 10105, 6009, 14201, 3961, 12153, 8057, 16249, + 249, 8441, 4345, 12537, 2297, 10489, 6393, 14585, 1273, 9465, 5369, 13561, 3321, 11513, 7417, 15609, + 761, 8953, 4857, 13049, 2809, 11001, 6905, 15097, 1785, 9977, 5881, 14073, 3833, 12025, 7929, 16121, + 505, 8697, 4601, 12793, 2553, 10745, 6649, 14841, 1529, 9721, 5625, 13817, 3577, 11769, 7673, 15865, + 1017, 9209, 5113, 13305, 3065, 11257, 7161, 15353, 2041, 10233, 6137, 14329, 4089, 12281, 8185, 16377, + 5, 8197, 4101, 12293, 2053, 10245, 6149, 14341, 1029, 9221, 5125, 13317, 3077, 11269, 7173, 15365, + 517, 8709, 4613, 12805, 2565, 10757, 6661, 14853, 1541, 9733, 5637, 13829, 3589, 11781, 7685, 15877, + 261, 8453, 4357, 12549, 2309, 10501, 6405, 14597, 1285, 9477, 5381, 13573, 3333, 11525, 7429, 15621, + 773, 8965, 4869, 13061, 2821, 11013, 6917, 15109, 1797, 9989, 5893, 14085, 3845, 12037, 7941, 16133, + 133, 8325, 4229, 12421, 2181, 10373, 6277, 14469, 1157, 9349, 5253, 13445, 3205, 11397, 7301, 15493, + 645, 8837, 4741, 12933, 2693, 10885, 6789, 14981, 1669, 9861, 5765, 13957, 3717, 11909, 7813, 16005, + 389, 8581, 4485, 12677, 2437, 10629, 6533, 14725, 1413, 9605, 5509, 13701, 3461, 11653, 7557, 15749, + 901, 9093, 4997, 13189, 2949, 11141, 7045, 15237, 1925, 10117, 6021, 14213, 3973, 12165, 8069, 16261, + 69, 8261, 4165, 12357, 2117, 10309, 6213, 14405, 1093, 9285, 5189, 13381, 3141, 11333, 7237, 15429, + 581, 8773, 4677, 12869, 2629, 10821, 6725, 14917, 1605, 9797, 5701, 13893, 3653, 11845, 7749, 15941, + 325, 8517, 4421, 12613, 2373, 10565, 6469, 14661, 1349, 9541, 5445, 13637, 3397, 11589, 7493, 15685, + 837, 9029, 4933, 13125, 2885, 11077, 6981, 15173, 1861, 10053, 5957, 14149, 3909, 12101, 8005, 16197, + 197, 8389, 4293, 12485, 2245, 10437, 6341, 14533, 1221, 9413, 5317, 13509, 3269, 11461, 7365, 15557, + 709, 8901, 4805, 12997, 2757, 10949, 6853, 15045, 1733, 9925, 5829, 14021, 3781, 11973, 7877, 16069, + 453, 8645, 4549, 12741, 2501, 10693, 6597, 14789, 1477, 9669, 5573, 13765, 3525, 11717, 7621, 15813, + 965, 9157, 5061, 13253, 3013, 11205, 7109, 15301, 1989, 10181, 6085, 14277, 4037, 12229, 8133, 16325, + 37, 8229, 4133, 12325, 2085, 10277, 6181, 14373, 1061, 9253, 5157, 13349, 3109, 11301, 7205, 15397, + 549, 8741, 4645, 12837, 2597, 10789, 6693, 14885, 1573, 9765, 5669, 13861, 3621, 11813, 7717, 15909, + 293, 8485, 4389, 12581, 2341, 10533, 6437, 14629, 1317, 9509, 5413, 13605, 3365, 11557, 7461, 15653, + 805, 8997, 4901, 13093, 2853, 11045, 6949, 15141, 1829, 10021, 5925, 14117, 3877, 12069, 7973, 16165, + 165, 8357, 4261, 12453, 2213, 10405, 6309, 14501, 1189, 9381, 5285, 13477, 3237, 11429, 7333, 15525, + 677, 8869, 4773, 12965, 2725, 10917, 6821, 15013, 1701, 9893, 5797, 13989, 3749, 11941, 7845, 16037, + 421, 8613, 4517, 12709, 2469, 10661, 6565, 14757, 1445, 9637, 5541, 13733, 3493, 11685, 7589, 15781, + 933, 9125, 5029, 13221, 2981, 11173, 7077, 15269, 1957, 10149, 6053, 14245, 4005, 12197, 8101, 16293, + 101, 8293, 4197, 12389, 2149, 10341, 6245, 14437, 1125, 9317, 5221, 13413, 3173, 11365, 7269, 15461, + 613, 8805, 4709, 12901, 2661, 10853, 6757, 14949, 1637, 9829, 5733, 13925, 3685, 11877, 7781, 15973, + 357, 8549, 4453, 12645, 2405, 10597, 6501, 14693, 1381, 9573, 5477, 13669, 3429, 11621, 7525, 15717, + 869, 9061, 4965, 13157, 2917, 11109, 7013, 15205, 1893, 10085, 5989, 14181, 3941, 12133, 8037, 16229, + 229, 8421, 4325, 12517, 2277, 10469, 6373, 14565, 1253, 9445, 5349, 13541, 3301, 11493, 7397, 15589, + 741, 8933, 4837, 13029, 2789, 10981, 6885, 15077, 1765, 9957, 5861, 14053, 3813, 12005, 7909, 16101, + 485, 8677, 4581, 12773, 2533, 10725, 6629, 14821, 1509, 9701, 5605, 13797, 3557, 11749, 7653, 15845, + 997, 9189, 5093, 13285, 3045, 11237, 7141, 15333, 2021, 10213, 6117, 14309, 4069, 12261, 8165, 16357, + 21, 8213, 4117, 12309, 2069, 10261, 6165, 14357, 1045, 9237, 5141, 13333, 3093, 11285, 7189, 15381, + 533, 8725, 4629, 12821, 2581, 10773, 6677, 14869, 1557, 9749, 5653, 13845, 3605, 11797, 7701, 15893, + 277, 8469, 4373, 12565, 2325, 10517, 6421, 14613, 1301, 9493, 5397, 13589, 3349, 11541, 7445, 15637, + 789, 8981, 4885, 13077, 2837, 11029, 6933, 15125, 1813, 10005, 5909, 14101, 3861, 12053, 7957, 16149, + 149, 8341, 4245, 12437, 2197, 10389, 6293, 14485, 1173, 9365, 5269, 13461, 3221, 11413, 7317, 15509, + 661, 8853, 4757, 12949, 2709, 10901, 6805, 14997, 1685, 9877, 5781, 13973, 3733, 11925, 7829, 16021, + 405, 8597, 4501, 12693, 2453, 10645, 6549, 14741, 1429, 9621, 5525, 13717, 3477, 11669, 7573, 15765, + 917, 9109, 5013, 13205, 2965, 11157, 7061, 15253, 1941, 10133, 6037, 14229, 3989, 12181, 8085, 16277, + 85, 8277, 4181, 12373, 2133, 10325, 6229, 14421, 1109, 9301, 5205, 13397, 3157, 11349, 7253, 15445, + 597, 8789, 4693, 12885, 2645, 10837, 6741, 14933, 1621, 9813, 5717, 13909, 3669, 11861, 7765, 15957, + 341, 8533, 4437, 12629, 2389, 10581, 6485, 14677, 1365, 9557, 5461, 13653, 3413, 11605, 7509, 15701, + 853, 9045, 4949, 13141, 2901, 11093, 6997, 15189, 1877, 10069, 5973, 14165, 3925, 12117, 8021, 16213, + 213, 8405, 4309, 12501, 2261, 10453, 6357, 14549, 1237, 9429, 5333, 13525, 3285, 11477, 7381, 15573, + 725, 8917, 4821, 13013, 2773, 10965, 6869, 15061, 1749, 9941, 5845, 14037, 3797, 11989, 7893, 16085, + 469, 8661, 4565, 12757, 2517, 10709, 6613, 14805, 1493, 9685, 5589, 13781, 3541, 11733, 7637, 15829, + 981, 9173, 5077, 13269, 3029, 11221, 7125, 15317, 2005, 10197, 6101, 14293, 4053, 12245, 8149, 16341, + 53, 8245, 4149, 12341, 2101, 10293, 6197, 14389, 1077, 9269, 5173, 13365, 3125, 11317, 7221, 15413, + 565, 8757, 4661, 12853, 2613, 10805, 6709, 14901, 1589, 9781, 5685, 13877, 3637, 11829, 7733, 15925, + 309, 8501, 4405, 12597, 2357, 10549, 6453, 14645, 1333, 9525, 5429, 13621, 3381, 11573, 7477, 15669, + 821, 9013, 4917, 13109, 2869, 11061, 6965, 15157, 1845, 10037, 5941, 14133, 3893, 12085, 7989, 16181, + 181, 8373, 4277, 12469, 2229, 10421, 6325, 14517, 1205, 9397, 5301, 13493, 3253, 11445, 7349, 15541, + 693, 8885, 4789, 12981, 2741, 10933, 6837, 15029, 1717, 9909, 5813, 14005, 3765, 11957, 7861, 16053, + 437, 8629, 4533, 12725, 2485, 10677, 6581, 14773, 1461, 9653, 5557, 13749, 3509, 11701, 7605, 15797, + 949, 9141, 5045, 13237, 2997, 11189, 7093, 15285, 1973, 10165, 6069, 14261, 4021, 12213, 8117, 16309, + 117, 8309, 4213, 12405, 2165, 10357, 6261, 14453, 1141, 9333, 5237, 13429, 3189, 11381, 7285, 15477, + 629, 8821, 4725, 12917, 2677, 10869, 6773, 14965, 1653, 9845, 5749, 13941, 3701, 11893, 7797, 15989, + 373, 8565, 4469, 12661, 2421, 10613, 6517, 14709, 1397, 9589, 5493, 13685, 3445, 11637, 7541, 15733, + 885, 9077, 4981, 13173, 2933, 11125, 7029, 15221, 1909, 10101, 6005, 14197, 3957, 12149, 8053, 16245, + 245, 8437, 4341, 12533, 2293, 10485, 6389, 14581, 1269, 9461, 5365, 13557, 3317, 11509, 7413, 15605, + 757, 8949, 4853, 13045, 2805, 10997, 6901, 15093, 1781, 9973, 5877, 14069, 3829, 12021, 7925, 16117, + 501, 8693, 4597, 12789, 2549, 10741, 6645, 14837, 1525, 9717, 5621, 13813, 3573, 11765, 7669, 15861, + 1013, 9205, 5109, 13301, 3061, 11253, 7157, 15349, 2037, 10229, 6133, 14325, 4085, 12277, 8181, 16373, + 13, 8205, 4109, 12301, 2061, 10253, 6157, 14349, 1037, 9229, 5133, 13325, 3085, 11277, 7181, 15373, + 525, 8717, 4621, 12813, 2573, 10765, 6669, 14861, 1549, 9741, 5645, 13837, 3597, 11789, 7693, 15885, + 269, 8461, 4365, 12557, 2317, 10509, 6413, 14605, 1293, 9485, 5389, 13581, 3341, 11533, 7437, 15629, + 781, 8973, 4877, 13069, 2829, 11021, 6925, 15117, 1805, 9997, 5901, 14093, 3853, 12045, 7949, 16141, + 141, 8333, 4237, 12429, 2189, 10381, 6285, 14477, 1165, 9357, 5261, 13453, 3213, 11405, 7309, 15501, + 653, 8845, 4749, 12941, 2701, 10893, 6797, 14989, 1677, 9869, 5773, 13965, 3725, 11917, 7821, 16013, + 397, 8589, 4493, 12685, 2445, 10637, 6541, 14733, 1421, 9613, 5517, 13709, 3469, 11661, 7565, 15757, + 909, 9101, 5005, 13197, 2957, 11149, 7053, 15245, 1933, 10125, 6029, 14221, 3981, 12173, 8077, 16269, + 77, 8269, 4173, 12365, 2125, 10317, 6221, 14413, 1101, 9293, 5197, 13389, 3149, 11341, 7245, 15437, + 589, 8781, 4685, 12877, 2637, 10829, 6733, 14925, 1613, 9805, 5709, 13901, 3661, 11853, 7757, 15949, + 333, 8525, 4429, 12621, 2381, 10573, 6477, 14669, 1357, 9549, 5453, 13645, 3405, 11597, 7501, 15693, + 845, 9037, 4941, 13133, 2893, 11085, 6989, 15181, 1869, 10061, 5965, 14157, 3917, 12109, 8013, 16205, + 205, 8397, 4301, 12493, 2253, 10445, 6349, 14541, 1229, 9421, 5325, 13517, 3277, 11469, 7373, 15565, + 717, 8909, 4813, 13005, 2765, 10957, 6861, 15053, 1741, 9933, 5837, 14029, 3789, 11981, 7885, 16077, + 461, 8653, 4557, 12749, 2509, 10701, 6605, 14797, 1485, 9677, 5581, 13773, 3533, 11725, 7629, 15821, + 973, 9165, 5069, 13261, 3021, 11213, 7117, 15309, 1997, 10189, 6093, 14285, 4045, 12237, 8141, 16333, + 45, 8237, 4141, 12333, 2093, 10285, 6189, 14381, 1069, 9261, 5165, 13357, 3117, 11309, 7213, 15405, + 557, 8749, 4653, 12845, 2605, 10797, 6701, 14893, 1581, 9773, 5677, 13869, 3629, 11821, 7725, 15917, + 301, 8493, 4397, 12589, 2349, 10541, 6445, 14637, 1325, 9517, 5421, 13613, 3373, 11565, 7469, 15661, + 813, 9005, 4909, 13101, 2861, 11053, 6957, 15149, 1837, 10029, 5933, 14125, 3885, 12077, 7981, 16173, + 173, 8365, 4269, 12461, 2221, 10413, 6317, 14509, 1197, 9389, 5293, 13485, 3245, 11437, 7341, 15533, + 685, 8877, 4781, 12973, 2733, 10925, 6829, 15021, 1709, 9901, 5805, 13997, 3757, 11949, 7853, 16045, + 429, 8621, 4525, 12717, 2477, 10669, 6573, 14765, 1453, 9645, 5549, 13741, 3501, 11693, 7597, 15789, + 941, 9133, 5037, 13229, 2989, 11181, 7085, 15277, 1965, 10157, 6061, 14253, 4013, 12205, 8109, 16301, + 109, 8301, 4205, 12397, 2157, 10349, 6253, 14445, 1133, 9325, 5229, 13421, 3181, 11373, 7277, 15469, + 621, 8813, 4717, 12909, 2669, 10861, 6765, 14957, 1645, 9837, 5741, 13933, 3693, 11885, 7789, 15981, + 365, 8557, 4461, 12653, 2413, 10605, 6509, 14701, 1389, 9581, 5485, 13677, 3437, 11629, 7533, 15725, + 877, 9069, 4973, 13165, 2925, 11117, 7021, 15213, 1901, 10093, 5997, 14189, 3949, 12141, 8045, 16237, + 237, 8429, 4333, 12525, 2285, 10477, 6381, 14573, 1261, 9453, 5357, 13549, 3309, 11501, 7405, 15597, + 749, 8941, 4845, 13037, 2797, 10989, 6893, 15085, 1773, 9965, 5869, 14061, 3821, 12013, 7917, 16109, + 493, 8685, 4589, 12781, 2541, 10733, 6637, 14829, 1517, 9709, 5613, 13805, 3565, 11757, 7661, 15853, + 1005, 9197, 5101, 13293, 3053, 11245, 7149, 15341, 2029, 10221, 6125, 14317, 4077, 12269, 8173, 16365, + 29, 8221, 4125, 12317, 2077, 10269, 6173, 14365, 1053, 9245, 5149, 13341, 3101, 11293, 7197, 15389, + 541, 8733, 4637, 12829, 2589, 10781, 6685, 14877, 1565, 9757, 5661, 13853, 3613, 11805, 7709, 15901, + 285, 8477, 4381, 12573, 2333, 10525, 6429, 14621, 1309, 9501, 5405, 13597, 3357, 11549, 7453, 15645, + 797, 8989, 4893, 13085, 2845, 11037, 6941, 15133, 1821, 10013, 5917, 14109, 3869, 12061, 7965, 16157, + 157, 8349, 4253, 12445, 2205, 10397, 6301, 14493, 1181, 9373, 5277, 13469, 3229, 11421, 7325, 15517, + 669, 8861, 4765, 12957, 2717, 10909, 6813, 15005, 1693, 9885, 5789, 13981, 3741, 11933, 7837, 16029, + 413, 8605, 4509, 12701, 2461, 10653, 6557, 14749, 1437, 9629, 5533, 13725, 3485, 11677, 7581, 15773, + 925, 9117, 5021, 13213, 2973, 11165, 7069, 15261, 1949, 10141, 6045, 14237, 3997, 12189, 8093, 16285, + 93, 8285, 4189, 12381, 2141, 10333, 6237, 14429, 1117, 9309, 5213, 13405, 3165, 11357, 7261, 15453, + 605, 8797, 4701, 12893, 2653, 10845, 6749, 14941, 1629, 9821, 5725, 13917, 3677, 11869, 7773, 15965, + 349, 8541, 4445, 12637, 2397, 10589, 6493, 14685, 1373, 9565, 5469, 13661, 3421, 11613, 7517, 15709, + 861, 9053, 4957, 13149, 2909, 11101, 7005, 15197, 1885, 10077, 5981, 14173, 3933, 12125, 8029, 16221, + 221, 8413, 4317, 12509, 2269, 10461, 6365, 14557, 1245, 9437, 5341, 13533, 3293, 11485, 7389, 15581, + 733, 8925, 4829, 13021, 2781, 10973, 6877, 15069, 1757, 9949, 5853, 14045, 3805, 11997, 7901, 16093, + 477, 8669, 4573, 12765, 2525, 10717, 6621, 14813, 1501, 9693, 5597, 13789, 3549, 11741, 7645, 15837, + 989, 9181, 5085, 13277, 3037, 11229, 7133, 15325, 2013, 10205, 6109, 14301, 4061, 12253, 8157, 16349, + 61, 8253, 4157, 12349, 2109, 10301, 6205, 14397, 1085, 9277, 5181, 13373, 3133, 11325, 7229, 15421, + 573, 8765, 4669, 12861, 2621, 10813, 6717, 14909, 1597, 9789, 5693, 13885, 3645, 11837, 7741, 15933, + 317, 8509, 4413, 12605, 2365, 10557, 6461, 14653, 1341, 9533, 5437, 13629, 3389, 11581, 7485, 15677, + 829, 9021, 4925, 13117, 2877, 11069, 6973, 15165, 1853, 10045, 5949, 14141, 3901, 12093, 7997, 16189, + 189, 8381, 4285, 12477, 2237, 10429, 6333, 14525, 1213, 9405, 5309, 13501, 3261, 11453, 7357, 15549, + 701, 8893, 4797, 12989, 2749, 10941, 6845, 15037, 1725, 9917, 5821, 14013, 3773, 11965, 7869, 16061, + 445, 8637, 4541, 12733, 2493, 10685, 6589, 14781, 1469, 9661, 5565, 13757, 3517, 11709, 7613, 15805, + 957, 9149, 5053, 13245, 3005, 11197, 7101, 15293, 1981, 10173, 6077, 14269, 4029, 12221, 8125, 16317, + 125, 8317, 4221, 12413, 2173, 10365, 6269, 14461, 1149, 9341, 5245, 13437, 3197, 11389, 7293, 15485, + 637, 8829, 4733, 12925, 2685, 10877, 6781, 14973, 1661, 9853, 5757, 13949, 3709, 11901, 7805, 15997, + 381, 8573, 4477, 12669, 2429, 10621, 6525, 14717, 1405, 9597, 5501, 13693, 3453, 11645, 7549, 15741, + 893, 9085, 4989, 13181, 2941, 11133, 7037, 15229, 1917, 10109, 6013, 14205, 3965, 12157, 8061, 16253, + 253, 8445, 4349, 12541, 2301, 10493, 6397, 14589, 1277, 9469, 5373, 13565, 3325, 11517, 7421, 15613, + 765, 8957, 4861, 13053, 2813, 11005, 6909, 15101, 1789, 9981, 5885, 14077, 3837, 12029, 7933, 16125, + 509, 8701, 4605, 12797, 2557, 10749, 6653, 14845, 1533, 9725, 5629, 13821, 3581, 11773, 7677, 15869, + 1021, 9213, 5117, 13309, 3069, 11261, 7165, 15357, 2045, 10237, 6141, 14333, 4093, 12285, 8189, 16381, + 3, 8195, 4099, 12291, 2051, 10243, 6147, 14339, 1027, 9219, 5123, 13315, 3075, 11267, 7171, 15363, + 515, 8707, 4611, 12803, 2563, 10755, 6659, 14851, 1539, 9731, 5635, 13827, 3587, 11779, 7683, 15875, + 259, 8451, 4355, 12547, 2307, 10499, 6403, 14595, 1283, 9475, 5379, 13571, 3331, 11523, 7427, 15619, + 771, 8963, 4867, 13059, 2819, 11011, 6915, 15107, 1795, 9987, 5891, 14083, 3843, 12035, 7939, 16131, + 131, 8323, 4227, 12419, 2179, 10371, 6275, 14467, 1155, 9347, 5251, 13443, 3203, 11395, 7299, 15491, + 643, 8835, 4739, 12931, 2691, 10883, 6787, 14979, 1667, 9859, 5763, 13955, 3715, 11907, 7811, 16003, + 387, 8579, 4483, 12675, 2435, 10627, 6531, 14723, 1411, 9603, 5507, 13699, 3459, 11651, 7555, 15747, + 899, 9091, 4995, 13187, 2947, 11139, 7043, 15235, 1923, 10115, 6019, 14211, 3971, 12163, 8067, 16259, + 67, 8259, 4163, 12355, 2115, 10307, 6211, 14403, 1091, 9283, 5187, 13379, 3139, 11331, 7235, 15427, + 579, 8771, 4675, 12867, 2627, 10819, 6723, 14915, 1603, 9795, 5699, 13891, 3651, 11843, 7747, 15939, + 323, 8515, 4419, 12611, 2371, 10563, 6467, 14659, 1347, 9539, 5443, 13635, 3395, 11587, 7491, 15683, + 835, 9027, 4931, 13123, 2883, 11075, 6979, 15171, 1859, 10051, 5955, 14147, 3907, 12099, 8003, 16195, + 195, 8387, 4291, 12483, 2243, 10435, 6339, 14531, 1219, 9411, 5315, 13507, 3267, 11459, 7363, 15555, + 707, 8899, 4803, 12995, 2755, 10947, 6851, 15043, 1731, 9923, 5827, 14019, 3779, 11971, 7875, 16067, + 451, 8643, 4547, 12739, 2499, 10691, 6595, 14787, 1475, 9667, 5571, 13763, 3523, 11715, 7619, 15811, + 963, 9155, 5059, 13251, 3011, 11203, 7107, 15299, 1987, 10179, 6083, 14275, 4035, 12227, 8131, 16323, + 35, 8227, 4131, 12323, 2083, 10275, 6179, 14371, 1059, 9251, 5155, 13347, 3107, 11299, 7203, 15395, + 547, 8739, 4643, 12835, 2595, 10787, 6691, 14883, 1571, 9763, 5667, 13859, 3619, 11811, 7715, 15907, + 291, 8483, 4387, 12579, 2339, 10531, 6435, 14627, 1315, 9507, 5411, 13603, 3363, 11555, 7459, 15651, + 803, 8995, 4899, 13091, 2851, 11043, 6947, 15139, 1827, 10019, 5923, 14115, 3875, 12067, 7971, 16163, + 163, 8355, 4259, 12451, 2211, 10403, 6307, 14499, 1187, 9379, 5283, 13475, 3235, 11427, 7331, 15523, + 675, 8867, 4771, 12963, 2723, 10915, 6819, 15011, 1699, 9891, 5795, 13987, 3747, 11939, 7843, 16035, + 419, 8611, 4515, 12707, 2467, 10659, 6563, 14755, 1443, 9635, 5539, 13731, 3491, 11683, 7587, 15779, + 931, 9123, 5027, 13219, 2979, 11171, 7075, 15267, 1955, 10147, 6051, 14243, 4003, 12195, 8099, 16291, + 99, 8291, 4195, 12387, 2147, 10339, 6243, 14435, 1123, 9315, 5219, 13411, 3171, 11363, 7267, 15459, + 611, 8803, 4707, 12899, 2659, 10851, 6755, 14947, 1635, 9827, 5731, 13923, 3683, 11875, 7779, 15971, + 355, 8547, 4451, 12643, 2403, 10595, 6499, 14691, 1379, 9571, 5475, 13667, 3427, 11619, 7523, 15715, + 867, 9059, 4963, 13155, 2915, 11107, 7011, 15203, 1891, 10083, 5987, 14179, 3939, 12131, 8035, 16227, + 227, 8419, 4323, 12515, 2275, 10467, 6371, 14563, 1251, 9443, 5347, 13539, 3299, 11491, 7395, 15587, + 739, 8931, 4835, 13027, 2787, 10979, 6883, 15075, 1763, 9955, 5859, 14051, 3811, 12003, 7907, 16099, + 483, 8675, 4579, 12771, 2531, 10723, 6627, 14819, 1507, 9699, 5603, 13795, 3555, 11747, 7651, 15843, + 995, 9187, 5091, 13283, 3043, 11235, 7139, 15331, 2019, 10211, 6115, 14307, 4067, 12259, 8163, 16355, + 19, 8211, 4115, 12307, 2067, 10259, 6163, 14355, 1043, 9235, 5139, 13331, 3091, 11283, 7187, 15379, + 531, 8723, 4627, 12819, 2579, 10771, 6675, 14867, 1555, 9747, 5651, 13843, 3603, 11795, 7699, 15891, + 275, 8467, 4371, 12563, 2323, 10515, 6419, 14611, 1299, 9491, 5395, 13587, 3347, 11539, 7443, 15635, + 787, 8979, 4883, 13075, 2835, 11027, 6931, 15123, 1811, 10003, 5907, 14099, 3859, 12051, 7955, 16147, + 147, 8339, 4243, 12435, 2195, 10387, 6291, 14483, 1171, 9363, 5267, 13459, 3219, 11411, 7315, 15507, + 659, 8851, 4755, 12947, 2707, 10899, 6803, 14995, 1683, 9875, 5779, 13971, 3731, 11923, 7827, 16019, + 403, 8595, 4499, 12691, 2451, 10643, 6547, 14739, 1427, 9619, 5523, 13715, 3475, 11667, 7571, 15763, + 915, 9107, 5011, 13203, 2963, 11155, 7059, 15251, 1939, 10131, 6035, 14227, 3987, 12179, 8083, 16275, + 83, 8275, 4179, 12371, 2131, 10323, 6227, 14419, 1107, 9299, 5203, 13395, 3155, 11347, 7251, 15443, + 595, 8787, 4691, 12883, 2643, 10835, 6739, 14931, 1619, 9811, 5715, 13907, 3667, 11859, 7763, 15955, + 339, 8531, 4435, 12627, 2387, 10579, 6483, 14675, 1363, 9555, 5459, 13651, 3411, 11603, 7507, 15699, + 851, 9043, 4947, 13139, 2899, 11091, 6995, 15187, 1875, 10067, 5971, 14163, 3923, 12115, 8019, 16211, + 211, 8403, 4307, 12499, 2259, 10451, 6355, 14547, 1235, 9427, 5331, 13523, 3283, 11475, 7379, 15571, + 723, 8915, 4819, 13011, 2771, 10963, 6867, 15059, 1747, 9939, 5843, 14035, 3795, 11987, 7891, 16083, + 467, 8659, 4563, 12755, 2515, 10707, 6611, 14803, 1491, 9683, 5587, 13779, 3539, 11731, 7635, 15827, + 979, 9171, 5075, 13267, 3027, 11219, 7123, 15315, 2003, 10195, 6099, 14291, 4051, 12243, 8147, 16339, + 51, 8243, 4147, 12339, 2099, 10291, 6195, 14387, 1075, 9267, 5171, 13363, 3123, 11315, 7219, 15411, + 563, 8755, 4659, 12851, 2611, 10803, 6707, 14899, 1587, 9779, 5683, 13875, 3635, 11827, 7731, 15923, + 307, 8499, 4403, 12595, 2355, 10547, 6451, 14643, 1331, 9523, 5427, 13619, 3379, 11571, 7475, 15667, + 819, 9011, 4915, 13107, 2867, 11059, 6963, 15155, 1843, 10035, 5939, 14131, 3891, 12083, 7987, 16179, + 179, 8371, 4275, 12467, 2227, 10419, 6323, 14515, 1203, 9395, 5299, 13491, 3251, 11443, 7347, 15539, + 691, 8883, 4787, 12979, 2739, 10931, 6835, 15027, 1715, 9907, 5811, 14003, 3763, 11955, 7859, 16051, + 435, 8627, 4531, 12723, 2483, 10675, 6579, 14771, 1459, 9651, 5555, 13747, 3507, 11699, 7603, 15795, + 947, 9139, 5043, 13235, 2995, 11187, 7091, 15283, 1971, 10163, 6067, 14259, 4019, 12211, 8115, 16307, + 115, 8307, 4211, 12403, 2163, 10355, 6259, 14451, 1139, 9331, 5235, 13427, 3187, 11379, 7283, 15475, + 627, 8819, 4723, 12915, 2675, 10867, 6771, 14963, 1651, 9843, 5747, 13939, 3699, 11891, 7795, 15987, + 371, 8563, 4467, 12659, 2419, 10611, 6515, 14707, 1395, 9587, 5491, 13683, 3443, 11635, 7539, 15731, + 883, 9075, 4979, 13171, 2931, 11123, 7027, 15219, 1907, 10099, 6003, 14195, 3955, 12147, 8051, 16243, + 243, 8435, 4339, 12531, 2291, 10483, 6387, 14579, 1267, 9459, 5363, 13555, 3315, 11507, 7411, 15603, + 755, 8947, 4851, 13043, 2803, 10995, 6899, 15091, 1779, 9971, 5875, 14067, 3827, 12019, 7923, 16115, + 499, 8691, 4595, 12787, 2547, 10739, 6643, 14835, 1523, 9715, 5619, 13811, 3571, 11763, 7667, 15859, + 1011, 9203, 5107, 13299, 3059, 11251, 7155, 15347, 2035, 10227, 6131, 14323, 4083, 12275, 8179, 16371, + 11, 8203, 4107, 12299, 2059, 10251, 6155, 14347, 1035, 9227, 5131, 13323, 3083, 11275, 7179, 15371, + 523, 8715, 4619, 12811, 2571, 10763, 6667, 14859, 1547, 9739, 5643, 13835, 3595, 11787, 7691, 15883, + 267, 8459, 4363, 12555, 2315, 10507, 6411, 14603, 1291, 9483, 5387, 13579, 3339, 11531, 7435, 15627, + 779, 8971, 4875, 13067, 2827, 11019, 6923, 15115, 1803, 9995, 5899, 14091, 3851, 12043, 7947, 16139, + 139, 8331, 4235, 12427, 2187, 10379, 6283, 14475, 1163, 9355, 5259, 13451, 3211, 11403, 7307, 15499, + 651, 8843, 4747, 12939, 2699, 10891, 6795, 14987, 1675, 9867, 5771, 13963, 3723, 11915, 7819, 16011, + 395, 8587, 4491, 12683, 2443, 10635, 6539, 14731, 1419, 9611, 5515, 13707, 3467, 11659, 7563, 15755, + 907, 9099, 5003, 13195, 2955, 11147, 7051, 15243, 1931, 10123, 6027, 14219, 3979, 12171, 8075, 16267, + 75, 8267, 4171, 12363, 2123, 10315, 6219, 14411, 1099, 9291, 5195, 13387, 3147, 11339, 7243, 15435, + 587, 8779, 4683, 12875, 2635, 10827, 6731, 14923, 1611, 9803, 5707, 13899, 3659, 11851, 7755, 15947, + 331, 8523, 4427, 12619, 2379, 10571, 6475, 14667, 1355, 9547, 5451, 13643, 3403, 11595, 7499, 15691, + 843, 9035, 4939, 13131, 2891, 11083, 6987, 15179, 1867, 10059, 5963, 14155, 3915, 12107, 8011, 16203, + 203, 8395, 4299, 12491, 2251, 10443, 6347, 14539, 1227, 9419, 5323, 13515, 3275, 11467, 7371, 15563, + 715, 8907, 4811, 13003, 2763, 10955, 6859, 15051, 1739, 9931, 5835, 14027, 3787, 11979, 7883, 16075, + 459, 8651, 4555, 12747, 2507, 10699, 6603, 14795, 1483, 9675, 5579, 13771, 3531, 11723, 7627, 15819, + 971, 9163, 5067, 13259, 3019, 11211, 7115, 15307, 1995, 10187, 6091, 14283, 4043, 12235, 8139, 16331, + 43, 8235, 4139, 12331, 2091, 10283, 6187, 14379, 1067, 9259, 5163, 13355, 3115, 11307, 7211, 15403, + 555, 8747, 4651, 12843, 2603, 10795, 6699, 14891, 1579, 9771, 5675, 13867, 3627, 11819, 7723, 15915, + 299, 8491, 4395, 12587, 2347, 10539, 6443, 14635, 1323, 9515, 5419, 13611, 3371, 11563, 7467, 15659, + 811, 9003, 4907, 13099, 2859, 11051, 6955, 15147, 1835, 10027, 5931, 14123, 3883, 12075, 7979, 16171, + 171, 8363, 4267, 12459, 2219, 10411, 6315, 14507, 1195, 9387, 5291, 13483, 3243, 11435, 7339, 15531, + 683, 8875, 4779, 12971, 2731, 10923, 6827, 15019, 1707, 9899, 5803, 13995, 3755, 11947, 7851, 16043, + 427, 8619, 4523, 12715, 2475, 10667, 6571, 14763, 1451, 9643, 5547, 13739, 3499, 11691, 7595, 15787, + 939, 9131, 5035, 13227, 2987, 11179, 7083, 15275, 1963, 10155, 6059, 14251, 4011, 12203, 8107, 16299, + 107, 8299, 4203, 12395, 2155, 10347, 6251, 14443, 1131, 9323, 5227, 13419, 3179, 11371, 7275, 15467, + 619, 8811, 4715, 12907, 2667, 10859, 6763, 14955, 1643, 9835, 5739, 13931, 3691, 11883, 7787, 15979, + 363, 8555, 4459, 12651, 2411, 10603, 6507, 14699, 1387, 9579, 5483, 13675, 3435, 11627, 7531, 15723, + 875, 9067, 4971, 13163, 2923, 11115, 7019, 15211, 1899, 10091, 5995, 14187, 3947, 12139, 8043, 16235, + 235, 8427, 4331, 12523, 2283, 10475, 6379, 14571, 1259, 9451, 5355, 13547, 3307, 11499, 7403, 15595, + 747, 8939, 4843, 13035, 2795, 10987, 6891, 15083, 1771, 9963, 5867, 14059, 3819, 12011, 7915, 16107, + 491, 8683, 4587, 12779, 2539, 10731, 6635, 14827, 1515, 9707, 5611, 13803, 3563, 11755, 7659, 15851, + 1003, 9195, 5099, 13291, 3051, 11243, 7147, 15339, 2027, 10219, 6123, 14315, 4075, 12267, 8171, 16363, + 27, 8219, 4123, 12315, 2075, 10267, 6171, 14363, 1051, 9243, 5147, 13339, 3099, 11291, 7195, 15387, + 539, 8731, 4635, 12827, 2587, 10779, 6683, 14875, 1563, 9755, 5659, 13851, 3611, 11803, 7707, 15899, + 283, 8475, 4379, 12571, 2331, 10523, 6427, 14619, 1307, 9499, 5403, 13595, 3355, 11547, 7451, 15643, + 795, 8987, 4891, 13083, 2843, 11035, 6939, 15131, 1819, 10011, 5915, 14107, 3867, 12059, 7963, 16155, + 155, 8347, 4251, 12443, 2203, 10395, 6299, 14491, 1179, 9371, 5275, 13467, 3227, 11419, 7323, 15515, + 667, 8859, 4763, 12955, 2715, 10907, 6811, 15003, 1691, 9883, 5787, 13979, 3739, 11931, 7835, 16027, + 411, 8603, 4507, 12699, 2459, 10651, 6555, 14747, 1435, 9627, 5531, 13723, 3483, 11675, 7579, 15771, + 923, 9115, 5019, 13211, 2971, 11163, 7067, 15259, 1947, 10139, 6043, 14235, 3995, 12187, 8091, 16283, + 91, 8283, 4187, 12379, 2139, 10331, 6235, 14427, 1115, 9307, 5211, 13403, 3163, 11355, 7259, 15451, + 603, 8795, 4699, 12891, 2651, 10843, 6747, 14939, 1627, 9819, 5723, 13915, 3675, 11867, 7771, 15963, + 347, 8539, 4443, 12635, 2395, 10587, 6491, 14683, 1371, 9563, 5467, 13659, 3419, 11611, 7515, 15707, + 859, 9051, 4955, 13147, 2907, 11099, 7003, 15195, 1883, 10075, 5979, 14171, 3931, 12123, 8027, 16219, + 219, 8411, 4315, 12507, 2267, 10459, 6363, 14555, 1243, 9435, 5339, 13531, 3291, 11483, 7387, 15579, + 731, 8923, 4827, 13019, 2779, 10971, 6875, 15067, 1755, 9947, 5851, 14043, 3803, 11995, 7899, 16091, + 475, 8667, 4571, 12763, 2523, 10715, 6619, 14811, 1499, 9691, 5595, 13787, 3547, 11739, 7643, 15835, + 987, 9179, 5083, 13275, 3035, 11227, 7131, 15323, 2011, 10203, 6107, 14299, 4059, 12251, 8155, 16347, + 59, 8251, 4155, 12347, 2107, 10299, 6203, 14395, 1083, 9275, 5179, 13371, 3131, 11323, 7227, 15419, + 571, 8763, 4667, 12859, 2619, 10811, 6715, 14907, 1595, 9787, 5691, 13883, 3643, 11835, 7739, 15931, + 315, 8507, 4411, 12603, 2363, 10555, 6459, 14651, 1339, 9531, 5435, 13627, 3387, 11579, 7483, 15675, + 827, 9019, 4923, 13115, 2875, 11067, 6971, 15163, 1851, 10043, 5947, 14139, 3899, 12091, 7995, 16187, + 187, 8379, 4283, 12475, 2235, 10427, 6331, 14523, 1211, 9403, 5307, 13499, 3259, 11451, 7355, 15547, + 699, 8891, 4795, 12987, 2747, 10939, 6843, 15035, 1723, 9915, 5819, 14011, 3771, 11963, 7867, 16059, + 443, 8635, 4539, 12731, 2491, 10683, 6587, 14779, 1467, 9659, 5563, 13755, 3515, 11707, 7611, 15803, + 955, 9147, 5051, 13243, 3003, 11195, 7099, 15291, 1979, 10171, 6075, 14267, 4027, 12219, 8123, 16315, + 123, 8315, 4219, 12411, 2171, 10363, 6267, 14459, 1147, 9339, 5243, 13435, 3195, 11387, 7291, 15483, + 635, 8827, 4731, 12923, 2683, 10875, 6779, 14971, 1659, 9851, 5755, 13947, 3707, 11899, 7803, 15995, + 379, 8571, 4475, 12667, 2427, 10619, 6523, 14715, 1403, 9595, 5499, 13691, 3451, 11643, 7547, 15739, + 891, 9083, 4987, 13179, 2939, 11131, 7035, 15227, 1915, 10107, 6011, 14203, 3963, 12155, 8059, 16251, + 251, 8443, 4347, 12539, 2299, 10491, 6395, 14587, 1275, 9467, 5371, 13563, 3323, 11515, 7419, 15611, + 763, 8955, 4859, 13051, 2811, 11003, 6907, 15099, 1787, 9979, 5883, 14075, 3835, 12027, 7931, 16123, + 507, 8699, 4603, 12795, 2555, 10747, 6651, 14843, 1531, 9723, 5627, 13819, 3579, 11771, 7675, 15867, + 1019, 9211, 5115, 13307, 3067, 11259, 7163, 15355, 2043, 10235, 6139, 14331, 4091, 12283, 8187, 16379, + 7, 8199, 4103, 12295, 2055, 10247, 6151, 14343, 1031, 9223, 5127, 13319, 3079, 11271, 7175, 15367, + 519, 8711, 4615, 12807, 2567, 10759, 6663, 14855, 1543, 9735, 5639, 13831, 3591, 11783, 7687, 15879, + 263, 8455, 4359, 12551, 2311, 10503, 6407, 14599, 1287, 9479, 5383, 13575, 3335, 11527, 7431, 15623, + 775, 8967, 4871, 13063, 2823, 11015, 6919, 15111, 1799, 9991, 5895, 14087, 3847, 12039, 7943, 16135, + 135, 8327, 4231, 12423, 2183, 10375, 6279, 14471, 1159, 9351, 5255, 13447, 3207, 11399, 7303, 15495, + 647, 8839, 4743, 12935, 2695, 10887, 6791, 14983, 1671, 9863, 5767, 13959, 3719, 11911, 7815, 16007, + 391, 8583, 4487, 12679, 2439, 10631, 6535, 14727, 1415, 9607, 5511, 13703, 3463, 11655, 7559, 15751, + 903, 9095, 4999, 13191, 2951, 11143, 7047, 15239, 1927, 10119, 6023, 14215, 3975, 12167, 8071, 16263, + 71, 8263, 4167, 12359, 2119, 10311, 6215, 14407, 1095, 9287, 5191, 13383, 3143, 11335, 7239, 15431, + 583, 8775, 4679, 12871, 2631, 10823, 6727, 14919, 1607, 9799, 5703, 13895, 3655, 11847, 7751, 15943, + 327, 8519, 4423, 12615, 2375, 10567, 6471, 14663, 1351, 9543, 5447, 13639, 3399, 11591, 7495, 15687, + 839, 9031, 4935, 13127, 2887, 11079, 6983, 15175, 1863, 10055, 5959, 14151, 3911, 12103, 8007, 16199, + 199, 8391, 4295, 12487, 2247, 10439, 6343, 14535, 1223, 9415, 5319, 13511, 3271, 11463, 7367, 15559, + 711, 8903, 4807, 12999, 2759, 10951, 6855, 15047, 1735, 9927, 5831, 14023, 3783, 11975, 7879, 16071, + 455, 8647, 4551, 12743, 2503, 10695, 6599, 14791, 1479, 9671, 5575, 13767, 3527, 11719, 7623, 15815, + 967, 9159, 5063, 13255, 3015, 11207, 7111, 15303, 1991, 10183, 6087, 14279, 4039, 12231, 8135, 16327, + 39, 8231, 4135, 12327, 2087, 10279, 6183, 14375, 1063, 9255, 5159, 13351, 3111, 11303, 7207, 15399, + 551, 8743, 4647, 12839, 2599, 10791, 6695, 14887, 1575, 9767, 5671, 13863, 3623, 11815, 7719, 15911, + 295, 8487, 4391, 12583, 2343, 10535, 6439, 14631, 1319, 9511, 5415, 13607, 3367, 11559, 7463, 15655, + 807, 8999, 4903, 13095, 2855, 11047, 6951, 15143, 1831, 10023, 5927, 14119, 3879, 12071, 7975, 16167, + 167, 8359, 4263, 12455, 2215, 10407, 6311, 14503, 1191, 9383, 5287, 13479, 3239, 11431, 7335, 15527, + 679, 8871, 4775, 12967, 2727, 10919, 6823, 15015, 1703, 9895, 5799, 13991, 3751, 11943, 7847, 16039, + 423, 8615, 4519, 12711, 2471, 10663, 6567, 14759, 1447, 9639, 5543, 13735, 3495, 11687, 7591, 15783, + 935, 9127, 5031, 13223, 2983, 11175, 7079, 15271, 1959, 10151, 6055, 14247, 4007, 12199, 8103, 16295, + 103, 8295, 4199, 12391, 2151, 10343, 6247, 14439, 1127, 9319, 5223, 13415, 3175, 11367, 7271, 15463, + 615, 8807, 4711, 12903, 2663, 10855, 6759, 14951, 1639, 9831, 5735, 13927, 3687, 11879, 7783, 15975, + 359, 8551, 4455, 12647, 2407, 10599, 6503, 14695, 1383, 9575, 5479, 13671, 3431, 11623, 7527, 15719, + 871, 9063, 4967, 13159, 2919, 11111, 7015, 15207, 1895, 10087, 5991, 14183, 3943, 12135, 8039, 16231, + 231, 8423, 4327, 12519, 2279, 10471, 6375, 14567, 1255, 9447, 5351, 13543, 3303, 11495, 7399, 15591, + 743, 8935, 4839, 13031, 2791, 10983, 6887, 15079, 1767, 9959, 5863, 14055, 3815, 12007, 7911, 16103, + 487, 8679, 4583, 12775, 2535, 10727, 6631, 14823, 1511, 9703, 5607, 13799, 3559, 11751, 7655, 15847, + 999, 9191, 5095, 13287, 3047, 11239, 7143, 15335, 2023, 10215, 6119, 14311, 4071, 12263, 8167, 16359, + 23, 8215, 4119, 12311, 2071, 10263, 6167, 14359, 1047, 9239, 5143, 13335, 3095, 11287, 7191, 15383, + 535, 8727, 4631, 12823, 2583, 10775, 6679, 14871, 1559, 9751, 5655, 13847, 3607, 11799, 7703, 15895, + 279, 8471, 4375, 12567, 2327, 10519, 6423, 14615, 1303, 9495, 5399, 13591, 3351, 11543, 7447, 15639, + 791, 8983, 4887, 13079, 2839, 11031, 6935, 15127, 1815, 10007, 5911, 14103, 3863, 12055, 7959, 16151, + 151, 8343, 4247, 12439, 2199, 10391, 6295, 14487, 1175, 9367, 5271, 13463, 3223, 11415, 7319, 15511, + 663, 8855, 4759, 12951, 2711, 10903, 6807, 14999, 1687, 9879, 5783, 13975, 3735, 11927, 7831, 16023, + 407, 8599, 4503, 12695, 2455, 10647, 6551, 14743, 1431, 9623, 5527, 13719, 3479, 11671, 7575, 15767, + 919, 9111, 5015, 13207, 2967, 11159, 7063, 15255, 1943, 10135, 6039, 14231, 3991, 12183, 8087, 16279, + 87, 8279, 4183, 12375, 2135, 10327, 6231, 14423, 1111, 9303, 5207, 13399, 3159, 11351, 7255, 15447, + 599, 8791, 4695, 12887, 2647, 10839, 6743, 14935, 1623, 9815, 5719, 13911, 3671, 11863, 7767, 15959, + 343, 8535, 4439, 12631, 2391, 10583, 6487, 14679, 1367, 9559, 5463, 13655, 3415, 11607, 7511, 15703, + 855, 9047, 4951, 13143, 2903, 11095, 6999, 15191, 1879, 10071, 5975, 14167, 3927, 12119, 8023, 16215, + 215, 8407, 4311, 12503, 2263, 10455, 6359, 14551, 1239, 9431, 5335, 13527, 3287, 11479, 7383, 15575, + 727, 8919, 4823, 13015, 2775, 10967, 6871, 15063, 1751, 9943, 5847, 14039, 3799, 11991, 7895, 16087, + 471, 8663, 4567, 12759, 2519, 10711, 6615, 14807, 1495, 9687, 5591, 13783, 3543, 11735, 7639, 15831, + 983, 9175, 5079, 13271, 3031, 11223, 7127, 15319, 2007, 10199, 6103, 14295, 4055, 12247, 8151, 16343, + 55, 8247, 4151, 12343, 2103, 10295, 6199, 14391, 1079, 9271, 5175, 13367, 3127, 11319, 7223, 15415, + 567, 8759, 4663, 12855, 2615, 10807, 6711, 14903, 1591, 9783, 5687, 13879, 3639, 11831, 7735, 15927, + 311, 8503, 4407, 12599, 2359, 10551, 6455, 14647, 1335, 9527, 5431, 13623, 3383, 11575, 7479, 15671, + 823, 9015, 4919, 13111, 2871, 11063, 6967, 15159, 1847, 10039, 5943, 14135, 3895, 12087, 7991, 16183, + 183, 8375, 4279, 12471, 2231, 10423, 6327, 14519, 1207, 9399, 5303, 13495, 3255, 11447, 7351, 15543, + 695, 8887, 4791, 12983, 2743, 10935, 6839, 15031, 1719, 9911, 5815, 14007, 3767, 11959, 7863, 16055, + 439, 8631, 4535, 12727, 2487, 10679, 6583, 14775, 1463, 9655, 5559, 13751, 3511, 11703, 7607, 15799, + 951, 9143, 5047, 13239, 2999, 11191, 7095, 15287, 1975, 10167, 6071, 14263, 4023, 12215, 8119, 16311, + 119, 8311, 4215, 12407, 2167, 10359, 6263, 14455, 1143, 9335, 5239, 13431, 3191, 11383, 7287, 15479, + 631, 8823, 4727, 12919, 2679, 10871, 6775, 14967, 1655, 9847, 5751, 13943, 3703, 11895, 7799, 15991, + 375, 8567, 4471, 12663, 2423, 10615, 6519, 14711, 1399, 9591, 5495, 13687, 3447, 11639, 7543, 15735, + 887, 9079, 4983, 13175, 2935, 11127, 7031, 15223, 1911, 10103, 6007, 14199, 3959, 12151, 8055, 16247, + 247, 8439, 4343, 12535, 2295, 10487, 6391, 14583, 1271, 9463, 5367, 13559, 3319, 11511, 7415, 15607, + 759, 8951, 4855, 13047, 2807, 10999, 6903, 15095, 1783, 9975, 5879, 14071, 3831, 12023, 7927, 16119, + 503, 8695, 4599, 12791, 2551, 10743, 6647, 14839, 1527, 9719, 5623, 13815, 3575, 11767, 7671, 15863, + 1015, 9207, 5111, 13303, 3063, 11255, 7159, 15351, 2039, 10231, 6135, 14327, 4087, 12279, 8183, 16375, + 15, 8207, 4111, 12303, 2063, 10255, 6159, 14351, 1039, 9231, 5135, 13327, 3087, 11279, 7183, 15375, + 527, 8719, 4623, 12815, 2575, 10767, 6671, 14863, 1551, 9743, 5647, 13839, 3599, 11791, 7695, 15887, + 271, 8463, 4367, 12559, 2319, 10511, 6415, 14607, 1295, 9487, 5391, 13583, 3343, 11535, 7439, 15631, + 783, 8975, 4879, 13071, 2831, 11023, 6927, 15119, 1807, 9999, 5903, 14095, 3855, 12047, 7951, 16143, + 143, 8335, 4239, 12431, 2191, 10383, 6287, 14479, 1167, 9359, 5263, 13455, 3215, 11407, 7311, 15503, + 655, 8847, 4751, 12943, 2703, 10895, 6799, 14991, 1679, 9871, 5775, 13967, 3727, 11919, 7823, 16015, + 399, 8591, 4495, 12687, 2447, 10639, 6543, 14735, 1423, 9615, 5519, 13711, 3471, 11663, 7567, 15759, + 911, 9103, 5007, 13199, 2959, 11151, 7055, 15247, 1935, 10127, 6031, 14223, 3983, 12175, 8079, 16271, + 79, 8271, 4175, 12367, 2127, 10319, 6223, 14415, 1103, 9295, 5199, 13391, 3151, 11343, 7247, 15439, + 591, 8783, 4687, 12879, 2639, 10831, 6735, 14927, 1615, 9807, 5711, 13903, 3663, 11855, 7759, 15951, + 335, 8527, 4431, 12623, 2383, 10575, 6479, 14671, 1359, 9551, 5455, 13647, 3407, 11599, 7503, 15695, + 847, 9039, 4943, 13135, 2895, 11087, 6991, 15183, 1871, 10063, 5967, 14159, 3919, 12111, 8015, 16207, + 207, 8399, 4303, 12495, 2255, 10447, 6351, 14543, 1231, 9423, 5327, 13519, 3279, 11471, 7375, 15567, + 719, 8911, 4815, 13007, 2767, 10959, 6863, 15055, 1743, 9935, 5839, 14031, 3791, 11983, 7887, 16079, + 463, 8655, 4559, 12751, 2511, 10703, 6607, 14799, 1487, 9679, 5583, 13775, 3535, 11727, 7631, 15823, + 975, 9167, 5071, 13263, 3023, 11215, 7119, 15311, 1999, 10191, 6095, 14287, 4047, 12239, 8143, 16335, + 47, 8239, 4143, 12335, 2095, 10287, 6191, 14383, 1071, 9263, 5167, 13359, 3119, 11311, 7215, 15407, + 559, 8751, 4655, 12847, 2607, 10799, 6703, 14895, 1583, 9775, 5679, 13871, 3631, 11823, 7727, 15919, + 303, 8495, 4399, 12591, 2351, 10543, 6447, 14639, 1327, 9519, 5423, 13615, 3375, 11567, 7471, 15663, + 815, 9007, 4911, 13103, 2863, 11055, 6959, 15151, 1839, 10031, 5935, 14127, 3887, 12079, 7983, 16175, + 175, 8367, 4271, 12463, 2223, 10415, 6319, 14511, 1199, 9391, 5295, 13487, 3247, 11439, 7343, 15535, + 687, 8879, 4783, 12975, 2735, 10927, 6831, 15023, 1711, 9903, 5807, 13999, 3759, 11951, 7855, 16047, + 431, 8623, 4527, 12719, 2479, 10671, 6575, 14767, 1455, 9647, 5551, 13743, 3503, 11695, 7599, 15791, + 943, 9135, 5039, 13231, 2991, 11183, 7087, 15279, 1967, 10159, 6063, 14255, 4015, 12207, 8111, 16303, + 111, 8303, 4207, 12399, 2159, 10351, 6255, 14447, 1135, 9327, 5231, 13423, 3183, 11375, 7279, 15471, + 623, 8815, 4719, 12911, 2671, 10863, 6767, 14959, 1647, 9839, 5743, 13935, 3695, 11887, 7791, 15983, + 367, 8559, 4463, 12655, 2415, 10607, 6511, 14703, 1391, 9583, 5487, 13679, 3439, 11631, 7535, 15727, + 879, 9071, 4975, 13167, 2927, 11119, 7023, 15215, 1903, 10095, 5999, 14191, 3951, 12143, 8047, 16239, + 239, 8431, 4335, 12527, 2287, 10479, 6383, 14575, 1263, 9455, 5359, 13551, 3311, 11503, 7407, 15599, + 751, 8943, 4847, 13039, 2799, 10991, 6895, 15087, 1775, 9967, 5871, 14063, 3823, 12015, 7919, 16111, + 495, 8687, 4591, 12783, 2543, 10735, 6639, 14831, 1519, 9711, 5615, 13807, 3567, 11759, 7663, 15855, + 1007, 9199, 5103, 13295, 3055, 11247, 7151, 15343, 2031, 10223, 6127, 14319, 4079, 12271, 8175, 16367, + 31, 8223, 4127, 12319, 2079, 10271, 6175, 14367, 1055, 9247, 5151, 13343, 3103, 11295, 7199, 15391, + 543, 8735, 4639, 12831, 2591, 10783, 6687, 14879, 1567, 9759, 5663, 13855, 3615, 11807, 7711, 15903, + 287, 8479, 4383, 12575, 2335, 10527, 6431, 14623, 1311, 9503, 5407, 13599, 3359, 11551, 7455, 15647, + 799, 8991, 4895, 13087, 2847, 11039, 6943, 15135, 1823, 10015, 5919, 14111, 3871, 12063, 7967, 16159, + 159, 8351, 4255, 12447, 2207, 10399, 6303, 14495, 1183, 9375, 5279, 13471, 3231, 11423, 7327, 15519, + 671, 8863, 4767, 12959, 2719, 10911, 6815, 15007, 1695, 9887, 5791, 13983, 3743, 11935, 7839, 16031, + 415, 8607, 4511, 12703, 2463, 10655, 6559, 14751, 1439, 9631, 5535, 13727, 3487, 11679, 7583, 15775, + 927, 9119, 5023, 13215, 2975, 11167, 7071, 15263, 1951, 10143, 6047, 14239, 3999, 12191, 8095, 16287, + 95, 8287, 4191, 12383, 2143, 10335, 6239, 14431, 1119, 9311, 5215, 13407, 3167, 11359, 7263, 15455, + 607, 8799, 4703, 12895, 2655, 10847, 6751, 14943, 1631, 9823, 5727, 13919, 3679, 11871, 7775, 15967, + 351, 8543, 4447, 12639, 2399, 10591, 6495, 14687, 1375, 9567, 5471, 13663, 3423, 11615, 7519, 15711, + 863, 9055, 4959, 13151, 2911, 11103, 7007, 15199, 1887, 10079, 5983, 14175, 3935, 12127, 8031, 16223, + 223, 8415, 4319, 12511, 2271, 10463, 6367, 14559, 1247, 9439, 5343, 13535, 3295, 11487, 7391, 15583, + 735, 8927, 4831, 13023, 2783, 10975, 6879, 15071, 1759, 9951, 5855, 14047, 3807, 11999, 7903, 16095, + 479, 8671, 4575, 12767, 2527, 10719, 6623, 14815, 1503, 9695, 5599, 13791, 3551, 11743, 7647, 15839, + 991, 9183, 5087, 13279, 3039, 11231, 7135, 15327, 2015, 10207, 6111, 14303, 4063, 12255, 8159, 16351, + 63, 8255, 4159, 12351, 2111, 10303, 6207, 14399, 1087, 9279, 5183, 13375, 3135, 11327, 7231, 15423, + 575, 8767, 4671, 12863, 2623, 10815, 6719, 14911, 1599, 9791, 5695, 13887, 3647, 11839, 7743, 15935, + 319, 8511, 4415, 12607, 2367, 10559, 6463, 14655, 1343, 9535, 5439, 13631, 3391, 11583, 7487, 15679, + 831, 9023, 4927, 13119, 2879, 11071, 6975, 15167, 1855, 10047, 5951, 14143, 3903, 12095, 7999, 16191, + 191, 8383, 4287, 12479, 2239, 10431, 6335, 14527, 1215, 9407, 5311, 13503, 3263, 11455, 7359, 15551, + 703, 8895, 4799, 12991, 2751, 10943, 6847, 15039, 1727, 9919, 5823, 14015, 3775, 11967, 7871, 16063, + 447, 8639, 4543, 12735, 2495, 10687, 6591, 14783, 1471, 9663, 5567, 13759, 3519, 11711, 7615, 15807, + 959, 9151, 5055, 13247, 3007, 11199, 7103, 15295, 1983, 10175, 6079, 14271, 4031, 12223, 8127, 16319, + 127, 8319, 4223, 12415, 2175, 10367, 6271, 14463, 1151, 9343, 5247, 13439, 3199, 11391, 7295, 15487, + 639, 8831, 4735, 12927, 2687, 10879, 6783, 14975, 1663, 9855, 5759, 13951, 3711, 11903, 7807, 15999, + 383, 8575, 4479, 12671, 2431, 10623, 6527, 14719, 1407, 9599, 5503, 13695, 3455, 11647, 7551, 15743, + 895, 9087, 4991, 13183, 2943, 11135, 7039, 15231, 1919, 10111, 6015, 14207, 3967, 12159, 8063, 16255, + 255, 8447, 4351, 12543, 2303, 10495, 6399, 14591, 1279, 9471, 5375, 13567, 3327, 11519, 7423, 15615, + 767, 8959, 4863, 13055, 2815, 11007, 6911, 15103, 1791, 9983, 5887, 14079, 3839, 12031, 7935, 16127, + 511, 8703, 4607, 12799, 2559, 10751, 6655, 14847, 1535, 9727, 5631, 13823, 3583, 11775, 7679, 15871, + 1023, 9215, 5119, 13311, 3071, 11263, 7167, 15359, 2047, 10239, 6143, 14335, 4095, 12287, 8191, 16383, +}; +} +} diff --git a/include/kfr/data/sincos.hpp b/include/kfr/data/sincos.hpp @@ -0,0 +1,308 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/kfr.h" +#include <cstdint> + +namespace kfr +{ + +namespace data +{ + +// data generated by mpfr +template <typename T> +constexpr T c_sin_table[256] = { + /* sin(2*pi* 0/ 256) */ T(0.0), + /* sin(2*pi* 1/ 256) */ T(0.02454122852291228803173452945928292506547), + /* sin(2*pi* 2/ 256) */ T(0.04906767432741801425495497694268265831475), + /* sin(2*pi* 3/ 256) */ T(0.0735645635996674235294656215752343218133), + /* sin(2*pi* 4/ 256) */ T(0.09801714032956060199419556388864184586114), + /* sin(2*pi* 5/ 256) */ T(0.1224106751992161984987044741509457875752), + /* sin(2*pi* 6/ 256) */ T(0.1467304744553617516588501296467178197062), + /* sin(2*pi* 7/ 256) */ T(0.1709618887603012263636423572082635319663), + /* sin(2*pi* 8/ 256) */ T(0.1950903220161282678482848684770222409277), + /* sin(2*pi* 9/ 256) */ T(0.2191012401568697972277375474973577988484), + /* sin(2*pi* 10/ 256) */ T(0.242980179903263889948274162077471118321), + /* sin(2*pi* 11/ 256) */ T(0.2667127574748983863252865151164363940421), + /* sin(2*pi* 12/ 256) */ T(0.2902846772544623676361923758173952746915), + /* sin(2*pi* 13/ 256) */ T(0.3136817403988914766564788459941003099934), + /* sin(2*pi* 14/ 256) */ T(0.3368898533922200506892532126191475704778), + /* sin(2*pi* 15/ 256) */ T(0.3598950365349881487751045723267564202023), + /* sin(2*pi* 16/ 256) */ T(0.3826834323650897717284599840303988667613), + /* sin(2*pi* 17/ 256) */ T(0.4052413140049898709084813055050524665119), + /* sin(2*pi* 18/ 256) */ T(0.4275550934302820943209668568887985343046), + /* sin(2*pi* 19/ 256) */ T(0.4496113296546066000462945794242270758832), + /* sin(2*pi* 20/ 256) */ T(0.4713967368259976485563876259052543776575), + /* sin(2*pi* 21/ 256) */ T(0.4928981922297840368730266887588092682397), + /* sin(2*pi* 22/ 256) */ T(0.514102744193221726593693838968815772608), + /* sin(2*pi* 23/ 256) */ T(0.5349976198870972106630769046370179155603), + /* sin(2*pi* 24/ 256) */ T(0.5555702330196022247428308139485328743749), + /* sin(2*pi* 25/ 256) */ T(0.575808191417845300745972453815730841776), + /* sin(2*pi* 26/ 256) */ T(0.5956993044924333434670365288299698895119), + /* sin(2*pi* 27/ 256) */ T(0.6152315905806268454849135634139842776594), + /* sin(2*pi* 28/ 256) */ T(0.6343932841636454982151716132254933706757), + /* sin(2*pi* 29/ 256) */ T(0.6531728429537767640842030136563054150769), + /* sin(2*pi* 30/ 256) */ T(0.6715589548470184006253768504274218032288), + /* sin(2*pi* 31/ 256) */ T(0.6895405447370669246167306299574847028455), + /* sin(2*pi* 32/ 256) */ T(0.7071067811865475244008443621048490392848), + /* sin(2*pi* 33/ 256) */ T(0.7242470829514669209410692432905531674831), + /* sin(2*pi* 34/ 256) */ T(0.740951125354959091175616897495162729729), + /* sin(2*pi* 35/ 256) */ T(0.7572088465064845475754640536057844730404), + /* sin(2*pi* 36/ 256) */ T(0.773010453362736960810906609758469800971), + /* sin(2*pi* 37/ 256) */ T(0.7883464276266062620091647053596892826565), + /* sin(2*pi* 38/ 256) */ T(0.8032075314806449098066765129631419238796), + /* sin(2*pi* 39/ 256) */ T(0.817584813151583696504920884130633809471), + /* sin(2*pi* 40/ 256) */ T(0.8314696123025452370787883776179057567386), + /* sin(2*pi* 41/ 256) */ T(0.8448535652497070732595712051049570977198), + /* sin(2*pi* 42/ 256) */ T(0.8577286100002720699022699842847701370425), + /* sin(2*pi* 43/ 256) */ T(0.8700869911087114186522924044838488439108), + /* sin(2*pi* 44/ 256) */ T(0.8819212643483550297127568636603883495084), + /* sin(2*pi* 45/ 256) */ T(0.8932243011955153203424164474933979780006), + /* sin(2*pi* 46/ 256) */ T(0.9039892931234433315862002972305370487101), + /* sin(2*pi* 47/ 256) */ T(0.9142097557035306546350148293935774010447), + /* sin(2*pi* 48/ 256) */ T(0.9238795325112867561281831893967882868224), + /* sin(2*pi* 49/ 256) */ T(0.932992798834738887711660255543302498295), + /* sin(2*pi* 50/ 256) */ T(0.9415440651830207784125094025995023571856), + /* sin(2*pi* 51/ 256) */ T(0.9495281805930366671959360741893450282522), + /* sin(2*pi* 52/ 256) */ T(0.9569403357322088649357978869802699694828), + /* sin(2*pi* 53/ 256) */ T(0.9637760657954398666864643555078351536631), + /* sin(2*pi* 54/ 256) */ T(0.9700312531945439926039842072861002514569), + /* sin(2*pi* 55/ 256) */ T(0.975702130038528544460395766419527971644), + /* sin(2*pi* 56/ 256) */ T(0.9807852804032304491261822361342390369739), + /* sin(2*pi* 57/ 256) */ T(0.9852776423889412447740184331785477871601), + /* sin(2*pi* 58/ 256) */ T(0.9891765099647809734516737380162430639837), + /* sin(2*pi* 59/ 256) */ T(0.9924795345987099981567672516611178200108), + /* sin(2*pi* 60/ 256) */ T(0.9951847266721968862448369531094799215755), + /* sin(2*pi* 61/ 256) */ T(0.9972904566786902161355971401825678211717), + /* sin(2*pi* 62/ 256) */ T(0.9987954562051723927147716047591006944432), + /* sin(2*pi* 63/ 256) */ T(0.9996988186962042201157656496661721968501), + /* sin(2*pi* 64/ 256) */ T(1.0), + /* sin(2*pi* 65/ 256) */ T(0.9996988186962042201157656496661721968501), + /* sin(2*pi* 66/ 256) */ T(0.9987954562051723927147716047591006944432), + /* sin(2*pi* 67/ 256) */ T(0.9972904566786902161355971401825678211717), + /* sin(2*pi* 68/ 256) */ T(0.9951847266721968862448369531094799215755), + /* sin(2*pi* 69/ 256) */ T(0.9924795345987099981567672516611178200108), + /* sin(2*pi* 70/ 256) */ T(0.9891765099647809734516737380162430639837), + /* sin(2*pi* 71/ 256) */ T(0.9852776423889412447740184331785477871601), + /* sin(2*pi* 72/ 256) */ T(0.9807852804032304491261822361342390369739), + /* sin(2*pi* 73/ 256) */ T(0.975702130038528544460395766419527971644), + /* sin(2*pi* 74/ 256) */ T(0.9700312531945439926039842072861002514569), + /* sin(2*pi* 75/ 256) */ T(0.9637760657954398666864643555078351536631), + /* sin(2*pi* 76/ 256) */ T(0.9569403357322088649357978869802699694828), + /* sin(2*pi* 77/ 256) */ T(0.9495281805930366671959360741893450282522), + /* sin(2*pi* 78/ 256) */ T(0.9415440651830207784125094025995023571856), + /* sin(2*pi* 79/ 256) */ T(0.932992798834738887711660255543302498295), + /* sin(2*pi* 80/ 256) */ T(0.9238795325112867561281831893967882868224), + /* sin(2*pi* 81/ 256) */ T(0.9142097557035306546350148293935774010447), + /* sin(2*pi* 82/ 256) */ T(0.9039892931234433315862002972305370487101), + /* sin(2*pi* 83/ 256) */ T(0.8932243011955153203424164474933979780006), + /* sin(2*pi* 84/ 256) */ T(0.8819212643483550297127568636603883495084), + /* sin(2*pi* 85/ 256) */ T(0.8700869911087114186522924044838488439108), + /* sin(2*pi* 86/ 256) */ T(0.8577286100002720699022699842847701370425), + /* sin(2*pi* 87/ 256) */ T(0.8448535652497070732595712051049570977198), + /* sin(2*pi* 88/ 256) */ T(0.8314696123025452370787883776179057567386), + /* sin(2*pi* 89/ 256) */ T(0.817584813151583696504920884130633809471), + /* sin(2*pi* 90/ 256) */ T(0.8032075314806449098066765129631419238796), + /* sin(2*pi* 91/ 256) */ T(0.7883464276266062620091647053596892826565), + /* sin(2*pi* 92/ 256) */ T(0.773010453362736960810906609758469800971), + /* sin(2*pi* 93/ 256) */ T(0.7572088465064845475754640536057844730404), + /* sin(2*pi* 94/ 256) */ T(0.740951125354959091175616897495162729729), + /* sin(2*pi* 95/ 256) */ T(0.7242470829514669209410692432905531674831), + /* sin(2*pi* 96/ 256) */ T(0.7071067811865475244008443621048490392848), + /* sin(2*pi* 97/ 256) */ T(0.6895405447370669246167306299574847028455), + /* sin(2*pi* 98/ 256) */ T(0.6715589548470184006253768504274218032288), + /* sin(2*pi* 99/ 256) */ T(0.6531728429537767640842030136563054150769), + /* sin(2*pi* 100/ 256) */ T(0.6343932841636454982151716132254933706757), + /* sin(2*pi* 101/ 256) */ T(0.6152315905806268454849135634139842776594), + /* sin(2*pi* 102/ 256) */ T(0.5956993044924333434670365288299698895119), + /* sin(2*pi* 103/ 256) */ T(0.575808191417845300745972453815730841776), + /* sin(2*pi* 104/ 256) */ T(0.5555702330196022247428308139485328743749), + /* sin(2*pi* 105/ 256) */ T(0.5349976198870972106630769046370179155603), + /* sin(2*pi* 106/ 256) */ T(0.514102744193221726593693838968815772608), + /* sin(2*pi* 107/ 256) */ T(0.4928981922297840368730266887588092682397), + /* sin(2*pi* 108/ 256) */ T(0.4713967368259976485563876259052543776575), + /* sin(2*pi* 109/ 256) */ T(0.4496113296546066000462945794242270758832), + /* sin(2*pi* 110/ 256) */ T(0.4275550934302820943209668568887985343046), + /* sin(2*pi* 111/ 256) */ T(0.4052413140049898709084813055050524665119), + /* sin(2*pi* 112/ 256) */ T(0.3826834323650897717284599840303988667613), + /* sin(2*pi* 113/ 256) */ T(0.3598950365349881487751045723267564202023), + /* sin(2*pi* 114/ 256) */ T(0.3368898533922200506892532126191475704778), + /* sin(2*pi* 115/ 256) */ T(0.3136817403988914766564788459941003099934), + /* sin(2*pi* 116/ 256) */ T(0.2902846772544623676361923758173952746915), + /* sin(2*pi* 117/ 256) */ T(0.2667127574748983863252865151164363940421), + /* sin(2*pi* 118/ 256) */ T(0.242980179903263889948274162077471118321), + /* sin(2*pi* 119/ 256) */ T(0.2191012401568697972277375474973577988484), + /* sin(2*pi* 120/ 256) */ T(0.1950903220161282678482848684770222409277), + /* sin(2*pi* 121/ 256) */ T(0.1709618887603012263636423572082635319663), + /* sin(2*pi* 122/ 256) */ T(0.1467304744553617516588501296467178197062), + /* sin(2*pi* 123/ 256) */ T(0.1224106751992161984987044741509457875752), + /* sin(2*pi* 124/ 256) */ T(0.09801714032956060199419556388864184586114), + /* sin(2*pi* 125/ 256) */ T(0.0735645635996674235294656215752343218133), + /* sin(2*pi* 126/ 256) */ T(0.04906767432741801425495497694268265831475), + /* sin(2*pi* 127/ 256) */ T(0.02454122852291228803173452945928292506547), + /* sin(2*pi* 128/ 256) */ T(0.0), + /* sin(2*pi* 129/ 256) */ T(-0.02454122852291228803173452945928292506547), + /* sin(2*pi* 130/ 256) */ T(-0.04906767432741801425495497694268265831475), + /* sin(2*pi* 131/ 256) */ T(-0.0735645635996674235294656215752343218133), + /* sin(2*pi* 132/ 256) */ T(-0.09801714032956060199419556388864184586114), + /* sin(2*pi* 133/ 256) */ T(-0.1224106751992161984987044741509457875752), + /* sin(2*pi* 134/ 256) */ T(-0.1467304744553617516588501296467178197062), + /* sin(2*pi* 135/ 256) */ T(-0.1709618887603012263636423572082635319663), + /* sin(2*pi* 136/ 256) */ T(-0.1950903220161282678482848684770222409277), + /* sin(2*pi* 137/ 256) */ T(-0.2191012401568697972277375474973577988484), + /* sin(2*pi* 138/ 256) */ T(-0.242980179903263889948274162077471118321), + /* sin(2*pi* 139/ 256) */ T(-0.2667127574748983863252865151164363940421), + /* sin(2*pi* 140/ 256) */ T(-0.2902846772544623676361923758173952746915), + /* sin(2*pi* 141/ 256) */ T(-0.3136817403988914766564788459941003099934), + /* sin(2*pi* 142/ 256) */ T(-0.3368898533922200506892532126191475704778), + /* sin(2*pi* 143/ 256) */ T(-0.3598950365349881487751045723267564202023), + /* sin(2*pi* 144/ 256) */ T(-0.3826834323650897717284599840303988667613), + /* sin(2*pi* 145/ 256) */ T(-0.4052413140049898709084813055050524665119), + /* sin(2*pi* 146/ 256) */ T(-0.4275550934302820943209668568887985343046), + /* sin(2*pi* 147/ 256) */ T(-0.4496113296546066000462945794242270758832), + /* sin(2*pi* 148/ 256) */ T(-0.4713967368259976485563876259052543776575), + /* sin(2*pi* 149/ 256) */ T(-0.4928981922297840368730266887588092682397), + /* sin(2*pi* 150/ 256) */ T(-0.514102744193221726593693838968815772608), + /* sin(2*pi* 151/ 256) */ T(-0.5349976198870972106630769046370179155603), + /* sin(2*pi* 152/ 256) */ T(-0.5555702330196022247428308139485328743749), + /* sin(2*pi* 153/ 256) */ T(-0.575808191417845300745972453815730841776), + /* sin(2*pi* 154/ 256) */ T(-0.5956993044924333434670365288299698895119), + /* sin(2*pi* 155/ 256) */ T(-0.6152315905806268454849135634139842776594), + /* sin(2*pi* 156/ 256) */ T(-0.6343932841636454982151716132254933706757), + /* sin(2*pi* 157/ 256) */ T(-0.6531728429537767640842030136563054150769), + /* sin(2*pi* 158/ 256) */ T(-0.6715589548470184006253768504274218032288), + /* sin(2*pi* 159/ 256) */ T(-0.6895405447370669246167306299574847028455), + /* sin(2*pi* 160/ 256) */ T(-0.7071067811865475244008443621048490392848), + /* sin(2*pi* 161/ 256) */ T(-0.7242470829514669209410692432905531674831), + /* sin(2*pi* 162/ 256) */ T(-0.740951125354959091175616897495162729729), + /* sin(2*pi* 163/ 256) */ T(-0.7572088465064845475754640536057844730404), + /* sin(2*pi* 164/ 256) */ T(-0.773010453362736960810906609758469800971), + /* sin(2*pi* 165/ 256) */ T(-0.7883464276266062620091647053596892826565), + /* sin(2*pi* 166/ 256) */ T(-0.8032075314806449098066765129631419238796), + /* sin(2*pi* 167/ 256) */ T(-0.817584813151583696504920884130633809471), + /* sin(2*pi* 168/ 256) */ T(-0.8314696123025452370787883776179057567386), + /* sin(2*pi* 169/ 256) */ T(-0.8448535652497070732595712051049570977198), + /* sin(2*pi* 170/ 256) */ T(-0.8577286100002720699022699842847701370425), + /* sin(2*pi* 171/ 256) */ T(-0.8700869911087114186522924044838488439108), + /* sin(2*pi* 172/ 256) */ T(-0.8819212643483550297127568636603883495084), + /* sin(2*pi* 173/ 256) */ T(-0.8932243011955153203424164474933979780006), + /* sin(2*pi* 174/ 256) */ T(-0.9039892931234433315862002972305370487101), + /* sin(2*pi* 175/ 256) */ T(-0.9142097557035306546350148293935774010447), + /* sin(2*pi* 176/ 256) */ T(-0.9238795325112867561281831893967882868224), + /* sin(2*pi* 177/ 256) */ T(-0.932992798834738887711660255543302498295), + /* sin(2*pi* 178/ 256) */ T(-0.9415440651830207784125094025995023571856), + /* sin(2*pi* 179/ 256) */ T(-0.9495281805930366671959360741893450282522), + /* sin(2*pi* 180/ 256) */ T(-0.9569403357322088649357978869802699694828), + /* sin(2*pi* 181/ 256) */ T(-0.9637760657954398666864643555078351536631), + /* sin(2*pi* 182/ 256) */ T(-0.9700312531945439926039842072861002514569), + /* sin(2*pi* 183/ 256) */ T(-0.975702130038528544460395766419527971644), + /* sin(2*pi* 184/ 256) */ T(-0.9807852804032304491261822361342390369739), + /* sin(2*pi* 185/ 256) */ T(-0.9852776423889412447740184331785477871601), + /* sin(2*pi* 186/ 256) */ T(-0.9891765099647809734516737380162430639837), + /* sin(2*pi* 187/ 256) */ T(-0.9924795345987099981567672516611178200108), + /* sin(2*pi* 188/ 256) */ T(-0.9951847266721968862448369531094799215755), + /* sin(2*pi* 189/ 256) */ T(-0.9972904566786902161355971401825678211717), + /* sin(2*pi* 190/ 256) */ T(-0.9987954562051723927147716047591006944432), + /* sin(2*pi* 191/ 256) */ T(-0.9996988186962042201157656496661721968501), + /* sin(2*pi* 192/ 256) */ T(-1.0), + /* sin(2*pi* 193/ 256) */ T(-0.9996988186962042201157656496661721968501), + /* sin(2*pi* 194/ 256) */ T(-0.9987954562051723927147716047591006944432), + /* sin(2*pi* 195/ 256) */ T(-0.9972904566786902161355971401825678211717), + /* sin(2*pi* 196/ 256) */ T(-0.9951847266721968862448369531094799215755), + /* sin(2*pi* 197/ 256) */ T(-0.9924795345987099981567672516611178200108), + /* sin(2*pi* 198/ 256) */ T(-0.9891765099647809734516737380162430639837), + /* sin(2*pi* 199/ 256) */ T(-0.9852776423889412447740184331785477871601), + /* sin(2*pi* 200/ 256) */ T(-0.9807852804032304491261822361342390369739), + /* sin(2*pi* 201/ 256) */ T(-0.975702130038528544460395766419527971644), + /* sin(2*pi* 202/ 256) */ T(-0.9700312531945439926039842072861002514569), + /* sin(2*pi* 203/ 256) */ T(-0.9637760657954398666864643555078351536631), + /* sin(2*pi* 204/ 256) */ T(-0.9569403357322088649357978869802699694828), + /* sin(2*pi* 205/ 256) */ T(-0.9495281805930366671959360741893450282522), + /* sin(2*pi* 206/ 256) */ T(-0.9415440651830207784125094025995023571856), + /* sin(2*pi* 207/ 256) */ T(-0.932992798834738887711660255543302498295), + /* sin(2*pi* 208/ 256) */ T(-0.9238795325112867561281831893967882868224), + /* sin(2*pi* 209/ 256) */ T(-0.9142097557035306546350148293935774010447), + /* sin(2*pi* 210/ 256) */ T(-0.9039892931234433315862002972305370487101), + /* sin(2*pi* 211/ 256) */ T(-0.8932243011955153203424164474933979780006), + /* sin(2*pi* 212/ 256) */ T(-0.8819212643483550297127568636603883495084), + /* sin(2*pi* 213/ 256) */ T(-0.8700869911087114186522924044838488439108), + /* sin(2*pi* 214/ 256) */ T(-0.8577286100002720699022699842847701370425), + /* sin(2*pi* 215/ 256) */ T(-0.8448535652497070732595712051049570977198), + /* sin(2*pi* 216/ 256) */ T(-0.8314696123025452370787883776179057567386), + /* sin(2*pi* 217/ 256) */ T(-0.817584813151583696504920884130633809471), + /* sin(2*pi* 218/ 256) */ T(-0.8032075314806449098066765129631419238796), + /* sin(2*pi* 219/ 256) */ T(-0.7883464276266062620091647053596892826565), + /* sin(2*pi* 220/ 256) */ T(-0.773010453362736960810906609758469800971), + /* sin(2*pi* 221/ 256) */ T(-0.7572088465064845475754640536057844730404), + /* sin(2*pi* 222/ 256) */ T(-0.740951125354959091175616897495162729729), + /* sin(2*pi* 223/ 256) */ T(-0.7242470829514669209410692432905531674831), + /* sin(2*pi* 224/ 256) */ T(-0.7071067811865475244008443621048490392848), + /* sin(2*pi* 225/ 256) */ T(-0.6895405447370669246167306299574847028455), + /* sin(2*pi* 226/ 256) */ T(-0.6715589548470184006253768504274218032288), + /* sin(2*pi* 227/ 256) */ T(-0.6531728429537767640842030136563054150769), + /* sin(2*pi* 228/ 256) */ T(-0.6343932841636454982151716132254933706757), + /* sin(2*pi* 229/ 256) */ T(-0.6152315905806268454849135634139842776594), + /* sin(2*pi* 230/ 256) */ T(-0.5956993044924333434670365288299698895119), + /* sin(2*pi* 231/ 256) */ T(-0.575808191417845300745972453815730841776), + /* sin(2*pi* 232/ 256) */ T(-0.5555702330196022247428308139485328743749), + /* sin(2*pi* 233/ 256) */ T(-0.5349976198870972106630769046370179155603), + /* sin(2*pi* 234/ 256) */ T(-0.514102744193221726593693838968815772608), + /* sin(2*pi* 235/ 256) */ T(-0.4928981922297840368730266887588092682397), + /* sin(2*pi* 236/ 256) */ T(-0.4713967368259976485563876259052543776575), + /* sin(2*pi* 237/ 256) */ T(-0.4496113296546066000462945794242270758832), + /* sin(2*pi* 238/ 256) */ T(-0.4275550934302820943209668568887985343046), + /* sin(2*pi* 239/ 256) */ T(-0.4052413140049898709084813055050524665119), + /* sin(2*pi* 240/ 256) */ T(-0.3826834323650897717284599840303988667613), + /* sin(2*pi* 241/ 256) */ T(-0.3598950365349881487751045723267564202023), + /* sin(2*pi* 242/ 256) */ T(-0.3368898533922200506892532126191475704778), + /* sin(2*pi* 243/ 256) */ T(-0.3136817403988914766564788459941003099934), + /* sin(2*pi* 244/ 256) */ T(-0.2902846772544623676361923758173952746915), + /* sin(2*pi* 245/ 256) */ T(-0.2667127574748983863252865151164363940421), + /* sin(2*pi* 246/ 256) */ T(-0.242980179903263889948274162077471118321), + /* sin(2*pi* 247/ 256) */ T(-0.2191012401568697972277375474973577988484), + /* sin(2*pi* 248/ 256) */ T(-0.1950903220161282678482848684770222409277), + /* sin(2*pi* 249/ 256) */ T(-0.1709618887603012263636423572082635319663), + /* sin(2*pi* 250/ 256) */ T(-0.1467304744553617516588501296467178197062), + /* sin(2*pi* 251/ 256) */ T(-0.1224106751992161984987044741509457875752), + /* sin(2*pi* 252/ 256) */ T(-0.09801714032956060199419556388864184586114), + /* sin(2*pi* 253/ 256) */ T(-0.0735645635996674235294656215752343218133), + /* sin(2*pi* 254/ 256) */ T(-0.04906767432741801425495497694268265831475), + /* sin(2*pi* 255/ 256) */ T(-0.02454122852291228803173452945928292506547), + +}; +} + +template <typename T> +constexpr inline T sin_using_table(size_t size, size_t k) +{ + constexpr size_t table_size = arraysize(data::c_sin_table<T>); + return data::c_sin_table<T>[(k * table_size / size) % table_size]; +} +template <typename T> +constexpr inline T cos_using_table(size_t size, size_t k) +{ + return sin_using_table<T>(size, k + size / 4); +} +} diff --git a/include/kfr/dft/bitrev.hpp b/include/kfr/dft/bitrev.hpp @@ -0,0 +1,387 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/complex.hpp" +#include "../base/constants.hpp" +#include "../base/digitreverse.hpp" +#include "../base/vec.hpp" + +#include "../data/bitrev.hpp" + +#include "ft.hpp" + +namespace kfr +{ + +namespace internal +{ + +constexpr bool fft_reorder_aligned = false; + +template <size_t Bits> +constexpr inline u32 bitrev_using_table(u32 x) +{ + constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table)); + if (Bits > bitrev_table_log2N) + return bitreverse<Bits>(x); + + return data::bitrev_table[x] >> (bitrev_table_log2N - Bits); +} + +constexpr inline u32 bitrev_using_table(u32 x, size_t bits) +{ + constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table)); + if (bits > bitrev_table_log2N) + return bitreverse<32>(x) >> (32 - bits); + + return data::bitrev_table[x] >> (bitrev_table_log2N - bits); +} + +constexpr inline u32 dig4rev_using_table(u32 x, size_t bits) +{ + constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table)); + if (bits > bitrev_table_log2N) + return digitreverse4<32>(x) >> (32 - bits); + + x = data::bitrev_table[x]; + x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1)); + x = x >> (bitrev_table_log2N - bits); + return x; +} + +template <size_t log2n, size_t bitrev, typename T> +KFR_INTRIN void fft_reorder_swap(T* inout, size_t i) +{ + using cxx = cvec<T, 16>; + constexpr size_t N = 1 << log2n; + constexpr size_t N4 = 2 * N / 4; + + cxx vi = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i)); + vi = digitreverse<bitrev, 2>(vi); + cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), vi); +} + +template <size_t log2n, size_t bitrev, typename T> +KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j) +{ + __builtin_assume(i != j); + using cxx = cvec<T, 16>; + constexpr size_t N = 1 << log2n; + constexpr size_t N4 = 2 * N / 4; + + cxx vi = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i)); + cxx vj = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j)); + + vi = digitreverse<bitrev, 2>(vi); + cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), vi); + vj = digitreverse<bitrev, 2>(vj); + cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), vj); +} + +template <size_t log2n, size_t bitrev, typename T> +KFR_INTRIN void fft_reorder_swap(T* inout, size_t i, size_t j) +{ + __builtin_assume(i != j); + using cxx = cvec<T, 16>; + constexpr size_t N = 1 << log2n; + constexpr size_t N4 = 2 * N / 4; + + cxx vi = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i)); + cxx vj = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j)); + + vi = digitreverse<bitrev, 2>(vi); + cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), vi); + vj = digitreverse<bitrev, 2>(vj); + cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), vj); +} + +template <size_t log2n, size_t bitrev, typename T> +KFR_INTRIN void fft_reorder_swap(complex<T>* inout, size_t i) +{ + fft_reorder_swap<log2n, bitrev>(ptr_cast<T>(inout), i * 2); +} + +template <size_t log2n, size_t bitrev, typename T> +KFR_INTRIN void fft_reorder_swap_two(complex<T>* inout, size_t i0, size_t i1) +{ + fft_reorder_swap_two<log2n, bitrev>(ptr_cast<T>(inout), i0 * 2, i1 * 2); +} + +template <size_t log2n, size_t bitrev, typename T> +KFR_INTRIN void fft_reorder_swap(complex<T>* inout, size_t i, size_t j) +{ + fft_reorder_swap<log2n, bitrev>(ptr_cast<T>(inout), i * 2, j * 2); +} + +template <typename T> +KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<11>) +{ + fft_reorder_swap_two<11>(inout, 0 * 4, 8 * 4); + fft_reorder_swap<11>(inout, 1 * 4, 64 * 4); + fft_reorder_swap<11>(inout, 2 * 4, 32 * 4); + fft_reorder_swap<11>(inout, 3 * 4, 96 * 4); + fft_reorder_swap<11>(inout, 4 * 4, 16 * 4); + fft_reorder_swap<11>(inout, 5 * 4, 80 * 4); + fft_reorder_swap<11>(inout, 6 * 4, 48 * 4); + fft_reorder_swap<11>(inout, 7 * 4, 112 * 4); + fft_reorder_swap<11>(inout, 9 * 4, 72 * 4); + fft_reorder_swap<11>(inout, 10 * 4, 40 * 4); + fft_reorder_swap<11>(inout, 11 * 4, 104 * 4); + fft_reorder_swap<11>(inout, 12 * 4, 24 * 4); + fft_reorder_swap<11>(inout, 13 * 4, 88 * 4); + fft_reorder_swap<11>(inout, 14 * 4, 56 * 4); + fft_reorder_swap<11>(inout, 15 * 4, 120 * 4); + fft_reorder_swap<11>(inout, 17 * 4, 68 * 4); + fft_reorder_swap<11>(inout, 18 * 4, 36 * 4); + fft_reorder_swap<11>(inout, 19 * 4, 100 * 4); + fft_reorder_swap_two<11>(inout, 20 * 4, 28 * 4); + fft_reorder_swap<11>(inout, 21 * 4, 84 * 4); + fft_reorder_swap<11>(inout, 22 * 4, 52 * 4); + fft_reorder_swap<11>(inout, 23 * 4, 116 * 4); + fft_reorder_swap<11>(inout, 25 * 4, 76 * 4); + fft_reorder_swap<11>(inout, 26 * 4, 44 * 4); + fft_reorder_swap<11>(inout, 27 * 4, 108 * 4); + fft_reorder_swap<11>(inout, 29 * 4, 92 * 4); + fft_reorder_swap<11>(inout, 30 * 4, 60 * 4); + fft_reorder_swap<11>(inout, 31 * 4, 124 * 4); + fft_reorder_swap<11>(inout, 33 * 4, 66 * 4); + fft_reorder_swap_two<11>(inout, 34 * 4, 42 * 4); + fft_reorder_swap<11>(inout, 35 * 4, 98 * 4); + fft_reorder_swap<11>(inout, 37 * 4, 82 * 4); + fft_reorder_swap<11>(inout, 38 * 4, 50 * 4); + fft_reorder_swap<11>(inout, 39 * 4, 114 * 4); + fft_reorder_swap<11>(inout, 41 * 4, 74 * 4); + fft_reorder_swap<11>(inout, 43 * 4, 106 * 4); + fft_reorder_swap<11>(inout, 45 * 4, 90 * 4); + fft_reorder_swap<11>(inout, 46 * 4, 58 * 4); + fft_reorder_swap<11>(inout, 47 * 4, 122 * 4); + fft_reorder_swap<11>(inout, 49 * 4, 70 * 4); + fft_reorder_swap<11>(inout, 51 * 4, 102 * 4); + fft_reorder_swap<11>(inout, 53 * 4, 86 * 4); + fft_reorder_swap_two<11>(inout, 54 * 4, 62 * 4); + fft_reorder_swap<11>(inout, 55 * 4, 118 * 4); + fft_reorder_swap<11>(inout, 57 * 4, 78 * 4); + fft_reorder_swap<11>(inout, 59 * 4, 110 * 4); + fft_reorder_swap<11>(inout, 61 * 4, 94 * 4); + fft_reorder_swap<11>(inout, 63 * 4, 126 * 4); + fft_reorder_swap_two<11>(inout, 65 * 4, 73 * 4); + fft_reorder_swap<11>(inout, 67 * 4, 97 * 4); + fft_reorder_swap<11>(inout, 69 * 4, 81 * 4); + fft_reorder_swap<11>(inout, 71 * 4, 113 * 4); + fft_reorder_swap<11>(inout, 75 * 4, 105 * 4); + fft_reorder_swap<11>(inout, 77 * 4, 89 * 4); + fft_reorder_swap<11>(inout, 79 * 4, 121 * 4); + fft_reorder_swap<11>(inout, 83 * 4, 101 * 4); + fft_reorder_swap_two<11>(inout, 85 * 4, 93 * 4); + fft_reorder_swap<11>(inout, 87 * 4, 117 * 4); + fft_reorder_swap<11>(inout, 91 * 4, 109 * 4); + fft_reorder_swap<11>(inout, 95 * 4, 125 * 4); + fft_reorder_swap_two<11>(inout, 99 * 4, 107 * 4); + fft_reorder_swap<11>(inout, 103 * 4, 115 * 4); + fft_reorder_swap<11>(inout, 111 * 4, 123 * 4); + fft_reorder_swap_two<11>(inout, 119 * 4, 127 * 4); +} + +template <typename T> +KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<7>) +{ + constexpr size_t bitrev = 2; + fft_reorder_swap_two<7, bitrev>(inout, 0 * 4, 2 * 4); + fft_reorder_swap<7, bitrev>(inout, 1 * 4, 4 * 4); + fft_reorder_swap<7, bitrev>(inout, 3 * 4, 6 * 4); + fft_reorder_swap_two<7, bitrev>(inout, 5 * 4, 7 * 4); +} + +template <typename T> +KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<8>) +{ + constexpr size_t bitrev = 4; + fft_reorder_swap_two<8, bitrev>(inout, 0 * 4, 5 * 4); + fft_reorder_swap<8, bitrev>(inout, 1 * 4, 4 * 4); + fft_reorder_swap<8, bitrev>(inout, 2 * 4, 8 * 4); + fft_reorder_swap<8, bitrev>(inout, 3 * 4, 12 * 4); + fft_reorder_swap<8, bitrev>(inout, 6 * 4, 9 * 4); + fft_reorder_swap<8, bitrev>(inout, 7 * 4, 13 * 4); + fft_reorder_swap_two<8, bitrev>(inout, 10 * 4, 15 * 4); + fft_reorder_swap<8, bitrev>(inout, 11 * 4, 14 * 4); +} + +template <typename T> +KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<9>) +{ + constexpr size_t bitrev = 2; + fft_reorder_swap_two<9, bitrev>(inout, 0 * 4, 4 * 4); + fft_reorder_swap<9, bitrev>(inout, 1 * 4, 16 * 4); + fft_reorder_swap<9, bitrev>(inout, 2 * 4, 8 * 4); + fft_reorder_swap<9, bitrev>(inout, 3 * 4, 24 * 4); + fft_reorder_swap<9, bitrev>(inout, 5 * 4, 20 * 4); + fft_reorder_swap<9, bitrev>(inout, 6 * 4, 12 * 4); + fft_reorder_swap<9, bitrev>(inout, 7 * 4, 28 * 4); + fft_reorder_swap<9, bitrev>(inout, 9 * 4, 18 * 4); + fft_reorder_swap_two<9, bitrev>(inout, 10 * 4, 14 * 4); + fft_reorder_swap<9, bitrev>(inout, 11 * 4, 26 * 4); + fft_reorder_swap<9, bitrev>(inout, 13 * 4, 22 * 4); + fft_reorder_swap<9, bitrev>(inout, 15 * 4, 30 * 4); + fft_reorder_swap_two<9, bitrev>(inout, 17 * 4, 21 * 4); + fft_reorder_swap<9, bitrev>(inout, 19 * 4, 25 * 4); + fft_reorder_swap<9, bitrev>(inout, 23 * 4, 29 * 4); + fft_reorder_swap_two<9, bitrev>(inout, 27 * 4, 31 * 4); +} + +template <typename T, bool use_br2> +void cwrite_reordered(T* out, cvec<T, 16> value, size_t N4, cbool_t<use_br2>) +{ + value = digitreverse < use_br2 ? 2 : 4, 2 > (value); + cwrite_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(out), N4, value); +} + +template <typename T, bool use_br2> +KFR_INTRIN void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4, cbool_t<use_br2>) +{ + __builtin_assume(i != j); + const cvec<T, 16> vi = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), N4); + const cvec<T, 16> vj = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), N4); + cwrite_reordered(inout + j, vi, N4, cbool<use_br2>); + cwrite_reordered(inout + i, vj, N4, cbool<use_br2>); +} + +template <typename T> +KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2) +{ + const size_t N = 1 << log2n; + const size_t N4 = N / 4; + const size_t iend = N / 16 * 4 * 2; + constexpr size_t istep = 2 * 4; + const size_t jstep1 = (1 << (log2n - 5)) * 4 * 2; + const size_t jstep2 = size_t(1 << (log2n - 5)) * 4 * 2 - size_t(1 << (log2n - 6)) * 4 * 2; + T* io = ptr_cast<T>(inout); + + for (size_t i = 0; i < iend;) + { + size_t j = bitrev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3; + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep; + j = j + jstep1; + + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep; + j = j - jstep2; + + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep; + j = j + jstep1; + + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep; + } +} + +template <typename T> +KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2) +{ + const size_t N = size_t(1) << log2n; + const size_t N4 = N / 4; + const size_t N16 = N * 2 / 16; + size_t iend = N16; + constexpr size_t istep = 2 * 4; + const size_t jstep = N / 64 * 4 * 2; + T* io = ptr_cast<T>(inout); + + size_t i = 0; +#pragma clang loop unroll_count(2) + for (; i < iend;) + { + size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3; + + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep * 4; + } + iend += N16; +#pragma clang loop unroll_count(2) + for (; i < iend;) + { + size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3; + + fft_reorder_swap_n4(io, i, j, N4, use_br2); + + i += istep; + j = j + jstep; + + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep * 3; + } + iend += N16; +#pragma clang loop unroll_count(2) + for (; i < iend;) + { + size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3; + + fft_reorder_swap_n4(io, i, j, N4, use_br2); + + i += istep; + j = j + jstep; + + fft_reorder_swap_n4(io, i, j, N4, use_br2); + + i += istep; + j = j + jstep; + + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep * 2; + } + iend += N16; +#pragma clang loop unroll_count(2) + for (; i < iend;) + { + size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3; + + fft_reorder_swap_n4(io, i, j, N4, use_br2); + + i += istep; + j = j + jstep; + + fft_reorder_swap_n4(io, i, j, N4, use_br2); + + i += istep; + j = j + jstep; + + fft_reorder_swap_n4(io, i, j, N4, use_br2); + + i += istep; + j = j + jstep; + + if (i >= j) + fft_reorder_swap_n4(io, i, j, N4, use_br2); + i += istep; + } +} +} +} diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -0,0 +1,998 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/complex.hpp" +#include "../base/constants.hpp" +#include "../base/memory.hpp" +#include "../base/read_write.hpp" +#include "../base/vec.hpp" +#include "../misc/small_buffer.hpp" + +#include "../cometa/string.hpp" + +#include "bitrev.hpp" +#include "ft.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Wshadow") +#pragma clang diagnostic ignored "-Wshadow" +#endif + +namespace kfr +{ + +template <typename T> +struct dft_stage +{ + size_t stage_size = 0; + size_t data_size = 0; + size_t temp_size = 0; + u8* data = nullptr; + size_t repeats = 1; + size_t out_offset = 0; + const char* name; + bool recursion = false; + + void initialize(size_t size) { do_initialize(size); } + + KFR_INTRIN void execute(complex<T>* out, const complex<T>* in, u8* temp) { do_execute(out, in, temp); } + virtual ~dft_stage() {} + +protected: + virtual void do_initialize(size_t) {} + virtual void do_execute(complex<T>*, const complex<T>*, u8* temp) = 0; +}; + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Wassume") +#pragma clang diagnostic ignored "-Wassume" +#endif + +namespace internal +{ + +template <size_t width, bool inverse, typename T> +KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_format*/, cbool_t<inverse>, + cvec<T, width> w, cvec<T, width> tw) +{ + cvec<T, width> b1 = w * dupeven(tw); + w = swap<2>(w); + + if (inverse) + tw = -(tw); + w = subadd(b1, w * dupodd(tw)); + return w; +} + +template <size_t width, bool use_br2, bool inverse, bool aligned, typename T> +KFR_INTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse_t, cbool_t<use_br2>, + cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in, + const complex<T>* twiddle) +{ + const size_t N4 = N / 4; + cvec<T, width> w1, w2, w3; + + cvec<T, width> sum02, sum13, diff02, diff13; + + cvec<T, width> a0, a1, a2, a3; + a0 = cread<width, aligned>(in + 0); + a2 = cread<width, aligned>(in + N4 * 2); + sum02 = a0 + a2; + + a1 = cread<width, aligned>(in + N4); + a3 = cread<width, aligned>(in + N4 * 3); + sum13 = a1 + a3; + + cwrite<width, aligned>(out, sum02 + sum13); + w2 = sum02 - sum13; + cwrite<width, aligned>( + out + N4 * (use_br2 ? 1 : 2), + radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w2, cread<width, true>(twiddle + width))); + diff02 = a0 - a2; + diff13 = a1 - a3; + if (inverse) + { + diff13 = (diff13 ^ broadcast<width, T>(T(), -T())); + diff13 = swap<2>(diff13); + } + else + { + diff13 = swap<2>(diff13); + diff13 = (diff13 ^ broadcast<width, T>(T(), -T())); + } + + w1 = diff02 + diff13; + + cwrite<width, aligned>( + out + N4 * (use_br2 ? 2 : 1), + radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w1, cread<width, true>(twiddle + 0))); + w3 = diff02 - diff13; + cwrite<width, aligned>(out + N4 * 3, radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w3, + cread<width, true>(twiddle + width * 2))); +} + +template <size_t width, bool inverse, typename T> +KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_format*/, cbool_t<inverse>, + cvec<T, width> w, cvec<T, width> tw) +{ + vec<T, width> re1, im1, twre, twim; + split(w, re1, im1); + split(tw, twre, twim); + + const vec<T, width> b1re = re1 * twre; + const vec<T, width> b1im = im1 * twre; + if (inverse) + w = concat(b1re + im1 * twim, b1im - re1 * twim); + else + w = concat(b1re - im1 * twim, b1im + re1 * twim); + return w; +} + +template <size_t width, bool splitout, bool splitin, bool use_br2, bool inverse, bool aligned, typename T> +KFR_INTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout>, cbool_t<splitin>, + cbool_t<use_br2>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, + const complex<T>* in, const complex<T>* twiddle) +{ + const size_t N4 = N / 4; + cvec<T, width> w1, w2, w3; + constexpr bool read_split = !splitin && splitout; + constexpr bool write_split = splitin && !splitout; + + vec<T, width> re0, im0, re1, im1, re2, im2, re3, im3; + + split(cread_split<width, aligned, read_split>(in + N4 * 0), re0, im0); + split(cread_split<width, aligned, read_split>(in + N4 * 1), re1, im1); + split(cread_split<width, aligned, read_split>(in + N4 * 2), re2, im2); + split(cread_split<width, aligned, read_split>(in + N4 * 3), re3, im3); + + const vec<T, width> sum02re = re0 + re2; + const vec<T, width> sum02im = im0 + im2; + const vec<T, width> sum13re = re1 + re3; + const vec<T, width> sum13im = im1 + im3; + + cwrite_split<width, aligned, write_split>(out, concat(sum02re + sum13re, sum02im + sum13im)); + w2 = concat(sum02re - sum13re, sum02im - sum13im); + cwrite_split<width, aligned, write_split>( + out + N4 * (use_br2 ? 1 : 2), + radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w2, cread<width, true>(twiddle + width))); + + const vec<T, width> diff02re = re0 - re2; + const vec<T, width> diff02im = im0 - im2; + const vec<T, width> diff13re = re1 - re3; + const vec<T, width> diff13im = im1 - im3; + + (inverse ? w1 : w3) = concat(diff02re - diff13im, diff02im + diff13re); + (inverse ? w3 : w1) = concat(diff02re + diff13im, diff02im - diff13re); + + cwrite_split<width, aligned, write_split>( + out + N4 * (use_br2 ? 2 : 1), + radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w1, cread<width, true>(twiddle + 0))); + cwrite_split<width, aligned, write_split>(out + N4 * 3, + radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w3, + cread<width, true>(twiddle + width * 2))); +} + +template <typename T> +KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size) +{ + if (n == 0) + { + return make_vector(static_cast<T>(1), static_cast<T>(0)); + } + else if (n == size / 4) + { + return make_vector(static_cast<T>(0), static_cast<T>(-1)); + } + else if (n == size / 2) + { + return make_vector(static_cast<T>(-1), static_cast<T>(0)); + } + else if (n == size * 3 / 4) + { + return make_vector(static_cast<T>(0), static_cast<T>(1)); + } + else + { + double kth = c_pi<double, 2> * (n / static_cast<double>(size)); + double tcos = +kfr::native::cos(kth); + double tsin = -kfr::native::sin(kth); + return make_vector(static_cast<T>(tcos), static_cast<T>(tsin)); + } +} + +template <typename T, size_t width> +KFR_INTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_t nnstep, size_t size, + bool split_format) +{ + vec<T, 2 * width> result = T(); + KFR_LOOP_UNROLL + for (size_t i = 0; i < width; i++) + { + const cvec<T, 1> r = calculate_twiddle<T>(nn + nnstep * i, size); + result(i * 2) = r[0]; + result(i * 2 + 1) = r[1]; + } + if (split_format) + ref_cast<cvec<T, width>>(twiddle[0]) = splitpairs(result); + else + ref_cast<cvec<T, width>>(twiddle[0]) = result; + twiddle += width; +} + +template <typename T, size_t width> +KFR_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, size_t size, bool split_format) +{ + size_t nnstep = size / stage_size; + KFR_LOOP_NOUNROLL + for (size_t n = 0; n < stage_size / 4; n += width) + { + initialize_twiddles_impl<T, width>(twiddle, n * nnstep * 1, nnstep * 1, size, split_format); + initialize_twiddles_impl<T, width>(twiddle, n * nnstep * 2, nnstep * 2, size, split_format); + initialize_twiddles_impl<T, width>(twiddle, n * nnstep * 3, nnstep * 3, size, split_format); + } +} + +template <typename T> +KFR_INTRIN void prefetch_one(const complex<T>* in) +{ + __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); +} + +template <typename T> +KFR_INTRIN void prefetch_four(size_t stride, const complex<T>* in) +{ + __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); + __builtin_prefetch(ptr_cast<void>(in + stride), 0, _MM_HINT_T0); + __builtin_prefetch(ptr_cast<void>(in + stride * 2), 0, _MM_HINT_T0); + __builtin_prefetch(ptr_cast<void>(in + stride * 3), 0, _MM_HINT_T0); +} + +template <typename Ntype, size_t width, bool splitout, bool splitin, bool prefetch, bool use_br2, + bool inverse, bool aligned, typename T> +KFR_INTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t<splitout>, cbool_t<splitin>, + cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, + complex<T>* out, const complex<T>* in, const complex<T>*& twiddle) +{ + constexpr static size_t prefetch_offset = width * 8; + const auto N4 = N / csize<4>; + const auto N43 = N4 * csize<3>; + __builtin_assume(blocks > 0); + __builtin_assume(N > 0); + __builtin_assume(N4 > 0); + KFR_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b++) + { +#pragma clang loop unroll_count(default_unroll_count) + for (size_t n2 = 0; n2 < N4; n2 += width) + { + if (prefetch) + prefetch_four(N4, in + prefetch_offset); + radix4_body(N, csize<width>, cbool < splitout || splitin >, cbool<splitout>, cbool<splitin>, + cbool<use_br2>, cbool<inverse>, cbool<aligned>, out, in, twiddle + n2 * 3); + in += width; + out += width; + } + in += N43; + out += N43; + } + twiddle += N43; + return {}; +} + +template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T> +KFR_SINTRIN ctrue_t radix4_pass(csize_t<32>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t, + cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, + complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) +{ + __builtin_assume(blocks > 0); + constexpr static size_t prefetch_offset = 32 * 4; + for (size_t b = 0; b < blocks; b++) + { + if (prefetch) + prefetch_four(csize<64>, out + prefetch_offset); + cvec<T, 4> w0, w1, w2, w3, w4, w5, w6, w7; + split(cread<8, aligned>(out + 0), w0, w1); + split(cread<8, aligned>(out + 8), w2, w3); + split(cread<8, aligned>(out + 16), w4, w5); + split(cread<8, aligned>(out + 24), w6, w7); + + butterfly8<4, inverse>(w0, w1, w2, w3, w4, w5, w6, w7); + + w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>); + w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>); + w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>); + w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>); + w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>); + w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>); + w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>); + + cvec<T, 8> z0, z1, z2, z3; + transpose4x8(w0, w1, w2, w3, w4, w5, w6, w7, z0, z1, z2, z3); + + butterfly4<8, inverse>(cfalse, z0, z1, z2, z3, z0, z1, z2, z3); + cwrite<32, aligned>(out, bitreverse<2>(concat(z0, z1, z2, z3))); + out += 32; + } + return {}; +} + +template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T> +KFR_SINTRIN ctrue_t radix4_pass(csize_t<8>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t, + cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, + complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) +{ + __builtin_assume(blocks > 0); + constexpr static size_t prefetch_offset = width * 16; + for (size_t b = 0; b < blocks; b += 2) + { + if (prefetch) + prefetch_one(out + prefetch_offset); + + cvec<T, 8> vlo = cread<8, aligned>(out + 0); + cvec<T, 8> vhi = cread<8, aligned>(out + 8); + butterfly8<inverse>(vlo); + butterfly8<inverse>(vhi); + vlo = permutegroups<(2), 0, 4, 2, 6, 1, 5, 3, 7>(vlo); + vhi = permutegroups<(2), 0, 4, 2, 6, 1, 5, 3, 7>(vhi); + cwrite<8, aligned>(out, vlo); + cwrite<8, aligned>(out + 8, vhi); + out += 16; + } + return {}; +} + +template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T> +KFR_SINTRIN ctrue_t radix4_pass(csize_t<16>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t, + cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, + complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) +{ + __builtin_assume(blocks > 0); + constexpr static size_t prefetch_offset = width * 4; +#pragma clang loop unroll_count(2) + for (size_t b = 0; b < blocks; b += 2) + { + if (prefetch) + prefetch_one(out + prefetch_offset); + + cvec<T, 16> vlo = cread<16, aligned>(out); + cvec<T, 16> vhi = cread<16, aligned>(out + 16); + butterfly4<4, inverse>(vlo); + butterfly4<4, inverse>(vhi); + apply_twiddles4<0, 4, 4, inverse>(vlo); + apply_twiddles4<0, 4, 4, inverse>(vhi); + vlo = digitreverse4<2>(vlo); + vhi = digitreverse4<2>(vhi); + butterfly4<4, inverse>(vlo); + butterfly4<4, inverse>(vhi); + + use_br2 ? cbitreverse_write(out, vlo) : cdigitreverse4_write(out, vlo); + use_br2 ? cbitreverse_write(out + 16, vhi) : cdigitreverse4_write(out + 16, vhi); + out += 32; + } + return {}; +} + +template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T> +KFR_SINTRIN ctrue_t radix4_pass(csize_t<4>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t, + cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, + complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) +{ + constexpr static size_t prefetch_offset = width * 4; + __builtin_assume(blocks > 0); + KFR_LOOP_NOUNROLL + for (size_t b = 0; b < blocks; b += 4) + { + if (prefetch) + prefetch_one(out + prefetch_offset); + + cvec<T, 16> v16 = cdigitreverse4_read<16, aligned>(out); + butterfly4<4, inverse>(v16); + cdigitreverse4_write<aligned>(out, v16); + + out += 4 * 4; + } + return {}; +} + +template <typename T, bool splitin, bool is_even, bool inverse> +struct fft_stage_impl : dft_stage<T> +{ + fft_stage_impl(size_t stage_size) + { + this->stage_size = stage_size; + this->repeats = 4; + this->recursion = true; + this->data_size = align_up(sizeof(complex<T>) * stage_size / 4 * 3, native_cache_alignment); + } + +protected: + constexpr static bool prefetch = true; + constexpr static bool aligned = false; + constexpr static size_t width = vector_width<T, cpu_t::native>; + + virtual void do_initialize(size_t size) override final + { + complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + initialize_twiddles<T, width>(twiddle, this->stage_size, size, true); + } + + virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final + { + const complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + if (splitin) + in = out; + const size_t stage_size = this->stage_size; + __builtin_assume(stage_size >= 2048); + __builtin_assume(stage_size % 2048 == 0); + radix4_pass(stage_size, 1, csize<width>, ctrue, cbool<splitin>, cbool<!is_even>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + } +}; + +template <typename T, bool splitin, size_t size, bool inverse> +struct fft_final_stage_impl : dft_stage<T> +{ + fft_final_stage_impl(size_t) + { + this->stage_size = size; + this->out_offset = size; + this->repeats = 4; + this->recursion = true; + this->data_size = align_up(sizeof(complex<T>) * size * 3 / 2, native_cache_alignment); + } + +protected: + constexpr static size_t width = vector_width<T, cpu_t::native>; + constexpr static bool is_even = cometa::is_even(ilog2(size)); + constexpr static bool use_br2 = !is_even; + constexpr static bool aligned = false; + constexpr static bool prefetch = splitin; + + virtual void do_initialize(size_t total_size) override final + { + complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + size_t stage_size = this->stage_size; + while (stage_size > 4) + { + initialize_twiddles<T, width>(twiddle, stage_size, total_size, true); + stage_size /= 4; + } + } + + virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final + { + constexpr bool is_double = sizeof(T) == 8; + constexpr size_t final_size = is_even ? (is_double ? 4 : 16) : (is_double ? 8 : 32); + const complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + final_pass(csize<final_size>, out, in, twiddle); + } + + KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) + { + radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + radix4_pass(csize<128>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<32>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<8>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + } + + KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) + { + radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + radix4_pass(csize<128>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<32>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + } + + KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) + { + radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<64>, 16, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<16>, 64, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<4>, 256, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + } + + KFR_INTRIN void final_pass(csize_t<16>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) + { + radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<64>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<16>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + } +}; + +template <typename T, bool is_even> +struct fft_reorder_stage_impl : dft_stage<T> +{ + fft_reorder_stage_impl(size_t stage_size) + { + this->stage_size = stage_size; + log2n = ilog2(stage_size); + this->data_size = 0; + } + +protected: + size_t log2n; + + virtual void do_initialize(size_t) override final {} + + virtual void do_execute(complex<T>* out, const complex<T>*, u8* /*temp*/) override final + { + fft_reorder(out, log2n, cbool<!is_even>); + } +}; + +template <typename T, size_t log2n, bool inverse> +struct fft_specialization; + +template <typename T, bool inverse> +struct fft_specialization<T, 1, inverse> : dft_stage<T> +{ + fft_specialization(size_t) {} +protected: + constexpr static bool aligned = false; + virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final + { + cvec<T, 1> a0, a1; + split(cread<2, aligned>(in), a0, a1); + cwrite<2, aligned>(out, concat(a0 + a1, a0 - a1)); + } +}; + +template <typename T, bool inverse> +struct fft_specialization<T, 2, inverse> : dft_stage<T> +{ + fft_specialization(size_t) {} +protected: + constexpr static bool aligned = false; + virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final + { + cvec<T, 1> a0, a1, a2, a3; + split(cread<4>(in), a0, a1, a2, a3); + butterfly(cbool<inverse>, a0, a1, a2, a3, a0, a1, a2, a3); + cwrite<4>(out, concat(a0, a1, a2, a3)); + } +}; + +template <typename T, bool inverse> +struct fft_specialization<T, 3, inverse> : dft_stage<T> +{ + fft_specialization(size_t) {} +protected: + constexpr static bool aligned = false; + virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final + { + cvec<T, 8> v8 = cread<8, aligned>(in); + butterfly8<inverse>(v8); + cwrite<8, aligned>(out, v8); + } +}; + +template <typename T, bool inverse> +struct fft_specialization<T, 4, inverse> : dft_stage<T> +{ + fft_specialization(size_t) {} +protected: + constexpr static bool aligned = false; + virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final + { + cvec<T, 16> v16 = cread<16, aligned>(in); + butterfly16<inverse>(v16); + cwrite<16, aligned>(out, v16); + } +}; + +template <typename T, bool inverse> +struct fft_specialization<T, 5, inverse> : dft_stage<T> +{ + fft_specialization(size_t) {} +protected: + constexpr static bool aligned = false; + virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final + { + cvec<T, 32> v32 = cread<32, aligned>(in); + butterfly32<inverse>(v32); + cwrite<32, aligned>(out, v32); + } +}; + +template <typename T, bool inverse> +struct fft_specialization<T, 6, inverse> : dft_stage<T> +{ + fft_specialization(size_t) {} +protected: + constexpr static bool aligned = false; + virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final + { + butterfly64(cbool<inverse>, cbool<aligned>, out, in); + } +}; + +template <typename T, bool inverse> +struct fft_specialization<T, 7, inverse> : dft_stage<T> +{ + fft_specialization(size_t) + { + this->stage_size = 128; + this->data_size = align_up(sizeof(complex<T>) * 128 * 3 / 2, native_cache_alignment); + } + +protected: + constexpr static bool aligned = false; + constexpr static size_t width = vector_width<T, cpu_t::native>; + constexpr static bool use_br2 = true; + constexpr static bool prefetch = false; + constexpr static bool is_double = sizeof(T) == 8; + constexpr static size_t final_size = is_double ? 8 : 32; + constexpr static size_t split_format = final_size == 8; + + virtual void do_initialize(size_t total_size) override final + { + complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + initialize_twiddles<T, width>(twiddle, 128, total_size, split_format); + initialize_twiddles<T, width>(twiddle, 32, total_size, split_format); + initialize_twiddles<T, width>(twiddle, 8, total_size, split_format); + } + + virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final + { + const complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + final_pass(csize<final_size>, out, in, twiddle); + fft_reorder(out, csize<7>); + } + + KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) + { + radix4_pass(csize<128>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + radix4_pass(csize<32>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<8>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + } + + KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) + { + radix4_pass(csize<128>, 1, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + radix4_pass(csize<32>, 4, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + } +}; + +template <bool inverse> +struct fft_specialization<float, 8, inverse> : dft_stage<float> +{ + fft_specialization(size_t) { this->temp_size = sizeof(complex<float>) * 256; } +protected: + virtual void do_execute(complex<float>* out, const complex<float>* in, u8* temp) override final + { + complex<float>* scratch = ptr_cast<complex<float>>(temp); + if (out == in) + { + butterfly16_multi_flip<0, inverse>(scratch, out); + butterfly16_multi_flip<1, inverse>(scratch, out); + butterfly16_multi_flip<2, inverse>(scratch, out); + butterfly16_multi_flip<3, inverse>(scratch, out); + + butterfly16_multi_natural<0, inverse>(out, scratch); + butterfly16_multi_natural<1, inverse>(out, scratch); + butterfly16_multi_natural<2, inverse>(out, scratch); + butterfly16_multi_natural<3, inverse>(out, scratch); + } + else + { + butterfly16_multi_flip<0, inverse>(out, in); + butterfly16_multi_flip<1, inverse>(out, in); + butterfly16_multi_flip<2, inverse>(out, in); + butterfly16_multi_flip<3, inverse>(out, in); + + butterfly16_multi_natural<0, inverse>(out, out); + butterfly16_multi_natural<1, inverse>(out, out); + butterfly16_multi_natural<2, inverse>(out, out); + butterfly16_multi_natural<3, inverse>(out, out); + } + } +}; + +template <bool inverse> +struct fft_specialization<double, 8, inverse> : dft_stage<double> +{ + using T = double; + fft_specialization(size_t) + { + this->stage_size = 256; + this->data_size = align_up(sizeof(complex<T>) * 256 * 3 / 2, native_cache_alignment); + } + +protected: + constexpr static bool aligned = false; + constexpr static size_t width = vector_width<T, cpu_t::native>; + constexpr static bool use_br2 = false; + constexpr static bool prefetch = false; + constexpr static size_t split_format = true; + + virtual void do_initialize(size_t total_size) override final + { + complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + initialize_twiddles<T, width>(twiddle, 256, total_size, split_format); + initialize_twiddles<T, width>(twiddle, 64, total_size, split_format); + initialize_twiddles<T, width>(twiddle, 16, total_size, split_format); + } + + virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final + { + const complex<T>* twiddle = ptr_cast<complex<T>>(this->data); + final_pass(csize<4>, out, in, twiddle); + fft_reorder(out, csize<8>); + } + + KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle) + { + radix4_pass(csize<256>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, in, twiddle); + radix4_pass(csize<64>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>, + cbool<aligned>, out, out, twiddle); + radix4_pass(csize<16>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + radix4_pass(csize<4>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>, + cbool<inverse>, cbool<aligned>, out, out, twiddle); + } +}; + +template <typename T, bool splitin, bool is_even> +struct fft_stage_impl_t +{ + template <bool inverse> + using type = internal::fft_stage_impl<T, splitin, is_even, inverse>; +}; +template <typename T, bool splitin, size_t size> +struct fft_final_stage_impl_t +{ + template <bool inverse> + using type = internal::fft_final_stage_impl<T, splitin, size, inverse>; +}; +template <typename T, bool is_even> +struct fft_reorder_stage_impl_t +{ + template <bool> + using type = internal::fft_reorder_stage_impl<T, is_even>; +}; +template <typename T, size_t log2n, bool aligned> +struct fft_specialization_t +{ + template <bool inverse> + using type = internal::fft_specialization<T, log2n, inverse>; +}; +} + +namespace dft_type +{ +constexpr cbools_t<true, true> both{}; +constexpr cbools_t<true, false> direct{}; +constexpr cbools_t<false, true> inverse{}; +} + +template <typename T> +struct dft_plan +{ + using dft_stage_ptr = std::unique_ptr<dft_stage<T>>; + + size_t size; + size_t temp_size; + + template <bool direct = true, bool inverse = true> + dft_plan(size_t size, cbools_t<direct, inverse> type = dft_type::both) + : size(size), temp_size(0), data_size(0) + { + if (is_poweroftwo(size)) + { + const size_t log2n = ilog2(size); + cswitch(csizes<1, 2, 3, 4, 5, 6, 7, 8>, log2n, + [&](auto log2n) { + add_stage<internal::fft_specialization_t<T, val_of(log2n), false>::template type>( + size, type); + }, + [&]() { + cswitch(cfalse_true, is_even(log2n), [&](auto is_even) { + make_fft(size, type, is_even, ctrue); + add_stage<internal::fft_reorder_stage_impl_t<T, val_of(is_even)>::template type>( + size, type); + }); + }); + initialize(type); + } + } + KFR_INTRIN void execute(complex<T>* out, const complex<T>* in, u8* temp, bool inverse = false) const + { + if (inverse) + execute_dft(ctrue, out, in, temp); + else + execute_dft(cfalse, out, in, temp); + } + template <bool inverse> + KFR_INTRIN void execute(complex<T>* out, const complex<T>* in, u8* temp, cbool_t<inverse> inv) const + { + execute_dft(inv, out, in, temp); + } + + template <size_t Tag1, size_t Tag2, size_t Tag3> + KFR_INTRIN void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in, + univector<u8, Tag3>& temp, bool inverse = false) const + { + if (inverse) + execute_dft(ctrue, out.data(), in.data(), temp.data()); + else + execute_dft(cfalse, out.data(), in.data(), temp.data()); + } + template <bool inverse, size_t Tag1, size_t Tag2, size_t Tag3> + KFR_INTRIN void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in, + univector<u8, Tag3>& temp, cbool_t<inverse> inv) const + { + execute_dft(inv, out.data(), in.data(), temp.data()); + } + +private: + autofree<u8> data; + size_t data_size; + std::vector<dft_stage_ptr> stages[2]; + template <template <bool inverse> class Stage> + void add_stage(size_t stage_size, cbools_t<true, true>) + { + dft_stage<T>* direct_stage = new Stage<false>(stage_size); + direct_stage->name = type_name<decltype(*direct_stage)>(); + dft_stage<T>* inverse_stage = new Stage<true>(stage_size); + inverse_stage->name = type_name<decltype(*inverse_stage)>(); + this->data_size += direct_stage->data_size; + this->temp_size += direct_stage->temp_size; + stages[0].push_back(dft_stage_ptr(direct_stage)); + stages[1].push_back(dft_stage_ptr(inverse_stage)); + } + template <template <bool inverse> class Stage> + void add_stage(size_t stage_size, cbools_t<true, false>) + { + dft_stage<T>* direct_stage = new Stage<false>(stage_size); + direct_stage->name = type_name<decltype(*direct_stage)>(); + this->data_size += direct_stage->data_size; + this->temp_size += direct_stage->temp_size; + stages[0].push_back(dft_stage_ptr(direct_stage)); + } + template <template <bool inverse> class Stage> + void add_stage(size_t stage_size, cbools_t<false, true>) + { + dft_stage<T>* inverse_stage = new Stage<true>(stage_size); + inverse_stage->name = type_name<decltype(*inverse_stage)>(); + this->data_size += inverse_stage->data_size; + this->temp_size += inverse_stage->temp_size; + stages[1].push_back(dft_stage_ptr(inverse_stage)); + } + + template <bool direct, bool inverse, bool is_even, bool first> + void make_fft(size_t stage_size, cbools_t<direct, inverse> type, cbool_t<is_even>, cbool_t<first>) + { + constexpr size_t final_size = is_even ? 1024 : 512; + + using fft_stage_impl_t = internal::fft_stage_impl_t<T, !first, is_even>; + using fft_final_stage_impl_t = internal::fft_final_stage_impl_t<T, !first, final_size>; + + if (stage_size >= 2048) + { + add_stage<fft_stage_impl_t::template type>(stage_size, type); + + make_fft(stage_size / 4, cbools<direct, inverse>, cbool<is_even>, cfalse); + } + else + { + add_stage<fft_final_stage_impl_t::template type>(final_size, type); + } + } + + template <bool direct, bool inverse> + void initialize(cbools_t<direct, inverse>) + { + data = autofree<u8>(data_size); + if (direct) + { + size_t offset = 0; + for (dft_stage_ptr& stage : stages[0]) + { + stage->data = data.data() + offset; + stage->initialize(this->size); + offset += stage->data_size; + } + } + if (inverse) + { + size_t offset = 0; + for (dft_stage_ptr& stage : stages[1]) + { + stage->data = data.data() + offset; + if (!direct) + stage->initialize(this->size); + offset += stage->data_size; + } + } + } + template <bool inverse> + KFR_INTRIN void execute_dft(cbool_t<inverse>, complex<T>* out, const complex<T>* in, u8* temp) const + { + size_t stack[32] = { 0 }; + + const size_t count = stages[inverse].size(); + + for (size_t depth = 0; depth < count;) + { + if (stages[inverse][depth]->recursion) + { + complex<T>* rout = out; + const complex<T>* rin = in; + size_t rdepth = depth; + size_t maxdepth = depth; + do + { + if (stack[rdepth] == stages[inverse][rdepth]->repeats) + { + stack[rdepth] = 0; + rdepth--; + } + else + { + stages[inverse][rdepth]->execute(rout, rin, temp); + rout += stages[inverse][rdepth]->out_offset; + rin = rout; + stack[rdepth]++; + if (rdepth < count - 1 && stages[inverse][rdepth + 1]->recursion) + rdepth++; + else + maxdepth = rdepth; + } + } while (rdepth != depth); + depth = maxdepth + 1; + } + else + { + stages[inverse][depth]->execute(out, in, temp); + depth++; + } + in = out; + } + } +}; +} + +#pragma clang diagnostic pop diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp @@ -0,0 +1,1505 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/complex.hpp" +#include "../base/constants.hpp" +#include "../base/digitreverse.hpp" +#include "../base/read_write.hpp" +#include "../base/sin_cos.hpp" +#include "../base/univector.hpp" +#include "../base/vec.hpp" +#include "../misc/small_buffer.hpp" + +#include "../base/memory.hpp" +#include "../data/sincos.hpp" + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Winaccessible-base" + +namespace kfr +{ + +namespace internal +{ + +template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> +KFR_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, N> y) +{ + return subadd(x * dupeven(y), swap<2>(x) * dupodd(y)); +} +template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> +KFR_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, 2> y) +{ + vec<T, N> yy = resize<N>(y); + return cmul_impl(x, yy); +} +template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> +KFR_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y) +{ + vec<T, N> xx = resize<N>(x); + return cmul_impl(xx, y); +} + +/// Complex Multiplication +template <typename T, size_t N1, size_t N2> +KFR_INLINE vec<T, std::max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y) +{ + return internal::cmul_impl(x, y); +} +KFR_FN(cmul) + +template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> +KFR_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, N> y) +{ + return swap<2>(subadd(swap<2>(x) * cdupreal(y), x * cdupimag(y))); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> +KFR_INLINE vec<T, N> cmul_2conj(vec<T, N> in0, vec<T, N> in1, vec<T, N> tw) +{ + return (in0 + in1) * cdupreal(tw) + swap<2>(cnegimag(in0 - in1)) * cdupimag(tw); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> +KFR_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, vec<T, 2> in0, vec<T, 2> in1, vec<T, N> tw) +{ + const vec<T, N> twr = cdupreal(tw); + const vec<T, N> twi = cdupimag(tw); + const vec<T, 2> sum = (in0 + in1); + const vec<T, 2> dif = swap<2>(cnegimag(in0 - in1)); + const vec<T, N> sumtw = resize<N>(sum) * twr; + const vec<T, N> diftw = resize<N>(dif) * twi; + out0 += sumtw + diftw; + out1 += sumtw - diftw; +} +template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> +KFR_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, 2> y) +{ + vec<T, N> yy = resize<N>(y); + return cmul_conj(x, yy); +} +template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> +KFR_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y) +{ + vec<T, N> xx = resize<N>(x); + return cmul_conj(xx, y); +} +KFR_FN(cmul_conj) +KFR_FN(cmul_2conj) + +template <size_t N, bool A = false, typename T> +KFR_INLINE cvec<T, N> cread(const complex<T>* src) +{ + return internal_read_write::read<N * 2, A>(ptr_cast<T>(src)); +} + +template <size_t N, bool A = false, typename T> +KFR_INLINE void cwrite(complex<T>* dest, cvec<T, N> value) +{ + return internal_read_write::write<A>(ptr_cast<T>(dest), value); +} + +template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices> +KFR_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, csizes_t<indices...>) +{ + return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...); +} +template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices> +KFR_INLINE void cwrite_group_impl(complex<T>* dest, cvec<T, count * N> value, csizes_t<indices...>) +{ + swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... }; +} + +template <size_t count, size_t N, bool A, typename T, size_t... indices> +KFR_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, size_t stride, csizes_t<indices...>) +{ + return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...); +} +template <size_t count, size_t N, bool A, typename T, size_t... indices> +KFR_INLINE void cwrite_group_impl(complex<T>* dest, size_t stride, cvec<T, count * N> value, + csizes_t<indices...>) +{ + swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... }; +} + +template <size_t count, size_t N, size_t stride, bool A = false, typename T> +KFR_INLINE cvec<T, count * N> cread_group(const complex<T>* src) +{ + return cread_group_impl<count, N, stride, A>(src, csizeseq<count>); +} + +template <size_t count, size_t N, size_t stride, bool A = false, typename T> +KFR_INLINE void cwrite_group(complex<T>* dest, cvec<T, count * N> value) +{ + return cwrite_group_impl<count, N, stride, A>(dest, value, csizeseq<count>); +} + +template <size_t count, size_t N, bool A = false, typename T> +KFR_INLINE cvec<T, count * N> cread_group(const complex<T>* src, size_t stride) +{ + return cread_group_impl<count, N, A>(src, stride, csizeseq<count>); +} + +template <size_t count, size_t N, bool A = false, typename T> +KFR_INLINE void cwrite_group(complex<T>* dest, size_t stride, cvec<T, count * N> value) +{ + return cwrite_group_impl<count, N, A>(dest, stride, value, csizeseq<count>); +} + +template <size_t N, bool A = false, bool split = false, typename T> +KFR_INLINE cvec<T, N> cread_split(const complex<T>* src) +{ + cvec<T, N> temp = internal_read_write::read<N * 2, A>(ptr_cast<T>(src)); + if (split) + temp = splitpairs(temp); + return temp; +} + +template <size_t N, bool A = false, bool split = false, typename T> +KFR_INLINE void cwrite_split(complex<T>* dest, cvec<T, N> value) +{ + if (split) + value = interleavehalfs(value); + internal_read_write::write<A>(ptr_cast<T>(dest), value); +} + +template <> +inline cvec<f32, 8> cread_split<8, false, true, f32>(const complex<f32>* src) +{ + const cvec<f32, 4> l = concat(cread<2>(src), cread<2>(src + 4)); + const cvec<f32, 4> h = concat(cread<2>(src + 2), cread<2>(src + 6)); + + return concat(shuffle<0, 2, 8 + 0, 8 + 2>(l, h), shuffle<1, 3, 8 + 1, 8 + 3>(l, h)); +} +template <> +inline cvec<f32, 8> cread_split<8, true, true, f32>(const complex<f32>* src) +{ + const cvec<f32, 4> l = concat(cread<2, true>(src), cread<2, true>(src + 4)); + const cvec<f32, 4> h = concat(cread<2, true>(src + 2), cread<2, true>(src + 6)); + + return concat(shuffle<0, 2, 8 + 0, 8 + 2>(l, h), shuffle<1, 3, 8 + 1, 8 + 3>(l, h)); +} + +template <> +inline cvec<f64, 4> cread_split<4, false, true, f64>(const complex<f64>* src) +{ + const cvec<f64, 2> l = concat(cread<1>(src), cread<1>(src + 2)); + const cvec<f64, 2> h = concat(cread<1>(src + 1), cread<1>(src + 3)); + + return concat(shuffle<0, 4, 2, 6>(l, h), shuffle<1, 5, 3, 7>(l, h)); +} + +template <> +inline void cwrite_split<8, false, true, f32>(complex<f32>* dest, cvec<f32, 8> x) +{ + x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); + + cvec<f32, 2> a, b, c, d; + split(x, a, b, c, d); + cwrite<2>(dest, a); + cwrite<2>(dest + 4, b); + cwrite<2>(dest + 2, c); + cwrite<2>(dest + 6, d); +} +template <> +inline void cwrite_split<8, true, true, f32>(complex<f32>* dest, cvec<f32, 8> x) +{ + x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); + + cvec<f32, 2> a, b, c, d; + split(x, a, b, c, d); + cwrite<2, true>(dest, a); + cwrite<2, true>(dest + 4, b); + cwrite<2, true>(dest + 2, c); + cwrite<2, true>(dest + 6, d); +} + +template <> +inline void cwrite_split<4, false, true, f64>(complex<f64>* dest, cvec<f64, 4> x) +{ + x = concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x))); + cwrite<1>(dest, part<4, 0>(x)); + cwrite<1>(dest + 2, part<4, 1>(x)); + cwrite<1>(dest + 1, part<4, 2>(x)); + cwrite<1>(dest + 3, part<4, 3>(x)); +} +template <> +inline void cwrite_split<4, true, true, f64>(complex<f64>* dest, cvec<f64, 4> x) +{ + x = concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x))); + cwrite<1, true>(dest, part<4, 0>(x)); + cwrite<1, true>(dest + 2, part<4, 1>(x)); + cwrite<1, true>(dest + 1, part<4, 2>(x)); + cwrite<1, true>(dest + 3, part<4, 3>(x)); +} + +template <size_t N, size_t stride, typename T, size_t... Indices> +KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, csizes_t<Indices...>) +{ + return concat(ref_cast<cvec<T, 1>>(base[Indices * stride])...); +} + +template <size_t N, size_t stride, typename T> +KFR_INLINE cvec<T, N> cgather(const complex<T>* base) +{ + if (stride == 1) + { + return ref_cast<cvec<T, N>>(*base); + } + else + return cgather_helper<N, stride, T>(base, csizeseq<N>); +} + +KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t size, size_t) +{ + size_t temp = index; + index += stride; + if (index >= size) + index -= size; + return temp; +} +KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t) +{ + size_t temp = index; + index += stride; + return temp; +} + +template <size_t N, typename T, size_t... Indices> +KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride, + csizes_t<Indices...>) +{ + return concat(ref_cast<cvec<T, 1>>(base[cgather_next(index, stride, Indices)])...); +} + +template <size_t N, typename T> +KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride) +{ + return cgather_helper<N, T>(base, index, stride, csizeseq<N>); +} +template <size_t N, typename T> +KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t stride) +{ + size_t index = 0; + return cgather_helper<N, T>(base, index, stride, csizeseq<N>); +} + +template <size_t N, typename T, size_t... Indices> +KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride, size_t size, + csizes_t<Indices...>) +{ + return concat(ref_cast<cvec<T, 1>>(base[cgather_next(index, stride, size, Indices)])...); +} + +template <size_t N, typename T> +KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride, size_t size) +{ + return cgather_helper<N, T>(base, index, stride, size, csizeseq<N>); +} + +template <size_t N, size_t stride, typename T, size_t... Indices> +KFR_INLINE void cscatter_helper(complex<T>* base, cvec<T, N> value, csizes_t<Indices...>) +{ + swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... }; +} + +template <size_t N, size_t stride, typename T> +KFR_INLINE void cscatter(complex<T>* base, cvec<T, N> value) +{ + if (stride == 1) + { + cwrite<N>(base, value); + } + else + { + return cscatter_helper<N, stride, T>(base, value, csizeseq<N>); + } +} + +template <size_t N, typename T, size_t... Indices> +KFR_INLINE void cscatter_helper(complex<T>* base, size_t stride, cvec<T, N> value, csizes_t<Indices...>) +{ + swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... }; +} + +template <size_t N, typename T> +KFR_INLINE void cscatter(complex<T>* base, size_t stride, cvec<T, N> value) +{ + return cscatter_helper<N, T>(base, stride, value, csizeseq<N>); +} + +template <size_t groupsize = 1, typename T, size_t N, typename IT> +KFR_INLINE vec<T, N * 2 * groupsize> cgather(const complex<T>* base, vec<IT, N> offset) +{ + return gather_helper<2 * groupsize>(ptr_cast<T>(base), offset, csizeseq<N>); +} + +template <size_t groupsize = 1, typename T, size_t N, typename IT> +KFR_INLINE void cscatter(complex<T>* base, vec<IT, N> offset, vec<T, N * 2 * groupsize> value) +{ + return scatter_helper<2 * groupsize>(ptr_cast<T>(base), offset, value, csizeseq<N>); +} + +constexpr size_t default_unroll_count = 2; + +template <typename T> +KFR_INTRIN void transpose4x8(cvec<T, 8> z0, cvec<T, 8> z1, cvec<T, 8> z2, cvec<T, 8> z3, cvec<T, 4>& w0, + cvec<T, 4>& w1, cvec<T, 4>& w2, cvec<T, 4>& w3, cvec<T, 4>& w4, cvec<T, 4>& w5, + cvec<T, 4>& w6, cvec<T, 4>& w7) +{ + cvec<T, 16> a = concat(low(z0), low(z1), low(z2), low(z3)); + cvec<T, 16> b = concat(high(z0), high(z1), high(z2), high(z3)); + a = digitreverse4<2>(a); + b = digitreverse4<2>(b); + w0 = part<4, 0>(a); + w1 = part<4, 1>(a); + w2 = part<4, 2>(a); + w3 = part<4, 3>(a); + w4 = part<4, 0>(b); + w5 = part<4, 1>(b); + w6 = part<4, 2>(b); + w7 = part<4, 3>(b); +} + +template <typename T> +KFR_INTRIN void transpose4x8(cvec<T, 4> w0, cvec<T, 4> w1, cvec<T, 4> w2, cvec<T, 4> w3, cvec<T, 4> w4, + cvec<T, 4> w5, cvec<T, 4> w6, cvec<T, 4> w7, cvec<T, 8>& z0, cvec<T, 8>& z1, + cvec<T, 8>& z2, cvec<T, 8>& z3) +{ + cvec<T, 16> a = concat(w0, w1, w2, w3); + cvec<T, 16> b = concat(w4, w5, w6, w7); + a = digitreverse4<2>(a); + b = digitreverse4<2>(b); + z0 = concat(part<4, 0>(a), part<4, 0>(b)); + z1 = concat(part<4, 1>(a), part<4, 1>(b)); + z2 = concat(part<4, 2>(a), part<4, 2>(b)); + z3 = concat(part<4, 3>(a), part<4, 3>(b)); +} + +template <typename T> +void transpose4(cvec<T, 16>& a, cvec<T, 16>& b, cvec<T, 16>& c, cvec<T, 16>& d) +{ + cvec<T, 4> a0, a1, a2, a3; + cvec<T, 4> b0, b1, b2, b3; + cvec<T, 4> c0, c1, c2, c3; + cvec<T, 4> d0, d1, d2, d3; + + split(a, a0, a1, a2, a3); + split(b, b0, b1, b2, b3); + split(c, c0, c1, c2, c3); + split(d, d0, d1, d2, d3); + + a = concat(a0, b0, c0, d0); + b = concat(a1, b1, c1, d1); + c = concat(a2, b2, c2, d2); + d = concat(a3, b3, c3, d3); +} +template <typename T> +void transpose4(cvec<T, 16>& a, cvec<T, 16>& b, cvec<T, 16>& c, cvec<T, 16>& d, cvec<T, 16>& aa, + cvec<T, 16>& bb, cvec<T, 16>& cc, cvec<T, 16>& dd) +{ + cvec<T, 4> a0, a1, a2, a3; + cvec<T, 4> b0, b1, b2, b3; + cvec<T, 4> c0, c1, c2, c3; + cvec<T, 4> d0, d1, d2, d3; + + split(a, a0, a1, a2, a3); + split(b, b0, b1, b2, b3); + split(c, c0, c1, c2, c3); + split(d, d0, d1, d2, d3); + + aa = concat(a0, b0, c0, d0); + bb = concat(a1, b1, c1, d1); + cc = concat(a2, b2, c2, d2); + dd = concat(a3, b3, c3, d3); +} + +template <bool b, typename T> +constexpr KFR_INTRIN T chsign(T x) +{ + return b ? -x : x; +} + +template <typename T, size_t N, size_t size, size_t start, size_t step, bool inverse = false, + size_t... indices> +constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>) +{ + return make_vector((indices & 1 ? chsign<inverse>(-sin_using_table<T>(size, (indices / 2 * step + start))) + : cos_using_table<T>(size, (indices / 2 * step + start)))...); +} + +template <typename T, size_t width, size_t... indices> +constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>, + size_t size, size_t start, size_t step) +{ + return make_vector((indices & 1 ? -sin_using_table<T>(size, indices / 2 * step + start) + : cos_using_table<T>(size, indices / 2 * step + start))...); +} + +template <typename T, size_t width, size_t size, size_t start, size_t step, bool inverse = false> +constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle() +{ + return get_fixed_twiddle_helper<T, width, size, start, step, inverse>( + std::make_index_sequence<width * 2>()); +} + +template <typename T, size_t width> +constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle(size_t size, size_t start, size_t step = 0) +{ + return get_fixed_twiddle_helper<T, width>(std::make_index_sequence<width * 2>(), start, step, size); +} + +template <typename T, size_t N, size_t size, size_t start, size_t step = 0, bool inverse = false> +constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>(); + +template <typename T, size_t N, bool inverse> +constexpr cvec<T, N> twiddleimagmask() +{ + return inverse ? broadcast<N, T>(-1, +1) : broadcast<N, T>(+1, -1); +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wconversion" + +#pragma clang diagnostic pop + +template <typename T, size_t N> +KFR_NOINLINE static vec<T, N> cossin_conj(vec<T, N> x) +{ + return cconj(in_sin_cos<cpu_t::native>::cossin(x)); +} + +template <size_t k, size_t size, bool inverse = false, typename T, size_t width> +KFR_INTRIN vec<T, width> cmul_by_twiddle(vec<T, width> x) +{ + constexpr size_t kk = (inverse ? size - k : k) % size; + constexpr T isqrt2 = static_cast<T>(0.70710678118654752440084436210485); + if (kk == 0) + { + return x; + } + else if (kk == size * 1 / 8) + { + return swap<2>(subadd(swap<2>(x), x)) * isqrt2; + } + else if (kk == size * 2 / 8) + { + return negodd(swap<2>(x)); + } + else if (kk == size * 3 / 8) + { + return subadd(x, swap<2>(x)) * -isqrt2; + } + else if (kk == size * 4 / 8) + { + return -x; + } + else if (kk == size * 5 / 8) + { + return swap<2>(subadd(swap<2>(x), x)) * -isqrt2; + } + else if (kk == size * 6 / 8) + { + return swap<2>(negodd(x)); + } + else if (kk == size * 7 / 8) + { + return subadd(x, swap<2>(x)) * isqrt2; + } + else + { + return cmul(x, resize<width>(fixed_twiddle<T, 1, size, kk>)); + } +} + +template <size_t N, typename T> +KFR_INTRIN void butterfly2(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N>& w0, cvec<T, N>& w1) +{ + w0 = a0 + a1; + w1 = a0 - a1; +} + +template <size_t N, typename T> +KFR_INTRIN void butterfly2(cvec<T, N>& a0, cvec<T, N>& a1) +{ + butterfly2<N>(a0, a1, a0, a1); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly4(cfalse_t /*split_format*/, cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, + cvec<T, N> a3, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3) +{ + cvec<T, N> sum02, sum13, diff02, diff13; + cvec<T, N * 2> a01, a23, sum0213, diff0213; + + a01 = concat(a0, a1); + a23 = concat(a2, a3); + sum0213 = a01 + a23; + diff0213 = a01 - a23; + + sum02 = low(sum0213); + sum13 = high(sum0213); + diff02 = low(diff0213); + diff13 = high(diff0213); + w0 = sum02 + sum13; + w2 = sum02 - sum13; + if (inverse) + { + diff13 = (diff13 ^ broadcast<N, T>(T(), -T())); + diff13 = swap<2>(diff13); + } + else + { + diff13 = swap<2>(diff13); + diff13 = (diff13 ^ broadcast<N, T>(T(), -T())); + } + + w1 = diff02 + diff13; + w3 = diff02 - diff13; +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly4(ctrue_t /*split_format*/, cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, + cvec<T, N> a3, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3) +{ + vec<T, N> re0, im0, re1, im1, re2, im2, re3, im3; + vec<T, N> wre0, wim0, wre1, wim1, wre2, wim2, wre3, wim3; + + cvec<T, N> sum02, sum13, diff02, diff13; + vec<T, N> sum02re, sum13re, diff02re, diff13re; + vec<T, N> sum02im, sum13im, diff02im, diff13im; + + sum02 = a0 + a2; + sum13 = a1 + a3; + + w0 = sum02 + sum13; + w2 = sum02 - sum13; + + diff02 = a0 - a2; + diff13 = a1 - a3; + split(diff02, diff02re, diff02im); + split(diff13, diff13re, diff13im); + + (inverse ? w3 : w1) = concat(diff02re + diff13im, diff02im - diff13re); + (inverse ? w1 : w3) = concat(diff02re - diff13im, diff02im + diff13re); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly8(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4, + cvec<T, N> a5, cvec<T, N> a6, cvec<T, N> a7, cvec<T, N>& w0, cvec<T, N>& w1, + cvec<T, N>& w2, cvec<T, N>& w3, cvec<T, N>& w4, cvec<T, N>& w5, cvec<T, N>& w6, + cvec<T, N>& w7) +{ + cvec<T, N> b0 = a0, b2 = a2, b4 = a4, b6 = a6; + butterfly4<N, inverse>(cbool<false>, b0, b2, b4, b6, b0, b2, b4, b6); + cvec<T, N> b1 = a1, b3 = a3, b5 = a5, b7 = a7; + butterfly4<N, inverse>(cbool<false>, b1, b3, b5, b7, b1, b3, b5, b7); + w0 = b0 + b1; + w4 = b0 - b1; + + b3 = cmul_by_twiddle<1, 8, inverse>(b3); + b5 = cmul_by_twiddle<2, 8, inverse>(b5); + b7 = cmul_by_twiddle<3, 8, inverse>(b7); + + w1 = b2 + b3; + w5 = b2 - b3; + w2 = b4 + b5; + w6 = b4 - b5; + w3 = b6 + b7; + w7 = b6 - b7; +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly8(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<T, N>& a3, cvec<T, N>& a4, + cvec<T, N>& a5, cvec<T, N>& a6, cvec<T, N>& a7) +{ + butterfly8<N, inverse>(a0, a1, a2, a3, a4, a5, a6, a7, a0, a1, a2, a3, a4, a5, a6, a7); +} + +template <bool inverse = false, typename T> +KFR_INTRIN void butterfly8(cvec<T, 2>& a01, cvec<T, 2>& a23, cvec<T, 2>& a45, cvec<T, 2>& a67) +{ + cvec<T, 2> b01 = a01, b23 = a23, b45 = a45, b67 = a67; + + butterfly4<2, inverse>(cbool<false>, b01, b23, b45, b67, b01, b23, b45, b67); + + cvec<T, 2> b02, b13, b46, b57; + + cvec<T, 8> b01234567 = concat(b01, b23, b45, b67); + cvec<T, 8> b02461357 = concat(even<2>(b01234567), odd<2>(b01234567)); + split(b02461357, b02, b46, b13, b57); + + b13 = cmul(b13, fixed_twiddle<T, 2, 8, 0, 1, inverse>); + b57 = cmul(b57, fixed_twiddle<T, 2, 8, 2, 1, inverse>); + a01 = b02 + b13; + a23 = b46 + b57; + a45 = b02 - b13; + a67 = b46 - b57; +} + +template <bool inverse = false, typename T> +KFR_INTRIN void butterfly8(cvec<T, 8>& v8) +{ + cvec<T, 2> w0, w1, w2, w3; + split(v8, w0, w1, w2, w3); + butterfly8<inverse>(w0, w1, w2, w3); + v8 = concat(w0, w1, w2, w3); +} + +template <bool inverse = false, typename T> +KFR_INTRIN void butterfly32(cvec<T, 32>& v32) +{ + cvec<T, 4> w0, w1, w2, w3, w4, w5, w6, w7; + split(v32, w0, w1, w2, w3, w4, w5, w6, w7); + butterfly8<4, inverse>(w0, w1, w2, w3, w4, w5, w6, w7); + + w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>); + w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>); + w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>); + w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>); + w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>); + w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>); + w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>); + + cvec<T, 8> z0, z1, z2, z3; + transpose4x8(w0, w1, w2, w3, w4, w5, w6, w7, z0, z1, z2, z3); + + butterfly4<8, inverse>(cfalse, z0, z1, z2, z3, z0, z1, z2, z3); + v32 = concat(z0, z1, z2, z3); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly4(cvec<T, N * 4>& a0123) +{ + cvec<T, N> a0; + cvec<T, N> a1; + cvec<T, N> a2; + cvec<T, N> a3; + split(a0123, a0, a1, a2, a3); + butterfly4<N, inverse>(cfalse, a0, a1, a2, a3, a0, a1, a2, a3); + a0123 = concat(a0, a1, a2, a3); +} + +template <size_t N, typename T> +KFR_INTRIN void butterfly2(cvec<T, N * 2>& a01) +{ + cvec<T, N> a0; + cvec<T, N> a1; + split(a01, a0, a1); + butterfly2<N>(a0, a1); + a01 = concat(a0, a1); +} + +template <size_t N, bool inverse = false, bool split_format = false, typename T> +KFR_INTRIN void apply_twiddle(cvec<T, N> a1, cvec<T, N> tw1, cvec<T, N>& w1) +{ + if (split_format) + { + vec<T, N> re1, im1, tw1re, tw1im; + split(a1, re1, im1); + split(tw1, tw1re, tw1im); + vec<T, N> b1re = re1 * tw1re; + vec<T, N> b1im = im1 * tw1re; + if (inverse) + w1 = concat(b1re + im1 * tw1im, b1im - re1 * tw1im); + else + w1 = concat(b1re - im1 * tw1im, b1im + re1 * tw1im); + } + else + { + cvec<T, N> b1 = a1 * dupeven(tw1); + a1 = swap<2>(a1); + + if (inverse) + tw1 = -(tw1); + w1 = subadd(b1, a1 * dupodd(tw1)); + } +} + +template <size_t N, bool inverse = false, bool split_format = false, typename T> +KFR_INTRIN void apply_twiddles4(cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> tw1, cvec<T, N> tw2, + cvec<T, N> tw3, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3) +{ + apply_twiddle<N, inverse, split_format>(a1, tw1, w1); + apply_twiddle<N, inverse, split_format>(a2, tw2, w2); + apply_twiddle<N, inverse, split_format>(a3, tw3, w3); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2, + cvec<T, N>& __restrict a3, cvec<T, N> tw1, cvec<T, N> tw2, cvec<T, N> tw3) +{ + apply_twiddles4<N, inverse>(a1, a2, a3, tw1, tw2, tw3, a1, a2, a3); +} + +template <size_t N, bool inverse = false, typename T, typename = u8[N - 1]> +KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2, + cvec<T, N>& __restrict a3, cvec<T, 1> tw1, cvec<T, 1> tw2, cvec<T, 1> tw3) +{ + apply_twiddles4<N, inverse>(a1, a2, a3, resize<N * 2>(tw1), resize<N * 2>(tw2), resize<N * 2>(tw3)); +} + +template <size_t N, bool inverse = false, typename T, typename = u8[N - 2]> +KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2, + cvec<T, N>& __restrict a3, cvec<T, N / 2> tw1, cvec<T, N / 2> tw2, + cvec<T, N / 2> tw3) +{ + apply_twiddles4<N, inverse>(a1, a2, a3, resize<N * 2>(tw1), resize<N * 2>(tw2), resize<N * 2>(tw3)); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void apply_vertical_twiddles4(cvec<T, N * 4>& b, cvec<T, N * 4>& c, cvec<T, N * 4>& d) +{ + cvec<T, 4> b0, b1, b2, b3; + cvec<T, 4> c0, c1, c2, c3; + cvec<T, 4> d0, d1, d2, d3; + + split(b, b0, b1, b2, b3); + split(c, c0, c1, c2, c3); + split(d, d0, d1, d2, d3); + + b1 = cmul_by_twiddle<4, 64, inverse>(b1); + b2 = cmul_by_twiddle<8, 64, inverse>(b2); + b3 = cmul_by_twiddle<12, 64, inverse>(b3); + + c1 = cmul_by_twiddle<8, 64, inverse>(c1); + c2 = cmul_by_twiddle<16, 64, inverse>(c2); + c3 = cmul_by_twiddle<24, 64, inverse>(c3); + + d1 = cmul_by_twiddle<12, 64, inverse>(d1); + d2 = cmul_by_twiddle<24, 64, inverse>(d2); + d3 = cmul_by_twiddle<36, 64, inverse>(d3); + + b = concat(b0, b1, b2, b3); + c = concat(c0, c1, c2, c3); + d = concat(d0, d1, d2, d3); +} + +template <size_t n2, size_t nnstep, size_t N, bool inverse = false, typename T> +KFR_INTRIN void apply_twiddles4(cvec<T, N * 4>& __restrict a0123) +{ + cvec<T, N> a0; + cvec<T, N> a1; + cvec<T, N> a2; + cvec<T, N> a3; + split(a0123, a0, a1, a2, a3); + + cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1, inverse>, + tw2 = fixed_twiddle<T, N, 64, n2 * nnstep * 2, nnstep * 2, inverse>, + tw3 = fixed_twiddle<T, N, 64, n2 * nnstep * 3, nnstep * 3, inverse>; + + apply_twiddles4<N>(a1, a2, a3, tw1, tw2, tw3); + + a0123 = concat(a0, a1, a2, a3); +} + +template <bool inverse, bool aligned, typename T> +KFR_INTRIN void butterfly64(cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in) +{ + cvec<T, 16> w0, w1, w2, w3; + + w0 = cread_group<4, 4, 16, aligned>( + in); // concat(cread<4>(in + 0), cread<4>(in + 16), cread<4>(in + 32), cread<4>(in + 48)); + butterfly4<4, inverse>(w0); + apply_twiddles4<0, 1, 4, inverse>(w0); + + w1 = cread_group<4, 4, 16, aligned>( + in + 4); // concat(cread<4>(in + 4), cread<4>(in + 20), cread<4>(in + 36), cread<4>(in + 52)); + butterfly4<4, inverse>(w1); + apply_twiddles4<4, 1, 4, inverse>(w1); + + w2 = cread_group<4, 4, 16, aligned>( + in + 8); // concat(cread<4>(in + 8), cread<4>(in + 24), cread<4>(in + 40), cread<4>(in + 56)); + butterfly4<4, inverse>(w2); + apply_twiddles4<8, 1, 4, inverse>(w2); + + w3 = cread_group<4, 4, 16, aligned>( + in + 12); // concat(cread<4>(in + 12), cread<4>(in + 28), cread<4>(in + 44), cread<4>(in + 60)); + butterfly4<4, inverse>(w3); + apply_twiddles4<12, 1, 4, inverse>(w3); + + transpose4(w0, w1, w2, w3); + // pass 2: + + butterfly4<4, inverse>(w0); + butterfly4<4, inverse>(w1); + butterfly4<4, inverse>(w2); + butterfly4<4, inverse>(w3); + + transpose4(w0, w1, w2, w3); + + w0 = digitreverse4<2>(w0); + w1 = digitreverse4<2>(w1); + w2 = digitreverse4<2>(w2); + w3 = digitreverse4<2>(w3); + + apply_vertical_twiddles4<4, inverse>(w1, w2, w3); + + // pass 3: + butterfly4<4, inverse>(w3); + cwrite_group<4, 4, 16, aligned>(out + 12, w3); // split(w3, out[3], out[7], out[11], out[15]); + + butterfly4<4, inverse>(w2); + cwrite_group<4, 4, 16, aligned>(out + 8, w2); // split(w2, out[2], out[6], out[10], out[14]); + + butterfly4<4, inverse>(w1); + cwrite_group<4, 4, 16, aligned>(out + 4, w1); // split(w1, out[1], out[5], out[9], out[13]); + + butterfly4<4, inverse>(w0); + cwrite_group<4, 4, 16, aligned>(out, w0); // split(w0, out[0], out[4], out[8], out[12]); +} + +template <bool inverse = false, typename T> +KFR_INTRIN void butterfly16(cvec<T, 16>& v16) +{ + butterfly4<4, inverse>(v16); + apply_twiddles4<0, 4, 4, inverse>(v16); + v16 = digitreverse4<2>(v16); + butterfly4<4, inverse>(v16); +} + +template <size_t index, bool inverse = false, typename T> +KFR_INTRIN void butterfly16_multi_natural(complex<T>* out, const complex<T>* in) +{ + constexpr size_t N = 4; + + cvec<T, 4> a1 = cread<4>(in + index * 4 + 16 * 1); + cvec<T, 4> a5 = cread<4>(in + index * 4 + 16 * 5); + cvec<T, 4> a9 = cread<4>(in + index * 4 + 16 * 9); + cvec<T, 4> a13 = cread<4>(in + index * 4 + 16 * 13); + butterfly4<N, inverse>(cfalse, a1, a5, a9, a13, a1, a5, a9, a13); + a5 = cmul_by_twiddle<1, 16, inverse>(a5); + a9 = cmul_by_twiddle<2, 16, inverse>(a9); + a13 = cmul_by_twiddle<3, 16, inverse>(a13); + + cvec<T, 4> a2 = cread<4>(in + index * 4 + 16 * 2); + cvec<T, 4> a6 = cread<4>(in + index * 4 + 16 * 6); + cvec<T, 4> a10 = cread<4>(in + index * 4 + 16 * 10); + cvec<T, 4> a14 = cread<4>(in + index * 4 + 16 * 14); + butterfly4<N, inverse>(cfalse, a2, a6, a10, a14, a2, a6, a10, a14); + a6 = cmul_by_twiddle<2, 16, inverse>(a6); + a10 = cmul_by_twiddle<4, 16, inverse>(a10); + a14 = cmul_by_twiddle<6, 16, inverse>(a14); + + cvec<T, 4> a3 = cread<4>(in + index * 4 + 16 * 3); + cvec<T, 4> a7 = cread<4>(in + index * 4 + 16 * 7); + cvec<T, 4> a11 = cread<4>(in + index * 4 + 16 * 11); + cvec<T, 4> a15 = cread<4>(in + index * 4 + 16 * 15); + butterfly4<N, inverse>(cfalse, a3, a7, a11, a15, a3, a7, a11, a15); + a7 = cmul_by_twiddle<3, 16, inverse>(a7); + a11 = cmul_by_twiddle<6, 16, inverse>(a11); + a15 = cmul_by_twiddle<9, 16, inverse>(a15); + + cvec<T, 4> a0 = cread<4>(in + index * 4 + 16 * 0); + cvec<T, 4> a4 = cread<4>(in + index * 4 + 16 * 4); + cvec<T, 4> a8 = cread<4>(in + index * 4 + 16 * 8); + cvec<T, 4> a12 = cread<4>(in + index * 4 + 16 * 12); + butterfly4<N, inverse>(cfalse, a0, a4, a8, a12, a0, a4, a8, a12); + butterfly4<N, inverse>(cfalse, a0, a1, a2, a3, a0, a1, a2, a3); + cwrite<4>(out + index * 4 + 16 * 0, a0); + cwrite<4>(out + index * 4 + 16 * 4, a1); + cwrite<4>(out + index * 4 + 16 * 8, a2); + cwrite<4>(out + index * 4 + 16 * 12, a3); + butterfly4<N, inverse>(cfalse, a4, a5, a6, a7, a4, a5, a6, a7); + cwrite<4>(out + index * 4 + 16 * 1, a4); + cwrite<4>(out + index * 4 + 16 * 5, a5); + cwrite<4>(out + index * 4 + 16 * 9, a6); + cwrite<4>(out + index * 4 + 16 * 13, a7); + butterfly4<N, inverse>(cfalse, a8, a9, a10, a11, a8, a9, a10, a11); + cwrite<4>(out + index * 4 + 16 * 2, a8); + cwrite<4>(out + index * 4 + 16 * 6, a9); + cwrite<4>(out + index * 4 + 16 * 10, a10); + cwrite<4>(out + index * 4 + 16 * 14, a11); + butterfly4<N, inverse>(cfalse, a12, a13, a14, a15, a12, a13, a14, a15); + cwrite<4>(out + index * 4 + 16 * 3, a12); + cwrite<4>(out + index * 4 + 16 * 7, a13); + cwrite<4>(out + index * 4 + 16 * 11, a14); + cwrite<4>(out + index * 4 + 16 * 15, a15); +} + +template <size_t index, bool inverse = false, typename T> +KFR_INTRIN void butterfly16_multi_flip(complex<T>* out, const complex<T>* in) +{ + constexpr size_t N = 4; + + cvec<T, 4> a1 = cread<4>(in + index * 4 + 16 * 1); + cvec<T, 4> a5 = cread<4>(in + index * 4 + 16 * 5); + cvec<T, 4> a9 = cread<4>(in + index * 4 + 16 * 9); + cvec<T, 4> a13 = cread<4>(in + index * 4 + 16 * 13); + butterfly4<N, inverse>(cfalse, a1, a5, a9, a13, a1, a5, a9, a13); + a5 = cmul_by_twiddle<1, 16, inverse>(a5); + a9 = cmul_by_twiddle<2, 16, inverse>(a9); + a13 = cmul_by_twiddle<3, 16, inverse>(a13); + + cvec<T, 4> a2 = cread<4>(in + index * 4 + 16 * 2); + cvec<T, 4> a6 = cread<4>(in + index * 4 + 16 * 6); + cvec<T, 4> a10 = cread<4>(in + index * 4 + 16 * 10); + cvec<T, 4> a14 = cread<4>(in + index * 4 + 16 * 14); + butterfly4<N, inverse>(cfalse, a2, a6, a10, a14, a2, a6, a10, a14); + a6 = cmul_by_twiddle<2, 16, inverse>(a6); + a10 = cmul_by_twiddle<4, 16, inverse>(a10); + a14 = cmul_by_twiddle<6, 16, inverse>(a14); + + cvec<T, 4> a3 = cread<4>(in + index * 4 + 16 * 3); + cvec<T, 4> a7 = cread<4>(in + index * 4 + 16 * 7); + cvec<T, 4> a11 = cread<4>(in + index * 4 + 16 * 11); + cvec<T, 4> a15 = cread<4>(in + index * 4 + 16 * 15); + butterfly4<N, inverse>(cfalse, a3, a7, a11, a15, a3, a7, a11, a15); + a7 = cmul_by_twiddle<3, 16, inverse>(a7); + a11 = cmul_by_twiddle<6, 16, inverse>(a11); + a15 = cmul_by_twiddle<9, 16, inverse>(a15); + + cvec<T, 16> w1 = concat(a1, a5, a9, a13); + cvec<T, 16> w2 = concat(a2, a6, a10, a14); + cvec<T, 16> w3 = concat(a3, a7, a11, a15); + + cvec<T, 4> a0 = cread<4>(in + index * 4 + 16 * 0); + cvec<T, 4> a4 = cread<4>(in + index * 4 + 16 * 4); + cvec<T, 4> a8 = cread<4>(in + index * 4 + 16 * 8); + cvec<T, 4> a12 = cread<4>(in + index * 4 + 16 * 12); + butterfly4<N, inverse>(cfalse, a0, a4, a8, a12, a0, a4, a8, a12); + cvec<T, 16> w0 = concat(a0, a4, a8, a12); + + butterfly4<N * 4, inverse>(cfalse, w0, w1, w2, w3, w0, w1, w2, w3); + + w0 = digitreverse4<2>(w0); + w1 = digitreverse4<2>(w1); + w2 = digitreverse4<2>(w2); + w3 = digitreverse4<2>(w3); + + transpose4(w0, w1, w2, w3); + cwrite<16>(out + index * 64 + 16 * 0, cmul(w0, fixed_twiddle<T, 16, 256, 0, index * 4 + 0, inverse>)); + cwrite<16>(out + index * 64 + 16 * 1, cmul(w1, fixed_twiddle<T, 16, 256, 0, index * 4 + 1, inverse>)); + cwrite<16>(out + index * 64 + 16 * 2, cmul(w2, fixed_twiddle<T, 16, 256, 0, index * 4 + 2, inverse>)); + cwrite<16>(out + index * 64 + 16 * 3, cmul(w3, fixed_twiddle<T, 16, 256, 0, index * 4 + 3, inverse>)); +} + +template <size_t n2, size_t nnstep, size_t N, typename T> +KFR_INTRIN void apply_twiddles2(cvec<T, N>& a1) +{ + cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1>; + + a1 = cmul(a1, tw1); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00, cvec<T, N>& w01, + cvec<T, N>& w02) +{ + constexpr cvec<T, N> tw3r1 = static_cast<T>(-0.5); + constexpr cvec<T, N> tw3i1 = + static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>(); + + const cvec<T, N> sum1 = a01 + a02; + const cvec<T, N> dif1 = swap<2>(a01 - a02); + w00 = a00 + sum1; + + const cvec<T, N> s1 = w00 + sum1 * tw3r1; + + const cvec<T, N> d1 = dif1 * tw3i1; + + w01 = s1 + d1; + w02 = s1 - d1; +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly3(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2) +{ + butterfly3<N, inverse>(a0, a1, a2, a0, a1, a2); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly6(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4, + cvec<T, N> a5, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3, + cvec<T, N>& w4, cvec<T, N>& w5) +{ + cvec<T, N* 2> a03 = concat(a0, a3); + cvec<T, N* 2> a25 = concat(a2, a5); + cvec<T, N* 2> a41 = concat(a4, a1); + butterfly3<N * 2, inverse>(a03, a25, a41, a03, a25, a41); + cvec<T, N> t0, t1, t2, t3, t4, t5; + split(a03, t0, t1); + split(a25, t2, t3); + split(a41, t4, t5); + t3 = -t3; + cvec<T, N* 2> a04 = concat(t0, t4); + cvec<T, N* 2> a15 = concat(t1, t5); + cvec<T, N * 2> w02, w35; + butterfly2<N * 2>(a04, a15, w02, w35); + split(w02, w0, w2); + split(w35, w3, w5); + + butterfly2<N>(t2, t3, w1, w4); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly6(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<T, N>& a3, cvec<T, N>& a4, + cvec<T, N>& a5) +{ + butterfly6<N, inverse>(a0, a1, a2, a3, a4, a5, a0, a1, a2, a3, a4, a5); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04, + cvec<T, N> a05, cvec<T, N> a06, cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02, + cvec<T, N>& w03, cvec<T, N>& w04, cvec<T, N>& w05, cvec<T, N>& w06) +{ + constexpr cvec<T, N> tw7r1 = static_cast<T>(0.623489801858733530525004884); + constexpr cvec<T, N> tw7i1 = + static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>(); + constexpr cvec<T, N> tw7r2 = static_cast<T>(-0.2225209339563144042889025645); + constexpr cvec<T, N> tw7i2 = + static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>(); + constexpr cvec<T, N> tw7r3 = static_cast<T>(-0.90096886790241912623610231951); + constexpr cvec<T, N> tw7i3 = + static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>(); + + const cvec<T, N> sum1 = a01 + a06; + const cvec<T, N> dif1 = swap<2>(a01 - a06); + const cvec<T, N> sum2 = a02 + a05; + const cvec<T, N> dif2 = swap<2>(a02 - a05); + const cvec<T, N> sum3 = a03 + a04; + const cvec<T, N> dif3 = swap<2>(a03 - a04); + w00 = a00 + sum1 + sum2 + sum3; + + const cvec<T, N> s1 = w00 + sum1 * tw7r1 + sum2 * tw7r2 + sum3 * tw7r3; + const cvec<T, N> s2 = w00 + sum1 * tw7r2 + sum2 * tw7r3 + sum3 * tw7r1; + const cvec<T, N> s3 = w00 + sum1 * tw7r3 + sum2 * tw7r1 + sum3 * tw7r2; + + const cvec<T, N> d1 = dif1 * tw7i1 + dif2 * tw7i2 + dif3 * tw7i3; + const cvec<T, N> d2 = dif1 * tw7i2 - dif2 * tw7i3 - dif3 * tw7i1; + const cvec<T, N> d3 = dif1 * tw7i3 - dif2 * tw7i1 + dif3 * tw7i2; + + w01 = s1 + d1; + w06 = s1 - d1; + w02 = s2 + d2; + w05 = s2 - d2; + w03 = s3 + d3; + w04 = s3 - d3; +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly7(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<T, N>& a3, cvec<T, N>& a4, + cvec<T, N>& a5, cvec<T, N>& a6) +{ + butterfly7<N, inverse>(a0, a1, a2, a3, a4, a5, a6, a0, a1, a2, a3, a4, a5, a6); +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly5(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04, + cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02, cvec<T, N>& w03, + cvec<T, N>& w04) +{ + constexpr cvec<T, N> tw5r1 = static_cast<T>(0.30901699437494742410229341718); + constexpr cvec<T, N> tw5i1 = + static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>(); + constexpr cvec<T, N> tw5r2 = static_cast<T>(-0.80901699437494742410229341718); + constexpr cvec<T, N> tw5i2 = + static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>(); + + const cvec<T, N> sum1 = a01 + a04; + const cvec<T, N> dif1 = swap<2>(a01 - a04); + const cvec<T, N> sum2 = a02 + a03; + const cvec<T, N> dif2 = swap<2>(a02 - a03); + w00 = a00 + sum1 + sum2; + + const cvec<T, N> s1 = w00 + sum1 * tw5r1 + sum2 * tw5r2; + const cvec<T, N> s2 = w00 + sum1 * tw5r2 + sum2 * tw5r1; + + const cvec<T, N> d1 = dif1 * tw5i1 + dif2 * tw5i2; + const cvec<T, N> d2 = dif1 * tw5i2 - dif2 * tw5i1; + + w01 = s1 + d1; + w04 = s1 - d1; + w02 = s2 + d2; + w03 = s2 - d2; +} + +template <size_t N, bool inverse = false, typename T> +KFR_INTRIN void butterfly10(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4, + cvec<T, N> a5, cvec<T, N> a6, cvec<T, N> a7, cvec<T, N> a8, cvec<T, N> a9, + cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3, cvec<T, N>& w4, + cvec<T, N>& w5, cvec<T, N>& w6, cvec<T, N>& w7, cvec<T, N>& w8, cvec<T, N>& w9) +{ + cvec<T, N* 2> a05 = concat(a0, a5); + cvec<T, N* 2> a27 = concat(a2, a7); + cvec<T, N* 2> a49 = concat(a4, a9); + cvec<T, N* 2> a61 = concat(a6, a1); + cvec<T, N* 2> a83 = concat(a8, a3); + butterfly5<N * 2, inverse>(a05, a27, a49, a61, a83, a05, a27, a49, a61, a83); + cvec<T, N> t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; + split(a05, t0, t1); + split(a27, t2, t3); + split(a49, t4, t5); + split(a61, t6, t7); + split(a83, t8, t9); + t5 = -t5; + + cvec<T, N * 2> t02, t13; + cvec<T, N * 2> w06, w51; + t02 = concat(t0, t2); + t13 = concat(t1, t3); + butterfly2<N * 2>(t02, t13, w06, w51); + split(w06, w0, w6); + split(w51, w5, w1); + + cvec<T, N * 2> t68, t79; + cvec<T, N * 2> w84, w39; + t68 = concat(t6, t8); + t79 = concat(t7, t9); + butterfly2<N * 2>(t68, t79, w84, w39); + split(w84, w8, w4); + split(w39, w3, w9); + butterfly2<N>(t4, t5, w7, w2); +} + +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N>& out0, vec<T, N>& out1) +{ + butterfly2<N / 2>(in0, in1, out0, out1); +} +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N>& out0, + vec<T, N>& out1, vec<T, N>& out2) +{ + butterfly3<N / 2, inverse>(in0, in1, in2, out0, out1, out2); +} + +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3, + vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3) +{ + butterfly4<N / 2, inverse>(cfalse, in0, in1, in2, in3, out0, out1, out2, out3); +} +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3, + vec<T, N> in4, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3, + vec<T, N>& out4) +{ + butterfly5<N / 2, inverse>(in0, in1, in2, in3, in4, out0, out1, out2, out3, out4); +} +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3, + vec<T, N> in4, vec<T, N> in5, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, + vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5) +{ + butterfly6<N / 2, inverse>(in0, in1, in2, in3, in4, in5, out0, out1, out2, out3, out4, out5); +} +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3, + vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N>& out0, vec<T, N>& out1, + vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6) +{ + butterfly7<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, out0, out1, out2, out3, out4, out5, out6); +} +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3, + vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N> in7, vec<T, N>& out0, + vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5, + vec<T, N>& out6, vec<T, N>& out7) +{ + butterfly8<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5, + out6, out7); +} +template <bool inverse, typename T, size_t N> +KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3, + vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N> in7, vec<T, N> in8, + vec<T, N> in9, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3, + vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6, vec<T, N>& out7, vec<T, N>& out8, + vec<T, N>& out9) +{ + butterfly10<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, out0, out1, out2, out3, + out4, out5, out6, out7, out8, out9); +} +template <bool transposed, typename T, size_t... N, size_t Nout = csum(csizes<N...>)> +KFR_INTRIN void cread_transposed(cbool_t<transposed>, const complex<T>* ptr, vec<T, N>&... w) +{ + vec<T, Nout> temp = read<Nout>(ptr_cast<T>(ptr)); + if (transposed) + temp = ctranspose<sizeof...(N)>(temp); + split(temp, w...); +} + +// Warning: Reads past the end. Use with care +KFR_INTRIN void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec<f32, 4>& w0, cvec<f32, 4>& w1, + cvec<f32, 4>& w2) +{ + cvec<f32, 4> w3; + cvec<f32, 16> v16 = concat(cread<4>(ptr), cread<4>(ptr + 3), cread<4>(ptr + 6), cread<4>(ptr + 9)); + v16 = digitreverse4<2>(v16); + split(v16, w0, w1, w2, w3); +} + +KFR_INTRIN void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec<f32, 4>& w0, cvec<f32, 4>& w1, + cvec<f32, 4>& w2, cvec<f32, 4>& w3, cvec<f32, 4>& w4) +{ + cvec<f32, 16> v16 = concat(cread<4>(ptr), cread<4>(ptr + 5), cread<4>(ptr + 10), cread<4>(ptr + 15)); + v16 = digitreverse4<2>(v16); + split(v16, w0, w1, w2, w3); + w4 = cgather<4, 5>(ptr + 4); +} + +template <bool transposed, typename T, size_t... N, size_t Nout = csum(csizes<N...>)> +KFR_INTRIN void cwrite_transposed(cbool_t<transposed>, complex<T>* ptr, vec<T, N>... args) +{ + auto temp = concat(args...); + if (transposed) + temp = ctransposeinverse<sizeof...(N)>(temp); + write(ptr_cast<T>(ptr), temp); +} + +template <size_t I, size_t radix, typename T, size_t N, size_t width = N / 2> +KFR_INTRIN vec<T, N> mul_tw(cbool_t<false>, vec<T, N> x, const complex<T>* twiddle) +{ + return I == 0 ? x : cmul(x, cread<width>(twiddle + width * (I - 1))); +} +template <size_t I, size_t radix, typename T, size_t N, size_t width = N / 2> +KFR_INTRIN vec<T, N> mul_tw(cbool_t<true>, vec<T, N> x, const complex<T>* twiddle) +{ + return I == 0 ? x : cmul_conj(x, cread<width>(twiddle + width * (I - 1))); +} + +// Non-final +template <typename T, size_t width, size_t radix, bool inverse, size_t... I> +KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>, + cbool_t<inverse>, complex<T>* out, const complex<T>* in, + const complex<T>* tw, size_t stride) +{ + carray<cvec<T, width>, radix> inout; + + swallow{ (inout.get(csize<I>) = cread<width>(in + i + stride * I))... }; + + butterfly(cbool<inverse>, inout.get(csize<I>)..., inout.get(csize<I>)...); + + swallow{ (cwrite<width>(out + i + stride * I, + mul_tw<I, radix>(cbool<inverse>, inout.get(csize<I>), tw + i * (radix - 1))), + 0)... }; +} + +// Final +template <typename T, size_t width, size_t radix, bool inverse, size_t... I> +KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>, + cbool_t<inverse>, complex<T>* out, const complex<T>* in, size_t stride) +{ + carray<cvec<T, width>, radix> inout; + + // swallow{ ( inout.get( csize<I> ) = infn( i, I, cvec<T, width>( ) ) )... }; + cread_transposed(cbool<true>, in + i * radix, inout.get(csize<I>)...); + + butterfly(cbool<inverse>, inout.get(csize<I>)..., inout.get(csize<I>)...); + + swallow{ (cwrite<width>(out + i + stride * I, inout.get(csize<I>)), 0)... }; +} + +template <size_t width, size_t radix, typename... Args> +KFR_INTRIN void butterfly(size_t i, csize_t<width>, csize_t<radix>, Args&&... args) +{ + butterfly_helper(std::make_index_sequence<radix>(), i, csize<width>, csize<radix>, + std::forward<Args>(args)...); +} + +template <typename... Args> +KFR_INTRIN void butterfly_cycle(size_t&, size_t, csize_t<0>, Args&&...) +{ +} +template <size_t width, typename... Args> +KFR_INTRIN void butterfly_cycle(size_t& i, size_t count, csize_t<width>, Args&&... args) +{ + KFR_LOOP_NOUNROLL + for (; i < count / width * width; i += width) + butterfly(i, csize<width>, std::forward<Args>(args)...); + butterfly_cycle(i, count, csize<width / 2>, std::forward<Args>(args)...); +} + +template <size_t width, typename... Args> +KFR_INTRIN void butterflies(size_t count, csize_t<width>, Args&&... args) +{ + __builtin_assume(count > 0); + size_t i = 0; + butterfly_cycle(i, count, csize<width>, std::forward<Args>(args)...); +} + +template <typename T, bool inverse, typename Tstride> +KFR_INTRIN void generic_butterfly_cycle(csize_t<0>, size_t, cbool_t<inverse>, complex<T>*, const complex<T>*, + Tstride, size_t, size_t, const complex<T>*, size_t) +{ +} +template <size_t width, bool inverse, typename T, typename Tstride> +KFR_INTRIN void generic_butterfly_cycle(csize_t<width>, size_t radix, cbool_t<inverse>, complex<T>* out, + const complex<T>* in, Tstride ostride, size_t halfradix, + size_t halfradix_sqr, const complex<T>* twiddle, size_t i) +{ + KFR_LOOP_NOUNROLL + for (; i < halfradix / width * width; i += width) + { + const cvec<T, 1> in0 = cread<1>(in); + cvec<T, width> sum0 = resize<2 * width>(in0); + cvec<T, width> sum1 = sum0; + + KFR_LOOP_NOUNROLL + for (size_t j = 0; j < halfradix; j++) + { + const cvec<T, 1> ina = cread<1>(in + (1 + j)); + const cvec<T, 1> inb = cread<1>(in + radix - (j + 1)); + cvec<T, width> tw = cread<width>(twiddle); + if (inverse) + tw = cconj(tw); + + cmul_2conj(sum0, sum1, ina, inb, tw); + twiddle += halfradix; + } + twiddle = twiddle - halfradix_sqr + width; + + if (is_constant_val(ostride)) + { + cwrite<width>(out + (1 + i), sum0); + cwrite<width>(out + (radix - (i + 1)) - (width - 1), reverse<2>(sum1)); + } + else + { + cscatter<width>(out + (i + 1) * ostride, ostride, sum0); + cscatter<width>(out + (radix - (i + 1)) * ostride - (width - 1) * ostride, ostride, + reverse<2>(sum1)); + } + } + generic_butterfly_cycle(csize<width / 2>, radix, cbool<inverse>, out, in, ostride, halfradix, + halfradix_sqr, twiddle, i); +} + +template <size_t width, typename T, bool inverse, typename Tstride = csize_t<1>> +KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in, + const complex<T>* twiddle, Tstride ostride = Tstride{}) +{ + __builtin_assume(radix > 0); + { + cvec<T, width> sum = T(); + size_t j = 0; + KFR_LOOP_NOUNROLL + for (; j < radix / width * width; j += width) + { + sum += cread<width>(in + j); + } + cvec<T, 1> sums = T(); + KFR_LOOP_NOUNROLL + for (; j < radix; j++) + { + sums += cread<1>(in + j); + } + cwrite<1>(out, hcadd(sum) + sums); + } + const size_t halfradix = radix / 2; + const size_t halfradix_sqr = halfradix * halfradix; + __builtin_assume(halfradix > 0); + size_t i = 0; + + generic_butterfly_cycle(csize<width>, radix, cbool<inverse>, out, in, ostride, halfradix, halfradix_sqr, + twiddle, i); +} + +template <typename T, bool inverse, typename Tstride = csize_t<1>> +KFR_INTRIN void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in, + complex<T>* temp, const complex<T>* twiddle, Tstride ostride = Tstride{}) +{ + if (out == in) + { + builtin_memcpy(temp, in, sizeof(complex<T>) * radix); + in = temp; + } + constexpr size_t width = vector_width<T, cpu_t::native>; + + cswitch(csizes<11>, radix, + [&](auto radix_) KFR_INLINE_LAMBDA { + generic_butterfly_w<width>(val_of(radix_), cbool<inverse>, out, in, twiddle, ostride); + }, + [&]() KFR_INLINE_LAMBDA { + generic_butterfly_w<width>(radix, cbool<inverse>, out, in, twiddle, ostride); + }); +} + +template <typename T, size_t N> +constexpr cvec<T, N> cmask08 = broadcast<N, T>(T(), -T()); + +template <typename T, size_t N> +constexpr cvec<T, N> cmask0088 = broadcast<N, T>(T(), T(), -T(), -T()); + +template <bool A = false, typename T, size_t N> +KFR_INTRIN void cbitreverse_write(complex<T>* dest, vec<T, N> x) +{ + cwrite<N / 2, A>(dest, bitreverse<2>(x)); +} + +template <bool A = false, typename T, size_t N> +KFR_INTRIN void cdigitreverse4_write(complex<T>* dest, vec<T, N> x) +{ + cwrite<N / 2, A>(dest, digitreverse4<2>(x)); +} + +template <size_t N, bool A = false, typename T> +KFR_INTRIN cvec<T, N> cbitreverse_read(const complex<T>* src) +{ + return bitreverse<2>(cread<N, A>(src)); +} + +template <size_t N, bool A = false, typename T> +KFR_INTRIN cvec<T, N> cdigitreverse4_read(const complex<T>* src) +{ + return digitreverse4<2>(cread<N, A>(src)); +} + +#if 1 + +template <> +KFR_INTRIN cvec<f64, 16> cdigitreverse4_read<16, false, f64>(const complex<f64>* src) +{ + return concat(cread<1>(src + 0), cread<1>(src + 4), cread<1>(src + 8), cread<1>(src + 12), + cread<1>(src + 1), cread<1>(src + 5), cread<1>(src + 9), cread<1>(src + 13), + cread<1>(src + 2), cread<1>(src + 6), cread<1>(src + 10), cread<1>(src + 14), + cread<1>(src + 3), cread<1>(src + 7), cread<1>(src + 11), cread<1>(src + 15)); +} +template <> +KFR_INTRIN void cdigitreverse4_write<false, f64, 32>(complex<f64>* dest, vec<f64, 32> x) +{ + cwrite<1>(dest, part<16, 0>(x)); + cwrite<1>(dest + 4, part<16, 1>(x)); + cwrite<1>(dest + 8, part<16, 2>(x)); + cwrite<1>(dest + 12, part<16, 3>(x)); + + cwrite<1>(dest + 1, part<16, 4>(x)); + cwrite<1>(dest + 5, part<16, 5>(x)); + cwrite<1>(dest + 9, part<16, 6>(x)); + cwrite<1>(dest + 13, part<16, 7>(x)); + + cwrite<1>(dest + 2, part<16, 8>(x)); + cwrite<1>(dest + 6, part<16, 9>(x)); + cwrite<1>(dest + 10, part<16, 10>(x)); + cwrite<1>(dest + 14, part<16, 11>(x)); + + cwrite<1>(dest + 3, part<16, 12>(x)); + cwrite<1>(dest + 7, part<16, 13>(x)); + cwrite<1>(dest + 11, part<16, 14>(x)); + cwrite<1>(dest + 15, part<16, 15>(x)); +} +#endif +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/dft/reference_dft.hpp b/include/kfr/dft/reference_dft.hpp @@ -0,0 +1,141 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/complex.hpp" +#include "../base/constants.hpp" +#include "../base/memory.hpp" +#include "../base/read_write.hpp" +#include "../base/vec.hpp" +#include "../misc/small_buffer.hpp" +#include <cmath> + +namespace kfr +{ + +template <typename Tnumber = long double> +void reference_fft_pass(Tnumber pi2, size_t N, size_t offset, size_t delta, int flag, Tnumber (*x)[2], + Tnumber (*X)[2], Tnumber (*XX)[2]) +{ + const size_t N2 = N / 2; + using std::sin; + using std::cos; + + if (N != 2) + { + reference_fft_pass(pi2, N2, offset, 2 * delta, flag, x, XX, X); + reference_fft_pass(pi2, N2, offset + delta, 2 * delta, flag, x, XX, X); + + for (size_t k = 0; k < N2; k++) + { + const size_t k00 = offset + k * delta; + const size_t k01 = k00 + N2 * delta; + const size_t k10 = offset + 2 * k * delta; + const size_t k11 = k10 + delta; + const Tnumber m = static_cast<Tnumber>(k) / N; + const Tnumber cs = cos(pi2 * m); + const Tnumber sn = flag * sin(pi2 * m); + const Tnumber tmp0 = cs * XX[k11][0] + sn * XX[k11][1]; + const Tnumber tmp1 = cs * XX[k11][1] - sn * XX[k11][0]; + X[k01][0] = XX[k10][0] - tmp0; + X[k01][1] = XX[k10][1] - tmp1; + X[k00][0] = XX[k10][0] + tmp0; + X[k00][1] = XX[k10][1] + tmp1; + } + } + else + { + const size_t k00 = offset; + const size_t k01 = k00 + delta; + X[k01][0] = x[k00][0] - x[k01][0]; + X[k01][1] = x[k00][1] - x[k01][1]; + X[k00][0] = x[k00][0] + x[k01][0]; + X[k00][1] = x[k00][1] + x[k01][1]; + } +} + +template <typename Tnumber = long double, typename T> +void reference_fft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false) +{ + using Tcmplx = Tnumber(*)[2]; + if (size < 2) + return; + std::vector<complex<Tnumber>> datain(size); + std::vector<complex<Tnumber>> dataout(size); + std::vector<complex<Tnumber>> temp(size); + std::copy(in, in + size, datain.begin()); + const Tnumber pi2 = c_pi<Tnumber, 2, 1>; + reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()), + Tcmplx(dataout.data()), Tcmplx(temp.data())); + std::copy(dataout.begin(), dataout.end(), out); +} + +template <typename Tnumber = long double, typename T> +void reference_dft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false) +{ + using std::sin; + using std::cos; + if (is_poweroftwo(size)) + { + return reference_fft<Tnumber>(out, in, size, inversion); + } + constexpr Tnumber pi2 = c_pi<Tnumber, 2>; + if (size < 2) + return; + std::vector<complex<T>> datain; + if (out == in) + { + datain.resize(size); + std::copy_n(in, size, datain.begin()); + in = datain.data(); + } + { + Tnumber sumr = 0; + Tnumber sumi = 0; + for (size_t j = 0; j < size; j++) + { + sumr += static_cast<Tnumber>(in[j].real()); + sumi += static_cast<Tnumber>(in[j].imag()); + } + out[0] = { static_cast<T>(sumr), static_cast<T>(sumi) }; + } + for (size_t i = 1; i < size; i++) + { + Tnumber sumr = static_cast<Tnumber>(in[0].real()); + Tnumber sumi = static_cast<Tnumber>(in[0].imag()); + + for (size_t j = 1; j < size; j++) + { + const Tnumber x = pi2 * ((i * j) % size) / size; + Tnumber twr = cos(x); + Tnumber twi = sin(x); + if (inversion) + twi = -twi; + + sumr += twr * static_cast<Tnumber>(in[j].real()) + twi * static_cast<Tnumber>(in[j].imag()); + sumi += twr * static_cast<Tnumber>(in[j].imag()) - twi * static_cast<Tnumber>(in[j].real()); + out[i] = { static_cast<T>(sumr), static_cast<T>(sumi) }; + } + } +} +} diff --git a/include/kfr/dispatch/cpuid.hpp b/include/kfr/dispatch/cpuid.hpp @@ -0,0 +1,305 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/types.hpp" +#include <cstring> + +namespace kfr +{ + +struct cpu_features +{ + u32 max; + u32 exmax; + u32 isIntel : 1; + u32 isAMD : 1; + u32 has3DNOW : 1; + u32 has3DNOWEXT : 1; + u32 hasABM : 1; + u32 hasADX : 1; + u32 hasAES : 1; + u32 hasAVX : 1; + u32 hasAVX2 : 1; + u32 hasAVXOSSUPPORT : 1; + u32 hasAVX512OSSUPPORT : 1; + u32 hasAVX512CD : 1; + u32 hasAVX512ER : 1; + u32 hasAVX512F : 1; + u32 hasAVX512DQ : 1; + u32 hasAVX512PF : 1; + u32 hasAVX512BW : 1; + u32 hasBMI1 : 1; + u32 hasBMI2 : 1; + u32 hasCLFSH : 1; + u32 hasCMOV : 1; + u32 hasCMPXCHG16B : 1; + u32 hasCX8 : 1; + u32 hasERMS : 1; + u32 hasF16C : 1; + u32 hasFMA : 1; + u32 hasFSGSBASE : 1; + u32 hasFXSR : 1; + u32 hasHLE : 1; + u32 hasINVPCID : 1; + u32 hasLAHF : 1; + u32 hasLZCNT : 1; + u32 hasMMX : 1; + u32 hasMMXEXT : 1; + u32 hasMONITOR : 1; + u32 hasMOVBE : 1; + u32 hasMSR : 1; + u32 hasOSXSAVE : 1; + u32 hasPCLMULQDQ : 1; + u32 hasPOPCNT : 1; + u32 hasPREFETCHWT1 : 1; + u32 hasRDRAND : 1; + u32 hasRDSEED : 1; + u32 hasRDTSCP : 1; + u32 hasRTM : 1; + u32 hasSEP : 1; + u32 hasSHA : 1; + u32 hasSSE : 1; + u32 hasSSE2 : 1; + u32 hasSSE3 : 1; + u32 hasSSE41 : 1; + u32 hasSSE42 : 1; + u32 hasSSE4a : 1; + u32 hasSSSE3 : 1; + u32 hasSYSCALL : 1; + u32 hasTBM : 1; + u32 hasXOP : 1; + u32 hasXSAVE : 1; + u32 padding1 : 6; + char vendor[17]; + char model[49]; + char padding2[2]; +}; + +namespace internal +{ + +struct cpu_data +{ + u32 data[4]; +}; + +#if defined KFR_COMPILER_GNU || defined KFR_COMPILER_CLANG +KFR_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx) +{ + __asm__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(func), "2"(subfunc)); + return 1; +} +KFR_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) +{ + get_cpuid(func, subfunc, &ptr[0], &ptr[1], &ptr[2], &ptr[3]); +} +KFR_INLINE u32 get_xcr0() +{ + u32 xcr0; + __asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx"); + return xcr0; +} +#endif + +template <size_t> +cpu_t detect_cpu() +{ + cpu_features c; + memset(&c, 0, sizeof(c)); + cpu_data data0; + cpu_data exdata0; + + u32 f_1_ECX(0); + u32 f_1_EDX(0); + u32 f_7_EBX(0); + u32 f_7_ECX(0); + u32 f_81_ECX(0); + u32 f_81_EDX(0); + + cpuid(data0.data, 0); + c.max = static_cast<u32>(data0.data[0]); + cpuid(exdata0.data, 0x80000000); + c.exmax = static_cast<u32>(exdata0.data[0]); + + *ptr_cast<u32>(c.vendor) = static_cast<u32>(data0.data[1]); + *ptr_cast<u32>(c.vendor + 4) = static_cast<u32>(data0.data[3]); + *ptr_cast<u32>(c.vendor + 8) = static_cast<u32>(data0.data[2]); + + c.isIntel = strncmp(c.vendor, "GenuineIntel", sizeof(c.vendor)) == 0 ? 1 : 0; + c.isAMD = strncmp(c.vendor, "AuthenticAMD", sizeof(c.vendor)) == 0 ? 1 : 0; + + if (c.max >= 1) + { + cpu_data data1; + cpuid(data1.data, 1); + f_1_ECX = static_cast<u32>(data1.data[2]); + f_1_EDX = static_cast<u32>(data1.data[3]); + } + + if (c.max >= 7) + { + cpu_data data7; + cpuid(data7.data, 7); + f_7_EBX = static_cast<u32>(data7.data[1]); + f_7_ECX = static_cast<u32>(data7.data[2]); + } + + if (c.exmax >= 0x80000001) + { + cpu_data data81; + cpuid(data81.data, 0x80000001); + f_81_ECX = static_cast<u32>(data81.data[2]); + f_81_EDX = static_cast<u32>(data81.data[3]); + } + + if (c.exmax >= 0x80000004) + { + cpu_data data82; + cpu_data data83; + cpu_data data84; + cpuid(data82.data, 0x80000002); + cpuid(data83.data, 0x80000003); + cpuid(data84.data, 0x80000004); + memcpy(c.model, data82.data, sizeof(cpu_data)); + memcpy(c.model + 16, data83.data, sizeof(cpu_data)); + memcpy(c.model + 32, data84.data, sizeof(cpu_data)); + } + + c.hasSSE3 = f_1_ECX >> 0 & 1; + c.hasPCLMULQDQ = f_1_ECX >> 1 & 1; + c.hasMONITOR = f_1_ECX >> 3 & 1; + c.hasSSSE3 = f_1_ECX >> 9 & 1; + c.hasFMA = f_1_ECX >> 12 & 1; + c.hasCMPXCHG16B = f_1_ECX >> 13 & 1; + c.hasSSE41 = f_1_ECX >> 19 & 1; + c.hasSSE42 = f_1_ECX >> 20 & 1; + c.hasMOVBE = f_1_ECX >> 22 & 1; + c.hasPOPCNT = f_1_ECX >> 23 & 1; + c.hasAES = f_1_ECX >> 25 & 1; + c.hasXSAVE = f_1_ECX >> 26 & 1; + c.hasOSXSAVE = f_1_ECX >> 27 & 1; + c.hasAVX = f_1_ECX >> 28 & 1; + c.hasF16C = f_1_ECX >> 29 & 1; + c.hasRDRAND = f_1_ECX >> 30 & 1; + c.hasMSR = f_1_EDX >> 5 & 1; + c.hasCX8 = f_1_EDX >> 8 & 1; + c.hasSEP = f_1_EDX >> 11 & 1; + c.hasCMOV = f_1_EDX >> 15 & 1; + c.hasCLFSH = f_1_EDX >> 19 & 1; + c.hasMMX = f_1_EDX >> 23 & 1; + c.hasFXSR = f_1_EDX >> 24 & 1; + c.hasSSE = f_1_EDX >> 25 & 1; + c.hasSSE2 = f_1_EDX >> 26 & 1; + c.hasFSGSBASE = f_7_EBX >> 0 & 1; + c.hasBMI1 = f_7_EBX >> 3 & 1; + c.hasHLE = c.isIntel && f_7_EBX >> 4 & 1; + c.hasAVX2 = f_7_EBX >> 5 & 1; + c.hasBMI2 = f_7_EBX >> 8 & 1; + c.hasERMS = f_7_EBX >> 9 & 1; + c.hasINVPCID = f_7_EBX >> 10 & 1; + c.hasRTM = c.isIntel && f_7_EBX >> 11 & 1; + c.hasAVX512F = f_7_EBX >> 16 & 1; + c.hasAVX512DQ = f_7_EBX >> 17 & 1; + c.hasRDSEED = f_7_EBX >> 18 & 1; + c.hasADX = f_7_EBX >> 19 & 1; + c.hasAVX512PF = f_7_EBX >> 26 & 1; + c.hasAVX512ER = f_7_EBX >> 27 & 1; + c.hasAVX512CD = f_7_EBX >> 28 & 1; + c.hasSHA = f_7_EBX >> 29 & 1; + c.hasAVX512BW = f_7_EBX >> 30 & 1; + c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1; + c.hasLAHF = f_81_ECX >> 0 & 1; + c.hasLZCNT = c.isIntel && f_81_ECX >> 5 & 1; + c.hasABM = c.isAMD && f_81_ECX >> 5 & 1; + c.hasSSE4a = c.isAMD && f_81_ECX >> 6 & 1; + c.hasXOP = c.isAMD && f_81_ECX >> 11 & 1; + c.hasTBM = c.isAMD && f_81_ECX >> 21 & 1; + c.hasSYSCALL = c.isIntel && f_81_EDX >> 11 & 1; + c.hasMMXEXT = c.isAMD && f_81_EDX >> 22 & 1; + c.hasRDTSCP = c.isIntel && f_81_EDX >> 27 & 1; + c.has3DNOWEXT = c.isAMD && f_81_EDX >> 30 & 1; + c.has3DNOW = c.isAMD && f_81_EDX >> 31 & 1; + + const u32 xcr0 = get_xcr0(); + + c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (xcr0 & 0x06) == 0x06; + c.hasAVX512OSSUPPORT = c.hasAVX512F && c.hasOSXSAVE && (xcr0 & 0xE0) == 0xE0; + +#ifdef KFR_AVAIL_AVX512 + if (c.hasAVX512F && c.hasAVX512BW && c.hasAVX512DQ && c.hasAVX512OSSUPPORT) + return cpu_t::avx3; +#endif +#ifdef KFR_AVAIL_AVX2 + if (c.hasAVX2 && c.hasAVXOSSUPPORT) + return cpu_t::avx2; +#endif +#ifdef KFR_AVAIL_AVX + if (c.hasAVX && c.hasAVXOSSUPPORT) + return cpu_t::avx1; +#endif +#ifdef KFR_AVAIL_SSE41 + if (c.hasSSE41) + return cpu_t::sse41; +#endif +#ifdef KFR_AVAIL_SSSE3 + if (c.hasSSSE3) + return cpu_t::ssse3; +#endif +#ifdef KFR_AVAIL_SSE3 + if (c.hasSSE3) + return cpu_t::sse3; +#endif +#ifdef KFR_AVAIL_SSE2 + if (c.hasSSE2) + return cpu_t::sse2; +#endif + return cpu_t::lowest; +} +} + +namespace internal +{ + +KFR_INLINE cpu_t& cpu_v() +{ + static cpu_t v1 = cpu_t::native; + return v1; +} + +KFR_INLINE char init_cpu_v() +{ + cpu_v() = detect_cpu<0>(); + return 0; +} + +KFR_INLINE char init_dummyvar() +{ + static char dummy = init_cpu_v(); + return dummy; +} + +static char dummyvar = init_dummyvar(); +} +KFR_INLINE cpu_t get_cpu() { return internal::cpu_v(); } +} diff --git a/include/kfr/dispatch/runtimedispatch.hpp b/include/kfr/dispatch/runtimedispatch.hpp @@ -0,0 +1,173 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/dispatch.hpp" +#include "../base/types.hpp" +#include "cpuid.hpp" + +namespace kfr +{ + +namespace internal +{ + +template <typename Fn, typename... Args> +KFR_CPU_INTRIN(sse2) +auto with_cpu_impl(ccpu_t<cpu_t::sse2>, Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} + +template <typename Fn, typename... Args> +KFR_CPU_INTRIN(sse3) +auto with_cpu_impl(ccpu_t<cpu_t::sse3>, Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} + +template <typename Fn, typename... Args> +KFR_CPU_INTRIN(ssse3) +auto with_cpu_impl(ccpu_t<cpu_t::ssse3>, Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} + +template <typename Fn, typename... Args> +KFR_CPU_INTRIN(sse41) +auto with_cpu_impl(ccpu_t<cpu_t::sse41>, Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} + +template <typename Fn, typename... Args> +KFR_CPU_INTRIN(sse42) +auto with_cpu_impl(ccpu_t<cpu_t::sse42>, Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} + +template <typename Fn, typename... Args> +KFR_CPU_INTRIN(avx) +auto with_cpu_impl(ccpu_t<cpu_t::avx>, Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} + +template <typename Fn, typename... Args> +KFR_CPU_INTRIN(avx2) +auto with_cpu_impl(ccpu_t<cpu_t::avx2>, Fn&& fn, Args&&... args) +{ + return fn(std::forward<Args>(args)...); +} +} + +template <cpu_t cpu, typename Fn, typename... Args> +KFR_INTRIN auto with_cpu(ccpu_t<cpu>, Fn&& fn, Args&&... args) +{ + return internal::with_cpu_impl(ccpu<cpu>, std::forward<Fn>(fn), std::forward<Args>(args)...); +} + +template <cpu_t cpu, typename Fn> +struct fn_with_cpu +{ + template <typename... Args> + KFR_INTRIN auto operator()(Args&&... args) -> decltype(std::declval<Fn>()(std::forward<Args>(args)...)) + { + return internal::with_cpu_impl(ccpu<cpu>, std::forward<Fn>(fn), std::forward<Args>(args)...); + } + Fn fn; +}; + +template <cpu_t cpu, typename Fn> +KFR_INTRIN fn_with_cpu<cpu, Fn> make_with_cpu(ccpu_t<cpu>, Fn&& fn) +{ + return { std::forward<Fn>(fn) }; +} + +namespace internal +{ + +template <typename Fn, cpu_t, cpu_t...> +struct runtime_dispatcher; + +template <typename Fn, cpu_t oldest> +struct runtime_dispatcher<Fn, oldest> +{ + using targetFn = retarget<Fn, oldest>; + + template <typename... Args> + KFR_INLINE static result_of<targetFn(Args&&...)> call(Fn&& fn, cpu_t, Args&&... args) + { + return cpu_caller<oldest>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...); + } +}; + +template <typename Fn, cpu_t newest, cpu_t next, cpu_t... cpus> +struct runtime_dispatcher<Fn, newest, next, cpus...> +{ + using nextdispatcher = runtime_dispatcher<Fn, next, cpus...>; + + using targetFn = retarget<Fn, newest>; + + template <typename... Args, + KFR_ENABLE_IF(is_callable<targetFn, Args&&...>::value&& is_enabled<targetFn>::value)> + KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args) + -> decltype(nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...)) + { + return set >= newest + ? cpu_caller<newest>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...) + : nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...); + } + template <typename... Args, + KFR_ENABLE_IF(!(is_callable<targetFn, Args&&...>::value && is_enabled<targetFn>::value))> + KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args) + -> decltype(nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...)) + { + return nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...); + } +}; + +template <typename Fn, cpu_t newest, cpu_t... cpus, typename... Args> +KFR_INLINE auto runtimedispatch(cvals_t<cpu_t, newest, cpus...>, Fn&& fn, Args&&... args) + -> decltype(internal::runtime_dispatcher<Fn, newest, cpus...>::call(std::forward<Fn>(fn), get_cpu(), + std::forward<Args>(args)...)) +{ + return internal::runtime_dispatcher<Fn, newest, cpus...>::call(std::forward<Fn>(fn), get_cpu(), + std::forward<Args>(args)...); +} + +template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c == cpu_t::runtime)> +KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...)) +{ + return runtimedispatch(std::forward<Fn>(fn), std::forward<Args>(args)...); +} +} + +template <typename Fn, typename cpulist = decltype(cpu_all), typename... Args> +KFR_INLINE auto runtimedispatch(Fn&& fn, Args&&... args) + -> decltype(internal::runtimedispatch<Fn>(cpulist(), std::forward<Fn>(fn), std::forward<Args>(args)...)) +{ + return internal::runtimedispatch(cpulist(), std::forward<Fn>(fn), std::forward<Args>(args)...); +} +} diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp @@ -0,0 +1,401 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/operators.hpp" +#include "../base/vec.hpp" +#include <cmath> + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +enum class biquad_type +{ + lowpass, + highpass, + bandpass, + bandstop, + peak, + notch, + lowshelf, + highshelf +}; + +template <typename T> +struct biquad_params +{ + constexpr static bool is_pod = true; + + static_assert(std::is_floating_point<T>::value, "T must be a floating point type"); + constexpr biquad_params() noexcept : a0(1), a1(0), a2(0), b0(1), b1(0), b2(0) {} + constexpr biquad_params(T a0, T a1, T a2, T b0, T b1, T b2) noexcept : a0(a0), + a1(a1), + a2(a2), + b0(b0), + b1(b1), + b2(b2) + { + } + T a0; + T a1; + T a2; + T b0; + T b1; + T b2; + biquad_params<T> normalized_a0() const + { + vec<T, 5> v{ a1, a2, b0, b1, b2 }; + v = v / a0; + return { T(1.0), v[0], v[1], v[2], v[3], v[4] }; + } + biquad_params<T> normalized_b0() const { return { a0, a1, a2, T(1.0), b1 / b0, b2 / b0 }; } + biquad_params<T> normalized_all() const { return normalized_a0().normalized_b0(); } +}; + +template <typename T> +KFR_INLINE biquad_params<T> biquad_allpass(T frequency, T Q) +{ + const T alpha = std::sin(frequency) / 2.0 * Q; + const T cs = std::cos(frequency); + + const T b0 = 1.0 / (1.0 + alpha); + const T b1 = -2.0 * cs * b0; + const T b2 = (1.0 - alpha) * b0; + const T a0 = (1.0 - alpha) * b0; + const T a1 = -2.0 * cs * b0; + const T a2 = (1.0 + alpha) * b0; + return { b0, b1, b2, a0, a1, a2 }; +} + +template <typename T> +KFR_INLINE biquad_params<T> biquad_lowpass(T frequency, T Q) +{ + const T K = std::tan(c_pi<T, 1> * frequency); + const T K2 = K * K; + const T norm = 1 / (1 + K / Q + K2); + const T a0 = K2 * norm; + const T a1 = 2 * a0; + const T a2 = a0; + const T b1 = 2 * (K2 - 1) * norm; + const T b2 = (1 - K / Q + K2) * norm; + return { 1.0, b1, b2, a0, a1, a2 }; +} + +template <typename T> +KFR_INLINE biquad_params<T> biquad_highpass(T frequency, T Q) +{ + const T K = std::tan(c_pi<T, 1> * frequency); + const T K2 = K * K; + const T norm = 1 / (1 + K / Q + K2); + const T a0 = 1 * norm; + const T a1 = -2 * a0; + const T a2 = a0; + const T b1 = 2 * (K2 - 1) * norm; + const T b2 = (1 - K / Q + K2) * norm; + return { 1.0, b1, b2, a0, a1, a2 }; +} + +template <typename T> +KFR_INLINE biquad_params<T> biquad_bandpass(T frequency, T Q) +{ + const T K = std::tan(c_pi<T, 1> * frequency); + const T K2 = K * K; + const T norm = 1 / (1 + K / Q + K2); + const T a0 = K / Q * norm; + const T a1 = 0; + const T a2 = -a0; + const T b1 = 2 * (K2 - 1) * norm; + const T b2 = (1 - K / Q + K2) * norm; + return { 1.0, b1, b2, a0, a1, a2 }; +} + +template <typename T> +KFR_INLINE biquad_params<T> biquad_notch(T frequency, T Q) +{ + const T K = std::tan(c_pi<T, 1> * frequency); + const T K2 = K * K; + const T norm = 1 / (1 + K / Q + K2); + const T a0 = (1 + K2) * norm; + const T a1 = 2 * (K2 - 1) * norm; + const T a2 = a0; + const T b1 = a1; + const T b2 = (1 - K / Q + K2) * norm; + return { 1.0, b1, b2, a0, a1, a2 }; +} + +template <typename T> +KFR_INLINE biquad_params<T> biquad_peak(T frequency, T Q, T gain) +{ + biquad_params<T> result; + const T K = std::tan(c_pi<T, 1> * frequency); + const T K2 = K * K; + const T V = std::exp(std::abs(gain) * (1.0 / 20.0) * c_log_10<T>); + + if (gain >= 0) + { // boost + const T norm = 1 / (1 + 1 / Q * K + K2); + const T a0 = (1 + V / Q * K + K2) * norm; + const T a1 = 2 * (K2 - 1) * norm; + const T a2 = (1 - V / Q * K + K2) * norm; + const T b1 = a1; + const T b2 = (1 - 1 / Q * K + K2) * norm; + result = { 1.0, b1, b2, a0, a1, a2 }; + } + else + { // cut + const T norm = 1 / (1 + V / Q * K + K2); + const T a0 = (1 + 1 / Q * K + K2) * norm; + const T a1 = 2 * (K2 - 1) * norm; + const T a2 = (1 - 1 / Q * K + K2) * norm; + const T b1 = a1; + const T b2 = (1 - V / Q * K + K2) * norm; + result = { 1.0, b1, b2, a0, a1, a2 }; + } + return result; +} + +template <typename T> +KFR_INLINE biquad_params<T> biquad_lowshelf(T frequency, T gain) +{ + biquad_params<T> result; + const T K = std::tan(c_pi<T, 1> * frequency); + const T K2 = K * K; + const T V = std::exp(std::fabs(gain) * (1.0 / 20.0) * c_log_10<T>); + + if (gain >= 0) + { // boost + const T norm = 1 / (1 + c_sqrt_2<T> * K + K2); + const T a0 = (1 + std::sqrt(2 * V) * K + V * K2) * norm; + const T a1 = 2 * (V * K2 - 1) * norm; + const T a2 = (1 - std::sqrt(2 * V) * K + V * K2) * norm; + const T b1 = 2 * (K2 - 1) * norm; + const T b2 = (1 - c_sqrt_2<T> * K + K2) * norm; + result = { 1.0, b1, b2, a0, a1, a2 }; + } + else + { // cut + const T norm = 1 / (1 + std::sqrt(2 * V) * K + V * K2); + const T a0 = (1 + c_sqrt_2<T> * K + K2) * norm; + const T a1 = 2 * (K2 - 1) * norm; + const T a2 = (1 - c_sqrt_2<T> * K + K2) * norm; + const T b1 = 2 * (V * K2 - 1) * norm; + const T b2 = (1 - std::sqrt(2 * V) * K + V * K2) * norm; + result = { 1.0, b1, b2, a0, a1, a2 }; + } + return result; +} + +template <typename T> +KFR_INLINE biquad_params<T> biquad_highshelf(T frequency, T gain) +{ + biquad_params<T> result; + const T K = std::tan(c_pi<T, 1> * frequency); + const T K2 = K * K; + const T V = std::exp(std::fabs(gain) * (1.0 / 20.0) * c_log_10<T>); + + if (gain >= 0) + { // boost + const T norm = 1 / (1 + c_sqrt_2<T> * K + K2); + const T a0 = (V + std::sqrt(2 * V) * K + K2) * norm; + const T a1 = 2 * (K2 - V) * norm; + const T a2 = (V - std::sqrt(2 * V) * K + K2) * norm; + const T b1 = 2 * (K2 - 1) * norm; + const T b2 = (1 - c_sqrt_2<T> * K + K2) * norm; + result = { 1.0, b1, b2, a0, a1, a2 }; + } + else + { // cut + const T norm = 1 / (V + std::sqrt(2 * V) * K + K2); + const T a0 = (1 + c_sqrt_2<T> * K + K2) * norm; + const T a1 = 2 * (K2 - 1) * norm; + const T a2 = (1 - c_sqrt_2<T> * K + K2) * norm; + const T b1 = 2 * (K2 - V) * norm; + const T b2 = (V - std::sqrt(2 * V) * K + K2) * norm; + result = { 1.0, b1, b2, a0, a1, a2 }; + } + return result; +} + +namespace internal +{ +template <cpu_t cpu = cpu_t::native> +struct in_biquad +{ +private: +public: + template <typename T, size_t filters> + struct biquad_block + { + vec<T, filters> s1; + vec<T, filters> s2; + vec<T, filters> a1; + vec<T, filters> a2; + vec<T, filters> b0; + vec<T, filters> b1; + vec<T, filters> b2; + + vec<T, filters> out; + biquad_block() : s1(), s2(), a1(), a2(), b0(), b1(), b2(), out() {} + biquad_block(const biquad_params<T>* bq, size_t count) : s1(), s2(), out() + { + count = count > filters ? filters : count; + for (size_t i = 0; i < count; i++) + { + a1(i) = bq[i].a1; + a2(i) = bq[i].a2; + b0(i) = bq[i].b0; + b1(i) = bq[i].b1; + b2(i) = bq[i].b2; + } + for (size_t i = count; i < filters; i++) + { + a1(i) = T(0); + a2(i) = T(0); + b0(i) = T(1); + b1(i) = T(0); + b2(i) = T(0); + } + } + + template <size_t count> + biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count) + { + static_assert(count <= filters, "count > filters"); + } + }; + + template <typename T, typename E1> + struct expression_biquad : public expression<E1> + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_biquad<newcpu>::template expression_biquad<T, retarget<E1, newcpu>>; + + expression_biquad(const biquad_params<T>& bq, E1&& e1) noexcept + : expression<E1>(std::forward<E1>(e1)), + bq(bq) + { + } + template <typename U, size_t width> + inline vec<U, width> operator()(cinput_t, size_t index, vec_t<U, width> t) + { + const vec<T, width> in = cast<T>(this->argument_first(index, t)); + const vec<T, width> in1 = insertleft(x[0], in); + const vec<T, width> in2 = insertleft(x[1], in1); + vec<T, width> out = bq.b0 * in + bq.b1 * in1 + bq.b2 * in2; + + out(0) = out[0] - bq.a1 * y[0] - bq.a2 * y[1]; + out(1) = out[1] - bq.a1 * out[0] - bq.a2 * y[0]; + + KFR_LOOP_UNROLL + for (size_t i = 2; i < width; i++) + { + out(i) = out[i] - bq.a1 * out[i - 1] - bq.a2 * out[i - 2]; + } + + x(1) = in[width - 2]; + x(0) = in[width - 1]; + + y(1) = out[width - 2]; + y(0) = out[width - 1]; + return cast<U>(out); + } + template <typename U> + inline vec<U, 1> operator()(cinput_t, size_t index, vec_t<U, 1> t) + { + T in = cast<T>(this->argument_first(index, t))[0]; + + T out = bq.b0 * in + bq.b1 * x[0] + bq.b2 * x[1] - bq.a1 * y[0] - bq.a2 * y[1]; + x(1) = x[0]; + x(0) = in; + y(1) = y[0]; + y(0) = out; + return cast<U>(out); + } + biquad_params<T> bq; + mutable vec<T, 2> x = T(0); + mutable vec<T, 2> y = T(0); + }; + + template <size_t filters, typename T, typename E1> + struct expression_biquads : public expression<E1> + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = + typename in_biquad<newcpu>::template expression_biquads<filters, T, retarget<E1, newcpu>>; + + expression_biquads(const biquad_block<T, filters>& bq, E1&& e1) + : expression<E1>(std::forward<E1>(e1)), bq(bq) + { + } + template <size_t width> + inline vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const + { + const vec<T, width> in = this->argument_first(index, t); + vec<T, width> out; + + KFR_LOOP_UNROLL + for (size_t i = 0; i < width; i++) + { + bq.out = process(insertleft(in[i], bq.out)); + out(i) = bq.out[filters - 1]; + } + + return out; + } + KFR_INLINE vec<T, filters> process(vec<T, filters> in) const + { + const vec<T, filters> out = bq.b0 * in + bq.s1; + bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out; + bq.s2 = bq.b2 * in - bq.a2 * out; + return out; + } + mutable biquad_block<T, filters> bq; + }; +}; +} + +template <typename T, typename E1> +KFR_INLINE internal::in_biquad<>::expression_biquad<T, internal::arg<E1>> biquad(const biquad_params<T>& bq, + E1&& e1) +{ + return internal::in_biquad<>::expression_biquad<T, internal::arg<E1>>(bq, std::forward<E1>(e1)); +} +template <size_t filters, typename T, typename E1> +KFR_INLINE internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>> biquad( + const biquad_params<T> (&bq)[filters], E1&& e1) +{ + return internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1)); +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -0,0 +1,280 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/memory.hpp" +#include "../base/sin_cos.hpp" +#include "../base/vec.hpp" +#include "../expressions/basic.hpp" +#include "../expressions/operators.hpp" +#include "../expressions/reduce.hpp" +#include "window.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +template <typename T, size_t Size> +using fir_taps = univector<T, Size>; + +namespace internal +{ +template <cpu_t cpu = cpu_t::native> +struct in_fir : in_sqrt<cpu>, in_abs<cpu>, in_log_exp<cpu>, in_sin_cos<cpu>, in_window<cpu>, in_reduce<cpu> +{ +private: + using in_sqrt<cpu>::sqrt; + using in_abs<cpu>::abs; + using in_log_exp<cpu>::log; + using in_log_exp<cpu>::exp; + using in_log_exp<cpu>::log_fmadd; + using in_log_exp<cpu>::exp_fmadd; + using in_log_exp<cpu>::exp10; + using typename in_sin_cos<cpu>::fn_sinc; + using in_reduce<cpu>::reduce; + using in_reduce<cpu>::dotproduct; + using in_reduce<cpu>::sum; + +public: + template <typename T> + KFR_SINTRIN void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, + bool normalize = true) + { + const T scale = 2.0 * cutoff; + taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, + taps.size(), true)) * + scale * window; + + if (is_odd(taps.size())) + taps[taps.size() / 2] = scale; + + if (normalize) + { + const T invsum = reciprocal(sum(taps)); + taps = taps * invsum; + } + } + template <typename T> + KFR_SINTRIN void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, + bool normalize = true) + { + const T scale = 2.0 * -cutoff; + taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, + taps.size(), true)) * + scale * window; + + if (is_odd(taps.size())) + taps[taps.size() / 2] = 1 - 2.0 * cutoff; + + if (normalize) + { + const T invsum = reciprocal(sum(taps) + 1); + taps = taps * invsum; + } + } + + template <typename T> + KFR_SINTRIN void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2, + const expression_pointer<T>& window, bool normalize = true) + { + const T scale1 = 2.0 * frequency1; + const T scale2 = 2.0 * frequency2; + const T sc = c_pi<T> * T(taps.size() - 1); + const T start1 = sc * frequency1; + const T start2 = sc * frequency2; + + taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 - + bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) * + window; + + if (is_odd(taps.size())) + taps[taps.size() / 2] = 2 * (frequency2 - frequency1); + + if (normalize) + { + const T invsum = reciprocal(sum(taps) + 1); + taps = taps * invsum; + } + } + + template <typename T> + KFR_SINTRIN void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2, + const expression_pointer<T>& window, bool normalize = true) + { + const T scale1 = 2.0 * frequency1; + const T scale2 = 2.0 * frequency2; + const T sc = c_pi<T> * T(taps.size() - 1); + const T start1 = sc * frequency1; + const T start2 = sc * frequency2; + + taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 - + bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) * + window; + + if (is_odd(taps.size())) + taps[taps.size() / 2] = 1 - 2 * (frequency2 - frequency1); + + if (normalize) + { + const T invsum = reciprocal(sum(taps)); + taps = taps * invsum; + } + } + + template <size_t index, size_t order, typename T, size_t N> + KFR_SINTRIN void convole_round(vec<T, N>& output, vec<T, order> input, vec<T, order> taps, + vec<T, order> delay) + { + output(index) = dot(taps, rotatetwo<index + 1>(delay, input)); + } + + template <size_t index, size_t order, typename T, size_t N, KFR_ENABLE_IF(index >= N)> + KFR_SINTRIN void convole_rounds(vec<T, N>& /*output*/, vec<T, order> /*input*/, vec<T, order> /*taps*/, + vec<T, order> /*delay*/) + { + } + + template <size_t index, size_t order, typename T, size_t N, KFR_ENABLE_IF(index < N)> + KFR_SINTRIN void convole_rounds(vec<T, N>& output, vec<T, order> input, vec<T, order> taps, + vec<T, order> delay) + { + convole_round<index, order, T, N>(output, input, taps, delay); + convole_rounds<index + 1, order, T, N>(output, input, taps, delay); + } + + template <size_t tapcount, typename T, typename E1> + struct expression_short_fir : expression<E1> + { + static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two"); + template <cpu_t newcpu> + using retarget_this = + typename in_fir<newcpu>::template expression_short_fir<tapcount, T, retarget<E1, newcpu>>; + + expression_short_fir(E1&& e1, const array_ref<T>& taps) + : expression<E1>(std::forward<E1>(e1)), taps(taps) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) + { + const vec<T, N> in = cast<T>(this->argument_first(index, x)); + + vec<T, N> out; + vec<T, tapcount> winput = widen<tapcount>(in); + winput = reverse(winput); + convole_rounds<0, tapcount, T, N>(out, winput, taps, delayline); + delayline = rotatetwo<N>(delayline, winput); + + return cast<U>(out); + } + const vec<T, tapcount> taps; + vec<T, tapcount> delayline; + }; + + template <typename T, typename E1> + struct expression_fir : expression<E1> + { + template <cpu_t newcpu> + using retarget_this = typename in_fir<newcpu>::template expression_fir<T, retarget<E1, newcpu>>; + + expression_fir(E1&& e1, const array_ref<const T>& taps) + : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()), + delayline_cursor(0) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) + { + const size_t tapcount = taps.size(); + const vec<T, N> input = cast<T>(this->argument_first(index, x)); + + vec<T, N> output; + size_t cursor = delayline_cursor; + KFR_LOOP_NOUNROLL + for (size_t i = 0; i < N; i++) + { + delayline.ringbuf_write(cursor, input[i]); + output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) + + dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/); + } + delayline_cursor = cursor; + return cast<U>(output); + } + const univector_dyn<T> taps; + univector_dyn<T> delayline; + size_t delayline_cursor; + }; + KFR_SPEC_FN(in_fir, fir_lowpass) + KFR_SPEC_FN(in_fir, fir_highpass) + KFR_SPEC_FN(in_fir, fir_bandpass) + KFR_SPEC_FN(in_fir, fir_bandstop) +}; +} + +namespace native +{ +template <typename T, size_t Tag> +KFR_INLINE void fir_lowpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, + bool normalize = true) +{ + return internal::in_fir<>::fir_lowpass(taps.slice(), cutoff, window, normalize); +} +template <typename T, size_t Tag> +KFR_INLINE void fir_highpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, + bool normalize = true) +{ + return internal::in_fir<>::fir_highpass(taps.slice(), cutoff, window, normalize); +} +template <typename T, size_t Tag> +KFR_INLINE void fir_bandpass(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, + const expression_pointer<T>& window, bool normalize = true) +{ + return internal::in_fir<>::fir_bandpass(taps.slice(), frequency1, frequency2, window, normalize); +} +template <typename T, size_t Tag> +KFR_INLINE void fir_bandstop(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, + const expression_pointer<T>& window, bool normalize = true) +{ + return internal::in_fir<>::fir_bandstop(taps.slice(), frequency1, frequency2, window, normalize); +} + +template <typename T, typename E1, size_t Tag> +KFR_INLINE internal::in_fir<>::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps) +{ + return internal::in_fir<>::expression_fir<T, E1>(std::forward<E1>(e1), taps.ref()); +} +template <typename T, size_t TapCount, typename E1> +KFR_INLINE internal::in_fir<>::expression_short_fir<TapCount, T, E1> short_fir( + E1&& e1, const univector<T, TapCount>& taps) +{ + static_assert(TapCount >= 1 && TapCount < 16, "Use short_fir only for small FIR filters"); + return internal::in_fir<>::expression_short_fir<TapCount, T, E1>(std::forward<E1>(e1), taps.ref()); +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/dsp/goertzel.hpp b/include/kfr/dsp/goertzel.hpp @@ -0,0 +1,126 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/complex.hpp" +#include "../base/sin_cos.hpp" +#include "../base/vec.hpp" +#include "../expressions/basic.hpp" + +namespace kfr +{ +namespace internal +{ + +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_goertzel : in_sin_cos<cc> +{ +private: + using in_sin_cos<cc>::sin; + using in_sin_cos<cc>::cos; + +public: + template <typename T> + struct expression_goertzel : output_expression + { + expression_goertzel(complex<T>& result, identity<T> omega) + : result(result), omega(omega), coeff(2 * cos(omega)), q0(), q1(), q2() + { + } + ~expression_goertzel() + { + result.real(q1 - q2 * cos(omega)); + result.imag(q2 * sin(omega)); + } + template <typename U, size_t N> + KFR_INLINE void operator()(coutput_t, size_t index, vec<U, N> x) + { + vec<T, N> in = cast<T>(x); + KFR_LOOP_UNROLL + for (size_t i = 0; i < N; i++) + { + q0 = coeff * q1 - q2 + in[i]; + q2 = q1; + q1 = q0; + } + } + complex<T>& result; + const T omega; + const T coeff; + T q0; + T q1; + T q2; + }; + + template <typename T, size_t width> + struct expression_parallel_goertzel : output_expression + { + expression_parallel_goertzel(complex<T> result[], vec<T, width> omega) + : result(result), omega(omega), coeff(cos(omega)), q0(), q1(), q2() + { + } + ~expression_parallel_goertzel() + { + const vec<T, width> re = q1 - q2 * cos(omega); + const vec<T, width> im = q2 * sin(omega); + for (size_t i = 0; i < width; i++) + { + result[i].real(re[i]); + result[i].imag(im[i]); + } + } + template <typename U, size_t N> + KFR_INLINE void operator()(coutput_t, size_t index, vec<U, N> x) + { + const vec<T, N> in = cast<T>(x); + KFR_LOOP_UNROLL + for (size_t i = 0; i < N; i++) + { + q0 = coeff * q1 - q2 + in[i]; + q2 = q1; + q1 = q0; + } + } + complex<T> result[]; + const vec<T, width> omega; + const vec<T, width> coeff; + vec<T, width> q0; + vec<T, width> q1; + vec<T, width> q2; + }; + + template <typename T> + KFR_SINTRIN expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega) + { + return expression_goertzel<T>(result, omega); + } + + template <typename T, size_t width> + KFR_SINTRIN expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width], + const T (&omega)[width]) + { + return expression_parallel_goertzel<T, width>(result, read<width>(omega)); + } +}; +} +} diff --git a/include/kfr/dsp/interpolation.hpp b/include/kfr/dsp/interpolation.hpp @@ -0,0 +1,86 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/select.hpp" +#include "../base/sin_cos.hpp" +#include "../base/vec.hpp" + +namespace kfr +{ +namespace internal +{ +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_interpolation : in_sin_cos<cc>, in_select<cc> +{ +private: + using in_sin_cos<cc>::fastcos; + using in_select<cc>::select; + +public: + template <typename T, typename M> + KFR_SINTRIN T nearest(M mu, T x1, T x2) + { + return select(mu < M(0.5), x1, x2); + } + + template <typename T, typename M> + KFR_SINTRIN T linear(M mu, T x1, T x2) + { + return mix(mu, x1, x2); + } + + template <typename T, typename M> + KFR_SINTRIN T cosine(M mu, T x1, T x2) + { + return mix((M(1) - fastcos(mu * c_pi<T>)) * M(0.5), x1, x2); + } + + template <typename T, typename M> + KFR_SINTRIN T cubic(M mu, T x0, T x1, T x2, T x3) + { + const T a0 = x3 - x2 - x0 + x1; + const T a1 = x0 - x1 - a0; + const T a2 = x2 - x0; + const T a3 = x1; + return horner(mu, a0, a1, a2, a3); + } + + template <typename T, typename M> + KFR_SINTRIN T catmullrom(M mu, T x0, T x1, T x2, T x3) + { + const T a0 = T(0.5) * (x3 - x0) - T(1.5) * (x2 - x1); + const T a1 = x0 - T(2.5) * x1 + T(2) * x2 - T(0.5) * x3; + const T a2 = T(0.5) * (x2 - x0); + const T a3 = x1; + return horner(mu, a0, a1, a2, a3); + } + + KFR_SPEC_FN(in_interpolation, nearest) + KFR_SPEC_FN(in_interpolation, linear) + KFR_SPEC_FN(in_interpolation, cosine) + KFR_SPEC_FN(in_interpolation, cubic) + KFR_SPEC_FN(in_interpolation, catmullrom) +}; +} +} diff --git a/include/kfr/dsp/oscillators.hpp b/include/kfr/dsp/oscillators.hpp @@ -0,0 +1,338 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/sin_cos.hpp" +#include "../base/vec.hpp" +#include "../expressions/basic.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +inline auto simpleimpulse() +{ + return lambda([](cinput_t, size_t index, auto x) { + if (index == 0) + return onoff(x); + else + return zerovector(x); + }); +} + +template <typename T> +auto jaehne(T magn, size_t size) +{ + using namespace native; + return typed<T>(magn * sin(c_pi<T, 1, 2> * sqr(linspace(T(0), T(size), size, false)) / size), size); +} + +template <typename T> +auto swept(T magn, size_t size) +{ + using namespace native; + return typed<T>( + magn * sin(c_pi<T, 1, 4> * sqr(sqr(linspace(T(0), T(size), size, false)) / sqr(T(size))) * T(size)), + size); +} + +namespace internal +{ +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_oscillators : in_sin_cos<cc>, in_select<cc>, in_round<cc>, in_abs<cc> +{ +private: + using in_sin_cos<cc>::fastsin; + using in_sin_cos<cc>::sin; + using in_select<cc>::select; + using in_round<cc>::fract; + using in_abs<cc>::abs; + +public: + template <typename T> + KFR_SINTRIN T rawsine(T x) + { + return fastsin(x * c_pi<T, 2>); + } + template <typename T> + KFR_SINTRIN T sinenorm(T x) + { + return rawsine(fract(x)); + } + template <typename T> + KFR_SINTRIN T sine(T x) + { + return sinenorm(c_recip_pi<T, 1, 2> * x); + } + + template <typename T> + KFR_SINTRIN T rawsquare(T x) + { + return select(x < T(0.5), T(1), -T(1)); + } + template <typename T> + KFR_SINTRIN T squarenorm(T x) + { + return rawsquare(fract(x)); + } + template <typename T> + KFR_SINTRIN T square(T x) + { + return squarenorm(c_recip_pi<T, 1, 2> * x); + } + + template <typename T> + KFR_SINTRIN T rawsawtooth(T x) + { + return T(1) - 2 * x; + } + template <typename T> + KFR_SINTRIN T sawtoothnorm(T x) + { + return rawsawtooth(fract(x)); + } + template <typename T> + KFR_SINTRIN T sawtooth(T x) + { + return sawtoothnorm(c_recip_pi<T, 1, 2> * x); + } + + template <typename T> + KFR_SINTRIN T isawtoothnorm(T x) + { + return T(-1) + 2 * fract(x + 0.5); + } + template <typename T> + KFR_SINTRIN T isawtooth(T x) + { + return isawtoothnorm(c_recip_pi<T, 1, 2> * x); + } + + template <typename T> + KFR_SINTRIN T rawtriangle(T x) + { + return 1 - abs(4 * x - 2); + } + template <typename T> + KFR_SINTRIN T trianglenorm(T x) + { + return rawtriangle(fract(x + 0.25)); + } + template <typename T> + KFR_SINTRIN T triangle(T x) + { + return trianglenorm(c_recip_pi<T, 1, 2> * x); + } + + KFR_SPEC_FN(in_oscillators, rawsine) + KFR_SPEC_FN(in_oscillators, sine) + KFR_SPEC_FN(in_oscillators, sinenorm) + KFR_SPEC_FN(in_oscillators, rawsquare) + KFR_SPEC_FN(in_oscillators, square) + KFR_SPEC_FN(in_oscillators, squarenorm) + KFR_SPEC_FN(in_oscillators, rawtriangle) + KFR_SPEC_FN(in_oscillators, triangle) + KFR_SPEC_FN(in_oscillators, trianglenorm) + KFR_SPEC_FN(in_oscillators, rawsawtooth) + KFR_SPEC_FN(in_oscillators, sawtooth) + KFR_SPEC_FN(in_oscillators, sawtoothnorm) + KFR_SPEC_FN(in_oscillators, isawtooth) + KFR_SPEC_FN(in_oscillators, isawtoothnorm) +}; +} + +using fn_rawsine = internal::in_oscillators<>::fn_rawsine; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> rawsine(const T1& x) +{ + return internal::in_oscillators<>::rawsine(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_rawsine, E1> rawsine(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_sine = internal::in_oscillators<>::fn_sine; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> sine(const T1& x) +{ + return internal::in_oscillators<>::sine(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_sine, E1> sine(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_sinenorm = internal::in_oscillators<>::fn_sinenorm; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> sinenorm(const T1& x) +{ + return internal::in_oscillators<>::sinenorm(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_sinenorm, E1> sinenorm(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_rawsquare = internal::in_oscillators<>::fn_rawsquare; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> rawsquare(const T1& x) +{ + return internal::in_oscillators<>::rawsquare(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_rawsquare, E1> rawsquare(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_square = internal::in_oscillators<>::fn_square; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> square(const T1& x) +{ + return internal::in_oscillators<>::square(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_square, E1> square(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_squarenorm = internal::in_oscillators<>::fn_squarenorm; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> squarenorm(const T1& x) +{ + return internal::in_oscillators<>::squarenorm(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_squarenorm, E1> squarenorm(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_rawtriangle = internal::in_oscillators<>::fn_rawtriangle; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> rawtriangle(const T1& x) +{ + return internal::in_oscillators<>::rawtriangle(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_rawtriangle, E1> rawtriangle(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_triangle = internal::in_oscillators<>::fn_triangle; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> triangle(const T1& x) +{ + return internal::in_oscillators<>::triangle(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_triangle, E1> triangle(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_trianglenorm = internal::in_oscillators<>::fn_trianglenorm; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> trianglenorm(const T1& x) +{ + return internal::in_oscillators<>::trianglenorm(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_trianglenorm, E1> trianglenorm(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_rawsawtooth = internal::in_oscillators<>::fn_rawsawtooth; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> rawsawtooth(const T1& x) +{ + return internal::in_oscillators<>::rawsawtooth(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_rawsawtooth, E1> rawsawtooth(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_sawtooth = internal::in_oscillators<>::fn_sawtooth; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> sawtooth(const T1& x) +{ + return internal::in_oscillators<>::sawtooth(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_sawtooth, E1> sawtooth(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_sawtoothnorm = internal::in_oscillators<>::fn_sawtoothnorm; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> sawtoothnorm(const T1& x) +{ + return internal::in_oscillators<>::sawtoothnorm(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_sawtoothnorm, E1> sawtoothnorm(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_isawtooth = internal::in_oscillators<>::fn_isawtooth; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> isawtooth(const T1& x) +{ + return internal::in_oscillators<>::isawtooth(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_isawtooth, E1> isawtooth(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_isawtoothnorm = internal::in_oscillators<>::fn_isawtoothnorm; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> isawtoothnorm(const T1& x) +{ + return internal::in_oscillators<>::isawtoothnorm(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_isawtoothnorm, E1> isawtoothnorm(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/dsp/resample.hpp b/include/kfr/dsp/resample.hpp @@ -0,0 +1,244 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/memory.hpp" +#include "../base/vec.hpp" +#include "../expressions/reduce.hpp" +#include "window.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ +namespace resample_quality +{ +constexpr csize_t<4> draft{}; +constexpr csize_t<6> low{}; +constexpr csize_t<8> normal{}; +constexpr csize_t<10> high{}; +} + +namespace internal +{ +template <cpu_t cc = cpu_t::native> +struct in_resampling : in_sqrt<cc>, in_abs<cc>, in_log_exp<cc>, in_sin_cos<cc>, in_window<cc>, in_reduce<cc> +{ +private: + using in_sqrt<cc>::sqrt; + using in_abs<cc>::abs; + using in_log_exp<cc>::log; + using in_log_exp<cc>::exp; + using in_log_exp<cc>::log_fmadd; + using in_log_exp<cc>::exp_fmadd; + using in_log_exp<cc>::exp10; + using in_sin_cos<cc>::cos; + using in_sin_cos<cc>::sinc; + using in_reduce<cc>::dotproduct; + using in_reduce<cc>::sum; + +public: + template <typename T1, typename T2> + static inline T1 blackman(T1 n, T2 a) + { + const T1 a0 = (1 - a) * 0.5; + const T1 a1 = 0.5; + const T1 a2 = a * 0.5; + n = n * c_pi<T1, 2>; + return a0 - a1 * cos(n) + a2 * cos(2 * n); + } + + template <typename T, size_t quality> + struct resampler + { + template <cpu_t newcpu> + using retarget_this = typename in_resampling<newcpu>::template resampler<T, quality>; + + using itype = i64; + + constexpr static itype depth = static_cast<itype>(1 << (quality + 1)); + + resampler(itype interpolation_factor, itype decimation_factor, T scale = T(1), T cutoff = 0.49) + : input_position(0), output_position(0) + { + const i64 gcf = gcd(interpolation_factor, decimation_factor); + interpolation_factor /= gcf; + decimation_factor /= gcf; + + taps = depth * interpolation_factor; + order = size_t(depth * interpolation_factor - 1); + + this->interpolation_factor = interpolation_factor; + this->decimation_factor = decimation_factor; + + const itype halftaps = taps / 2; + filter = univector<T>(size_t(taps), T()); + delay = univector<T>(size_t(depth), T()); + + cutoff = cutoff / std::max(decimation_factor, interpolation_factor); + + for (itype j = 0, jj = 0; j < taps; j++) + { + filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff * + sinc((jj - halftaps) * cutoff * c_pi<T, 2>) * + blackman(T(jj) / T(taps - 1), T(0.16)); + jj += size_t(interpolation_factor); + if (jj >= taps) + jj = jj - taps + 1; + } + + const T s = reciprocal(sum(filter)) * interpolation_factor; + filter = filter * s; + } + KFR_INLINE size_t operator()(T* dest, size_t zerosize) + { + size_t outputsize = 0; + const itype srcsize = itype(zerosize); + + for (size_t i = 0;; i++) + { + const itype ii = itype(i) + output_position; + const itype workindex = ii * (decimation_factor); + const itype workindex_rem = workindex % (interpolation_factor); + const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; + itype srcindex = workindex / (interpolation_factor); + srcindex = workindex_rem ? srcindex + 1 : srcindex; + const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); + srcindex = srcindex - (depth - 1); + + if (srcindex + depth >= input_position + srcsize) + break; + outputsize++; + + if (dest) + { + if (srcindex >= input_position) + { + dest[i] = T(0); + } + else + { + const itype prev_count = input_position - srcindex; + dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr); + } + } + } + if (srcsize >= depth) + { + delay = zeros(); + } + else + { + delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); + delay.slice(size_t(depth - srcsize)) = zeros(); + } + + input_position += srcsize; + output_position += outputsize; + return outputsize; + } + KFR_INLINE size_t operator()(T* dest, univector_ref<const T> src) + { + size_t outputsize = 0; + const itype srcsize = itype(src.size()); + + for (size_t i = 0;; i++) + { + const itype ii = itype(i) + output_position; + const itype workindex = ii * (decimation_factor); + const itype workindex_rem = workindex % (interpolation_factor); + const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; + itype srcindex = workindex / (interpolation_factor); + srcindex = workindex_rem ? srcindex + 1 : srcindex; + const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); + srcindex = srcindex - (depth - 1); + + if (srcindex + depth >= input_position + srcsize) + break; + outputsize++; + + if (dest) + { + if (srcindex >= input_position) + { + dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)), + tap_ptr /*, depth*/); + } + else + { + const itype prev_count = input_position - srcindex; + dest[i] = + dotproduct(delay.slice(size_t(depth - prev_count)), + tap_ptr /*, size_t(prev_count)*/) + + dotproduct(src, tap_ptr.slice( + size_t(prev_count), + size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/); + } + } + } + if (srcsize >= depth) + { + delay = src.slice(size_t(srcsize - depth)); + } + else + { + delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); + delay.slice(size_t(depth - srcsize)) = src; + } + + input_position += srcsize; + output_position += outputsize; + return outputsize; + } + itype taps; + size_t order; + itype interpolation_factor; + itype decimation_factor; + univector<T> filter; + univector<T> delay; + itype input_position; + itype output_position; + }; +}; +} + +namespace native +{ +template <typename T, size_t quality> +inline internal::in_resampling<>::resampler<T, quality> resampler(csize_t<quality>, + size_t interpolation_factor, + size_t decimation_factor, T scale = T(1), + T cutoff = 0.49) +{ + using itype = typename internal::in_resampling<>::resampler<T, quality>::itype; + return internal::in_resampling<>::resampler<T, quality>(itype(interpolation_factor), + itype(decimation_factor), scale, cutoff); +} +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/dsp/speaker.hpp b/include/kfr/dsp/speaker.hpp @@ -0,0 +1,91 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +namespace kfr +{ + +enum class Speaker : int +{ + Mono = 0, + M = static_cast<int>(Mono), + Left = 1, + L = static_cast<int>(Left), + Right = 2, + R = static_cast<int>(Right), + Center = 3, + C = static_cast<int>(Center), + Lfe = 4, + Ls = 5, + LeftSurround = static_cast<int>(Ls), + Rs = 6, + RightSurround = static_cast<int>(Rs), + Lc = 7, + Rc = 8, + S = 9, + Cs = static_cast<int>(S), + Sl = 10, + Sr = 11, + Tm = 12, + Tfl = 13, + Tfc = 14, + Tfr = 15, + Trl = 16, + Trc = 17, + Trr = 18, + Lfe2 = 19 +}; + +enum class SpeakerArrangement : int +{ + Mono = 0, + Stereo = 1, + StereoSurround = 2, + StereoCenter = 3, + StereoSide = 4, + StereoCLfe = 5, + Cine30 = 6, + Music30 = 7, + Cine31 = 8, + Music31 = 9, + Cine40 = 10, + Music40 = 11, + Cine41 = 12, + Music41 = 13, + Arr50 = 14, + Arr51 = 15, + Cine60 = 16, + Music60 = 17, + Cine61 = 18, + Music61 = 19, + Cine70 = 20, + Music70 = 21, + Cine71 = 22, + Music71 = 23, + Cine80 = 24, + Music80 = 25, + Cine81 = 26, + Music81 = 27, + Arr102 = 28 +}; +} diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp @@ -0,0 +1,219 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/log_exp.hpp" +#include "../base/vec.hpp" +#include "../expressions/basic.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +using sample_rate_t = double; + +namespace internal +{ +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_dsp_units : in_log_exp<cc>, in_select<cc>, in_round<cc>, in_abs<cc> +{ +private: + using in_log_exp<cc>::log; + using in_log_exp<cc>::exp; + using in_log_exp<cc>::log10; + using in_log_exp<cc>::exp10; + using in_log_exp<cc>::exp_fmadd; + using in_log_exp<cc>::log_fmadd; + using in_select<cc>::select; + using in_round<cc>::fract; + using in_abs<cc>::abs; + +public: + template <typename T, typename TF = ftype<T>> + KFR_SINTRIN TF amp_to_dB(T amp) + { + return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322); + // return T( 20.0 ) * log10( level ); + } + + template <typename T, typename TF = ftype<T>> + KFR_SINTRIN TF dB_to_amp(T dB) + { + return exp(dB * subtype<TF>(0.11512925464970228420089957273422)); + // return exp10( dB / 20 ); + } + + template <typename T, typename TF = ftype<T>> + KFR_SINTRIN TF amp_to_dB(T amp, T offset) + { + return log_fmadd(amp, subtype<TF>(8.6858896380650365530225783783322), offset); + // return T( 20.0 ) * log10( level ); + } + + template <typename T, typename TF = ftype<T>> + KFR_SINTRIN TF dB_to_amp(T dB, T offset) + { + auto offs = -subtype<TF>(0.11512925464970228420089957273422) * offset; + return exp_fmadd(dB, subtype<TF>(0.11512925464970228420089957273422), offs); + // return exp10( dB / 20 ); + } + + template <typename T> + KFR_SINTRIN T power_to_dB(T x) + { + return log(x) * (10 * c_recip_log_10<T>); + } + + template <typename T> + KFR_SINTRIN T dB_to_power(T x) + { + if (x == -c_infinity<T>) + return 0.0; + else + return exp(x * (c_log_10<T> / 10.0)); + } + + template <typename T, typename TF = ftype<T>> + KFR_SINTRIN TF note_to_hertz(T note) + { + const subtype<TF> offset = 2.1011784386926213177653145771814; + + return exp_fmadd(note, subtype<TF>(0.05776226504666210911810267678818), offset); + } + + template <typename T, typename TF = ftype<T>> + KFR_SINTRIN TF hertz_to_note(T hertz) + { + const subtype<TF> offset = -36.376316562295915248836189714583; + + return log_fmadd(hertz, subtype<TF>(17.312340490667560888319096172023), offset); + } + + template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>> + KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz) + { + const Tc offset = log(tunehertz) - tunenote * subtype<Tc>(0.05776226504666210911810267678818); + + return exp_fmadd(note, subtype<Tc>(0.05776226504666210911810267678818), offset); + } + + template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>> + KFR_SINTRIN Tc hertz_to_note(T1 hertz, T2 tunenote, T3 tunehertz) + { + const Tc offset = tunenote - log(tunehertz) * subtype<Tc>(17.312340490667560888319096172023); + + return log_fmadd(hertz, subtype<Tc>(17.312340490667560888319096172023), offset); + } + + KFR_SPEC_FN(in_dsp_units, note_to_hertz) + KFR_SPEC_FN(in_dsp_units, hertz_to_note) + KFR_SPEC_FN(in_dsp_units, amp_to_dB) + KFR_SPEC_FN(in_dsp_units, dB_to_amp) + KFR_SPEC_FN(in_dsp_units, power_to_dB) + KFR_SPEC_FN(in_dsp_units, dB_to_power) +}; +} + +using fn_note_to_hertz = internal::in_dsp_units<>::fn_note_to_hertz; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> note_to_hertz(const T1& x) +{ + return internal::in_dsp_units<>::note_to_hertz(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_note_to_hertz, E1> note_to_hertz(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_hertz_to_note = internal::in_dsp_units<>::fn_hertz_to_note; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> hertz_to_note(const T1& x) +{ + return internal::in_dsp_units<>::hertz_to_note(x); +} +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_hertz_to_note, E1> hertz_to_note(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_amp_to_dB = internal::in_dsp_units<>::fn_amp_to_dB; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> amp_to_dB(const T1& x) +{ + return internal::in_dsp_units<>::amp_to_dB(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_amp_to_dB, E1> amp_to_dB(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_dB_to_amp = internal::in_dsp_units<>::fn_dB_to_amp; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> dB_to_amp(const T1& x) +{ + return internal::in_dsp_units<>::dB_to_amp(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_dB_to_amp, E1> dB_to_amp(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_power_to_dB = internal::in_dsp_units<>::fn_power_to_dB; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> power_to_dB(const T1& x) +{ + return internal::in_dsp_units<>::power_to_dB(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_power_to_dB, E1> power_to_dB(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +using fn_dB_to_power = internal::in_dsp_units<>::fn_dB_to_power; +template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)> +KFR_INTRIN ftype<T1> dB_to_power(const T1& x) +{ + return internal::in_dsp_units<>::dB_to_power(x); +} + +template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_INTRIN expr_func<fn_dB_to_power, E1> dB_to_power(E1&& x) +{ + return { {}, std::forward<E1>(x) }; +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/dsp/weighting.hpp b/include/kfr/dsp/weighting.hpp @@ -0,0 +1,122 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/sqrt.hpp" +#include "../base/vec.hpp" +#include "units.hpp" + +namespace kfr +{ +namespace internal +{ + +template <cpu_t c = cpu_t::native, cpu_t cc = c> +struct in_weight : in_sqrt<cc>, in_dsp_units<cc> +{ +private: + using in_dsp_units<cc>::amp_to_dB; + +public: + template <typename T> + KFR_SINTRIN T weight_a_unnorm(T f) + { + const T f2 = pow2(f); + const T nom = pow2(12200) * pow4(f); + const T den = + (f2 + pow2(20.6)) * (sqrt((f2 + pow2(107.7)) * (f2 + pow2(737.9)))) * (f2 + pow2(12200)); + return nom / den; + } + + template <typename T> + constexpr static T weight_a_gain = reciprocal(weight_a_unnorm(T(1000.0))); + + template <typename T> + KFR_SINTRIN T aweighting(T f) + { + return weight_a_unnorm(f) * weight_a_gain<subtype<T>>; + } + + template <typename T> + KFR_SINTRIN T weight_b_unnorm(T f) + { + const T f2 = pow2(f); + const T nom = pow2(12200) * pow3(f); + const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(158.5)))) * (f2 + pow2(12200)); + + return nom / den; + } + + template <typename T> + constexpr static T weight_b_gain = reciprocal(weight_b_unnorm(T(1000.0))); + + template <typename T> + KFR_SINTRIN T bweighting(T f) + { + return weight_b_unnorm(f) * weight_b_gain<subtype<T>>; + } + + template <typename T> + KFR_SINTRIN T weight_c_unnorm(T f) + { + const T f2 = pow2(f); + const T nom = pow2(12200) * f2; + const T den = (f2 + pow2(20.6)) * (f2 + pow2(12200)); + + return nom / den; + } + + template <typename T> + constexpr static T weight_c_gain = reciprocal(weight_c_unnorm(T(1000.0))); + + template <typename T> + KFR_SINTRIN T cweighting(T f) + { + return weight_c_unnorm(f) * weight_c_gain<subtype<T>>; + } + + template <typename T> + KFR_SINTRIN T aweightingdB(T f) + { + return amp_to_dB(aweighting(f)); + } + template <typename T> + KFR_SINTRIN T bweightingdB(T f) + { + return amp_to_dB(bweighting(f)); + } + template <typename T> + KFR_SINTRIN T cweightingdB(T f) + { + return amp_to_dB(cweighting(f)); + } + + KFR_SPEC_FN(in_weight, aweighting) + KFR_SPEC_FN(in_weight, bweighting) + KFR_SPEC_FN(in_weight, cweighting) + KFR_SPEC_FN(in_weight, aweightingdB) + KFR_SPEC_FN(in_weight, bweightingdB) + KFR_SPEC_FN(in_weight, cweightingdB) +}; +} +} diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp @@ -0,0 +1,685 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/log_exp.hpp" +#include "../base/sin_cos.hpp" +#include "../base/sqrt.hpp" +#include "../base/vec.hpp" +#include "../expressions/pointer.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +enum class window_type +{ + rectangular = 1, + triangular = 2, + bartlett = 3, + cosine = 4, + hann = 5, + bartlett_hann = 6, + hamming = 7, + bohman = 8, + blackman = 9, + blackman_harris = 10, + kaiser = 11, + flattop = 12, + gaussian = 13, + lanczos = 14, +}; + +template <window_type type> +using cwindow_type_t = cval_t<window_type, type>; + +template <window_type type> +constexpr cwindow_type_t<type> cwindow_type{}; + +enum class window_symmetry +{ + periodic, + symmetric +}; + +namespace internal +{ + +template <typename T> +constexpr T bessel_coef[] = { T(0.25), + T(0.027777777777777776236), + T(0.0017361111111111110147), + T(6.9444444444444444384e-005), + T(1.9290123456790123911e-006), + T(3.9367598891408417495e-008), + T(6.1511873267825652335e-010), + T(7.5940584281266239246e-012), + T(7.5940584281266233693e-014), + T(6.2760813455591932909e-016), + T(4.3583898233049949985e-018), + T(2.5789288895295827557e-020), + T(1.3157800456783586208e-022), + T(5.8479113141260384983e-025), + T(2.2843403570804837884e-027), + T(7.904291893012054025e-030), + T(2.4395962632753252792e-032), + T(6.75788438580422547e-035), + T(1.689471096451056426e-037), + T(3.8310002187098784929e-040), + T(7.9152897080782616517e-043), + T(1.4962740468957016443e-045), + T(2.5976979980828152196e-048), + T(4.1563167969325041577e-051), + T(6.1483976285983795968e-054), + T(8.434015951438105991e-057), + T(1.0757673407446563809e-059), + T(1.2791526049282476926e-062), + T(1.4212806721424974034e-065), + T(1.4789601166935457918e-068), + T(1.4442969889585408123e-071), + T(1.3262598613026086927e-074), + T(1.1472836170437790782e-077), + T(9.3655805472961564331e-081), + T(7.2265282000741942594e-084), + T(5.2786911614858977913e-087), + T(3.6556032974279072401e-090), + T(2.4034209713529963119e-093), + T(1.5021381070956226783e-096) }; + +template <typename T, size_t N> +KFR_INLINE vec<T, N> modzerobessel(vec<T, N> x) +{ + const vec<T, N> x_2 = x * 0.5; + const vec<T, N> x_2_sqr = x_2 * x_2; + vec<T, N> num = x_2_sqr; + vec<T, N> result; + result = 1 + x_2_sqr; + + KFR_LOOP_UNROLL + for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++) + { + result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result); + } + return result; +} + +template <cpu_t cpu = cpu_t::native> +struct in_window : in_sin_cos<cpu>, in_log_exp<cpu>, in_select<cpu>, in_sqrt<cpu>, in_abs<cpu> +{ +private: + using in_sin_cos<cpu>::sin; + using in_sin_cos<cpu>::cos; + using in_sin_cos<cpu>::sinc; + using in_log_exp<cpu>::exp; + using in_select<cpu>::select; + using in_sqrt<cpu>::sqrt; + using in_abs<cpu>::abs; + +public: + template <typename T> + struct window_linspace_0_1 : expression_linspace<T> + { + window_linspace_0_1(size_t size, window_symmetry symmetry) + : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric) + { + } + }; + + template <typename T> + struct window_linspace_m1_1 : expression_linspace<T> + { + window_linspace_m1_1(size_t size, window_symmetry symmetry) + : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric) + { + } + }; + + template <typename T> + struct window_linspace_mpi_pi : expression_linspace<T> + { + window_linspace_mpi_pi(size_t size, window_symmetry symmetry) + : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric) + { + } + }; + + template <typename T> + struct window_linspace_m1_1_trunc : expression_linspace<T> + { + window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry) + : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size, + symmetry == window_symmetry::symmetric) + { + } + }; + + template <typename T> + struct window_linspace_m1_1_trunc2 : expression_linspace<T> + { + window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry) + : expression_linspace<T>(symmetric_linspace, + (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size, + symmetry == window_symmetry::symmetric) + { + } + }; + + template <typename T> + struct expression_rectangular : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_rectangular<T>; + expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric) + : m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + using UI = utype<U>; + const vec<UI, N> i = enumerate(vec<UI, N>()) + cast<UI>(index); + return select(i < cast<UI>(m_size), U(1), U(0)); + } + size_t size() const { return m_size; } + + private: + size_t m_size; + }; + + template <typename T> + struct expression_triangular : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_triangular<T>; + expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(1 - abs(linspace(cinput, index, y))); + } + size_t size() const { return m_size; } + + private: + window_linspace_m1_1_trunc2<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_bartlett : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_bartlett<T>; + expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(1 - abs(linspace(cinput, index, y))); + } + size_t size() const { return m_size; } + + private: + window_linspace_m1_1<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_cosine : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_cosine<T>; + expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(sin(c_pi<T> * linspace(cinput, index, y))); + } + size_t size() const { return m_size; } + + private: + window_linspace_0_1<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_hann : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_hann<T>; + expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y)))); + } + size_t size() const { return m_size; } + + private: + window_linspace_0_1<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_bartlett_hann : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_bartlett_hann<T>; + + expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> xx = linspace(cinput, index, y); + return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5)))); + } + size_t size() const { return m_size; } + + private: + window_linspace_0_1<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_hamming : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_hamming<T>; + expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), alpha(alpha), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y)))); + } + size_t size() const { return m_size; } + + private: + window_linspace_0_1<T> linspace; + T alpha; + size_t m_size; + }; + + template <typename T> + struct expression_bohman : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_bohman<T>; + expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<U, N> n = abs(linspace(cinput, index, y)); + return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n)); + } + size_t size() const { return m_size; } + + private: + window_linspace_m1_1<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_blackman : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_blackman<T>; + expression_blackman(size_t size, T alpha = 0.16, + window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> n = linspace(cinput, index, y); + return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n)); + } + size_t size() const { return m_size; } + + private: + window_linspace_0_1<T> linspace; + T a0, a1, a2; + size_t m_size; + }; + + template <typename T> + struct expression_blackman_harris : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_blackman_harris<T>; + expression_blackman_harris(size_t size, T = T(), + window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; + + return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - + T(0.01168) * cos(3 * n)); + } + size_t size() const { return m_size; } + + private: + window_linspace_0_1<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_kaiser : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_kaiser<T>; + expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])), + m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m); + } + size_t size() const { return m_size; } + + private: + window_linspace_m1_1<T> linspace; + T beta; + T m; + size_t m_size; + }; + + template <typename T> + struct expression_flattop : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_flattop<T>; + expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; + constexpr T a0 = 1; + constexpr T a1 = 1.93; + constexpr T a2 = 1.29; + constexpr T a3 = 0.388; + constexpr T a4 = 0.028; + return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n)); + } + size_t size() const { return m_size; } + + private: + window_linspace_0_1<T> linspace; + size_t m_size; + }; + + template <typename T> + struct expression_gaussian : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_gaussian<T>; + + expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), alpha(alpha), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y)))); + } + + size_t size() const { return m_size; } + private: + window_linspace_m1_1_trunc<T> linspace; + T alpha; + size_t m_size; + }; + + template <typename T> + struct expression_lanczos : input_expression + { + using value_type = T; + + template <cpu_t newcpu> + using retarget_this = typename in_window<newcpu>::template expression_lanczos<T>; + expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) + : linspace(size, symmetry), alpha(alpha), m_size(size) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + constexpr vec_t<T, N> y{}; + return cast<U>(sinc(linspace(cinput, index, y))); + } + size_t size() const { return m_size; } + + private: + window_linspace_mpi_pi<T> linspace; + T alpha; + size_t m_size; + }; +}; + +template <window_type> +struct window_by_type; + +#define KFR_WINDOW_BY_TYPE(win) \ + template <> \ + struct window_by_type<window_type::win> \ + { \ + template <typename T> \ + using type = in_window<>::expression_##win<T>; \ + }; +KFR_WINDOW_BY_TYPE(rectangular) +KFR_WINDOW_BY_TYPE(triangular) +KFR_WINDOW_BY_TYPE(bartlett) +KFR_WINDOW_BY_TYPE(cosine) +KFR_WINDOW_BY_TYPE(hann) +KFR_WINDOW_BY_TYPE(bartlett_hann) +KFR_WINDOW_BY_TYPE(hamming) +KFR_WINDOW_BY_TYPE(bohman) +KFR_WINDOW_BY_TYPE(blackman) +KFR_WINDOW_BY_TYPE(blackman_harris) +KFR_WINDOW_BY_TYPE(kaiser) +KFR_WINDOW_BY_TYPE(flattop) +KFR_WINDOW_BY_TYPE(gaussian) +KFR_WINDOW_BY_TYPE(lanczos) +} + +KFR_INLINE internal::in_window<>::expression_rectangular<fbase> window_rectangular(size_t size) +{ + return internal::in_window<>::expression_rectangular<fbase>(size, fbase()); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_triangular<T> window_triangular(size_t size, + ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_triangular<T>(size); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_bartlett<T> window_bartlett(size_t size, + ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_bartlett<T>(size); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_cosine<T>(size); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_hann<T>(size); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_bartlett_hann<T> window_bartlett_hann(size_t size, + ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_bartlett_hann<T>(size); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_hamming<T> window_hamming(size_t size, T alpha = 0.54, + ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_hamming<T>(size, alpha); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_bohman<T>(size); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_blackman<T> window_blackman( + size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric, + ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_blackman<T>(size, alpha, symmetry); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_blackman_harris<T> window_blackman_harris( + size_t size, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_blackman_harris<T>(size, T(), symmetry); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_kaiser<T> window_kaiser(size_t size, T beta = T(0.5), + ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_kaiser<T>(size, beta); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_flattop<T>(size); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_gaussian<T> window_gaussian(size_t size, T alpha = 2.5, + ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_gaussian<T>(size, alpha); +} +template <typename T = fbase> +KFR_INLINE internal::in_window<>::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>()) +{ + return internal::in_window<>::expression_lanczos<T>(size); +} + +template <typename T = fbase, window_type type, + typename window_expr = typename internal::window_by_type<type>::template type<T>> +KFR_NOINLINE window_expr window(size_t size, cval_t<window_type, type>, T win_param = T(), + window_symmetry symmetry = window_symmetry::symmetric, + ctype_t<T> = ctype_t<T>()) +{ + return window_expr(size, win_param, symmetry); +} + +template <typename T = fbase> +KFR_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_param, + window_symmetry symmetry = window_symmetry::symmetric, + ctype_t<T> = ctype_t<T>()) +{ + return cswitch( + cvals<window_type, window_type::rectangular, window_type::triangular, window_type::bartlett, + window_type::cosine, window_type::hann, window_type::bartlett_hann, window_type::hamming, + window_type::bohman, window_type::blackman, window_type::blackman_harris, window_type::kaiser, + window_type::flattop, window_type::gaussian, window_type::lanczos>, + type, + [=](auto win) { + constexpr window_type window = val_of(win); + return to_pointer<T>( + typename internal::window_by_type<window>::template type<T>(size, win_param, symmetry)); + }, + fn_returns<expression_pointer<T>>()); +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/expressions/basic.hpp b/include/kfr/expressions/basic.hpp @@ -0,0 +1,360 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/univector.hpp" +#include "../base/vec.hpp" + +namespace kfr +{ + +namespace internal +{ +template <typename T, typename E1> +struct expression_iterator +{ + constexpr expression_iterator(E1&& e1) : e1(std::forward<E1>(e1)) {} + struct iterator + { + T operator*() { return get(); } + T get() { return expr.e1(cinput, position, vec_t<T, 1>())[0]; } + iterator& operator++() + { + ++position; + return *this; + } + iterator operator++(int) + { + iterator copy = *this; + ++(*this); + return copy; + } + bool operator!=(const iterator& other) const { return position != other.position; } + expression_iterator& expr; + size_t position; + }; + iterator begin() { return { *this, 0 }; } + iterator end() { return { *this, e1.size() }; } + E1 e1; +}; +} + +template <typename E1, typename T = value_type_of<E1>> +KFR_INLINE internal::expression_iterator<T, E1> to_iterator(E1&& e1) +{ + return internal::expression_iterator<T, E1>(std::forward<E1>(e1)); +} + +template <typename T, typename... Ts> +KFR_INLINE auto sequence(T x, Ts... rest) +{ + const T seq[] = { x, static_cast<T>(rest)... }; + constexpr size_t N = arraysize(seq); + return lambda([=](size_t index) { return seq[index % N]; }); +} +KFR_INLINE auto zeros() +{ + return lambda([](cinput_t, size_t, auto x) { return zerovector(x); }); +} +KFR_INLINE auto ones() +{ + return lambda([](cinput_t, size_t, auto x) { + using U = subtype<decltype(x)>; + return U(1); + }); +} +KFR_INLINE auto counter() +{ + return lambda([](cinput_t, size_t index, auto x) { + using T = subtype<decltype(x)>; + using Tsub = subtype<T>; + using TI = subtype<itype<T>>; + return cast<T>(enumerate<Tsub, x.size()>() + cast<Tsub>(cast<TI>(index))); + }); +} +template <typename T1> +KFR_INLINE auto counter(T1 start) +{ + return lambda([start](cinput_t, size_t index, auto x) { + using T = subtype<decltype(x)>; + using Tsub = subtype<T>; + using TI = subtype<itype<T>>; + return cast<T>(enumerate<Tsub, x.size()>() + cast<Tsub>(start) + cast<Tsub>(cast<TI>(index))); + }); +} +template <typename T1, typename T2> +KFR_INLINE auto counter(T1 start, T2 step) +{ + return lambda([start, step](cinput_t, size_t index, auto x) { + using T = subtype<decltype(x)>; + using Tsub = subtype<T>; + using TI = subtype<itype<T>>; + return cast<T>(enumerate<Tsub, x.size()>() * step + cast<Tsub>(start) + cast<Tsub>(cast<TI>(index))); + }); +} + +template <typename Gen> +struct segment +{ + template <typename Gen_> + constexpr segment(size_t start, Gen_&& gen) : start(start), gen(std::forward<Gen_>(gen)) + { + } + size_t start; + Gen gen; +}; + +enum symmetric_linspace_t +{ + symmetric_linspace +}; + +namespace internal +{ +template <typename T, typename E1> +struct expression_reader +{ + constexpr expression_reader(E1&& e1) noexcept : e1(std::forward<E1>(e1)) {} + T read() + { + const T result = e1(cinput, m_position, vec_t<T, 1>()); + m_position++; + return result; + } + size_t m_position = 0; + E1 e1; +}; +template <typename T, typename E1> +struct expression_writer +{ + constexpr expression_writer(E1&& e1) noexcept : e1(std::forward<E1>(e1)) {} + template <typename U> + void write(U value) + { + e1(coutput, m_position, vec<U, 1>(value)); + m_position++; + } + size_t m_position = 0; + E1 e1; +}; +} + +template <typename T, typename E1> +internal::expression_reader<T, E1> reader(E1&& e1) +{ + static_assert(is_input_expression<E1>::value, "E1 must be an expression"); + return internal::expression_reader<T, E1>(std::forward<E1>(e1)); +} + +template <typename T, typename E1> +internal::expression_writer<T, E1> writer(E1&& e1) +{ + static_assert(is_output_expression<E1>::value, "E1 must be an output expression"); + return internal::expression_writer<T, E1>(std::forward<E1>(e1)); +} + +namespace internal +{ + +template <typename E1, typename = void> +struct inherit_value_type +{ +}; + +template <typename E1> +struct inherit_value_type<E1, void_t<typename decay<E1>::value_type>> +{ + using value_type = typename decay<E1>::value_type; +}; + +template <typename E1> +struct expression_skip : expression<E1>, inherit_value_type<E1> +{ + expression_skip(E1&& e1, size_t count) : expression<E1>(std::forward<E1>(e1)), count(count) {} + template <typename T, size_t N> + KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) + { + return this->argument_first(index + count, y); + } + size_t count; +}; + +template <typename T, bool precise = false> +struct expression_linspace; + +template <typename T> +struct expression_linspace<T, false> : input_expression +{ + using value_type = T; + + expression_linspace(T start, T stop, size_t size, bool endpoint = false) + : start(start), offset((stop - start) / T(endpoint ? size - 1 : size)) + { + } + + expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false) + : expression_linspace(-symsize, +symsize, size, endpoint) + { + } + + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + { + using UI = itype<U>; + return U(start) + (enumerate(x) + cast<U>(cast<UI>(index))) * U(offset); + } + + T start; + T offset; +}; + +template <typename T> +struct expression_linspace<T, true> : input_expression +{ + expression_linspace(T start, T stop, size_t size, bool endpoint = false) + : start(start), stop(stop), invsize(1.0 / T(endpoint ? size - 1 : size)) + { + } + + expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false) + : expression_linspace(-symsize, +symsize, size, endpoint) + { + } + + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + { + using UI = itype<U>; + return mix((enumerate(x) + cast<U>(cast<UI>(index))) * invsize, cast<U>(start), cast<U>(stop)); + } + template <typename U, size_t N> + KFR_INLINE static vec<U, N> mix(vec<U, N> t, U x, U y) + { + return (U(1.0) - t) * x + t * y; + } + + T start; + T stop; + T invsize; +}; + +template <typename... E> +struct expression_sequence : expression<E...> +{ +public: + using base = expression<E...>; + template <cpu_t newcpu> + using retarget_this = expression_sequence<retarget<E, newcpu>...>; + + template <typename... Expr_> + KFR_INLINE expression_sequence(const size_t (&segments)[base::size], Expr_&&... expr) noexcept + : base(std::forward<Expr_>(expr)...) + { + std::copy(std::begin(segments), std::end(segments), this->segments.begin() + 1); + this->segments[0] = 0; + this->segments[base::size + 1] = size_t(-1); + } + + template <typename T, size_t N> + KFR_NOINLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) + { + std::size_t sindex = size_t(std::upper_bound(std::begin(segments), std::end(segments), index) - 1 - + std::begin(segments)); + if (segments[sindex + 1] - index >= N) + return get(index, sindex - 1, y); + else + { + vec<T, N> result; +#pragma clang loop unroll_count(4) + for (size_t i = 0; i < N; i++) + { + sindex = segments[sindex + 1] == index ? sindex + 1 : sindex; + result.data()[i] = get(index, sindex - 1, vec_t<T, 1>())[0]; + index++; + } + return result; + } + } + +protected: + template <typename T, size_t N> + KFR_NOINLINE vec<T, N> get(size_t index, size_t expr_index, vec_t<T, N> y) + { + return cswitch(indicesfor<E...>, expr_index, [&](auto val) { return this->argument(val, index, y); }, + [&]() { return zerovector(y); }); + } + + std::array<size_t, base::size + 2> segments; +}; +} + +template <typename E1> +KFR_INLINE internal::expression_skip<E1> skip(E1&& e1, size_t count = 1) +{ + return internal::expression_skip<E1>(std::forward<E1>(e1), count); +} + +template <typename T1, typename T2, bool precise = false, typename TF = ftype<common_type<T1, T2>>> +KFR_INLINE internal::expression_linspace<TF, precise> linspace(T1 start, T2 stop, size_t size, + bool endpoint = false) +{ + return internal::expression_linspace<TF, precise>(start, stop, size, endpoint); +} +KFR_FN(linspace) + +template <typename T, bool precise = false, typename TF = ftype<T>> +KFR_INLINE internal::expression_linspace<TF, precise> symmlinspace(T symsize, size_t size, + bool endpoint = false) +{ + return internal::expression_linspace<TF, precise>(symmetric_linspace, symsize, size, endpoint); +} +KFR_FN(symmlinspace) + +template <size_t size, typename... E> +KFR_INLINE internal::expression_sequence<decay<E>...> gen_sequence(const size_t (&list)[size], E&&... gens) +{ + static_assert(size == sizeof...(E), "Lists must be of equal length"); + return internal::expression_sequence<decay<E>...>(list, std::forward<E>(gens)...); +} +KFR_FN(gen_sequence) + +namespace internal +{ +template <typename... E> +struct multioutput : output_expression +{ + template <typename... E_> + multioutput(E_&&... e) : outputs(std::forward<E_>(e)...) + { + } + template <typename T, size_t N> + void operator()(coutput_t, size_t index, vec<T, N> x) + { + cfor(csize<0>, csize<sizeof...(E)>, [&](auto n) { std::get<val_of(n)>(outputs)(coutput, index, x); }); + } + std::tuple<E...> outputs; + +private: +}; +} +} diff --git a/include/kfr/expressions/conversion.hpp b/include/kfr/expressions/conversion.hpp @@ -0,0 +1,57 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ + +#pragma once + +#include "../base/function.hpp" +#include "../base/operators.hpp" +#include "../base/vec.hpp" +#include "../expressions/basic.hpp" + +namespace kfr +{ +namespace internal +{ +template <typename From, typename E> +struct expression_convert : expression<E> +{ + template <cpu_t newcpu> + using retarget_this = expression_convert<From, retarget<E, newcpu>>; + + KFR_INLINE expression_convert(E&& expr) noexcept : expression<E>(std::forward<E>(expr)) {} + + template <typename T, size_t N> + KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) + { + return this->argument_first(index, vec_t<From, N>()); + } +}; +} + +template <typename From, typename E> +KFR_INLINE internal::expression_convert<From, decay<E>> convert(E&& expr) +{ + return internal::expression_convert<From, decay<E>>(std::forward<E>(expr)); +} +KFR_FN(convert) +} diff --git a/include/kfr/expressions/generators.hpp b/include/kfr/expressions/generators.hpp @@ -0,0 +1,279 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/log_exp.hpp" +#include "../base/select.hpp" +#include "../base/sin_cos.hpp" +#include "../base/vec.hpp" + +#pragma clang diagnostic push +#if CID_HAS_WARNING("-Winaccessible-base") +#pragma clang diagnostic ignored "-Winaccessible-base" +#endif + +namespace kfr +{ + +namespace internal +{ + +template <cpu_t cpu = cpu_t::native> +struct in_generators : in_log_exp<cpu>, in_select<cpu>, in_sin_cos<cpu> +{ +private: + using in_log_exp<cpu>::exp; + using in_log_exp<cpu>::exp2; + using in_select<cpu>::select; + using in_sin_cos<cpu>::cossin; + +public: + template <typename T, size_t width_, typename Class> + struct generator + { + constexpr static size_t width = width_; + using type = T; + + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N> t) const + { + return cast<U>(generate(t)); + } + + void resync(T start) const { ptr_cast<Class>(this)->sync(start); } + + protected: + void call_next() const { ptr_cast<Class>(this)->next(); } + template <size_t N> + void call_shift(csize_t<N>) const + { + ptr_cast<Class>(this)->shift(csize<N>); + } + + template <size_t N> + void shift(csize_t<N>) const + { + const vec<T, width> oldvalue = value; + call_next(); + value = slice<N, width>(oldvalue, value); + } + + template <size_t N, KFR_ENABLE_IF(N == width)> + KFR_INLINE vec<T, N> generate(vec_t<T, N>) const + { + const vec<T, N> result = value; + call_next(); + return result; + } + + template <size_t N, KFR_ENABLE_IF(N < width)> + KFR_INLINE vec<T, N> generate(vec_t<T, N>) const + { + const vec<T, N> result = narrow<N>(value); + shift(csize<N>); + return result; + } + + template <size_t N, KFR_ENABLE_IF(N > width)> + KFR_INLINE vec<T, N> generate(vec_t<T, N> x) const + { + const auto lo = generate(low(x)); + const auto hi = generate(high(x)); + return concat(lo, hi); + } + + mutable vec<T, width> value; + }; + + template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)> + struct generator_linear : generator<T, width, generator_linear<T, width>> + { + template <cpu_t newcpu> + using retarget_this = typename in_generators<newcpu>::template generator_linear<T>; + + constexpr generator_linear(T start, T step) noexcept : step(step), vstep(step* width) + { + this->resync(start); + } + + KFR_INLINE void sync(T start) const noexcept { this->value = start + enumerate<T, width>() * step; } + + KFR_INLINE void next() const noexcept { this->value += vstep; } + + protected: + T step; + T vstep; + }; + + template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)> + struct generator_exp : generator<T, width, generator_exp<T, width>> + { + template <cpu_t newcpu> + using retarget_this = typename in_generators<newcpu>::template generator_exp<T>; + + generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1) + { + this->resync(start); + } + + KFR_INLINE void sync(T start) const noexcept + { + this->value = exp(start + enumerate<T, width>() * step); + } + + KFR_INLINE void next() const noexcept { this->value += this->value * vstep; } + + protected: + T step; + T vstep; + }; + + template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)> + struct generator_exp2 : generator<T, width, generator_exp2<T, width>> + { + template <cpu_t newcpu> + using retarget_this = typename in_generators<newcpu>::template generator_exp2<T>; + + generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1) + { + this->resync(start); + } + + KFR_INLINE void sync(T start) const noexcept + { + this->value = exp2(start + enumerate<T, width>() * step); + } + + KFR_INLINE void next() const noexcept { this->value += this->value * vstep; } + + protected: + T step; + T vstep; + }; + + template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)> + struct generator_cossin : generator<T, width, generator_cossin<T, width>> + { + template <cpu_t newcpu> + using retarget_this = typename in_generators<newcpu>::template generator_cossin<T>; + + generator_cossin(T start, T step) + : step(step), alpha(2 * sqr(sin(width / 2 * step / 2))), beta(-sin(width / 2 * step)) + { + this->resync(start); + } + KFR_INLINE void sync(T start) const noexcept { this->value = init_cossin(step, start); } + + KFR_INLINE void next() const noexcept + { + this->value = this->value - subadd(alpha * this->value, beta * swap<2>(this->value)); + } + + protected: + T step; + T alpha; + T beta; + KFR_NOINLINE static vec<T, width> init_cossin(T w, T phase) + { + return cossin(dup(phase + enumerate<T, width / 2>() * w)); + } + }; + + template <typename T, size_t width = get_vector_width<T, cpu>(2, 4)> + struct generator_sin : generator<T, width, generator_sin<T, width>> + { + template <cpu_t newcpu> + using retarget_this = typename in_generators<newcpu>::template generator_sin<T>; + + generator_sin(T start, T step) + : step(step), alpha(2 * sqr(sin(width * step / 2))), beta(sin(width * step)) + { + this->resync(start); + } + KFR_INLINE void sync(T start) const noexcept + { + const vec<T, width* 2> cs = splitpairs(cossin(dup(start + enumerate<T, width>() * step))); + this->cos_value = low(cs); + this->value = high(cs); + } + + KFR_INLINE void next() const noexcept + { + const vec<T, width> c = this->cos_value; + const vec<T, width> s = this->value; + + const vec<T, width> cc = alpha * c + beta * s; + const vec<T, width> ss = alpha * s - beta * c; + + this->cos_value = c - cc; + this->value = s - ss; + } + + template <size_t N> + void shift(csize_t<N>) const noexcept + { + const vec<T, width> oldvalue = this->value; + const vec<T, width> oldcosvalue = this->cos_value; + next(); + this->value = slice<N, width>(oldvalue, this->value); + this->cos_value = slice<N, width>(oldcosvalue, this->cos_value); + } + + protected: + T step; + T alpha; + T beta; + mutable vec<T, width> cos_value; + }; +}; +} + +template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> +KFR_SINTRIN internal::in_generators<>::generator_linear<TF> gen_linear(T1 start, T2 step) +{ + return internal::in_generators<>::generator_linear<TF>(start, step); +} +template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> +KFR_SINTRIN internal::in_generators<>::generator_exp<TF> gen_exp(T1 start, T2 step) +{ + return internal::in_generators<>::generator_exp<TF>(start, step); +} +template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> +KFR_SINTRIN internal::in_generators<>::generator_exp2<TF> gen_exp2(T1 start, T2 step) +{ + return internal::in_generators<>::generator_exp2<TF>(start, step); +} +template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> +KFR_SINTRIN internal::in_generators<>::generator_sin<TF> gen_cossin(T1 start, T2 step) +{ + return internal::in_generators<>::generator_cossin<TF>(start, step); +} +template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> +KFR_SINTRIN internal::in_generators<>::generator_sin<TF> gen_sin(T1 start, T2 step) +{ + return internal::in_generators<>::generator_sin<TF>(start, step); +} +} + +#pragma clang diagnostic pop diff --git a/include/kfr/expressions/operators.hpp b/include/kfr/expressions/operators.hpp @@ -0,0 +1,66 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/operators.hpp" +#include "../base/vec.hpp" + +namespace kfr +{ + +#define KFR_EXPR_UNARY(fn, op) \ + template <typename A1, KFR_ENABLE_IF(is_input_expression<A1>::value)> \ + KFR_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward<A1>(a1))) \ + { \ + return bind_expression(fn(), std::forward<A1>(a1)); \ + } + +#define KFR_EXPR_BINARY(fn, op) \ + template <typename A1, typename A2, KFR_ENABLE_IF(is_input_expressions<A1, A2>::value)> \ + KFR_INLINE auto operator op(A1&& a1, A2&& a2) \ + ->decltype(bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2))) \ + { \ + return bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2)); \ + } + +KFR_EXPR_UNARY(fn_neg, -) +KFR_EXPR_UNARY(fn_bitwisenot, ~) + +KFR_EXPR_BINARY(fn_add, +) +KFR_EXPR_BINARY(fn_sub, -) +KFR_EXPR_BINARY(fn_mul, *) +KFR_EXPR_BINARY(fn_div, /) +KFR_EXPR_BINARY(fn_bitwiseand, &) +KFR_EXPR_BINARY(fn_bitwiseor, |) +KFR_EXPR_BINARY(fn_bitwisexor, ^) +KFR_EXPR_BINARY(fn_shl, <<) +KFR_EXPR_BINARY(fn_shr, >>) + +KFR_EXPR_BINARY(fn_equal, ==) +KFR_EXPR_BINARY(fn_notequal, !=) +KFR_EXPR_BINARY(fn_less, <) +KFR_EXPR_BINARY(fn_greater, >) +KFR_EXPR_BINARY(fn_lessorequal, <=) +KFR_EXPR_BINARY(fn_greaterorequal, >=) +} diff --git a/include/kfr/expressions/pointer.hpp b/include/kfr/expressions/pointer.hpp @@ -0,0 +1,168 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/vec.hpp" +#include "basic.hpp" +#include <memory> + +namespace kfr +{ + +constexpr size_t maximum_expression_width() { return bitness_const(16, 32); } + +template <typename T, size_t maxwidth = maximum_expression_width()> +using expression_vtable = carray<void*, 2 + ilog2(maxwidth) + 1>; + +struct dummy_content +{ +}; + +struct expression_resource +{ + virtual ~expression_resource() {} + virtual void* instance() { return nullptr; } +}; +template <typename E> +struct expression_resource_impl : expression_resource +{ + expression_resource_impl(E&& e) noexcept : e(std::move(e)) {} + virtual ~expression_resource_impl() {} + virtual void* instance() override final { return &e; } +private: + E e; +}; + +template <typename E> +std::shared_ptr<expression_resource> make_resource(E&& e) +{ + return std::static_pointer_cast<expression_resource>( + std::make_shared<expression_resource_impl<decay<E>>>(std::move(e))); +} + +template <typename T, size_t maxwidth = maximum_expression_width()> +struct expression_pointer : input_expression +{ + using value_type = T; + + static_assert(is_poweroftwo(maxwidth), "N must be a power of two"); + expression_pointer() noexcept : instance(nullptr), vtable(nullptr) {} + expression_pointer(void* instance, const expression_vtable<T, maxwidth>* vtable, + std::shared_ptr<expression_resource> resource = nullptr) + : instance(instance), vtable(vtable), resource(std::move(resource)) + { + } + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + using func_t = simd<T, N> (*)(void*, size_t); + + static_assert(is_poweroftwo(N), "N must be a power of two"); + constexpr size_t findex = ilog2(N); + static_assert(N <= maxwidth, "N is greater than maxwidth"); + func_t func = reinterpret_cast<func_t>(vtable->get(csize<2 + findex>)); + vec<U, N> result = cast<U>(func(instance, index)); + return result; + } + KFR_INLINE void begin_block(size_t size) const + { + using func_t = void (*)(void*, size_t); + func_t func = reinterpret_cast<func_t>(vtable->get(csize<0>)); + func(instance, size); + } + KFR_INLINE void end_block(size_t size) const + { + using func_t = void (*)(void*, size_t); + func_t func = reinterpret_cast<func_t>(vtable->get(csize<1>)); + func(instance, size); + } + +private: + void* instance; + const expression_vtable<T, maxwidth>* vtable; + std::shared_ptr<expression_resource> resource; +}; + +namespace internal +{ +template <typename T, size_t N, typename Fn, typename Ret = simd<T, N>, + typename NonMemFn = Ret (*)(Fn*, size_t, vec_t<T, N>)> +KFR_INLINE NonMemFn make_expression_func() +{ + return [](Fn* fn, size_t index, vec_t<T, N> x) { return *(fn->operator()(cinput, index, x)); }; +} + +template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)> +KFR_INLINE NonMemFn make_expression_begin_block() +{ + return [](Fn* fn, size_t size) { return fn->begin_block(size); }; +} +template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)> +KFR_INLINE NonMemFn make_expression_end_block() +{ + return [](Fn* fn, size_t size) { return fn->end_block(size); }; +} + +template <typename T, size_t maxwidth, typename E> +expression_vtable<T, maxwidth> make_expression_vtable_impl() +{ + expression_vtable<T, maxwidth> result; + constexpr size_t size = result.size() - 2; + + result.get(csize<0>) = reinterpret_cast<void*>(&internal::make_expression_begin_block<decay<E>>); + result.get(csize<1>) = reinterpret_cast<void*>(&internal::make_expression_end_block<decay<E>>); + + cforeach(csizeseq<size>, [&](auto u) { + constexpr size_t N = 1 << val_of(u); + result.get(csize<2 + val_of(u)>) = + reinterpret_cast<void*>(internal::make_expression_func<T, N, decay<E>>()); + }); + return result; +} + +template <typename T, size_t maxwidth, typename E> +KFR_INLINE expression_vtable<T, maxwidth>* make_expression_vtable() +{ + static_assert(is_input_expression<E>::value, "E must be an expression"); + static expression_vtable<T, maxwidth> vtable = internal::make_expression_vtable_impl<T, maxwidth, E>(); + return &vtable; +} +} + +template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()> +KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E& expr) +{ + static_assert(is_input_expression<E>::value, "E must be an expression"); + return expression_pointer<T, maxwidth>(std::addressof(expr), + internal::make_expression_vtable<T, maxwidth, E>()); +} + +template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()> +KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E&& expr) +{ + static_assert(is_input_expression<E>::value, "E must be an expression"); + std::shared_ptr<expression_resource> ptr = make_resource(std::move(expr)); + return expression_pointer<T, maxwidth>( + ptr->instance(), internal::make_expression_vtable<T, maxwidth, E>(), std::move(ptr)); +} +} diff --git a/include/kfr/expressions/reduce.hpp b/include/kfr/expressions/reduce.hpp @@ -0,0 +1,265 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/min_max.hpp" +#include "../base/operators.hpp" +#include "../base/vec.hpp" +#include "basic.hpp" + +namespace kfr +{ + +template <typename T> +KFR_INLINE T final_mean(T value, size_t size) +{ + return value / size; +} +KFR_FN(final_mean) + +template <typename T> +KFR_INLINE T final_rootmean(T value, size_t size) +{ + return internal::builtin_sqrt(value / size); +} +KFR_FN(final_rootmean) + +namespace internal +{ +template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, size_t, T>::value)> +KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t size, T value) +{ + return finalfn(value, size); +} +template <typename FinalFn, typename T, KFR_ENABLE_IF(!is_callable<FinalFn, size_t, T>::value)> +KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t, T value) +{ + return finalfn(value); +} + +template <cpu_t cpu = cpu_t::native> +struct in_reduce +{ + + template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn> + struct expression_reduce : output_expression + { + using Tsubtype = subtype<T>; + constexpr static size_t width = vector_width<Tsubtype, cpu> * bitness_const(1, 2); + + expression_reduce(ReduceFn&& reducefn, TransformFn&& transformfn, FinalFn&& finalfn) + : counter(0), reducefn(std::move(reducefn)), transformfn(std::move(transformfn)), + finalfn(std::move(finalfn)), value(resize<width>(make_vector(reducefn(initialvalue<T>{})))) + { + } + + template <typename U, size_t N> + KFR_INLINE void operator()(coutput_t, size_t, vec<U, N> x) const + { + counter += N; + process(x); + } + + KFR_INLINE T get() + { + return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn)); + } + + protected: + void reset() { counter = 0; } + template <size_t N, KFR_ENABLE_IF(N == width)> + KFR_INLINE void process(vec<Tsubtype, N> x) const + { + value = reducefn(transformfn(x), value); + } + + template <size_t N, KFR_ENABLE_IF(N < width)> + KFR_INLINE void process(vec<Tsubtype, N> x) const + { + value = combine(value, reducefn(transformfn(x), narrow<N>(value))); + } + + template <size_t N, KFR_ENABLE_IF(N > width)> + KFR_INLINE void process(vec<Tsubtype, N> x) const + { + process(low(x)); + process(high(x)); + } + + mutable size_t counter; + retarget<ReduceFn, cpu> reducefn; + retarget<TransformFn, cpu> transformfn; + retarget<FinalFn, cpu> finalfn; + mutable vec<Tsubtype, width> value; + }; + + template <typename ReduceFn, typename TransformFn = fn_pass_through, typename FinalFn = fn_pass_through, + typename E1, typename T = value_type_of<E1>> + KFR_SINTRIN T reduce(E1&& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_pass_through(), + FinalFn&& finalfn = fn_pass_through()) + { + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + const size_t size = e1.size(); + using reducer_t = expression_reduce<T, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>; + reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn), + std::forward<FinalFn>(finalfn)); + process<T, cpu>(red, std::forward<E1>(e1), size); + + return red.get(); + } + + template <typename E1, typename T = value_type_of<E1>> + KFR_SINTRIN T sum(E1&& x) + { + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return reduce(std::forward<E1>(x), fn_add()); + } + + template <typename E1, typename T = value_type_of<E1>> + KFR_SINTRIN T mean(E1&& x) + { + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return reduce(std::forward<E1>(x), fn_add(), fn_pass_through(), fn_final_mean()); + } + + template <typename E1, typename T = value_type_of<E1>> + KFR_SINTRIN T min(E1&& x) + { + using fn_min = typename in_min_max<cpu>::fn_min; + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return reduce(std::forward<E1>(x), fn_min()); + } + + template <typename E1, typename T = value_type_of<E1>> + KFR_SINTRIN T max(E1&& x) + { + using fn_max = typename in_min_max<cpu>::fn_max; + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return reduce(std::forward<E1>(x), fn_max()); + } + + template <typename E1, typename E2, + typename T = value_type_of<decltype(std::declval<E1>() * std::declval<E2>())>> + KFR_SINTRIN T dotproduct(E1&& x, E2&& y) + { + auto m = std::forward<E1>(x) * std::forward<E2>(y); + using E12 = decltype(m); + static_assert(!is_generic<E12>::value, "e1 * e2 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E12>::value, "e1 * e2 must be a sized expression (use typed<T>())"); + return reduce(std::move(m), fn_add()); + } + + template <typename E1, typename T = value_type_of<E1>> + KFR_SINTRIN T rms(E1&& x) + { + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return reduce(std::forward<E1>(x), fn_add(), fn_sqr(), fn_final_rootmean()); + } + + template <typename E1, typename T = value_type_of<E1>> + KFR_SINTRIN T sumsqr(E1&& x) + { + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return reduce(std::forward<E1>(x), fn_add(), fn_sqr()); + } + + KFR_SPEC_FN(in_reduce, reduce) + KFR_SPEC_FN(in_reduce, sum) + KFR_SPEC_FN(in_reduce, dotproduct) + KFR_SPEC_FN(in_reduce, rms) + KFR_SPEC_FN(in_reduce, sumsqr) + KFR_SPEC_FN(in_reduce, mean) + KFR_SPEC_FN(in_reduce, min) + KFR_SPEC_FN(in_reduce, max) +}; +} + +namespace native +{ + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T sum(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::in_reduce<>::sum(std::forward<E1>(x)); +} + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T mean(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::in_reduce<>::mean(std::forward<E1>(x)); +} + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T max(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::in_reduce<>::max(std::forward<E1>(x)); +} + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T min(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::in_reduce<>::min(std::forward<E1>(x)); +} + +template <typename E1, typename E2, typename T = value_type_of<E1>, + KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> +KFR_SINTRIN T dotproduct(E1&& x, E2&& y) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::in_reduce<>::dotproduct(std::forward<E1>(x), std::forward<E2>(y)); +} + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T rms(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::in_reduce<>::rms(std::forward<E1>(x)); +} + +template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)> +KFR_SINTRIN T sumsqr(E1&& x) +{ + static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())"); + static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())"); + return internal::in_reduce<>::sumsqr(std::forward<E1>(x)); +} +} +} diff --git a/include/kfr/io/audiofile.hpp b/include/kfr/io/audiofile.hpp @@ -0,0 +1,370 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/univector.hpp" +#include "../base/vec.hpp" +#include "../expressions/basic.hpp" +#include "file.hpp" + +namespace kfr +{ + +template <typename Tout, typename Tin, size_t Tag1, size_t Tag2, typename E1> +void write_interleaved(E1&& dest, const univector2d<Tin, Tag1, Tag2>& src) +{ + const size_t channels = src.size(); + const size_t size = src[0].size(); + if (channels == 1) + { + process<Tout>(std::forward<E1>(dest), src[0], size); + } + else if (channels == 2) + { + process<Tout>(std::forward<E1>(dest), bind_expression(fn_interleave(), src[0], src[1]), size); + } + else + { + internal::expression_writer<Tout, E1> wr = writer<Tout>(std::forward<E1>(dest)); + for (size_t i = 0; i < size; i++) + for (size_t ch = 0; ch < channels; ch++) + wr.write(src[ch][i]); + } +} + +enum class audiodatatype +{ + unknown, + i16, + i24, + i24a32, + i32, + f32, + f64 +}; + +namespace internal +{ +template <typename T> +constexpr range<fmax> audio_range() +{ + return { -std::numeric_limits<T>::max(), std::numeric_limits<T>::max() }; +} + +template <> +constexpr range<fmax> audio_range<f32>() +{ + return { -1.0, +1.0 }; +} + +template <> +constexpr range<fmax> audio_range<f64>() +{ + return { -1.0, +1.0 }; +} + +inline size_t get_audiobitdepth(audiodatatype type) +{ + return (size_t[]){ 0, 16, 24, 24, 32, 32, 64 }[static_cast<int>(type)]; +} + +template <typename T> +inline audiodatatype get_audiodatatype() +{ + if (ctypeid<T>() == ctypeid<i16>()) + return audiodatatype::i16; + else if (ctypeid<T>() == ctypeid<i32>()) + return audiodatatype::i32; + else if (ctypeid<T>() == ctypeid<f32>()) + return audiodatatype::f32; + else if (ctypeid<T>() == ctypeid<f64>()) + return audiodatatype::f64; + else + return audiodatatype::unknown; +} +} + +struct audioformat +{ + size_t channels; + size_t samples; + audiodatatype type; + fmax samplerate; + + template <typename T, size_t Tag1, size_t Tag2> + constexpr audioformat(const univector2d<T, Tag1, Tag2>& data, fmax sample_rate) + : channels(data.size()), samples(data[0].size()), type(internal::get_audiodatatype<T>()), + samplerate(sample_rate) + { + } +}; + +namespace internal +{ +static constexpr u32 FourCC(const char (&ch)[5]) +{ + return u32(u8(ch[0])) | u32(u8(ch[1])) << 8 | u32(u8(ch[2])) << 16 | u32(u8(ch[3])) << 24; +} + +struct WAV_FMT +{ + i32 fId; // 'fmt ' + i32 pcmHeaderLength; + i16 wFormatTag; + i16 numChannels; + i32 nSamplesPerSec; + i32 nAvgBytesPerSec; + i16 numBlockAlingn; + i16 numBitsPerSample; +} __attribute__((packed)); + +struct WAV_DATA +{ + i32 dId; // 'data' or 'fact' + i32 dLen; + u8 data[1]; +} __attribute__((packed)); + +struct WAV_DATA_HDR +{ + i32 dId; // 'data' or 'fact' + i32 dLen; +} __attribute__((packed)); + +struct AIFF_FMT +{ + i32 chunkID; + i32 chunkLen; + i16 channels; + u32 frames; + i16 bitsPerSample; + f80 sampleRate; + i32 compression; +} __attribute__((packed)); + +struct AIFF_DATA +{ + i32 chunkID; + i32 chunkLen; + u32 offset; +} __attribute__((packed)); + +constexpr u32 cWAVE_FORMAT_PCM = 1; +constexpr u32 cWAVE_FORMAT_IEEE = 3; + +constexpr u32 ccRIFF = FourCC("RIFF"); +constexpr u32 ccWAVE = FourCC("WAVE"); +constexpr u32 ccfmt = FourCC("fmt "); +constexpr u32 ccdata = FourCC("data"); + +constexpr u32 ccFORM = FourCC("FORM"); +constexpr u32 ccAIFF = FourCC("AIFF"); +constexpr u32 ccAIFC = FourCC("AIFC"); +constexpr u32 ccCOMM = FourCC("COMM"); +constexpr u32 ccSSND = FourCC("SSND"); +constexpr u32 ccNONE = FourCC("NONE"); +constexpr u32 ccsowt = FourCC("sowt"); + +struct RIFF_HDR +{ + i32 riffID; // 'RIFF' or 'COMM' + i32 fileLen; + i32 formatID; // 'WAVE' or 'AIFF' +} __attribute__((packed)); + +struct WAV_HEADER +{ + RIFF_HDR riff; + WAV_FMT fmt; + WAV_DATA_HDR data; + +} __attribute__((packed)); + +struct CHUNK_HDR +{ + i32 chunkID; + i32 chunkLen; +} __attribute__((packed)); + +static bool audio_test_wav(const array_ref<u8>& rawbytes) +{ + if (rawbytes.size() < sizeof(RIFF_HDR)) + { + return false; + } + const RIFF_HDR* hdr = reinterpret_cast<const RIFF_HDR*>(rawbytes.data()); + if (hdr->riffID != ccRIFF) + { + return false; + } + if (hdr->formatID != ccWAVE) + { + return false; + } + return true; +} + +static bool audio_test_aiff(const array_ref<u8>& rawbytes) +{ + if (rawbytes.size() < sizeof(RIFF_HDR)) + { + return false; + } + const RIFF_HDR* hdr = reinterpret_cast<const RIFF_HDR*>(rawbytes.data()); + if (hdr->riffID != ccFORM) + { + return false; + } + if (hdr->formatID != ccAIFF && hdr->formatID != ccAIFC) + { + return false; + } + return true; +} + +enum class file_status +{ + ok, + unknown_format, + bad_format, + unsupported_compression, + unsupported_bit_format +}; + +static file_status audio_info_wav(audioformat& info, const array_ref<u8>& rawbytes) +{ + const CHUNK_HDR* chunk = ptr_cast<CHUNK_HDR>(rawbytes.data() + 12); + const void* end = ptr_cast<char>(rawbytes.end()); + const WAV_FMT* fmt = nullptr; + const WAV_DATA* rawdata = nullptr; + while (chunk < end) + { + switch (chunk->chunkID) + { + case ccfmt: + fmt = ptr_cast<WAV_FMT>(chunk); + break; + case ccdata: + rawdata = ptr_cast<WAV_DATA>(chunk); + break; + } + chunk = ptr_cast<CHUNK_HDR>(ptr_cast<u8>(chunk) + chunk->chunkLen + 8); + } + if (!fmt || !rawdata) + { + return file_status::bad_format; + } + + if (fmt->wFormatTag != cWAVE_FORMAT_PCM && fmt->wFormatTag != cWAVE_FORMAT_IEEE) + { + return file_status::unsupported_compression; + } + + int storedbits = fmt->numBlockAlingn * 8 / fmt->numChannels; + if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 16 && storedbits == 16) + { + info.type = audiodatatype::i16; + } + else if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 24 && storedbits == 24) + { + info.type = audiodatatype::i24; + } + else if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 24 && storedbits == 32) + { + info.type = audiodatatype::i24a32; + } + else if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 32 && storedbits == 32) + { + info.type = audiodatatype::i32; + } + else if (fmt->wFormatTag == cWAVE_FORMAT_IEEE && fmt->numBitsPerSample == 32 && storedbits == 32) + { + info.type = audiodatatype::f32; + } + else if (fmt->wFormatTag == cWAVE_FORMAT_IEEE && fmt->numBitsPerSample == 64 && storedbits == 64) + { + info.type = audiodatatype::f64; + } + else + { + return file_status::unsupported_bit_format; + } + + if (fmt->numChannels < 1 || fmt->numChannels > 16) + return file_status::unsupported_bit_format; + + info.channels = size_t(fmt->numChannels); + info.samplerate = size_t(fmt->nSamplesPerSec); + info.samples = size_t(rawdata->dLen) / info.channels / (get_audiobitdepth(info.type) / 8); + + return file_status::ok; +} + +static file_status audio_info(audioformat& info, const array_ref<u8>& file_bytes) +{ + if (audio_test_wav(file_bytes)) + return audio_info_wav(info, file_bytes); + else + return file_status::unknown_format; +} +} + +template <size_t = 0> +void audio_encode_header(internal::expression_sequential_file_writer& dest, const audioformat& info) +{ + using namespace internal; + WAV_HEADER hdr; + zeroize(hdr); + const size_t framesize = info.channels * get_audiobitdepth(info.type) / 8; + hdr.riff.riffID = ccRIFF; + hdr.riff.formatID = ccWAVE; + hdr.riff.fileLen = autocast(info.samples * framesize + sizeof(hdr) - 8); + hdr.fmt.fId = ccfmt; + hdr.fmt.pcmHeaderLength = autocast(sizeof(hdr.fmt) - sizeof(CHUNK_HDR)); + hdr.fmt.numBlockAlingn = autocast(framesize); + hdr.fmt.nAvgBytesPerSec = autocast(info.samplerate * framesize); + hdr.fmt.nSamplesPerSec = autocast(info.samplerate); + hdr.fmt.numChannels = autocast(info.channels); + hdr.fmt.wFormatTag = info.type >= audiodatatype::f32 ? cWAVE_FORMAT_IEEE : cWAVE_FORMAT_PCM; + hdr.fmt.numBitsPerSample = autocast(get_audiobitdepth(info.type)); + hdr.data.dId = ccdata; + hdr.data.dLen = autocast(info.samples * framesize); + + dest.write(hdr); +} + +template <typename T, size_t Tag1, size_t Tag2> +void audio_encode_audio(internal::expression_sequential_file_writer& dest, + const univector2d<T, Tag1, Tag2>& audio) +{ + write_interleaved<T>(dest, audio); +} + +template <typename T, size_t Tag1, size_t Tag2> +void audio_encode(internal::expression_sequential_file_writer& dest, const univector2d<T, Tag1, Tag2>& audio, + const audioformat& info) +{ + audio_encode_header(dest, info); + audio_encode_audio(dest, audio); +} +} diff --git a/include/kfr/io/file.hpp b/include/kfr/io/file.hpp @@ -0,0 +1,132 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/univector.hpp" +#include "../base/vec.hpp" +#include <cstdio> +#include <string> + +namespace kfr +{ + +namespace internal +{ +struct expression_file_base +{ + expression_file_base() = delete; + expression_file_base(const expression_file_base&) = delete; + expression_file_base(expression_file_base&&) = default; + expression_file_base(FILE* file) : file(file) {} + ~expression_file_base() { fclose(file); } + bool ok() const { return file != nullptr; } + FILE* file; +}; + +struct expression_sequential_file_writer : expression_file_base, output_expression +{ + using expression_file_base::expression_file_base; + template <typename U, size_t N> + void operator()(coutput_t, size_t, vec<U, N> value) + { + write(value); + } + template <typename U> + void write(const U& value) + { + fwrite(std::addressof(value), 1, sizeof(U), file); + } +}; + +struct expression_sequential_file_reader : expression_file_base, input_expression +{ + using expression_file_base::expression_file_base; + template <typename U, size_t N> + vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const + { + vec<U, N> input = qnan; + read(input); + return input; + } + template <typename U> + void read(U& value) const + { + fread(std::addressof(value), 1, sizeof(U), file); + } +}; + +template <typename T> +struct expression_file_writer : expression_file_base, output_expression +{ + using expression_file_base::expression_file_base; + template <typename U, size_t N> + void operator()(coutput_t, size_t index, vec<U, N> value) + { + if (position != index) + fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET); + const vec<T, N> output = cast<T>(value); + fwrite(output.data(), sizeof(T), output.size(), file); + position = index + N; + } + size_t position = 0; +}; + +template <typename T> +struct expression_file_reader : expression_file_base, input_expression +{ + using expression_file_base::expression_file_base; + template <typename U, size_t N> + vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + { + if (position != index) + fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET); + vec<T, N> input = qnan; + fread(input.data(), sizeof(T), input.size(), file); + position = index + N; + return cast<U>(input); + } + size_t position = 0; +}; +} + +inline internal::expression_sequential_file_reader sequential_file_reader(const std::string& file_name) +{ + return internal::expression_sequential_file_reader(fopen(file_name.c_str(), "rb")); +} +inline internal::expression_sequential_file_writer sequential_file_writer(const std::string& file_name) +{ + return internal::expression_sequential_file_writer(fopen(file_name.c_str(), "wb")); +} + +template <typename T = u8> +internal::expression_file_reader<T> file_reader(const std::string& file_name) +{ + return internal::expression_file_reader<T>(fopen(file_name.c_str(), "rb")); +} +template <typename T = u8> +internal::expression_file_writer<T> file_writer(const std::string& file_name) +{ + return internal::expression_file_writer<T>(fopen(file_name.c_str(), "wb")); +} +} diff --git a/include/kfr/io/python_plot.hpp b/include/kfr/io/python_plot.hpp @@ -0,0 +1,155 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "../base/vec.hpp" +#include "../cometa/string.hpp" +#include <cstdlib> + +#ifdef KFR_OS_WIN +#include <direct.h> +#define cross_getcwd _getcwd +#else +#include <unistd.h> +#define cross_getcwd getcwd +#endif + +namespace kfr +{ +namespace internal +{ + +void python(const std::string& name, const std::string& code) +{ + std::string filename; + { + char curdir[1024]; + cross_getcwd(curdir, arraysize(curdir)); + filename = curdir; + } +#ifdef KFR_OS_WIN + const char* slash = "\\"; +#else + const char* slash = "/"; +#endif + filename = filename + slash + name + ".py"; + + FILE* f = fopen(filename.c_str(), "w"); + fwrite(code.c_str(), 1, code.size(), f); + fclose(f); + std::system(("python \"" + filename + "\"").c_str()); +} +} + +static std::string concat_args() { return {}; } + +template <typename... Ts> +static std::string concat_args(const std::string& left, const Ts&... rest) +{ + const std::string right = concat_args(rest...); + return left.empty() ? right : right.empty() ? left : left + ", " + right; +} + +static void plot_show(const std::string& name, const std::string& wavfile, const std::string& options = "") +{ + print(name, "..."); + std::string ss; + ss += "#!/usr/bin/env python\n" + "import dspplot\n\n" + "dspplot.plot(" + + concat_args("r'" + wavfile + "'", options) + ")\n"; + + internal::python(name, ss); + print("done\n"); +} + +static void plot_show(const std::string& name, const char* x, const std::string& options = "") +{ + plot_show(name, std::string(x), options); +} + +template <typename T> +void plot_show(const std::string& name, T&& x, const std::string& options = "") +{ + print(name, "..."); + auto array = make_array_ref(std::forward<T>(x)); + std::string ss; + ss += "#!/usr/bin/env python\n" + "import dspplot\n\n" + "data = [\n"; + for (size_t i = 0; i < array.size(); i++) + ss += as_string(fmt<'g', 20, 17>(array[i]), ",\n"); + ss += "]\n"; + + ss += "dspplot.plot(" + concat_args("data", options) + ")\n"; + + internal::python(name, ss); + print("done\n"); +} + +template <typename T> +void plot_save(const std::string& name, T&& x, const std::string& options = "") +{ + plot_show(name, std::forward<T>(x), concat_args(options, "file='../svg/" + name + ".svg'")); +} + +template <typename T1, typename T2> +void perfplot_show(const std::string& name, T1&& data, T2&& labels, const std::string& options = "") +{ + print(name, "..."); + auto array = make_array_ref(std::forward<T1>(data)); + auto labels_array = make_array_ref(std::forward<T2>(labels)); + std::string ss; + ss += "#!/usr/bin/env python\n"; + ss += "import dspplot\n\n"; + ss += "data = [\n"; + for (size_t i = 0; i < array.size(); i++) + { + auto subarray = make_array_ref(array[i]); + ss += "[\n"; + for (size_t i = 0; i < subarray.size(); i++) + ss += as_string(" ", fmt<'g', 20, 17>(subarray[i]), ",\n"); + ss += "],"; + } + ss += "]\n"; + + ss += "labels = [\n"; + for (size_t i = 0; i < labels_array.size(); i++) + { + const std::string label = labels_array[i]; + ss += " '" + label + "',"; + } + ss += "]\n"; + + ss += "dspplot.perfplot(" + concat_args("data, labels", options) + ")\n"; + + internal::python(name, ss); + print("done\n"); +} + +template <typename T1, typename T2> +void perfplot_save(const std::string& name, T1&& data, T2&& labels, const std::string& options = "") +{ + perfplot_show(name, std::forward<T1>(data), std::forward<T2>(labels), + concat_args(options, "file='../perf/" + name + ".svg'")); +} +} diff --git a/include/kfr/io/tostring.hpp b/include/kfr/io/tostring.hpp @@ -0,0 +1,131 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/complex.hpp" +#include "../base/univector.hpp" +#include "../base/vec.hpp" +#include "../cometa/string.hpp" +#include <cmath> + +namespace kfr +{ + +namespace internal +{ + +constexpr size_t number_width = 9; +constexpr size_t number_precision = 6; +constexpr size_t number_precision_short = 2; +constexpr size_t number_columns = 8; + +template <typename T> +std::string fmtvalue(std::true_type, const T& x) +{ + std::string str = as_string(fmt<'g', number_width, number_precision>(x)); + if (str.size() > number_width) + str = as_string(fmt<'g', number_width, number_precision_short>(x)); + return str; +} + +template <typename T> +std::string fmtvalue(std::true_type, const complex<T>& x) +{ + std::string restr = as_string(fmt<'g', number_width, number_precision>(x.real())); + if (restr.size() > number_width) + restr = as_string(fmt<'g', number_width, number_precision_short>(x.real())); + + std::string imstr = as_string(fmt<'g', -1, number_precision>(std::abs(x.imag()))); + if (imstr.size() > number_width) + imstr = as_string(fmt<'g', -1, number_precision_short>(std::abs(x.imag()))); + + return restr + (x.imag() < T(0) ? "-" : "+") + padleft(number_width, imstr + "j"); +} + +template <typename T> +std::string fmtvalue(std::false_type, const T& x) +{ + return as_string(fmtwidth<number_width>(repr(x))); +} +} + +template <typename T> +inline std::string repr(const kfr::complex<T>& v) +{ + return as_string(v.real()) + " + " + as_string(v.imag()) + "j"; +} + +template <typename T> +inline std::string repr(const T* source, size_t N) +{ + std::string str; + for (size_t i = 0; i < N; i++) + { + if (i > 0) + { + if (i % internal::number_columns == 0) + str += "\n"; + else + str += " "; + } + str += as_string(internal::fmtvalue(std::is_floating_point<T>(), source[i])); + } + return str; +} + +template <typename T> +inline std::string repr(const complex<T>* source, size_t N) +{ + std::string str; + for (size_t i = 0; i < N; i++) + { + if (i > 0) + { + if (i % (internal::number_columns / 2) == 0) + str += "\n"; + else + str += " "; + } + str += as_string(internal::fmtvalue(std::true_type{}, source[i])); + } + return str; +} + +template <typename T, int N> +inline std::string repr(kfr::simd<T, N> v) +{ + return repr(tovec(v)); +} + +template <typename T, size_t N> +inline std::string repr(vec<T, N> v) +{ + return repr(v.data(), v.size()); +} + +template <typename T, size_t Tag> +inline std::string repr(const univector<T, Tag>& v) +{ + return repr(v.data(), v.size()); +} +} diff --git a/include/kfr/math.hpp b/include/kfr/math.hpp @@ -0,0 +1,51 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "base/vec.hpp" + +#include "base/abs.hpp" +#include "base/asin_acos.hpp" +#include "base/atan.hpp" +#include "base/complex.hpp" +#include "base/constants.hpp" +#include "base/digitreverse.hpp" +#include "base/gamma.hpp" +#include "base/log_exp.hpp" +#include "base/logical.hpp" +#include "base/min_max.hpp" +#include "base/operators.hpp" +#include "base/read_write.hpp" +#include "base/round.hpp" +#include "base/saturation.hpp" +#include "base/select.hpp" +#include "base/shuffle.hpp" +#include "base/sin_cos.hpp" +#include "base/sinh_cosh.hpp" +#include "base/sqrt.hpp" +#include "base/tan.hpp" + +namespace kfr +{ +using namespace native; +} diff --git a/include/kfr/misc/compiletime.hpp b/include/kfr/misc/compiletime.hpp @@ -0,0 +1,81 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "../base/constants.hpp" +#include "../base/operators.hpp" +#include "../base/types.hpp" + +namespace kfr +{ + +namespace compiletime +{ + +template <typename T> +constexpr inline T select(bool c, T x, T y) +{ + return c ? x : y; +} +template <typename T> +constexpr inline T trunc(T x) +{ + return static_cast<T>(static_cast<long long>(x)); +} +template <typename T> +constexpr inline T abs(T x) +{ + return x < T() ? -x : x; +} +template <typename T> +constexpr inline T mulsign(T x, T y) +{ + return y < T() ? -x : x; +} +template <typename T> +constexpr inline T sin(T x) +{ + x = x - trunc(x / c_pi<T, 2>) * c_pi<T, 2>; + constexpr T c2 = -0.16665853559970855712890625; + constexpr T c4 = +8.31427983939647674560546875e-3; + constexpr T c6 = -1.85423981747590005397796630859375e-4; + + x -= c_pi<T>; + T y = abs(x); + y = select(y > c_pi<T, 1, 2>, c_pi<T> - y, y); + y = mulsign(y, -x); + + const T y2 = y * y; + T formula = c6; + const T y3 = y2 * y; + formula = fmadd(formula, y2, c4); + formula = fmadd(formula, y2, c2); + formula = formula * y3 + y; + return formula; +} +template <typename T> +constexpr inline T cos(T x) +{ + return sin(x + c_pi<T, 1, 2>); +} +} +} diff --git a/include/kfr/misc/random.hpp b/include/kfr/misc/random.hpp @@ -0,0 +1,180 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once +#include "../base/function.hpp" +#include "../base/operators.hpp" +#include "../base/shuffle.hpp" +#include "../base/vec.hpp" + +namespace kfr +{ + +using random_state = u32x4; + +struct seed_from_rdtsc_t +{ +}; + +constexpr seed_from_rdtsc_t seed_from_rdtsc{}; + +struct random_bit_generator +{ + random_bit_generator(seed_from_rdtsc_t) noexcept + : state(bitcast<u32>(make_vector(__builtin_readcyclecounter(), + (__builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull))) + { + (void)operator()(); + } + constexpr random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) noexcept : state(x0, x1, x2, x3) + { + (void)operator()(); + } + constexpr random_bit_generator(u64 x0, u64 x1) noexcept : state(bitcast<u32>(make_vector(x0, x1))) + { + (void)operator()(); + } + + inline random_state operator()() + { + constexpr static random_state mul{ 214013u, 17405u, 214013u, 69069u }; + constexpr static random_state add{ 2531011u, 10395331u, 13737667u, 1u }; + state = bitcast<u32>(rotateright<3>(bitcast<u8>(fmadd(state, mul, add)))); + return state; + } + +protected: + random_state state; +}; + +template <size_t N, KFR_ENABLE_IF(N <= sizeof(random_state))> +inline vec<u8, N> random_bits(random_bit_generator& gen) +{ + return narrow<N>(bitcast<u8>(gen())); +} +template <size_t N, KFR_ENABLE_IF(N > sizeof(random_state))> +inline vec<u8, N> random_bits(random_bit_generator& gen) +{ + constexpr size_t N2 = prev_poweroftwo(N - 1); + return concat(random_bits<N2>(gen), random_bits<N - N2>(gen)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(std::is_integral<T>::value)> +inline vec<T, N> random_uniform(random_bit_generator& gen) +{ + return bitcast<T>(random_bits<N * sizeof(T)>(gen)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(std::is_same<T, f32>::value)> +inline vec<f32, N> randommantissa(random_bit_generator& gen) +{ + return bitcast<f32>((random_uniform<u32, N>(gen) & 0x7FFFFFu) | 0x3f800000u) + 0.0f; +} + +template <typename T, size_t N, KFR_ENABLE_IF(std::is_same<T, f64>::value)> +inline vec<f64, N> randommantissa(random_bit_generator& gen) +{ + return bitcast<f64>((random_uniform<u64, N>(gen) & 0x000FFFFFFFFFFFFFull) | 0x3FF0000000000000ull) + 0.0; +} + +template <typename T, size_t N> +inline enable_if_f<vec<T, N>> random_uniform(random_bit_generator& gen) +{ + return randommantissa<T, N>(gen) - 1.f; +} + +template <size_t N, typename T> +inline enable_if_f<vec<T, N>> random_range(random_bit_generator& gen, T min, T max) +{ + return mix(random_uniform<T, N>(gen), min, max); +} + +template <size_t N, typename T> +inline enable_if_not_f<vec<T, N>> random_range(random_bit_generator& gen, T min, T max) +{ + using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>; + + vec<T, N> u = random_uniform<T, N>(gen); + const vec<big_type, N> tmp = cast<big_type>(u); + return cast<T>((tmp * (max - min) + min) >> typebits<T>::bits); +} + +namespace internal +{ +template <typename T> +struct expression_random_uniform : input_expression +{ + using value_type = T; + constexpr expression_random_uniform(const random_bit_generator& gen) noexcept : gen(gen) {} + template <typename U, size_t N> + vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const + { + return cast<U>(random_uniform<T, N>(gen)); + } + mutable random_bit_generator gen; +}; + +template <typename T> +struct expression_random_range : input_expression +{ + using value_type = T; + constexpr expression_random_range(const random_bit_generator& gen, T min, T max) noexcept : gen(gen), + min(min), + max(max) + { + } + + template <typename U, size_t N> + vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const + { + return cast<U>(random_range<N, T>(gen, min, max)); + } + mutable random_bit_generator gen; + const T min; + const T max; +}; +} + +template <typename T> +inline internal::expression_random_uniform<T> gen_random_uniform(const random_bit_generator& gen) +{ + return internal::expression_random_uniform<T>(gen); +} + +template <typename T> +inline internal::expression_random_range<T> gen_random_range(const random_bit_generator& gen, T min, T max) +{ + return internal::expression_random_range<T>(gen, min, max); +} + +template <typename T> +inline internal::expression_random_uniform<T> gen_random_uniform() +{ + return internal::expression_random_uniform<T>(random_bit_generator(seed_from_rdtsc)); +} + +template <typename T> +inline internal::expression_random_range<T> gen_random_range(T min, T max) +{ + return internal::expression_random_range<T>(random_bit_generator(seed_from_rdtsc), min, max); +} +} diff --git a/include/kfr/misc/small_buffer.hpp b/include/kfr/misc/small_buffer.hpp @@ -0,0 +1,113 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/memory.hpp" +#include <algorithm> +#include <cstdint> + +namespace kfr +{ + +template <typename T, std::size_t Capacity = 16> +struct small_buffer +{ +public: + small_buffer() noexcept : m_size(0), m_data(m_preallocated) {} + + small_buffer(std::size_t size) : small_buffer() { resize(size); } + + friend void swap(small_buffer<T, Capacity>& first, small_buffer<T, Capacity>& second) noexcept + { + using std::swap; + + swap(first.m_size, second.m_size); + swap(first.m_data, second.m_data); + swap(first.m_preallocated, second.m_preallocated); + first.m_data = first.m_size <= Capacity ? first.m_preallocated : first.m_data; + second.m_data = second.m_size <= Capacity ? second.m_preallocated : second.m_data; + } + small_buffer(small_buffer<T, Capacity>&& other) : small_buffer() { swap(other, *this); } + + small_buffer(const small_buffer<T, Capacity>& other) : small_buffer() { assign(other); } + small_buffer<T, Capacity>& operator=(small_buffer<T, Capacity> other) + { + swap(other, *this); + return *this; + } + + ~small_buffer() { clear(); } + + void assign(const small_buffer<T, Capacity>& other) + { + resize(other.m_size); + std::copy_n(other.m_data, m_size, m_data); + } + + void resize(std::size_t newsize) + { + T* m_newdata; + if (newsize <= Capacity) + { + m_newdata = m_preallocated; + } + else + { + m_newdata = aligned_allocate<T>(newsize); + } + std::copy_n(std::make_move_iterator(m_data), std::min(newsize, m_size), m_newdata); + if (m_data != m_preallocated) + aligned_deallocate(m_data); + m_data = m_newdata; + m_size = newsize; + } + bool empty() const { return !size(); } + std::size_t size() const { return m_size; } + const T* begin() const { return m_data; } + const T* end() const { return m_data + m_size; } + const T* cbegin() const { return m_data; } + const T* cend() const { return m_data + m_size; } + T* begin() { return m_data; } + T* end() { return m_data + m_size; } + void clear() { resize(0); } + const T& front() const { return m_data[0]; } + const T& back() const { return m_data[m_size - 1]; } + T& front() { return m_data[0]; } + T& back() { return m_data[m_size - 1]; } + void pop_back() { resize(m_size - 1); } + T* data() { return m_data; } + const T* data() const { return m_data; } + T& operator[](std::size_t i) { return m_data[i]; } + const T& operator[](std::size_t i) const { return m_data[i]; } + void push_back(const T& value) + { + resize(m_size + 1); + m_data[m_size - 1] = value; + } + +protected: + T m_preallocated[Capacity]; + std::size_t m_size; + T* m_data; +}; +} diff --git a/include/kfr/misc/sort.hpp b/include/kfr/misc/sort.hpp @@ -0,0 +1,98 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/min_max.hpp" +#include "../base/shuffle.hpp" +#include "../base/vec.hpp" + +namespace kfr +{ +/** + * Sort the elements in the vector in ascending order + * @param x input vector + * @return sorted vector + * @code + * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(-10, 1, 2, 1000)); + * @endcode + */ +template <typename T, size_t N> +KFR_INLINE vec<T, N> sort(vec<T, N> x) +{ + using namespace kfr::native; + constexpr size_t Nhalf = N / 2; + vec<T, Nhalf> e = low(x); + vec<T, Nhalf> o = high(x); + constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>); + for (size_t i = 0; i < Nhalf; i++) + { + vec<T, Nhalf> t; + t = min(e, o); + o = max(e, o); + o = rotateright<1>(o); + e = t; + t = max(e, o); + o = min(e, o); + e = t; + t = blend(e, o, blend0); + o = blend(o, e, blend0); + o = rotateleft<1>(o); + e = t; + } + return interleavehalfs(concat(e, o)); +} + +/** + * Sort the elements in the vector in descending order + * @param x input vector + * @return sorted vector + * @code + * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(1000, 2, 1, -10)); + * @endcode + */ +template <typename T, size_t N> +KFR_INLINE vec<T, N> sortdesc(vec<T, N> x) +{ + using namespace kfr::native; + constexpr size_t Nhalf = N / 2; + vec<T, Nhalf> e = low(x); + vec<T, Nhalf> o = high(x); + constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>); + for (size_t i = 0; i < Nhalf; i++) + { + vec<T, Nhalf> t; + t = max(e, o); + o = min(e, o); + o = rotateright<1>(o); + e = t; + t = min(e, o); + o = max(e, o); + e = t; + t = blend(e, o, blend0); + o = blend(o, e, blend0); + o = rotateleft<1>(o); + e = t; + } + return interleavehalfs(concat(e, o)); +} +} diff --git a/include/kfr/vec.hpp b/include/kfr/vec.hpp @@ -0,0 +1,25 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "base/vec.hpp" diff --git a/include/kfr/version.hpp b/include/kfr/version.hpp @@ -0,0 +1,35 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "base/types.hpp" +#include <string> + +namespace kfr +{ +static std::string library_version() +{ + return "KFR " + std::string(version_string) + bitness_const(" x86 ", " x86-64 ") + + CID_STRINGIFY(KFR_ARCH_NAME); +} +} diff --git a/readme.md b/readme.md @@ -0,0 +1,101 @@ +# KFR + +KFR is an open source C++ math framework with focus on DSP. + +KFR is a header-only and has no external dependencies. + +## Features + +* All code in the library is optimized for SSE2, SSE3, SSE4.x, AVX and AVX2 processors +* Mathematical and statistical functions +* Template expressions (See examples) +* All data types are supported including complex numbers +* All vector lengths are also supported. `vec<float,1>`, `vec<unsigned,3>`, `vec<complex<float>, 11>` all are valid vector types in KFR +* Most of the standard library functions are re-implemented to support vector of any length and data type +* Runtime CPU dispatching +* Multi-versioning. Code for various architecttures (SSE2, AVX2, etc) can co-exist in one translation unit. No need to compile for all cpus + +Included DSP/audio algorithms: + +* FFT +* FIR filtering +* FIR filter design using the window method +* Resampling with configurable quality (See resampling.cpp from Examples directory) +* Goertzel algorithm +* Biquad filtering +* Biquad design functions +* Oscillators: Sine, Square, Sawtooth, Triangle +* Window functions: Triangular, Bartlett, Cosine, Hann, Bartlett-Hann, Hamming, Bohman, Blackman, Blackman-Harris, Kaiser, Flattop, Gaussian, Lanczos, Rectangular +* Audio file reading/writing +* Pseudorandom number generator +* Sorting +* Ring (Circular) buffer +* Fast incremental sine/cosine generation + +## Performace + +FFT (double precision, sizes from 1024 to 16777216) + +![FFT Performance](img/fft_performance.png) + +## Prerequisities + +* XCode 6.3, 6.4, 7.x, 8.x, or C++14-compliant compiler (currently only Clang 3.7 or newer is supported) +* CoMeta metaprogramming library (already included) + +KFR is a header-only so just `#include <kfr/math.hpp>` to start using it + +The following tools are required to build the examples: + +* CMake 3.x + +To build the tests: + +* Testo - C++14 testing micro framework (included) +* Python 2.7 with the following modules: + + * dspplot (included, see Installation) + * matplotlib + * numpy + * scipy + +## Installation + +To obtain the full code, including examples and tests, you can clone the git repository: + +``` +git clone https://github.com/kfrlib/kfr.git +``` + +To be able to run the tests and examples install the following python modules: + +``` +pip install matplotlib +pip install numpy # or download prebuilt package for windows +pip install scipy # or download prebuilt package for windows +``` +Install dspplot using `python setup.py install` inside dspplot directory + +## Tests + +Execute `build.py` to run the tests or run tests manually from the `tests` directory + +Tested on the following systems: + +* OS X 10.11.4 / AppleClang 7.3.0.7030031 +* Windows 8.1 / clang version 3.8.0 (branches/release_38) + + +## Planned for future versions + +* DFT for any lengths (not only powers of two) +* Parallel execution of algorithms +* Serialization/Deserialization of any expression +* More formats for audio file reading/writing +* Reduce STL dependency + +## License + +KFR is dual-licensed, available under both commercial and open-source GPL license. + +If you want to use KFR in commercial product or a closed-source project, you need to [purchase a Commercial License](http://kfrlib.com/purchase-license) diff --git a/sources.cmake b/sources.cmake @@ -0,0 +1,89 @@ +# Copyright (C) 2016 D Levin (http://www.kfrlib.com) +# This file is part of KFR +# +# KFR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# KFR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with KFR. + + +set( + KFR_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/base/abs.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/asin_acos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/atan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/dispatch.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/function.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/gamma.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/log_exp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/logical.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/round.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/saturation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/select.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/shuffle.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/sin_cos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/sinh_cosh.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/sqrt.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/tan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/types.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/vec.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/data/bitrev.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/data/sincos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/bitrev.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/ft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/cpuid.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/runtimedispatch.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/goertzel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/interpolation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/resample.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/expressions/basic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/expressions/conversion.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/expressions/generators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/expressions/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/expressions/pointer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/expressions/reduce.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/misc/compiletime.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/misc/random.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/misc/small_buffer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/misc/sort.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/vec.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/kfr.h + ${PROJECT_SOURCE_DIR}/include/kfr/base/intrinsics.h + ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp + + ${PROJECT_SOURCE_DIR}/tests/testo/testo.hpp + ${PROJECT_SOURCE_DIR}/tests/testo/print_colored.hpp +) diff --git a/syntax-check.py b/syntax-check.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +from __future__ import print_function + +import fnmatch +import subprocess +import os +import sys + +path = os.path.dirname(os.path.realpath(__file__)) + +filenames = [] +for root, dirnames, files in os.walk(os.path.join(path, 'include')): + for filename in fnmatch.filter(files, '*.hpp'): + filenames.append(os.path.join(root, filename)) + + +target = "" +if sys.platform.startswith('win32'): + target = "--target=x86_64-w64-windows-gnu" + +fails = 0 +for filename in filenames: + print(filename, '...') + c = subprocess.call(["clang", "-fsyntax-only", filename, "-std=c++14", "-I"+os.path.join(path, "include"), "-Wno-pragma-once-outside-header", target]) + if c != 0: + fails+=1 + +exit(fails) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright (C) 2016 D Levin (http://www.kfrlib.com) +# This file is part of KFR +# +# KFR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# KFR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with KFR. + + +cmake_minimum_required(VERSION 3.0) + +add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0) + +link_libraries(stdc++ pthread) + +include_directories(../include) + +add_executable(basic_vector_test basic_vector_test.cpp ${KFR_SRC}) +add_executable(dft_test dft_test.cpp ${KFR_SRC}) +add_executable(empty_test empty_test.cpp ${KFR_SRC}) +add_executable(complex_test complex_test.cpp ${KFR_SRC}) + +find_package(PythonInterp 2.7) + +if (PYTHONINTERP_FOUND) + enable_testing() + + add_test(NAME test_basic_vector + COMMAND ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/tests/test_output.py + ${PROJECT_BINARY_DIR}/tests/basic_vector_test + ${PROJECT_SOURCE_DIR}/tests/basic_vector_test.cpp) + + add_test(NAME test_dft + COMMAND ${PROJECT_BINARY_DIR}/tests/dft_test) + add_test(NAME complex_test + COMMAND ${PROJECT_BINARY_DIR}/tests/complex_test) +else () + message(WARNING "Install Python to run tests") +endif () diff --git a/tests/basic_vector_test.cpp b/tests/basic_vector_test.cpp @@ -0,0 +1,152 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +#include <kfr/io/tostring.hpp> + +#include <kfr/cometa/string.hpp> +#include <kfr/math.hpp> +#include <kfr/vec.hpp> +#include <kfr/version.hpp> + +using namespace kfr; +using namespace kfr::native; + +template <typename T> +void print_type(const T& value) +{ + println(type_name(value), ":"); + println(value); +} + +int main(int /*argc*/, char** /*argv*/) +{ + println(library_version()); + // >>> KFR ... + + // How to make a vector: + + // * Use constructor + const vec<double, 4> first{ 1, 2.5, -infinity, 3.1415926 }; + print_type(first); + // >>> kfr::vec<double, 4>: + // >>> 1 2.5 -inf 3.14159 + + // * Use make_vector function + const auto second = make_vector(-1, +1); + print_type(second); + // >>> kfr::vec<int, 2>: + // >>> -1 1 + + // * Convert from vector of other type: + const vec<int, 4> int_vector{ 10, 20, 30, 40 }; + const vec<double, 4> double_vector = cast<double>(int_vector); + print_type(double_vector); + // >>> kfr::vec<double, 4>: + // >>> 10 20 30 40 + + // * Concat two vectors: + const vec<int, 1> left_part{ 1 }; + const vec<int, 1> right_part{ 2 }; + const vec<int, 2> pair{ left_part, right_part }; + print_type(pair); + // >>> kfr::vec<int, 2>: + // >>> 1 2 + + // * Same, but using make_vector and concat: + const vec<int, 2> pair2 = concat(make_vector(10), make_vector(20)); + print_type(pair2); + // >>> kfr::vec<int, 2>: + // >>> 10 20 + + // * Repeat vector multiple times: + const vec<short, 8> repeated = repeat<4>(make_vector<short>(0, -1)); + print_type(repeated); + // >>> kfr::vec<short, 8>: + // >>> 0 -1 0 -1 0 -1 0 -1 + + // * Use enumerate to generate sequence of numbers: + const vec<int, 8> eight = enumerate<int, 8>(); + print_type(eight); + // >>> kfr::vec<int, 8>: + // >>> 0 1 2 3 4 5 6 7 + + // * Vectors can be of any length... + const vec<int, 1> one{ 42 }; + const vec<int, 2> two = concat(one, make_vector(42)); + print_type(two); + // >>> kfr::vec<int, 2>: + // >>> 42 42 + + const vec<u8, 256> very_long_vector = repeat<64>(make_vector<u8>(1, 2, 4, 8)); + print_type(slice<0, 17>(very_long_vector)); + // >>> kfr::vec<unsigned char, 17>: + // >>> 1 2 4 8 1 2 4 8 + // >>> 1 2 4 8 1 2 4 8 + // >>> 1 + + // * ...really any: + using big_vector = vec<i16, 107>; + big_vector v107 = enumerate<i16, 107>(); + print_type(hadd(v107)); + // >>> short: + // >>> 5671 + + using color = vec<u8, 3>; + const color green = cast<u8>(make_vector(0.0, 1.0, 0.0) * 255); + print_type(green); + // >>> kfr::vec<unsigned char, 3>: + // >>> 0 255 0 + + // Vectors support all standard operators: + const auto op1 = make_vector(0, 1, 10, 100); + const auto op2 = make_vector(20, 2, -2, 200); + const auto result = op1 * op2 - 4; + print_type(result); + // >>> kfr::vec<int, 4>: + // >>> -4 -2 -24 19996 + + // * Transform vector: + const vec<int, 8> numbers1 = enumerate<int, 8>(); + const vec<int, 8> numbers2 = enumerate<int, 8>() + 100; + print_type(odd(numbers1)); + print_type(even(numbers2)); + // >>> kfr::vec<int, 4>: + // >>> 1 3 5 7 + // >>> kfr::vec<int, 4>: + // >>> 100 102 104 106 + + // * The following command pairs are equivalent: + print_type(permute<0, 2, 1, 3, 4, 6, 5, 7>(numbers1)); + print_type(permute<0, 2, 1, 3>(numbers1)); + // >>> kfr::vec<int, 8>: + // >>> 0 2 1 3 4 6 5 7 + // >>> kfr::vec<int, 8>: + // >>> 0 2 1 3 4 6 5 7 + + print_type(shuffle<0, 8, 2, 10, 4, 12, 6, 14>(numbers1, numbers2)); + print_type(shuffle<0, 8>(numbers1, numbers2)); + // >>> kfr::vec<int, 8>: + // >>> 0 100 2 102 4 104 6 106 + // >>> kfr::vec<int, 8>: + // >>> 0 100 2 102 4 104 6 106 + + print_type(blend<0, 1, 1, 0, 1, 1, 0, 1>(numbers1, numbers2)); + print_type(blend<0, 1, 1>(numbers1, numbers2)); + // >>> kfr::vec<int, 8>: + // >>> 0 101 102 3 104 105 6 107 + // >>> kfr::vec<int, 8>: + // >>> 0 101 102 3 104 105 6 107 + + // * Transpose matrix: + const auto sixteen = enumerate<float, 16>(); + print_type(transpose<4>(sixteen)); + // >>> kfr::vec<float, 16>: + // >>> 0 4 8 12 1 5 9 13 + // >>> 2 6 10 14 3 7 11 15 + // >>> + + return 0; +} diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp @@ -0,0 +1,200 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +#include <kfr/io/tostring.hpp> + +#include "testo/testo.hpp" +#include <kfr/base/complex.hpp> +#include <kfr/cometa/string.hpp> +#include <kfr/expressions/basic.hpp> +#include <kfr/expressions/operators.hpp> +#include <kfr/expressions/reduce.hpp> +#include <kfr/math.hpp> +#include <kfr/version.hpp> + +using namespace kfr; + +template <typename T1, typename T2> +void assert_is_same() +{ + static_assert(std::is_same<T1, T2>::value, ""); +} + +TEST(complex_vector) +{ + const vec<c32, 1> c32x1{ c32{ 0, 1 } }; + CHECK(c32x1(0) == 0.0f); + CHECK(c32x1(1) == 1.0f); + + const vec<c32, 2> c32x2{ c32{ 0, 1 }, c32{ 2, 3 } }; + CHECK(c32x2(0) == 0.0f); + CHECK(c32x2(1) == 1.0f); + CHECK(c32x2(2) == 2.0f); + CHECK(c32x2(3) == 3.0f); + + const vec<c32, 3> c32x3{ c32{ 0, 1 }, c32{ 2, 3 }, c32{ 4, 5 } }; + CHECK(c32x3(0) == 0.0f); + CHECK(c32x3(1) == 1.0f); + CHECK(c32x3(2) == 2.0f); + CHECK(c32x3(3) == 3.0f); + CHECK(c32x3(4) == 4.0f); + CHECK(c32x3(5) == 5.0f); + + const vec<c32, 1> c32s = 2; + CHECK(c32s(0) == 2.f); + CHECK(c32s(1) == 0.f); +} + +TEST(complex_cast) +{ + const vec<f32, 4> v1 = subcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 })); + CHECK(v1(0) == 0.f); + CHECK(v1(1) == 1.f); + CHECK(v1(2) == 2.f); + CHECK(v1(3) == 3.f); + + const vec<c32, 1> v2 = subcast<c32>(make_vector(1.f, 2.f)); + CHECK(v2(0) == 1.f); + CHECK(v2(1) == 2.f); + + const vec<c32, 2> v3 = cast<c32>(make_vector(1.f, 2.f)); + CHECK(v3(0) == 1.f); + CHECK(v3(1) == 0.f); + CHECK(v3(2) == 2.f); + CHECK(v3(3) == 0.f); + + CHECK(zerovector<c32, 4>() == make_vector(c32{ 0, 0 }, c32{ 0, 0 }, c32{ 0, 0 }, c32{ 0, 0 })); + CHECK(enumerate<c32, 4>() == make_vector(c32{ 0, 0 }, c32{ 1, 0 }, c32{ 2, 0 }, c32{ 3, 0 })); +} + +TEST(complex_math) +{ + const vec<c32, 1> a{ c32{ 1, 2 } }; + const vec<c32, 1> b{ c32{ 3, 4 } }; + const vec<c32, 1> c = a + b; + CHECK(a + b == make_vector(c32{ 4, 6 })); + CHECK(a - b == make_vector(c32{ -2, -2 })); + CHECK(a * b == make_vector(c32{ -5, 10 })); + CHECK(a * 2 == make_vector(c32{ 2, 4 })); + CHECK(a / b == make_vector(c32{ 0.44, 0.08 })); + CHECK(-a == make_vector(c32{ -1, -2 })); + + CHECK(real(a) == make_vector(1.f)); + CHECK(imag(a) == make_vector(2.f)); + + CHECK(make_complex(5.f, 7) == c32{ 5.f, 7.f }); + CHECK(make_complex(make_vector(5.f, 8.f), make_vector(7.f, 9.f)) == + make_vector(c32{ 5.f, 7.f }, c32{ 8.f, 9.f })); + + CHECK(cabs(c32{ 3.f, 4.f }) == 5.f); + CHECK(cabs(make_vector(c32{ 3.f, 4.f })) == make_vector(5.f)); + + testo::epsilon<f32>() *= 5; + testo::epsilon<f64>() *= 5; + + CHECK(csin(c32{ 1.f, 1.f }) == c32{ 1.2984575814159773, 0.634963914784736 }); + CHECK(ccos(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489, -0.9888977057628651 }); + CHECK(csinh(c32{ 1.f, 1.f }) == c32{ 0.634963914784736, 1.2984575814159773 }); + CHECK(ccosh(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489, 0.9888977057628651 }); + + CHECK(clog(c32{ 1.f, 1.f }) == c32{ 0.34657359027997264, 0.7853981633974483 }); + CHECK(clog2(c32{ 1.f, 1.f }) == c32{ 0.5, 1.1330900354567983 }); + CHECK(clog10(c32{ 1.f, 1.f }) == c32{ 0.15051499783199057, 0.3410940884604603 }); + + CHECK(cexp(c32{ 1.f, 1.f }) == c32{ 1.4686939399158849, 2.2873552871788423 }); + CHECK(cexp2(c32{ 1.f, 1.f }) == c32{ 1.5384778027279442, 1.2779225526272695 }); + CHECK(cexp10(c32{ 1.f, 1.f }) == c32{ -6.682015101903131, 7.439803369574931 }); +} + +TEST(complex_read_write) +{ + c32 buffer[8] = { c32{ 1, 2 }, c32{ 3, 4 }, c32{ 5, 6 }, c32{ 7, 8 }, + c32{ 9, 10 }, c32{ 11, 12 }, c32{ 13, 14 }, c32{ 15, 16 } }; + + CHECK(read<4>(buffer) == make_vector(c32{ 1, 2 }, c32{ 3, 4 }, c32{ 5, 6 }, c32{ 7, 8 })); + CHECK(read<3>(buffer + 1) == make_vector(c32{ 3, 4 }, c32{ 5, 6 }, c32{ 7, 8 })); + write(buffer + 2, make_vector(c32{ 10, 11 }, c32{ 12, 13 })); + CHECK(read<4>(buffer) == make_vector(c32{ 1, 2 }, c32{ 3, 4 }, c32{ 10, 11 }, c32{ 12, 13 })); +} + +TEST(complex_shuffle) +{ + const vec<c32, 2> a{ c32{ 0, 1 }, c32{ 2, 3 } }; + CHECK(reverse(a) == make_vector(c32{ 2, 3 }, c32{ 0, 1 })); +} + +TEST(complex_basic_expressions) +{ + const univector<c32, 3> uv1 = zeros(); + CHECK(uv1[0] == c32{ 0, 0 }); + CHECK(uv1[1] == c32{ 0, 0 }); + CHECK(uv1[2] == c32{ 0, 0 }); + const univector<c32, 3> uv2 = ones(); + CHECK(uv2[0] == c32{ 1, 0 }); + CHECK(uv2[1] == c32{ 1, 0 }); + CHECK(uv2[2] == c32{ 1, 0 }); + const univector<c32, 3> uv3 = counter(); + CHECK(uv3[0] == c32{ 0, 0 }); + CHECK(uv3[1] == c32{ 1, 0 }); + CHECK(uv3[2] == c32{ 2, 0 }); +} + +TEST(complex_function_expressions) +{ + static_assert(is_generic<decltype(counter())>::value, ""); + static_assert(is_generic<decltype(sqr(counter()))>::value, ""); + + const univector<c32, 4> uv1 = sqr(counter()); + CHECK(uv1[0] == c32{ 0, 0 }); + CHECK(uv1[1] == c32{ 1, 0 }); + CHECK(uv1[2] == c32{ 4, 0 }); + CHECK(uv1[3] == c32{ 9, 0 }); + + const univector<c32, 4> uv2 = uv1 * 2.f; + CHECK(uv2[0] == c32{ 0, 0 }); + CHECK(uv2[1] == c32{ 2, 0 }); + CHECK(uv2[2] == c32{ 8, 0 }); + CHECK(uv2[3] == c32{ 18, 0 }); + + const univector<f32, 4> uv3 = real(uv2); + CHECK(uv3[0] == 0.f); + CHECK(uv3[1] == 2.f); + CHECK(uv3[2] == 8.f); + CHECK(uv3[3] == 18.f); + + assert_is_same<c32, value_type_of<decltype(uv2)>>(); + assert_is_same<f32, value_type_of<decltype(uv3)>>(); + assert_is_same<f32, value_type_of<decltype(real(uv2))>>(); +} + +int main(int argc, char** argv) +{ + println(library_version()); + + static_assert(vector_width<f32, cpu_t::sse2> == 4, ""); + static_assert(vector_width<c32, cpu_t::sse2> == 2, ""); + static_assert(vector_width<i32, cpu_t::sse2> == 4, ""); + static_assert(vector_width<complex<i32>, cpu_t::sse2> == 2, ""); + + static_assert(sizeof(vec<c32, 4>) == sizeof(vec<f32, 8>), ""); + static_assert(vec<f32, 4>::size() == 4, ""); + static_assert(vec<c32, 4>::size() == 4, ""); + static_assert(vec<f32, 4>::scalar_size() == 4, ""); + static_assert(vec<c32, 4>::scalar_size() == 8, ""); + assert_is_same<subtype<complex<i32>>, i32>(); + assert_is_same<vec<c32, 4>::value_type, c32>(); + assert_is_same<vec<c32, 4>::scalar_type, f32>(); + assert_is_same<vec<f32, 4>::value_type, f32>(); + assert_is_same<vec<f32, 4>::scalar_type, f32>(); + assert_is_same<vec<c32, 1>, decltype(make_vector(c32{ 0, 0 }))>(); + assert_is_same<vec<c32, 2>, decltype(make_vector(c32{ 0, 0 }, 4))>(); + assert_is_same<ftype<complex<i32>>, complex<f32>>(); + assert_is_same<ftype<complex<i64>>, complex<f64>>(); + assert_is_same<ftype<vec<complex<i32>, 4>>, vec<complex<f32>, 4>>(); + assert_is_same<ftype<vec<complex<i64>, 8>>, vec<complex<f64>, 8>>(); + + return testo::run_all("", true); +} diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp @@ -0,0 +1,56 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ +#include <tuple> + +#include "testo/testo.hpp" +#include <kfr/cometa/string.hpp> +#include <kfr/dft/fft.hpp> +#include <kfr/dft/reference_dft.hpp> +#include <kfr/expressions/basic.hpp> +#include <kfr/expressions/operators.hpp> +#include <kfr/expressions/reduce.hpp> +#include <kfr/io/tostring.hpp> +#include <kfr/math.hpp> +#include <kfr/misc/random.hpp> +#include <kfr/version.hpp> + +using namespace kfr; + +TEST(fft_accuracy) +{ + testo::active_test()->show_progress = true; + random_bit_generator gen(2247448713, 915890490, 864203735, 2982561); + + testo::matrix(named("type") = ctypes<float, double>, // + named("inverse") = std::make_tuple(false, true), // + named("log2(size)") = make_range(1, 21), // + [&gen](auto type, bool inverse, size_t log2size) { + using float_type = type_of<decltype(type)>; + const size_t size = 1 << log2size; + + univector<complex<float_type>> in = + typed<float_type>(gen_random_range(gen, -1.0, +1.0), size * 2); + univector<complex<float_type>> out = in; + univector<complex<float_type>> refout = out; + const dft_plan<float_type> dft(size); + univector<u8> temp(dft.temp_size); + + reference_dft(refout.data(), in.data(), size, inverse); + dft.execute(out, out, temp, inverse); + + const float_type rms_diff = rms(cabs(refout - out)); + const double ops = log2size * 50; + const double epsilon = std::numeric_limits<float_type>::epsilon(); + CHECK(rms_diff < epsilon * ops); + }); +} + +int main(int argc, char** argv) +{ + println(library_version()); + + return testo::run_all("", true); +} diff --git a/tests/empty_test.cpp b/tests/empty_test.cpp @@ -0,0 +1,7 @@ +#include <kfr/math.hpp> +#include <kfr/vec.hpp> + +using namespace kfr; +using namespace kfr::native; + +int main(int argc, char** argv) { return 0; } diff --git a/tests/test_output.py b/tests/test_output.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +from __future__ import print_function + +import os +import subprocess +import sys +import re + +binary_filename = sys.argv[1] +source_filename = sys.argv[2] + +with open(source_filename) as src: + test_source = enumerate(src.readlines()) + +parsed_output = [(re.sub(r'^\s*// >>>', '', line).strip(), linenum) for linenum, line in test_source if '// >>>' in line] + +output = subprocess.check_output([binary_filename], stderr=subprocess.STDOUT).decode("utf-8").splitlines() + +output = [o.strip() for o in output] + +fails = 0 +for expected, actual in zip(parsed_output, output): + reg = re.escape(expected[0]).replace(r'\.\.\.', '.*') + match = re.match(reg, actual) + if not match: + fails+=1 + print('Expected output string ({file}.cpp, #{line}): \n"{expected}"\n got: \n"{actual}"'.format(expected=expected[0], file=filename, actual=actual, line=expected[1])) + +if fails == 0: + print('All tests passed successfully ({} lines)'.format(len(parsed_output))) +else: + print('Number of failed tests: {fails})'.format(fails=fails)) + +exit(fails) diff --git a/tests/testo/print_colored.hpp b/tests/testo/print_colored.hpp @@ -0,0 +1,150 @@ +#pragma once +#include <cstdint> + +#if defined(_WIN32) +#include <windows.h> +#endif + +namespace print_colored +{ + +enum text_color : uint32_t +{ + Black = 0x00, + DarkBlue = 0x01, + DarkGreen = 0x02, + DarkCyan = 0x03, + DarkRed = 0x04, + DarkMagenta = 0x05, + DarkYellow = 0x06, + LightGrey = 0x07, + Gray = 0x08, + Blue = 0x09, + Green = 0x0A, + Cyan = 0x0B, + Red = 0x0C, + Magenta = 0x0D, + Yellow = 0x0E, + White = 0x0F, + BgBlack = 0x00, + BgDarkBlue = 0x10, + BgDarkGreen = 0x20, + BgDarkCyan = 0x30, + BgDarkRed = 0x40, + BgDarkMagenta = 0x50, + BgDarkYellow = 0x60, + BgLightGrey = 0x70, + BgGray = 0x80, + BgBlue = 0x90, + BgGreen = 0xA0, + BgCyan = 0xB0, + BgRed = 0xC0, + BgMagenta = 0xD0, + BgYellow = 0xE0, + BgWhite = 0xF0, + + Normal = BgBlack | LightGrey +}; + +enum console_buffer +{ + ConsoleStdOutput, + ConsoleStdError +}; + +#if defined(_WIN32) +typedef HANDLE console_handle_t; + +inline console_handle_t console_handle(console_buffer console = ConsoleStdOutput) +{ + static HANDLE con_out = ::GetStdHandle(STD_OUTPUT_HANDLE); + static HANDLE con_err = ::GetStdHandle(STD_ERROR_HANDLE); + return console == ConsoleStdOutput ? con_out : con_err; +} + +#endif + +struct console_color +{ +public: + console_color(text_color c, console_buffer console = ConsoleStdOutput) + : m_old(get(console)), m_console(console) + { + set(c, m_console); + } + + ~console_color() { set(m_old, m_console); } + +private: + text_color get(console_buffer console = ConsoleStdOutput) + { +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO info; + ::GetConsoleScreenBufferInfo(console_handle(console), &info); + return static_cast<text_color>(info.wAttributes & 0xFF); +#else + return static_color(); +#endif + } + + void set(text_color new_color, console_buffer console = ConsoleStdOutput) + { +#ifdef _WIN32 + ::SetConsoleTextAttribute(console_handle(console), static_cast<WORD>(new_color)); +#else + if (new_color != Normal) + { + uint8_t t = new_color & 0xF; + uint8_t b = (new_color & 0xF0) >> 4; + uint8_t tnum = 30 + ((t & 1) << 2 | (t & 2) | (t & 4) >> 2); + uint8_t bnum = 40 + ((b & 1) << 2 | (b & 2) | (b & 4) >> 2); + if (t & 8) + tnum += 60; + if (b & 8) + bnum += 60; + printf("\x1B[%d;%dm", tnum, bnum); + } + else + { + printf("\x1B[0m"); + } + static_color() = new_color; +#endif + } + + text_color m_old; + console_buffer m_console; +#ifndef _WIN32 + static text_color& static_color() + { + static text_color color = Normal; + return color; + } +#endif +}; + +template <text_color color, console_buffer console = ConsoleStdOutput> +struct colored_text_tpl : public console_color +{ +public: + colored_text_tpl() : console_color(color, console) {} + +private: +}; + +typedef colored_text_tpl<DarkBlue> darkblue_text; +typedef colored_text_tpl<DarkGreen> darkgreen_text; +typedef colored_text_tpl<DarkCyan> darkcyan_text; +typedef colored_text_tpl<DarkRed> darkred_text; +typedef colored_text_tpl<DarkMagenta> darkmagenta_text; +typedef colored_text_tpl<DarkYellow> darkyellow_text; +typedef colored_text_tpl<LightGrey> lightgrey_text; +typedef colored_text_tpl<Gray> gray_text; +typedef colored_text_tpl<Blue> blue_text; +typedef colored_text_tpl<Green> green_text; +typedef colored_text_tpl<Cyan> cyan_text; +typedef colored_text_tpl<Red> red_text; +typedef colored_text_tpl<Magenta> magenta_text; +typedef colored_text_tpl<Yellow> yellow_text; +typedef colored_text_tpl<White> white_text; +} diff --git a/tests/testo/testo.hpp b/tests/testo/testo.hpp @@ -0,0 +1,549 @@ +#pragma once + +#include <kfr/cometa.hpp> +#include <kfr/cometa/string.hpp> + +#include <ctime> +#include <functional> +#include <sstream> +#include <utility> +#include <vector> +#ifdef TESTO_MPFR +#include <mpfr/mpfr.hpp> +#include <mpfr/mpfr_tostring.hpp> +#endif +#include "print_colored.hpp" +#include <chrono> +#include <cmath> + +#if !defined CLANG_DIAGNOSTIC_PRAGMA +#if defined __clang__ +#define TESTO_STRING(str) #str +#define CLANG_DIAGNOSTIC_PRAGMA(pragma) _Pragma(TESTO_STRING(clang diagnostic pragma)) +#else +#define CLANG_DIAGNOSTIC_PRAGMA(pragma) +#endif +#endif + +CLANG_DIAGNOSTIC_PRAGMA(push) +CLANG_DIAGNOSTIC_PRAGMA(ignored "-Wexit-time-destructors") +CLANG_DIAGNOSTIC_PRAGMA(ignored "-Wpadded") +CLANG_DIAGNOSTIC_PRAGMA(ignored "-Wshadow") + +namespace testo +{ + +using namespace cometa; + +#ifdef TESTO_MPFR +using reference_number = mpfr::number; +#else +using reference_number = long double; +#endif + +#ifdef TESTO_MPFR +template <typename T> +inline double ulp_distance(const mpfr::number& reference, T test) +{ + if (std::isnan(test) && reference.isnan()) + return 0.0; + if (std::isinf(test) && (reference.isinfinity() || mpfr::abs(reference) > std::numeric_limits<T>::max())) + { + if ((reference < 0 && test < 0) || (reference > 0 && test > 0)) + return 0.0; + else + return std::numeric_limits<double>::infinity(); + } + mpfr::number testreal = test; + T next = std::nexttoward(test, std::numeric_limits<long double>::infinity()); + mpfr::number ulp = testreal - mpfr::number(next); + return std::abs(static_cast<double>((reference - testreal) / ulp)); +} +inline std::string number_to_string(const mpfr::number& reference, int precision) +{ + return mpfr::to_string(reference, precision, 'g'); +} +#else +template <typename T> +inline double ulp_distance(long double reference, T test) +{ + if (__builtin_isnan(test) && __builtin_isnan(reference)) + return 0.0; + if (__builtin_isinf(test) && + (__builtin_isinf(reference) || std::fabs(reference) > std::numeric_limits<T>::max())) + { + if ((reference < 0 && test < 0) || (reference > 0 && test > 0)) + return 0.0; + else + return std::numeric_limits<double>::infinity(); + } + long double test80 = test; + T next = std::nexttoward(test, std::numeric_limits<long double>::infinity()); + long double ulp = test80 - static_cast<long double>(next); + return std::abs(static_cast<double>((reference - test80) / ulp)); +} +#endif + +using namespace print_colored; + +template <typename Fn, typename L, typename R> +struct comparison +{ + L left; + R right; + Fn cmp; + + comparison(L&& left, R&& right) : left(std::forward<L>(left)), right(std::forward<R>(right)) {} + + bool operator()() { return cmp(left, right); } +}; + +template <typename Left, typename Right> +struct static_assert_type_eq +{ + static_assert(std::is_same<Left, Right>::value, "std::is_same<Left, Right>::value"); +}; + +template <typename T, T left, T right> +struct static_assert_eq +{ + static_assert(left == right, "left == right"); +}; + +template <typename L, typename R, typename = void> +struct equality_comparer +{ + bool operator()(const L& l, const R& r) const { return l == r; } +}; + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" + +template <typename T> +inline T& epsilon() +{ + static T value = std::numeric_limits<T>::epsilon(); + return value; +} + +template <> +struct equality_comparer<float, float> +{ + bool operator()(const float& l, const float& r) const { return !(std::abs(l - r) > epsilon<float>()); } +}; +template <> +struct equality_comparer<double, double> +{ + bool operator()(const double& l, const double& r) const { return !(std::abs(l - r) > epsilon<double>()); } +}; +template <> +struct equality_comparer<long double, long double> +{ + bool operator()(const long double& l, const long double& r) const + { + return !(std::abs(l - r) > epsilon<long double>()); + } +}; + +#pragma clang diagnostic pop + +template <typename L, typename R> +struct equality_comparer<L, R, void_t<enable_if<!compound_type_traits<L>::is_scalar>>> +{ + using Tsubtype = subtype<L>; + constexpr static static_assert_type_eq<subtype<L>, subtype<R>> assert{}; + + bool operator()(const L& l, const R& r) const + { + if (compound_type_traits<L>::width != compound_type_traits<R>::width) + return false; + + compound_type_traits<L> itl; + compound_type_traits<R> itr; + for (size_t i = 0; i < compound_type_traits<L>::width; i++) + { + equality_comparer<Tsubtype, Tsubtype> cmp; + if (!cmp(itl.at(l, i), itr.at(r, i))) + return false; + } + return true; + } +}; + +struct cmp_eq +{ + static const char* op() { return "=="; } + + template <typename L, typename R> + bool operator()(L&& left, R&& right) + { + equality_comparer<std::decay_t<L>, std::decay_t<R>> eq; + return eq(left, right); + } +}; + +struct cmp_ne +{ + static const char* op() { return "!="; } + + template <typename L, typename R> + bool operator()(L&& left, R&& right) + { + return !cmp_eq()(left, right); + } +}; + +struct cmp_lt +{ + static const char* op() { return "<"; } + + template <typename L, typename R> + bool operator()(L&& left, R&& right) + { + return left < right; + } +}; + +struct cmp_gt +{ + static const char* op() { return ">"; } + + template <typename L, typename R> + bool operator()(L&& left, R&& right) + { + return left > right; + } +}; + +struct cmp_le +{ + static const char* op() { return "<="; } + + template <typename L, typename R> + bool operator()(L&& left, R&& right) + { + return left <= right; + } +}; + +struct cmp_ge +{ + static const char* op() { return ">="; } + + template <typename L, typename R> + bool operator()(L&& left, R&& right) + { + return left >= right; + } +}; + +template <typename L> +struct half_comparison +{ + half_comparison(L&& left) : left(std::forward<L>(left)) {} + + template <typename R> + comparison<cmp_eq, L, R> operator==(R&& right) + { + return comparison<cmp_eq, L, R>(std::forward<L>(left), std::forward<R>(right)); + } + + template <typename R> + comparison<cmp_ne, L, R> operator!=(R&& right) + { + return comparison<cmp_ne, L, R>(std::forward<L>(left), std::forward<R>(right)); + } + + template <typename R> + comparison<cmp_lt, L, R> operator<(R&& right) + { + return comparison<cmp_lt, L, R>(std::forward<L>(left), std::forward<R>(right)); + } + + template <typename R> + comparison<cmp_gt, L, R> operator>(R&& right) + { + return comparison<cmp_gt, L, R>(std::forward<L>(left), std::forward<R>(right)); + } + + template <typename R> + comparison<cmp_le, L, R> operator<=(R&& right) + { + return comparison<cmp_le, L, R>(std::forward<L>(left), std::forward<R>(right)); + } + + template <typename R> + comparison<cmp_ge, L, R> operator>=(R&& right) + { + return comparison<cmp_ge, L, R>(std::forward<L>(left), std::forward<R>(right)); + } + + L left; +}; + +struct make_comparison +{ + template <typename L> + half_comparison<L> operator<=(L&& left) + { + return half_comparison<L>(std::forward<L>(left)); + } +}; + +inline std::vector<std::string> split(const std::string& text, char delimeter) +{ + std::string r = text; + size_t prev_pos = 0; + size_t start_pos = 0; + std::vector<std::string> list; + while ((start_pos = r.find(delimeter, prev_pos)) != std::string::npos) + { + list.push_back(text.substr(prev_pos, start_pos - prev_pos)); + prev_pos = start_pos + 1; + } + list.push_back(text.substr(prev_pos)); + return list; +} + +struct test_case; + +inline test_case*& active_test() +{ + static test_case* instance = nullptr; + return instance; +} + +struct test_case +{ + using test_func = void (*)(); + + static std::vector<test_case*>& tests() + { + static std::vector<test_case*> list; + return list; + } + + test_case(test_func func, const char* name) + : func(func), name(name), success(0), failed(0), time(0), show_progress(false) + { + tests().push_back(this); + } + + bool run(bool show_successful) + { + using namespace std::chrono; + using time_point = high_resolution_clock::time_point; + { + console_color cc(Cyan); + printfmt("[{}]", padcenter(11, std::string("RUN"), '-')); + } + printfmt(" {}...\n", name); + time_point start = high_resolution_clock::now(); + active_test() = this; + func(); + active_test() = nullptr; + time_point stop = high_resolution_clock::now(); + time = duration_cast<duration<double>>(stop - start).count(); + + { + console_color cc(failed ? Red : Green); + printfmt("[{}] {} subtests of {}\n", padcenter(11, failed ? "ERROR" : "SUCCESS", '-'), + failed ? failed : success, success + failed); + } + if (failed) + { + for (const subtest& s : subtests) + { + if ((s.success && show_successful) || !s.success) + { + if (!s.comment.empty()) + printfmt(" {}:\n", s.comment); + { + console_color cc(s.success ? Green : Red); + printfmt(" {} ", s.success ? "[success]" : "[fail] "); + } + printfmt("{}\n", s.text); + } + } + console_color cc(White); + } + return !failed; + } + + void check(bool result, const std::string& value, const char* expr) + { + subtests.push_back(subtest{ result, format("{} | {}", padleft(22, expr), value), comment }); + result ? success++ : failed++; + if (show_progress) + { + if (result) + { + console_color cc(Green); + print("."); + } + else + { + console_color cc(Red); + print("E"); + } + } + } + + template <typename Op, typename L, typename R> + void check(comparison<Op, L, R> comparison, const char* expr) + { + bool result = comparison(); + check(result, format("{} {} {}", as_string(comparison.left), Op::op(), as_string(comparison.right)), + expr); + } + + template <typename L> + void check(half_comparison<L> comparison, const char* expr) + { + bool result = comparison.left ? true : false; + check(result, as_string(comparison.left), expr); + } + + void set_comment(const std::string& text) + { + comment = text; + if (show_progress) + { + printfmt("\n{}:\n", comment); + } + } + + struct subtest + { + bool success; + std::string text; + std::string comment; + }; + + test_func func; + const char* name; + std::vector<subtest> subtests; + std::string comment; + int success; + int failed; + double time; + bool show_progress; +}; + +template <typename Number> +struct statistics +{ + Number minimum; + Number maximum; + double sum; + unsigned long long count; + std::vector<Number> values; + void reset() { *this = statistics<Number>(); } + std::string str() + { + return format("{} ... {} (avg={}, median={})\n", minimum, maximum, cometa::fmt<'f', 2>(average()), + median()); + } + double average() const { return sum / count; } + Number median() + { + std::sort(values.begin(), values.end()); + return values.empty() ? Number() : values[values.size() / 2]; + } + statistics() + : sum(), count(), minimum(std::numeric_limits<Number>::max()), + maximum(std::numeric_limits<Number>::min()) + { + } + void operator()(Number x) + { + minimum = std::min(minimum, x); + maximum = std::max(maximum, x); + sum += x; + count++; + values.push_back(x); + } +}; + +template <typename Arg0, typename Fn> +void matrix(named_arg<Arg0>&& arg0, Fn&& fn) +{ + cforeach(std::forward<Arg0>(arg0.value), [&](auto v0) { + active_test()->set_comment(format("{} = {}", arg0.name, v0)); + fn(v0); + }); + if (active_test()->show_progress) + printfmt("\n"); +} + +template <typename Arg0, typename Arg1, typename Fn> +void matrix(named_arg<Arg0>&& arg0, named_arg<Arg1>&& arg1, Fn&& fn) +{ + cforeach(std::forward<Arg0>(arg0.value), std::forward<Arg1>(arg1.value), [&](auto v0, auto v1) { + active_test()->set_comment(format("{} = {}, {} = {}", arg0.name, v0, arg1.name, v1)); + fn(v0, v1); + }); + if (active_test()->show_progress) + printfmt("\n"); +} + +template <typename Arg0, typename Arg1, typename Arg2, typename Fn> +void matrix(named_arg<Arg0>&& arg0, named_arg<Arg1>&& arg1, named_arg<Arg2>&& arg2, Fn&& fn) +{ + cforeach(std::forward<Arg0>(arg0.value), std::forward<Arg1>(arg1.value), std::forward<Arg2>(arg2.value), + [&](auto v0, auto v1, auto v2) { + active_test()->set_comment( + format("{} = {}, {} = {}, {} = {}", arg0.name, v0, arg1.name, v1, arg2.name, v2)); + fn(v0, v1, v2); + }); + if (active_test()->show_progress) + printfmt("\n"); +} + +static int run_all(const std::string& name = std::string(), bool show_successful = false) +{ + std::vector<test_case*> success; + std::vector<test_case*> failed; + for (test_case* t : test_case::tests()) + { + if (name.empty() || t->name == name) + t->run(show_successful) ? success.push_back(t) : failed.push_back(t); + } + printfmt("{}\n", std::string(79, '=')); + if (!success.empty()) + { + console_color cc(Green); + printfmt("[{}]", padcenter(11, "SUCCESS", '-')); + printfmt(" {} tests\n", success.size()); + } + if (!failed.empty()) + { + console_color cc(Red); + printfmt("[{}]", padcenter(11, "ERROR", '-')); + printfmt(" {} tests\n", failed.size()); + } + return static_cast<int>(failed.size()); +} + +#define TESTO_CHECK(...) \ + { \ + ::testo::active_test()->check(::testo::make_comparison() <= __VA_ARGS__, #__VA_ARGS__); \ + } + +#define TESTO_TEST(name) \ + void test_function_##name(); \ + ::testo::test_case test_case_##name(&test_function_##name, #name); \ + void CID_NOINLINE test_function_##name() + +#define TESTO_DTEST(name) \ + template <typename> \ + void disabled_test_function_##name() + +#ifndef TESTO_NO_SHORT_MACROS +#define CHECK TESTO_CHECK +#define TEST TESTO_TEST +#define DTEST TESTO_DTEST +#endif +} + +CLANG_DIAGNOSTIC_PRAGMA(pop)