commit 7866df07aa0d7c13b940fa81386086ca2d4dba45
Author: [email protected] <[email protected]>
Date: Wed, 29 Jun 2016 20:59:54 +0300
Initial commit
Diffstat:
96 files changed, 25231 insertions(+), 0 deletions(-)
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,27 @@
+UseTab: Never
+IndentWidth: 4
+Language : Cpp
+BreakBeforeBraces: Allman
+MaxEmptyLinesToKeep: 1
+IndentCaseLabels: false
+NamespaceIndentation: None
+AccessModifierOffset: -4
+SpacesInParentheses: false
+SpaceInEmptyParentheses: false
+SpacesInCStyleCastParentheses: false
+PointerAlignment: Left
+Cpp11BracedListStyle: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortFunctionsOnASingleLine : true
+AlignOperands: true
+Standard: Cpp11
+IndentCaseLabels: false
+AlignTrailingComments : false
+ConstructorInitializerAllOnOneLineOrOnePerLine : false
+ColumnLimit: 110
+BinPackParameters : true
+BinPackArguments : true
+AlwaysBreakTemplateDeclarations : true
+AlignConsecutiveAssignments : true
+PenaltyReturnTypeOnItsOwnLine: 50000
+CommentPragmas: '^ >>>'
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,73 @@
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+
+# CMake files
+CMakeCache.txt
+CMakeFiles
+CMakeScripts
+Makefile
+cmake_install.cmake
+install_manifest.txt
+CTestTestfile.cmake
+
+# build directory
+build/
+
+# test directory
+svg/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Sphinx documentation
+docs/_build/
+
+# CLion
+.idea/
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,47 @@
+# Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+# This file is part of KFR
+#
+# KFR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# KFR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with KFR.
+
+
+cmake_minimum_required(VERSION 3.0)
+
+if (${CMAKE_GENERATOR} STREQUAL "MinGW Makefiles" OR ${CMAKE_GENERATOR} STREQUAL "MSYS Makefiles")
+ if (CMAKE_BUILD_TYPE_INITIALIZED_TO_DEFAULT)
+ set(CMAKE_BUILD_TYPE Release)
+ endif ()
+ set(CMAKE_CXX_COMPILER clang++)
+ set(CMAKE_C_COMPILER clang)
+ set(CMAKE_CXX_FLAGS --target=x86_64-w64-windows-gnu CACHE STRING "compile flags" FORCE)
+ set(CMAKE_C_FLAGS --target=x86_64-w64-windows-gnu CACHE STRING "compile flags" FORCE)
+ set(CMAKE_EXE_LINKER_FLAGS --target=x86_64-w64-windows-gnu)
+ set(CMAKE_SHARED_LINKER_FLAGS --target=x86_64-w64-windows-gnu)
+ set(CMAKE_STATIC_LINKER_FLAGS --target=x86_64-w64-windows-gnu)
+endif ()
+
+project(kfr)
+
+include(sources.cmake)
+
+add_compile_options(-std=c++1y)
+
+set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded)
+
+add_compile_options(-march=native)
+
+add_subdirectory(examples)
+add_subdirectory(tests)
+
+file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/svg)
+
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/build.py b/build.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+# This file is part of KFR
+#
+# KFR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# KFR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with KFR.
+
+
+from __future__ import print_function
+
+import os
+import subprocess
+import sys
+
+path = os.path.dirname(os.path.realpath(__file__))
+build_dir = os.path.join(path, 'build')
+
+try:
+ os.makedirs(build_dir)
+except:
+ pass
+
+print('Checking clang...', end=' ')
+if subprocess.call(['clang', '--version'], stdout=subprocess.PIPE):
+ raise Exception('clang is not on your PATH')
+print('ok')
+print('Checking clang++...', end=' ')
+if subprocess.call(['clang++', '--version'], stdout=subprocess.PIPE):
+ raise Exception('clang++ is not on your PATH')
+print('ok')
+
+if sys.platform.startswith('win32'):
+ generator = 'MinGW Makefiles'
+elif sys.platform.startswith('darwin'):
+ generator = 'Unix Makefiles'
+
+options = [
+ '-DCMAKE_BUILD_TYPE=Release',
+ ]
+
+subprocess.call(['cmake', '-G', generator, '..'] + options, cwd=build_dir)
+subprocess.call(['cmake', '--build', '.'], cwd=build_dir)
+subprocess.call(['ctest'], cwd=os.path.join(build_dir, 'tests'))
diff --git a/dspplot/dspplot/__init__.py b/dspplot/dspplot/__init__.py
@@ -0,0 +1,2 @@
+from dspplotting import plot
+from dspplotting import perfplot
diff --git a/dspplot/dspplot/dspplotting.py b/dspplot/dspplot/dspplotting.py
@@ -0,0 +1,227 @@
+from __future__ import division
+
+import wave
+import matplotlib
+import matplotlib.ticker as tck
+import matplotlib.pyplot as plt
+import numpy as np
+import sys
+import matplotlib.colors as clr
+from scipy import signal
+from scipy import interpolate
+
+def gen_ticks(stop, start=10):
+ yield start
+ for s in range(0, 10):
+ if start * s > stop:
+ yield stop
+ raise StopIteration
+ yield start * s
+ for t in gen_ticks(stop, start * 10):
+ yield t
+
+def gen_tick_labels(stop, start=10):
+ yield (str(start) + 'Hz').replace('000Hz', 'kHz')
+ for s in range(0, 10):
+ if start * s > stop:
+ yield (str(int(stop)) + 'Hz').replace('000Hz', 'kHz')
+ raise StopIteration
+ yield ''
+ for t in gen_tick_labels(stop, start * 10):
+ yield t
+
+def smooth_colormap(colors, name='cmap1'):
+ to_rgb = clr.ColorConverter().to_rgb
+ colors = [(p, to_rgb(c)) for p, c in colors]
+ result = {'red': [], 'green': [], 'blue': []}
+ for index, item in enumerate(colors):
+ pos, color = item
+ if pos is not None:
+ r, g, b = color
+ result['red'].append([pos, r, r])
+ result['green'].append([pos, g, g])
+ result['blue'].append([pos, b, b])
+ cmap = clr.LinearSegmentedColormap(name, result)
+ plt.register_cmap(name=name, cmap=cmap)
+ return cmap
+
+def wavplot(wavfile, title='Title', file=None, segmentsize=512, overlap=8):
+ cmap = smooth_colormap([
+ (0 , '#000000'),
+ (1/9, '#010325'),
+ (2/9, '#130246'),
+ (3/9, '#51026e'),
+ (4/9, '#9e0379'),
+ (5/9, '#d6033e'),
+ (6/9, '#fc4d21'),
+ (7/9, '#fdc967'),
+ (8/9, '#f3fab8'),
+ (1 , '#ffffff')
+ ])
+
+ w = wave.open(wavfile, 'rb')
+
+ sr = w.getframerate()
+ data = np.fromstring(w.readframes(w.getnframes()), dtype=np.int32)/2147483647.0
+ datalen = len(data)
+
+ def fast_resample(data, newlen):
+ oldlen=len(data)
+ result=[]
+ for i in range(newlen):
+ result.append(data[i*oldlen//newlen])
+ return np.array(result)
+
+
+ datalen = len(data)
+ segments=datalen//segmentsize-1
+
+ im=[]
+
+ window = signal.hann(segmentsize * overlap)
+
+ np.seterr(all='ignore')
+
+ for segm in range(segments-overlap):
+ r = range(segm*datalen//segments, segm*datalen//segments+segmentsize*overlap)
+ subdata = data[r]
+ subdata = subdata * window
+ n = len(subdata)
+ Y = np.fft.fft(subdata)/n
+ Y = Y[range(len(Y) // 2)]
+ Yfreq = 20 * np.log10(np.absolute(Y))
+ Yfreq = signal.resample(Yfreq, 512)
+ Yfreq = np.fmax(-300, Yfreq)
+ im.append(Yfreq)
+
+ im = np.transpose(im)
+
+ plt.imshow(im,cmap=cmap, aspect='auto', vmin=-160, vmax=0, origin='lower', extent=[0, datalen / sr, 0, sr / 2 ], interpolation='bicubic')
+ plt.colorbar()
+
+ if not file:
+ plt.show()
+ else:
+ plt.savefig(file)
+
+
+def plot(data,
+ title='Title',
+ horizontal=True,
+ normalized_freq=False,
+ Fs=48000,
+ padwidth=1024,
+ log_freq=False,
+ file=None,
+ freqresp=True,
+ phaseresp=False,
+ dots=False,
+ segmentsize=512,
+ overlap=8):
+ if isinstance(data, (list, tuple, np.ndarray)):
+ n = len(data)
+ num = 1 + freqresp + phaseresp
+ figsize = (10 if horizontal else 6 * num, 5 * num if horizontal else 6)
+ fig, a = plt.subplots(num, 1, figsize=figsize) if horizontal else plt.subplots(1, num, figsize=figsize)
+ fig.suptitle(title, fontsize=16)
+ fig.subplots_adjust(top=0.85)
+ rect = fig.patch
+ rect.set_facecolor('#f0f0f0')
+ style = {'linewidth': 1.4, 'color': '#0072bd'}
+ grid_style = {'color': '#777777'}
+
+ dataplot = a[0] if freqresp or phaseresp else a
+
+ dataplot.plot(np.linspace(0, n, n, False), data, marker='.' if dots else None, **style)
+ dataplot.set_xlabel('Samples')
+ dataplot.set_ylabel('Amplitude')
+ dataplot.grid(True, **grid_style)
+ dataplot.set_autoscalex_on(False)
+ dataplot.set_xlim([0, n - 1])
+ dataplot.set_ylim(bottom=np.min(data))
+
+ np.seterr(all='ignore')
+
+ if freqresp or phaseresp:
+ padwidth = max(padwidth, n)
+ Y = np.fft.fft(np.pad(data, (0, padwidth - n), 'constant', constant_values=(0, 0)))
+ Y = Y[range(padwidth // 2)]
+ Yfreq = 20 * np.log10(np.abs(Y))
+ Yfreq = np.fmax(-300, Yfreq)
+
+ freq_label = [r'Normalized Frequency ($\times \pi$ rad/sample)', 'Frequency (Hz)']
+
+ def set_freq(a):
+ if normalized_freq:
+ a.set_xlabel(freq_label[0])
+ X = np.linspace(0, 1, len(Y), False)
+ a.set_xlim([0, 1])
+ else:
+ a.set_xlabel(freq_label[1])
+ if log_freq:
+ a.set_xscale('log')
+ a.set_xticks(list(gen_ticks(Fs / 2)))
+ a.set_xticklabels(list(gen_tick_labels(Fs / 2)))
+ X = np.linspace(0, Fs / 2, len(Y), False)
+ a.set_xlim([10, Fs / 2])
+ return X
+
+ if freqresp:
+ freqplot = a[1]
+ X = set_freq(freqplot)
+ freqplot.set_ylabel('Gain (dB)')
+ freqplot.grid(True, **grid_style)
+ freqplot.set_autoscalex_on(False)
+ freqplot.plot(X, Yfreq, **style)
+
+ if phaseresp:
+ phaseplot = a[1 + freqresp]
+ Yphase = np.angle(Y, deg=True);
+ X = set_freq(phaseplot)
+ phaseplot.grid(True, **grid_style)
+ phaseplot.set_ylabel(r'Phase (${\circ}$)')
+ phaseplot.set_autoscaley_on(False)
+ phaseplot.set_ylim([-180, +180])
+ phaseplot.plot(X, Yphase, **style)
+
+ plt.tight_layout(rect=[0, 0.0, 1, 0.94])
+
+ if not file:
+ plt.show()
+ else:
+ plt.savefig(file)
+ else:
+ wavplot(data, title=title, file=file, segmentsize=segmentsize, overlap=overlap)
+
+
+def perfplot(data, labels, title='Speed', xlabel='X', units='ms', file=None):
+
+ styles = [
+ {'color': '#F6511D', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'},
+ {'color': '#00A6ED', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'},
+ {'color': '#FFB400', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'},
+ {'color': '#7FB800', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'},
+ {'color': '#0D2C54', 'linestyle': '-', 'marker': 'o', 'markersize': 10.0, 'markeredgecolor': '#FFFFFF'},
+ ]
+ grid_style = {'color': '#777777'}
+ fig, ax = plt.subplots()
+ ax.grid(True, **grid_style)
+ data = map(list, zip(*data))
+ ticks = data[0]
+ data = data[1:]
+ for d, s, l in zip(data, styles, labels):
+ ax.set_xlabel(xlabel)
+ ax.set_ylabel(units)
+ x = np.linspace(0,len(d),len(d), False)
+ ax.plot(x, d, linewidth=1.6, label=l, **s)
+
+ ax.set_ylim(bottom=0.0)
+ legend = ax.legend(loc='lower center', shadow=True)
+
+ plt.xticks(x, ticks, rotation='vertical')
+ plt.tight_layout(rect=[0, 0.0, 1, 0.94])
+
+ if not file:
+ plt.show()
+ else:
+ plt.savefig(file)
diff --git a/dspplot/setup.py b/dspplot/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup
+
+setup(name='dspplot',
+ version='0.0.1',
+ use_2to3=False,
+ author='KFRLIB.COM',
+ author_email='[email protected]',
+ maintainer='KFRLIB.COM',
+ maintainer_email='[email protected]',
+ url='https://kfrlib.com/dspplot/',
+ description="Small python plotting library for DSP purposes",
+ long_description="Small python plotting library for DSP purposes",
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: 3.5',
+ 'Topic :: Scientific/Engineering :: Visualization',
+ ],
+ license='MIT',
+ packages=['dspplot'],
+ package_data={'dspplot': []},
+ install_requires=['matplotlib', 'numpy', 'scipy'],
+ zip_safe=False)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+# This file is part of KFR
+#
+# KFR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# KFR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with KFR.
+
+
+cmake_minimum_required(VERSION 3.0)
+
+add_compile_options(-fno-exceptions -fno-rtti)
+
+set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded)
+
+add_compile_options(-march=native)
+
+link_libraries(stdc++ pthread)
+
+include_directories(../include)
+
+add_executable(biquads biquads.cpp ${KFR_SRC})
+add_executable(window window.cpp ${KFR_SRC})
+add_executable(fir fir.cpp ${KFR_SRC})
+add_executable(resampling resampling.cpp ${KFR_SRC})
+add_executable(dft dft.cpp ${KFR_SRC} ${DFT_SRC})
diff --git a/examples/biquads.cpp b/examples/biquads.cpp
@@ -0,0 +1,85 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+
+// library_version()
+#include <kfr/version.hpp>
+
+// print(), format()
+#include <kfr/cometa/string.hpp>
+
+#include <kfr/math.hpp>
+
+// simpleimpulse()
+#include <kfr/dsp/oscillators.hpp>
+
+// biquad*
+#include <kfr/dsp/biquad.hpp>
+
+// plot_save()
+#include <kfr/io/python_plot.hpp>
+
+using namespace kfr;
+using namespace kfr::native;
+
+int main(int argc, char** argv)
+{
+ println(library_version());
+
+ using namespace native;
+ const std::string options = "phaseresp=True";
+
+ univector<double, 128> output;
+ {
+ biquad_params<double> bq[] = { biquad_notch(0.1, 0.5), biquad_notch(0.2, 0.5), biquad_notch(0.3, 0.5),
+ biquad_notch(0.4, 0.5) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_notch", output, options);
+
+ {
+ biquad_params<double> bq[] = { biquad_lowpass(0.2, 0.9) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_lowpass", output, options);
+
+ {
+ biquad_params<double> bq[] = { biquad_highpass(0.3, 0.1) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_highpass", output, options);
+
+ {
+ biquad_params<double> bq[] = { biquad_peak(0.3, 0.5, +9.0) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_peak", output, options);
+
+ {
+ biquad_params<double> bq[] = { biquad_peak(0.3, 3.0, -2.0) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_peak2", output, options);
+
+ {
+ biquad_params<double> bq[] = { biquad_lowshelf(0.3, -1.0) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_lowshelf", output, options);
+
+ {
+ biquad_params<double> bq[] = { biquad_highshelf(0.3, +9.0) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_highshelf", output, options);
+
+ {
+ biquad_params<double> bq[] = { biquad_bandpass(0.25, 0.2) };
+ output = biquad(bq, simpleimpulse());
+ }
+ plot_save("biquad_bandpass", output, options);
+
+ return 0;
+}
diff --git a/examples/dft.cpp b/examples/dft.cpp
@@ -0,0 +1,66 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+
+// library_version()
+#include <kfr/version.hpp>
+
+#include <kfr/io/tostring.hpp>
+
+// print(), format()
+#include <kfr/cometa/string.hpp>
+
+#include <kfr/dft/fft.hpp>
+#include <kfr/dft/reference_dft.hpp>
+#include <kfr/dsp/oscillators.hpp>
+#include <kfr/dsp/units.hpp>
+#include <kfr/expressions/basic.hpp>
+#include <kfr/expressions/operators.hpp>
+#include <kfr/expressions/reduce.hpp>
+#include <kfr/math.hpp>
+#include <kfr/misc/random.hpp>
+#include <kfr/vec.hpp>
+
+using namespace kfr;
+
+int main(int argc, char** argv)
+{
+ println(library_version());
+
+ // fft size
+ const size_t size = 128;
+ using float_type = double;
+
+ // initialize input & output buffers
+ univector<complex<float_type>, size> in = sin(linspace(0.0, c_pi<float_type, 2> * 4.0, size));
+ univector<complex<float_type>, size> out = scalar(qnan);
+
+ // initialize fft
+ const dft_plan<float_type> dft(size);
+
+ // allocate work buffer for fft (if needed)
+ univector<u8> temp(dft.temp_size);
+
+ // perform forward fft
+ dft.execute(out, in, temp);
+
+ // scale output
+ out = out / size;
+
+ // get magnitude and convert to decibels
+ univector<float_type, size> dB = amp_to_dB(cabs(out));
+
+ println("max = ", max(dB));
+ println("min = ", min(dB));
+ println("mean = ", mean(dB));
+ println("rms = ", rms(dB));
+
+ println(in);
+ println();
+ println(dB);
+ (void)argc;
+ (void)argv;
+ return 0;
+}
diff --git a/examples/fir.cpp b/examples/fir.cpp
@@ -0,0 +1,73 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+
+// library_version()
+#include <kfr/version.hpp>
+
+// print(), format()
+#include <kfr/cometa/string.hpp>
+
+#include <kfr/math.hpp>
+
+// expression_pointer<>, topointer()
+#include <kfr/expressions/pointer.hpp>
+
+// simpleimpulse()
+#include <kfr/dsp/oscillators.hpp>
+
+// fir*
+#include <kfr/dsp/fir.hpp>
+
+// plot_save()
+#include <kfr/io/python_plot.hpp>
+
+#include <iostream>
+
+using namespace kfr;
+using namespace kfr::native;
+
+int main(int argc, char** argv)
+{
+ println(library_version());
+
+ using namespace native;
+ const std::string options = "phaseresp=False";
+
+ univector<double, 15> taps15;
+ univector<double, 127> taps127;
+ univector<double, 8191> taps8191;
+
+ expression_pointer<double> hann = to_pointer(window_hann(taps15.size()));
+
+ expression_pointer<double> kaiser = to_pointer(window_kaiser(taps127.size(), 3.0));
+
+ expression_pointer<double> blackman_harris = to_pointer(window_blackman_harris(taps8191.size()));
+
+ fir_lowpass(taps15, 0.15, hann, true);
+ plot_save("fir_lowpass_hann", taps15, options + ", title='15-point lowpass FIR, Hann window'");
+
+ fir_lowpass(taps127, 0.2, kaiser, true);
+ plot_save("fir_lowpass_kaiser", taps127,
+ options + ", title=r'127-point lowpass FIR, Kaiser window ($\\alpha=3.0$)'");
+
+ fir_highpass(taps127, 0.2, kaiser, true);
+ plot_save("fir_highpass_kaiser", taps127,
+ options + ", title=r'127-point highpass FIR, Kaiser window ($\\alpha=3.0$)'");
+
+ fir_bandpass(taps127, 0.2, 0.4, kaiser, true);
+ plot_save("fir_bandpass_kaiser", taps127,
+ options + ", title=r'127-point bandpass FIR, Kaiser window ($\\alpha=3.0$)'");
+
+ fir_bandstop(taps127, 0.2, 0.4, kaiser, true);
+ plot_save("fir_bandstop_kaiser", taps127,
+ options + ", title=r'127-point bandstop FIR, Kaiser window ($\\alpha=3.0$)'");
+
+ fir_lowpass(taps8191, 0.15, blackman_harris, true);
+ plot_save("fir_lowpass_blackman", taps8191,
+ options + ", title='8191-point lowpass FIR, Blackman-Harris window'");
+
+ return 0;
+}
diff --git a/examples/resampling.cpp b/examples/resampling.cpp
@@ -0,0 +1,110 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+
+// library_version()
+#include <kfr/version.hpp>
+
+// print(), format()
+#include <kfr/cometa/string.hpp>
+
+#include <kfr/math.hpp>
+
+// resample*
+#include <kfr/dsp/resample.hpp>
+
+// file*
+#include <kfr/io/audiofile.hpp>
+
+// swept
+#include <kfr/dsp/oscillators.hpp>
+
+// operator overloading for expressions
+#include <kfr/expressions/operators.hpp>
+
+// plot_save()
+#include <kfr/io/python_plot.hpp>
+
+#include <iostream>
+
+using namespace kfr;
+using namespace kfr::native;
+
+constexpr size_t input_sr = 96000;
+constexpr size_t output_sr = 44100;
+constexpr size_t len = 96000 * 6;
+constexpr f64 i32max = 2147483647.0;
+
+int main(int argc, char** argv)
+{
+ println(library_version());
+
+ using namespace native;
+ const std::string options = "phaseresp=False";
+
+ univector<f64> swept_sine = swept(0.5, len);
+
+ {
+ auto r = resampler(resample_quality::high, output_sr, input_sr, 1.0, 0.496);
+ univector<f64> resampled(len * output_sr / input_sr);
+
+ const size_t destsize = r(resampled.data(), swept_sine);
+
+ univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max);
+ univector2d<i32> data = { i32data };
+
+ auto wr = sequential_file_writer("audio_high_quality.wav");
+ audio_encode(wr, data, audioformat(data, output_sr));
+
+ plot_save("audio_high_quality", "audio_high_quality.wav", "");
+ }
+
+ {
+ auto r = resampler(resample_quality::normal, output_sr, input_sr, 1.0, 0.496);
+ univector<f64> resampled(len * output_sr / input_sr);
+
+ const size_t destsize = r(resampled.data(), swept_sine);
+
+ univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max);
+ univector2d<i32> data = { i32data };
+
+ auto wr = sequential_file_writer("audio_normal_quality.wav");
+ audio_encode(wr, data, audioformat(data, output_sr));
+
+ plot_save("audio_normal_quality", "audio_normal_quality.wav", "");
+ }
+
+ {
+ auto r = resampler(resample_quality::low, output_sr, input_sr, 1.0, 0.496);
+ univector<f64> resampled(len * output_sr / input_sr);
+
+ const size_t destsize = r(resampled.data(), swept_sine);
+
+ univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max);
+ univector2d<i32> data = { i32data };
+
+ auto wr = sequential_file_writer("audio_low_quality.wav");
+ audio_encode(wr, data, audioformat(data, output_sr));
+
+ plot_save("audio_low_quality", "audio_low_quality.wav", "");
+ }
+
+ {
+ auto r = resampler(resample_quality::draft, output_sr, input_sr, 1.0, 0.496);
+ univector<f64> resampled(len * output_sr / input_sr);
+
+ const size_t destsize = r(resampled.data(), swept_sine);
+
+ univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max);
+ univector2d<i32> data = { i32data };
+
+ auto wr = sequential_file_writer("audio_draft_quality.wav");
+ audio_encode(wr, data, audioformat(data, output_sr));
+
+ plot_save("audio_draft_quality", "audio_draft_quality.wav", "");
+ }
+
+ return 0;
+}
diff --git a/examples/window.cpp b/examples/window.cpp
@@ -0,0 +1,76 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+
+// library_version()
+#include <kfr/version.hpp>
+
+// print(), format()
+#include <kfr/cometa/string.hpp>
+
+#include <kfr/math.hpp>
+
+// simpleimpulse()
+#include <kfr/dsp/oscillators.hpp>
+
+// window*
+#include <kfr/dsp/window.hpp>
+
+// plot_save()
+#include <kfr/io/python_plot.hpp>
+
+using namespace kfr;
+using namespace kfr::native;
+
+int main(int argc, char** argv)
+{
+ println(library_version());
+
+ using namespace native;
+ const std::string options = "freqresp=True, dots=True, padwidth=1024, "
+ "log_freq=False, horizontal=False, normalized_freq=True";
+
+ univector<double, 64> output;
+ output = window_hann(output.size());
+ plot_save("window_hann", output, options + ", title='Hann window'");
+
+ output = window_hamming(output.size());
+ plot_save("window_hamming", output, options + ", title='Hamming window'");
+
+ output = window_blackman(output.size());
+ plot_save("window_blackman", output, options + ", title='Blackman window'");
+
+ output = window_blackman_harris(output.size());
+ plot_save("window_blackman_harris", output, options + ", title='Blackman-Harris window'");
+
+ output = window_gaussian(output.size());
+ plot_save("window_gaussian", output, options + ", title='Gaussian window'");
+
+ output = window_triangular(output.size());
+ plot_save("window_triangular", output, options + ", title='Triangular window'");
+
+ output = window_bartlett(output.size());
+ plot_save("window_bartlett", output, options + ", title='Bartlett window'");
+
+ output = window_cosine(output.size());
+ plot_save("window_cosine", output, options + ", title='Cosine window'");
+
+ output = window_bartlett_hann(output.size());
+ plot_save("window_bartlett_hann", output, options + ", title='Bartlett-Hann window'");
+
+ output = window_bohman(output.size());
+ plot_save("window_bohman", output, options + ", title='Bohman window'");
+
+ output = window_lanczos(output.size());
+ plot_save("window_lanczos", output, options + ", title='Lanczos window'");
+
+ output = window_flattop(output.size());
+ plot_save("window_flattop", output, options + ", title='Flat top window'");
+
+ output = window_kaiser(output.size(), 2.5);
+ plot_save("window_kaiser", output, options + ", title='Kaiser window'");
+
+ return 0;
+}
diff --git a/format-all.py b/format-all.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import fnmatch
+import os
+import subprocess
+import sys
+import glob
+
+path = os.path.dirname(os.path.realpath(__file__))
+
+filenames = []
+for root, dirnames, files in os.walk(path, path):
+ for filename in fnmatch.filter(files, '*.hpp'):
+ filenames.append(os.path.join(root, filename))
+ for filename in fnmatch.filter(files, '*.h'):
+ filenames.append(os.path.join(root, filename))
+ for filename in fnmatch.filter(files, '*.cpp'):
+ filenames.append(os.path.join(root, filename))
+
+for filename in filenames:
+ print( filename, '...' )
+ subprocess.call(['clang-format', '-i', filename])
+ # Fix clang-format bug: https://llvm.org/bugs/show_bug.cgi?id=26125
+ for tmp_file in glob.glob(filename+'*.tmp'):
+ os.remove(tmp_file)
diff --git a/img/fft_performance.png b/img/fft_performance.png
Binary files differ.
diff --git a/include/kfr/all.hpp b/include/kfr/all.hpp
@@ -0,0 +1,84 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+
+#include "base/abs.hpp"
+#include "base/asin_acos.hpp"
+#include "base/atan.hpp"
+#include "base/complex.hpp"
+#include "base/constants.hpp"
+#include "base/digitreverse.hpp"
+#include "base/dispatch.hpp"
+#include "base/function.hpp"
+#include "base/gamma.hpp"
+#include "base/log_exp.hpp"
+#include "base/logical.hpp"
+#include "base/memory.hpp"
+#include "base/min_max.hpp"
+#include "base/operators.hpp"
+#include "base/read_write.hpp"
+#include "base/round.hpp"
+#include "base/saturation.hpp"
+#include "base/select.hpp"
+#include "base/shuffle.hpp"
+#include "base/sin_cos.hpp"
+#include "base/sinh_cosh.hpp"
+#include "base/sqrt.hpp"
+#include "base/tan.hpp"
+#include "base/types.hpp"
+#include "base/univector.hpp"
+#include "base/vec.hpp"
+#include "data/bitrev.hpp"
+#include "data/sincos.hpp"
+#include "dft/bitrev.hpp"
+#include "dft/fft.hpp"
+#include "dft/ft.hpp"
+#include "dft/reference_dft.hpp"
+#include "dispatch/cpuid.hpp"
+#include "dispatch/runtimedispatch.hpp"
+#include "dsp/biquad.hpp"
+#include "dsp/fir.hpp"
+#include "dsp/goertzel.hpp"
+#include "dsp/interpolation.hpp"
+#include "dsp/oscillators.hpp"
+#include "dsp/resample.hpp"
+#include "dsp/speaker.hpp"
+#include "dsp/units.hpp"
+#include "dsp/weighting.hpp"
+#include "dsp/window.hpp"
+#include "expressions/basic.hpp"
+#include "expressions/conversion.hpp"
+#include "expressions/generators.hpp"
+#include "expressions/operators.hpp"
+#include "expressions/pointer.hpp"
+#include "expressions/reduce.hpp"
+#include "io/audiofile.hpp"
+#include "io/file.hpp"
+#include "io/python_plot.hpp"
+#include "io/tostring.hpp"
+#include "math.hpp"
+#include "misc/compiletime.hpp"
+#include "misc/random.hpp"
+#include "misc/small_buffer.hpp"
+#include "misc/sort.hpp"
+#include "vec.hpp"
+#include "version.hpp"
diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp
@@ -0,0 +1,138 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "function.hpp"
+#include "operators.hpp"
+#include "select.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t cpu = cpu_t::native>
+struct in_abs : in_abs<older(cpu)>
+{
+ struct fn_abs : in_abs<older(cpu)>::fn_abs, fn_disabled
+ {
+ };
+};
+
+template <>
+struct in_abs<cpu_t::sse2> : in_select<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse2;
+
+private:
+ using in_select<cpu_t::sse2>::select;
+
+public:
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
+ {
+ return select(value >= T(), value, -value);
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
+ {
+ return value & invhighbitmask<T>;
+ }
+
+ KFR_HANDLE_ALL(abs)
+ KFR_HANDLE_SCALAR(abs)
+ KFR_SPEC_FN(in_abs, abs)
+};
+
+template <>
+struct in_abs<cpu_t::ssse3> : in_abs<cpu_t::sse2>, in_select<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::ssse3;
+
+private:
+ using in_select<cpu_t::sse2>::select;
+
+public:
+ template <size_t N>
+ KFR_SINTRIN vec<i64, N> abs(vec<i64, N> value)
+ {
+ return select(value >= 0, value, -value);
+ }
+
+ KFR_AINTRIN i32sse abs(i32sse value) { return _mm_abs_epi32(*value); }
+ KFR_AINTRIN i16sse abs(i16sse value) { return _mm_abs_epi16(*value); }
+ KFR_AINTRIN i8sse abs(i8sse value) { return _mm_abs_epi8(*value); }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
+ {
+ return value & invhighbitmask<T>;
+ }
+
+ KFR_HANDLE_ALL(abs)
+ KFR_HANDLE_SCALAR(abs)
+ KFR_SPEC_FN(in_abs, abs)
+};
+
+template <>
+struct in_abs<cpu_t::avx2> : in_abs<cpu_t::ssse3>
+{
+ constexpr static cpu_t cpu = cpu_t::avx2;
+ using in_abs<cpu_t::ssse3>::abs;
+
+ KFR_AINTRIN i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); }
+ KFR_AINTRIN i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); }
+ KFR_AINTRIN i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); }
+
+ KFR_HANDLE_ALL(abs)
+ KFR_HANDLE_SCALAR(abs)
+ KFR_SPEC_FN(in_abs, abs)
+};
+}
+
+namespace native
+{
+using fn_abs = internal::in_abs<>::fn_abs;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> abs(const T1& x)
+{
+ return internal::in_abs<>::abs(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_abs, E1> abs(E1&& x)
+{
+ return { fn_abs(), std::forward<E1>(x) };
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/asin_acos.hpp b/include/kfr/base/asin_acos.hpp
@@ -0,0 +1,104 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "abs.hpp"
+#include "atan.hpp"
+#include "constants.hpp"
+#include "function.hpp"
+#include "min_max.hpp"
+#include "operators.hpp"
+#include "select.hpp"
+#include "shuffle.hpp"
+#include "sqrt.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t cpu = cpu_t::native>
+struct in_asin_acos : private in_select<cpu>, private in_atan<cpu>, private in_sqrt<cpu>
+{
+private:
+ using in_atan<cpu>::atan2;
+ using in_sqrt<cpu>::sqrt;
+
+public:
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> asin(vec<T, N> x)
+ {
+ return atan2(x, sqrt(T(1) - x * x));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> acos(vec<T, N> x)
+ {
+ return atan2(sqrt(T(1) - x * x), x);
+ }
+ KFR_SPEC_FN(in_asin_acos, asin)
+ KFR_SPEC_FN(in_asin_acos, acos)
+};
+}
+
+namespace native
+{
+using fn_asin = internal::in_asin_acos<>::fn_asin;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> asin(const T1& x)
+{
+ return internal::in_asin_acos<>::asin(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_asin, E1> asin(E1&& x)
+{
+ return { fn_asin(), std::forward<E1>(x) };
+}
+
+using fn_acos = internal::in_asin_acos<>::fn_acos;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> acos(const T1& x)
+{
+ return internal::in_asin_acos<>::acos(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_acos, E1> acos(E1&& x)
+{
+ return { fn_acos(), std::forward<E1>(x) };
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/atan.hpp b/include/kfr/base/atan.hpp
@@ -0,0 +1,267 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "abs.hpp"
+#include "constants.hpp"
+#include "function.hpp"
+#include "operators.hpp"
+#include "select.hpp"
+#include "sin_cos.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+namespace internal
+{
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_atan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
+{
+private:
+ using in_abs<cc>::abs;
+ using in_round<cc>::floor;
+ using in_select<cc>::select;
+ using in_trig<cc>::mask_horner;
+ using in_select<cc>::sign;
+
+public:
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> atan2k(vec<f32, N> y, vec<f32, N> x)
+ {
+ vec<f32, N> s, t, u;
+ vec<i32, N> q;
+ q = select(x < 0, -2, 0);
+ x = select(x < 0, -x, x);
+ mask<i32, N> m;
+ m = y > x;
+ t = x;
+ x = select(m, y, x);
+ y = select(m, -t, y);
+ q = select(m, q + 1, q);
+ s = y / x;
+ t = s * s;
+ u = 0.00282363896258175373077393f;
+ u = fmadd(u, t, -0.0159569028764963150024414f);
+ u = fmadd(u, t, 0.0425049886107444763183594f);
+ u = fmadd(u, t, -0.0748900920152664184570312f);
+ u = fmadd(u, t, 0.106347933411598205566406f);
+ u = fmadd(u, t, -0.142027363181114196777344f);
+ u = fmadd(u, t, 0.199926957488059997558594f);
+ u = fmadd(u, t, -0.333331018686294555664062f);
+ t = u * t * s + s;
+ t = cast<f32>(q) * 1.5707963267948966192313216916398f + t;
+ return t;
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x)
+ {
+ vec<f64, N> s, t, u;
+ vec<i64, N> q;
+ q = select(x < 0, -2ll, 0ll);
+ x = select(x < 0, -x, x);
+ vec<i64, N> m;
+ m = y > x;
+ t = x;
+ x = select(m, y, x);
+ y = select(m, -t, y);
+ q = select(m, q + 1ll, q);
+ s = y / x;
+ t = s * s;
+ u = -1.88796008463073496563746e-05;
+ u = fmadd(u, t, 0.000209850076645816976906797);
+ u = fmadd(u, t, -0.00110611831486672482563471);
+ u = fmadd(u, t, 0.00370026744188713119232403);
+ u = fmadd(u, t, -0.00889896195887655491740809);
+ u = fmadd(u, t, 0.016599329773529201970117);
+ u = fmadd(u, t, -0.0254517624932312641616861);
+ u = fmadd(u, t, 0.0337852580001353069993897);
+ u = fmadd(u, t, -0.0407629191276836500001934);
+ u = fmadd(u, t, 0.0466667150077840625632675);
+ u = fmadd(u, t, -0.0523674852303482457616113);
+ u = fmadd(u, t, 0.0587666392926673580854313);
+ u = fmadd(u, t, -0.0666573579361080525984562);
+ u = fmadd(u, t, 0.0769219538311769618355029);
+ u = fmadd(u, t, -0.090908995008245008229153);
+ u = fmadd(u, t, 0.111111105648261418443745);
+ u = fmadd(u, t, -0.14285714266771329383765);
+ u = fmadd(u, t, 0.199999999996591265594148);
+ u = fmadd(u, t, -0.333333333333311110369124);
+ t = u * t * s + s;
+ t = cast<f64>(q) * 1.5707963267948966192313216916398 + t;
+ return t;
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> atan2(vec<f32, N> y, vec<f32, N> x)
+ {
+ vec<f32, N> r = atan2k(abs(y), x);
+ constexpr f32 pi = 3.1415926535897932384626433832795f;
+ constexpr f32 pi_over_2 = 1.5707963267948966192313216916398f;
+ constexpr f32 pi_over_4 = 0.78539816339744830961566084581988f;
+ r = mulsign(r, x);
+ r = select(isinf(x) || x == 0.0f, pi_over_2 - select(x.asmask(), mulsign(pi_over_2, x), 0.0f), r);
+ r = select(isinf(y), pi_over_2 - select(x.asmask(), mulsign(pi_over_4, x), 0.0f), r);
+ r = select(y == 0.0f, fbitcast(ibitcast(sign(x) == -1.0f) & ibitcast(pi)), r);
+ r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y)));
+ return r;
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> atan2(vec<f64, N> y, vec<f64, N> x)
+ {
+ vec<f64, N> r = atan2k(abs(y), x);
+ constexpr f64 pi = 3.1415926535897932384626433832795;
+ constexpr f64 pi_over_2 = 1.5707963267948966192313216916398;
+ constexpr f64 pi_over_4 = 0.78539816339744830961566084581988;
+ r = mulsign(r, x);
+ r = select(isinf(x) || x == 0.0, pi_over_2 - select(x, mulsign(pi_over_2, x), 0.0), r);
+ r = select(isinf(y), pi_over_2 - select(x, mulsign(pi_over_4, x), 0.0), r);
+ r = select(y == 0.0, fbitcast(ibitcast(sign(x) == -1.0) & ibitcast(pi)), r);
+ r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y)));
+ return r;
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> atan(vec<f32, N> s)
+ {
+ vec<f32, N> t, u;
+ vec<i32, N> q;
+ q = select(s < 0.f, 2, 0);
+ s = select(s < 0.f, -s, s);
+ q = select(s > 1.f, q | 1, q);
+ s = select(s > 1.f, 1.0f / s, s);
+ t = s * s;
+ u = 0.00282363896258175373077393f;
+ u = fmadd(u, t, -0.0159569028764963150024414f);
+ u = fmadd(u, t, 0.0425049886107444763183594f);
+ u = fmadd(u, t, -0.0748900920152664184570312f);
+ u = fmadd(u, t, 0.106347933411598205566406f);
+ u = fmadd(u, t, -0.142027363181114196777344f);
+ u = fmadd(u, t, 0.199926957488059997558594f);
+ u = fmadd(u, t, -0.333331018686294555664062f);
+ t = s + s * (t * u);
+ t = select((q & 1) != 0, 1.570796326794896557998982f - t, t);
+ t = select((q & 2) != 0, -t, t);
+ return t;
+ }
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> atan(vec<f64, N> s)
+ {
+ vec<f64, N> t, u;
+ vec<i64, N> q;
+ q = select(s < 0.0, 2ll, 0ll);
+ s = select(s < 0.0, -s, s);
+ q = select(s > 1.0, q | 1, q);
+ s = select(s > 1.0, 1.0 / s, s);
+ t = s * s;
+ u = -1.88796008463073496563746e-05;
+ u = fmadd(u, t, 0.000209850076645816976906797);
+ u = fmadd(u, t, -0.00110611831486672482563471);
+ u = fmadd(u, t, 0.00370026744188713119232403);
+ u = fmadd(u, t, -0.00889896195887655491740809);
+ u = fmadd(u, t, 0.016599329773529201970117);
+ u = fmadd(u, t, -0.0254517624932312641616861);
+ u = fmadd(u, t, 0.0337852580001353069993897);
+ u = fmadd(u, t, -0.0407629191276836500001934);
+ u = fmadd(u, t, 0.0466667150077840625632675);
+ u = fmadd(u, t, -0.0523674852303482457616113);
+ u = fmadd(u, t, 0.0587666392926673580854313);
+ u = fmadd(u, t, -0.0666573579361080525984562);
+ u = fmadd(u, t, 0.0769219538311769618355029);
+ u = fmadd(u, t, -0.090908995008245008229153);
+ u = fmadd(u, t, 0.111111105648261418443745);
+ u = fmadd(u, t, -0.14285714266771329383765);
+ u = fmadd(u, t, 0.199999999996591265594148);
+ u = fmadd(u, t, -0.333333333333311110369124);
+ t = s + s * (t * u);
+ t = select((q & 1) != 0, 1.570796326794896557998982 - t, t);
+ t = select((q & 2) != 0, -t, t);
+ return t;
+ }
+ template <typename T>
+ KFR_SINTRIN T atandeg(const T& x)
+ {
+ return atan(x) * c_radtodeg<T>;
+ }
+ template <typename T1, typename T2>
+ KFR_SINTRIN common_type<T1, T2> atan2deg(const T1& y, const T2& x)
+ {
+ return atan2(y, x) * c_radtodeg<common_type<T1, T2>>;
+ }
+ KFR_HANDLE_SCALAR(atan)
+ KFR_HANDLE_SCALAR(atan2)
+ KFR_SPEC_FN(in_atan, atan)
+ KFR_SPEC_FN(in_atan, atandeg)
+ KFR_SPEC_FN(in_atan, atan2)
+ KFR_SPEC_FN(in_atan, atan2deg)
+};
+}
+namespace native
+{
+using fn_atan = internal::in_atan<>::fn_atan;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>> atan(const T1& y, const T2& x)
+{
+ return internal::in_atan<>::atan(y, x);
+}
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_atan, E1, E2> atan(E1&& y, E2&& x)
+{
+ return { fn_atan(), std::forward<E1>(y), std::forward<E2>(x) };
+}
+using fn_atan2 = internal::in_atan<>::fn_atan2;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>> atan2(const T1& y, const T2& x)
+{
+ return internal::in_atan<>::atan2(y, x);
+}
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_atan2, E1, E2> atan2(E1&& y, E2&& x)
+{
+ return { fn_atan2(), std::forward<E1>(y), std::forward<E2>(x) };
+}
+using fn_atandeg = internal::in_atan<>::fn_atandeg;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>> atandeg(const T1& y, const T2& x)
+{
+ return internal::in_atan<>::atandeg(y, x);
+}
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_atandeg, E1, E2> atandeg(E1&& y, E2&& x)
+{
+ return { fn_atandeg(), std::forward<E1>(y), std::forward<E2>(x) };
+}
+using fn_atan2deg = internal::in_atan<>::fn_atan2deg;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>> atan2deg(const T1& y, const T2& x)
+{
+ return internal::in_atan<>::atan2deg(y, x);
+}
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_atan2deg, E1, E2> atan2deg(E1&& y, E2&& x)
+{
+ return { fn_atan2deg(), std::forward<E1>(y), std::forward<E2>(x) };
+}
+}
+}
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp
@@ -0,0 +1,610 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "abs.hpp"
+#include "atan.hpp"
+#include "constants.hpp"
+#include "function.hpp"
+#include "log_exp.hpp"
+#include "min_max.hpp"
+#include "operators.hpp"
+#include "select.hpp"
+#include "sin_cos.hpp"
+#include "sinh_cosh.hpp"
+#include "sqrt.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+template <typename T>
+struct complex
+{
+ constexpr static bool is_pod = true;
+ constexpr complex() noexcept = default;
+ constexpr complex(T re) noexcept : re(re), im(0) {}
+ constexpr complex(T re, T im) noexcept : re(re), im(im) {}
+ constexpr complex(const complex&) noexcept = default;
+ constexpr complex(complex&&) noexcept = default;
+ template <typename U>
+ constexpr complex(const complex<U>& other) noexcept : re(static_cast<T>(other.re)),
+ im(static_cast<T>(other.im))
+ {
+ }
+ template <typename U>
+ constexpr complex(complex<U>&& other) noexcept : re(std::move(other.re)), im(std::move(other.im))
+ {
+ }
+ constexpr complex& operator=(const complex&) noexcept = default;
+ constexpr complex& operator=(complex&&) noexcept = default;
+ constexpr const T& real() const noexcept { return re; }
+ constexpr const T& imag() const noexcept { return im; }
+ constexpr void real(T value) noexcept { re = value; }
+ constexpr void imag(T value) noexcept { im = value; }
+ T re;
+ T im;
+};
+
+using c32 = complex<f32>;
+using c64 = complex<f64>;
+using cbase = complex<fbase>;
+
+template <typename T>
+struct vec_op<complex<T>> : private vec_op<T>
+{
+ using scalar_type = T;
+ using vec_op<scalar_type>::add;
+ using vec_op<scalar_type>::sub;
+ using vec_op<scalar_type>::eq;
+ using vec_op<scalar_type>::ne;
+ using vec_op<scalar_type>::band;
+ using vec_op<scalar_type>::bor;
+ using vec_op<scalar_type>::bxor;
+ using vec_op<scalar_type>::bnot;
+ using vec_op<scalar_type>::neg;
+
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> mul(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ const vec<scalar_type, N> xx = x;
+ const vec<scalar_type, N> yy = y;
+ return *subadd(xx * dupeven(yy), swap<2>(xx) * dupodd(yy));
+ }
+
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> div(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ const vec<scalar_type, N> xx = x;
+ const vec<scalar_type, N> yy = y;
+ const vec<scalar_type, N> m = (sqr(dupeven(yy)) + sqr(dupodd(yy)));
+ return *swap<2>(subadd(swap<2>(xx) * dupeven(yy), xx * dupodd(yy)) / m);
+ }
+};
+
+template <typename T, size_t N>
+KFR_INLINE vec<complex<T>, N> cdupreal(const vec<complex<T>, N>& x)
+{
+ return subcast<complex<T>>(dupeven(subcast<T>(x)));
+}
+KFR_FN(cdupreal)
+
+template <typename T, size_t N>
+KFR_INLINE vec<complex<T>, N> cdupimag(const vec<complex<T>, N>& x)
+{
+ return subcast<complex<T>>(dupodd(subcast<T>(x)));
+}
+KFR_FN(cdupimag)
+
+template <typename T, size_t N>
+KFR_INLINE vec<complex<T>, N> cswapreim(const vec<complex<T>, N>& x)
+{
+ return subcast<complex<T>>(swap<2>(subcast<T>(x)));
+}
+KFR_FN(cswapreim)
+
+template <typename T, size_t N>
+KFR_INLINE vec<complex<T>, N> cnegreal(const vec<complex<T>, N>& x)
+{
+ return x ^ complex<T>(-T(), T());
+}
+KFR_FN(cnegreal)
+template <typename T, size_t N>
+KFR_INLINE vec<complex<T>, N> cnegimag(const vec<complex<T>, N>& x)
+{
+ return x ^ complex<T>(T(), -T());
+}
+KFR_FN(cnegimag)
+
+template <typename T, size_t N>
+KFR_INLINE vec<complex<T>, N> cconj(const vec<complex<T>, N>& x)
+{
+ return cnegimag(x);
+}
+KFR_FN(cconj)
+
+namespace internal
+{
+template <typename T>
+struct is_complex_impl : std::false_type
+{
+};
+template <typename T>
+struct is_complex_impl<complex<T>> : std::true_type
+{
+};
+}
+
+// real to complex
+template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)>
+constexpr KFR_INLINE vec<To, N> cast(vec<From, N> value) noexcept
+{
+ const vec<subtype<To>, N> casted = cast<subtype<To>>(value);
+ return subcast<To>(interleave(casted, zerovector(casted)));
+}
+
+// complex to complex
+template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)>
+constexpr KFR_INLINE vec<To, N> cast(vec<complex<From>, N> value) noexcept
+{
+ return subcast<To>(cast<subtype<To>>(subcast<From>(value)));
+}
+
+// complex to real
+template <typename To, typename From, size_t N, KFR_ENABLE_IF(!internal::is_complex_impl<To>::value)>
+constexpr KFR_INLINE vec<To, N> cast(vec<complex<From>, N> value) noexcept
+{
+ static_assert(sizeof(To) == 0, "Can't cast complex to real");
+ return {};
+}
+
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<complex<T>, N / 2> ccomp(const vec<T, N>& x)
+{
+ return subcast<complex<T>>(x);
+}
+
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N * 2> cdecom(const vec<complex<T>, N>& x)
+{
+ return subcast<T>(x);
+}
+
+template <typename T>
+constexpr KFR_INLINE T real(const complex<T>& value)
+{
+ return value.real();
+}
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> real(const vec<complex<T>, N>& value)
+{
+ return even(subcast<T>(value));
+}
+
+template <typename T>
+using realtype = decltype(real(std::declval<T>()));
+template <typename T>
+using realftype = ftype<decltype(real(std::declval<T>()))>;
+
+KFR_FN(real)
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_real, E1> real(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
+template <typename T>
+constexpr KFR_INLINE T imag(const complex<T>& value)
+{
+ return value.imag();
+}
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> imag(const vec<complex<T>, N>& value)
+{
+ return odd(subcast<T>(value));
+}
+KFR_FN(imag)
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_imag, E1> imag(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
+template <typename T1, typename T2 = T1, size_t N, typename T = common_type<T1, T2>>
+constexpr KFR_INLINE vec<complex<T>, N> make_complex(const vec<T1, N>& real, const vec<T2, N>& imag = T2(0))
+{
+ return subcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag)));
+}
+
+template <typename T1, typename T2 = T1, typename T = common_type<T1, T2>>
+constexpr KFR_INLINE complex<T> make_complex(T1 real, T2 imag = T2(0))
+{
+ return complex<T>(cast<T>(real), cast<T>(imag));
+}
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native>
+struct in_complex : in_select<c>, in_sin_cos<c>, in_sinh_cosh<c>, in_sqrt<c>, in_atan<c>, in_log_exp<c>
+{
+ constexpr static cpu_t cur = c;
+ using in_sqrt<c>::sqrt;
+ using in_sin_cos<c>::sincos;
+ using in_sin_cos<c>::cossin;
+ using in_sinh_cosh<c>::sinhcosh;
+ using in_sinh_cosh<c>::coshsinh;
+ using in_atan<c>::atan2;
+ using in_log_exp<c>::log;
+ using in_log_exp<c>::log2;
+ using in_log_exp<c>::log10;
+ using in_log_exp<c>::exp;
+ using in_log_exp<c>::exp2;
+ using in_log_exp<c>::exp10;
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> csin(const vec<complex<T>, N>& x)
+ {
+ return ccomp(sincos(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x))));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> csinh(const vec<complex<T>, N>& x)
+ {
+ return ccomp(sinhcosh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> ccos(const vec<complex<T>, N>& x)
+ {
+ return ccomp(negodd(cossin(cdecom(cdupreal(x))) * coshsinh(cdecom(cdupimag(x)))));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> ccosh(const vec<complex<T>, N>& x)
+ {
+ return ccomp(coshsinh(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> cabs(const vec<complex<T>, N>& x)
+ {
+ const vec<T, N* 2> xx = sqr(cdecom(x));
+ return sqrt(even(xx) + odd(xx));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> carg(const vec<complex<T>, N>& x)
+ {
+ const vec<T, N* 2> xx = cdecom(x);
+ return atan2(even(xx), odd(xx));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> clog(const vec<complex<T>, N>& x)
+ {
+ return make_complex(log(cabs(x)), carg(x));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> clog2(const vec<complex<T>, N>& x)
+ {
+ return clog(x) * c_recip_log_2<T>;
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> clog10(const vec<complex<T>, N>& x)
+ {
+ return clog(x) * c_recip_log_10<T>;
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> cexp(const vec<complex<T>, N>& x)
+ {
+ return ccomp(exp(cdecom(cdupreal(x))) * cossin(cdecom(cdupimag(x))));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> cexp2(const vec<complex<T>, N>& x)
+ {
+ return cexp(x * c_log_2<T>);
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> cexp10(const vec<complex<T>, N>& x)
+ {
+ return cexp(x * c_log_10<T>);
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> polar(const vec<complex<T>, N>& x)
+ {
+ return make_complex(cabs(x), carg(x));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> cartesian(const vec<complex<T>, N>& x)
+ {
+ return cdupreal(x) * ccomp(cossin(cdecom(cdupimag(x))));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> cabsdup(vec<T, N> x)
+ {
+ x = sqr(x);
+ return sqrt(x + swap<2>(x));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<complex<T>, N> csqrt(const vec<complex<T>, N>& x)
+ {
+ const vec<T, N> t = (cabsdup(cdecom(x)) + cdecom(cnegimag(cdupreal(x)))) * T(0.5);
+ return ccomp(select(dupodd(x) < T(), cdecom(cnegimag(ccomp(t))), t));
+ }
+
+ KFR_HANDLE_SCALAR(csin)
+ KFR_HANDLE_SCALAR(csinh)
+ KFR_HANDLE_SCALAR(ccos)
+ KFR_HANDLE_SCALAR(ccosh)
+ KFR_HANDLE_SCALAR(cabs)
+ KFR_HANDLE_SCALAR(carg)
+ KFR_HANDLE_SCALAR(clog)
+ KFR_HANDLE_SCALAR(clog2)
+ KFR_HANDLE_SCALAR(clog10)
+ KFR_HANDLE_SCALAR(cexp)
+ KFR_HANDLE_SCALAR(cexp2)
+ KFR_HANDLE_SCALAR(cexp10)
+ KFR_HANDLE_SCALAR(polar)
+ KFR_HANDLE_SCALAR(cartesian)
+ KFR_HANDLE_SCALAR(csqrt)
+
+ KFR_SPEC_FN(in_complex, csin)
+ KFR_SPEC_FN(in_complex, csinh)
+ KFR_SPEC_FN(in_complex, ccos)
+ KFR_SPEC_FN(in_complex, ccosh)
+ KFR_SPEC_FN(in_complex, cabs)
+ KFR_SPEC_FN(in_complex, carg)
+ KFR_SPEC_FN(in_complex, clog)
+ KFR_SPEC_FN(in_complex, clog2)
+ KFR_SPEC_FN(in_complex, clog10)
+ KFR_SPEC_FN(in_complex, cexp)
+ KFR_SPEC_FN(in_complex, cexp2)
+ KFR_SPEC_FN(in_complex, cexp10)
+ KFR_SPEC_FN(in_complex, polar)
+ KFR_SPEC_FN(in_complex, cartesian)
+ KFR_SPEC_FN(in_complex, csqrt)
+};
+}
+
+namespace native
+{
+using fn_csin = internal::in_complex<>::fn_csin;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> csin(const T1& x)
+{
+ return internal::in_complex<>::csin(x);
+}
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_csin, E1> csin(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
+using fn_csinh = internal::in_complex<>::fn_csinh;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> csinh(const T1& x)
+{
+ return internal::in_complex<>::csinh(x);
+}
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_csinh, E1> csinh(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
+using fn_ccos = internal::in_complex<>::fn_ccos;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> ccos(const T1& x)
+{
+ return internal::in_complex<>::ccos(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_ccos, E1> ccos(E1&& x)
+{
+ return { fn_ccos(), std::forward<E1>(x) };
+}
+
+using fn_ccosh = internal::in_complex<>::fn_ccosh;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> ccosh(const T1& x)
+{
+ return internal::in_complex<>::ccosh(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_ccosh, E1> ccosh(E1&& x)
+{
+ return { fn_ccosh(), std::forward<E1>(x) };
+}
+
+using fn_cabs = internal::in_complex<>::fn_cabs;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN realftype<T1> cabs(const T1& x)
+{
+ return internal::in_complex<>::cabs(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_cabs, E1> cabs(E1&& x)
+{
+ return { fn_cabs(), std::forward<E1>(x) };
+}
+
+using fn_carg = internal::in_complex<>::fn_carg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN realftype<T1> carg(const T1& x)
+{
+ return internal::in_complex<>::carg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_carg, E1> carg(E1&& x)
+{
+ return { fn_carg(), std::forward<E1>(x) };
+}
+
+using fn_clog = internal::in_complex<>::fn_clog;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> clog(const T1& x)
+{
+ return internal::in_complex<>::clog(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_clog, E1> clog(E1&& x)
+{
+ return { fn_clog(), std::forward<E1>(x) };
+}
+
+using fn_clog2 = internal::in_complex<>::fn_clog2;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> clog2(const T1& x)
+{
+ return internal::in_complex<>::clog2(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_clog2, E1> clog2(E1&& x)
+{
+ return { fn_clog2(), std::forward<E1>(x) };
+}
+
+using fn_clog10 = internal::in_complex<>::fn_clog10;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> clog10(const T1& x)
+{
+ return internal::in_complex<>::clog10(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_clog10, E1> clog10(E1&& x)
+{
+ return { fn_clog10(), std::forward<E1>(x) };
+}
+
+using fn_cexp = internal::in_complex<>::fn_cexp;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> cexp(const T1& x)
+{
+ return internal::in_complex<>::cexp(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_cexp, E1> cexp(E1&& x)
+{
+ return { fn_cexp(), std::forward<E1>(x) };
+}
+
+using fn_cexp2 = internal::in_complex<>::fn_cexp2;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> cexp2(const T1& x)
+{
+ return internal::in_complex<>::cexp2(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_cexp2, E1> cexp2(E1&& x)
+{
+ return { fn_cexp2(), std::forward<E1>(x) };
+}
+
+using fn_cexp10 = internal::in_complex<>::fn_cexp10;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> cexp10(const T1& x)
+{
+ return internal::in_complex<>::cexp10(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_cexp10, E1> cexp10(E1&& x)
+{
+ return { fn_cexp10(), std::forward<E1>(x) };
+}
+
+using fn_polar = internal::in_complex<>::fn_polar;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> polar(const T1& x)
+{
+ return internal::in_complex<>::polar(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_polar, E1> polar(E1&& x)
+{
+ return { fn_polar(), std::forward<E1>(x) };
+}
+
+using fn_cartesian = internal::in_complex<>::fn_cartesian;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> cartesian(const T1& x)
+{
+ return internal::in_complex<>::cartesian(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_cartesian, E1> cartesian(E1&& x)
+{
+ return { fn_cartesian(), std::forward<E1>(x) };
+}
+
+using fn_csqrt = internal::in_complex<>::fn_csqrt;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> csqrt(const T1& x)
+{
+ return internal::in_complex<>::csqrt(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_csqrt, E1> csqrt(E1&& x)
+{
+ return { fn_csqrt(), std::forward<E1>(x) };
+}
+}
+}
+namespace cometa
+{
+template <typename T>
+struct compound_type_traits<kfr::complex<T>>
+{
+ constexpr static size_t width = 2;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static bool is_scalar = false;
+ template <typename U>
+ using rebind = kfr::complex<U>;
+ template <typename U>
+ using deep_rebind = kfr::complex<cometa::deep_rebind<subtype, U>>;
+
+ static constexpr const subtype& at(const kfr::complex<T>& value, size_t index)
+ {
+ return index == 0 ? value.real() : value.imag();
+ }
+};
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/constants.hpp b/include/kfr/base/constants.hpp
@@ -0,0 +1,93 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "types.hpp"
+#include <limits>
+
+namespace kfr
+{
+
+// π (pi)
+// c_pi<f64, 4> = 4pi
+// c_pi<f64, 3, 4> = 3/4pi
+template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
+constexpr Tsub c_pi = Tsub(3.1415926535897932384626433832795 * m / d);
+
+// π² (pi²)
+// c_sqr_pi<f64, 4> = 4pi²
+// c_sqr_pi<f64, 3, 4> = 3/4pi²
+template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
+constexpr Tsub c_sqr_pi = Tsub(9.8696044010893586188344909998762 * m / d);
+
+// 1/π (1/pi)
+// c_recip_pi<f64> 1/pi
+// c_recip_pi<f64, 4> 4/pi
+template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
+constexpr Tsub c_recip_pi = Tsub(0.31830988618379067153776752674503 * m / d);
+
+// degree to radian conversion factor
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_degtorad = c_pi<T, 1, 180>;
+
+// radian to degree conversion factor
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_radtodeg = c_recip_pi<T, 180>;
+
+// e, Euler's number
+template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
+constexpr Tsub c_e = Tsub(2.718281828459045235360287471352662 * m / d);
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr unsigned c_mantissa_bits = sizeof(Tsub) == 32 ? 23 : 52;
+
+template <typename T, typename Tsub = usubtype<T>>
+constexpr Tsub c_mantissa_mask = (Tsub(1) << c_mantissa_bits<T>)-1;
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_epsilon = (std::numeric_limits<Tsub>::epsilon());
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_infinity = std::numeric_limits<Tsub>::infinity();
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_neginfinity = -std::numeric_limits<Tsub>::infinity();
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_qnan = std::numeric_limits<Tsub>::quiet_NaN();
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_recip_log_2 = Tsub(1.442695040888963407359924681001892137426645954);
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_recip_log_10 = Tsub(0.43429448190325182765112891891661);
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_log_2 = Tsub(0.69314718055994530941723212145818);
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub c_log_10 = Tsub(2.3025850929940456840179914546844);
+
+template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
+constexpr Tsub c_sqrt_2 = Tsub(1.4142135623730950488016887242097 * m / d);
+}
diff --git a/include/kfr/base/digitreverse.hpp b/include/kfr/base/digitreverse.hpp
@@ -0,0 +1,121 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "shuffle.hpp"
+#include "types.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <size_t radix, size_t bits>
+constexpr enable_if<radix == 2, u32> digitreverse(u32 x)
+{
+ x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
+ x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
+ x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
+ x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
+ return ((x >> 16) | (x << 16)) >> (32 - bits);
+}
+
+constexpr inline u32 bit_permute_step_simple(u32 x, u32 m, u32 shift)
+{
+ return ((x & m) << shift) | ((x >> shift) & m);
+}
+
+template <size_t radix, size_t bits>
+constexpr enable_if<radix == 4, u32> digitreverse(u32 x)
+{
+ if (bits <= 2)
+ return x;
+ if (bits <= 4)
+ {
+ x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits
+ return x >> (4 - bits);
+ }
+ if (bits <= 8)
+ {
+ x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits
+ x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits
+ return x >> (8 - bits);
+ }
+ if (bits <= 16)
+ {
+ x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits
+ x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits
+ x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3 regroups 16 bits
+ return x >> (16 - bits);
+ }
+ if (bits <= 32)
+ {
+ x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits
+ x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits
+ x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3 regroups 16 bits
+ x = bit_permute_step_simple(x, 0x0000ffff, 16); // Bit index complement 4 regroups 32 bits
+ return x >> (32 - bits);
+ }
+ return x;
+}
+
+template <size_t radix, size_t bits>
+struct shuffle_index_digitreverse
+{
+ constexpr inline size_t operator()(size_t index) const
+ {
+ return digitreverse<radix, bits>(static_cast<u32>(index));
+ }
+};
+}
+
+template <size_t radix, size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, N> digitreverse(vec<T, N> x)
+{
+ return shufflevector<N, internal::shuffle_index_digitreverse<radix, ilog2(N / groupsize)>, groupsize>(x);
+}
+
+template <size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, N> bitreverse(vec<T, N> x)
+{
+ return digitreverse<2, groupsize>(x);
+}
+
+template <size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, N> digitreverse4(vec<T, N> x)
+{
+ return digitreverse<4, groupsize>(x);
+}
+
+template <size_t bits>
+constexpr inline u32 bitreverse(u32 x)
+{
+ return internal::digitreverse<2, bits>(x);
+}
+
+template <size_t bits>
+constexpr inline u32 digitreverse4(u32 x)
+{
+ return internal::digitreverse<4, bits>(x);
+}
+}
diff --git a/include/kfr/base/dispatch.hpp b/include/kfr/base/dispatch.hpp
@@ -0,0 +1,200 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "kfr.h"
+
+#include "types.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <typename Fn, cpu_t newcpu, typename = void>
+struct retarget_impl
+{
+ using type = Fn;
+};
+
+template <typename Fn, cpu_t newcpu>
+struct retarget_impl<Fn, newcpu, void_t<typename Fn::template retarget_this<newcpu>>>
+{
+ using type = typename Fn::template retarget_this<newcpu>;
+};
+}
+
+template <typename Fn, cpu_t newcpu>
+using retarget = typename internal::retarget_impl<Fn, newcpu>::type;
+
+template <cpu_t newcpu, typename Fn, typename NewFn = retarget<Fn, newcpu>,
+ KFR_ENABLE_IF(std::is_constructible<NewFn, Fn&&>::value)>
+KFR_INLINE NewFn retarget_func(Fn&& fn)
+{
+ return NewFn(std::move(fn));
+}
+
+template <cpu_t newcpu, typename Fn, typename NewEmptyFn = retarget<Fn, newcpu>,
+ KFR_ENABLE_IF(!std::is_constructible<NewEmptyFn, Fn&&>::value && std::is_empty<NewEmptyFn>::value &&
+ std::is_constructible<NewEmptyFn>::value)>
+KFR_INLINE NewEmptyFn retarget_func(Fn&&)
+{
+ return NewEmptyFn();
+}
+
+namespace internal
+{
+
+template <cpu_t a>
+struct cpu_caller;
+
+template <>
+struct cpu_caller<cpu_t::avx2>
+{
+ constexpr static cpu_t a = cpu_t::avx2;
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(avx2) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
+ {
+ return fn(std::forward<Args>(args)...);
+ }
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(avx2) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
+ {
+ return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
+ }
+};
+
+template <>
+struct cpu_caller<cpu_t::avx1>
+{
+ constexpr static cpu_t a = cpu_t::avx1;
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(avx) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
+ {
+ return fn(std::forward<Args>(args)...);
+ }
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(avx) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
+ {
+ return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
+ }
+};
+
+template <>
+struct cpu_caller<cpu_t::sse41>
+{
+ constexpr static cpu_t a = cpu_t::sse41;
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(sse41) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
+ {
+ return fn(std::forward<Args>(args)...);
+ }
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(sse41) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
+ {
+ return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
+ }
+};
+
+template <>
+struct cpu_caller<cpu_t::ssse3>
+{
+ constexpr static cpu_t a = cpu_t::ssse3;
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
+ {
+ return fn(std::forward<Args>(args)...);
+ }
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
+ {
+ return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
+ }
+};
+
+template <>
+struct cpu_caller<cpu_t::sse3>
+{
+ constexpr static cpu_t a = cpu_t::sse3;
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(sse3) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
+ {
+ return fn(std::forward<Args>(args)...);
+ }
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(sse3) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
+ {
+ return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
+ }
+};
+
+template <>
+struct cpu_caller<cpu_t::sse2>
+{
+ constexpr static cpu_t a = cpu_t::sse2;
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(sse2) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
+ {
+ return fn(std::forward<Args>(args)...);
+ }
+
+ template <typename Fn, typename... Args>
+ KFR_NOINLINE static KFR_USE_CPU(sse2) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
+ {
+ return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
+ }
+};
+
+template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c == cpu_t::native)>
+KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
+{
+ using targetFn = retarget<Fn, cpu_t::native>;
+ targetFn newfn = retarget_func<c>(std::forward<Fn>(fn));
+ return newfn(std::forward<Args>(args)...);
+}
+
+template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c != cpu_t::native && c != cpu_t::runtime)>
+KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
+{
+ return internal::cpu_caller<c>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...);
+}
+}
+
+template <cpu_t c, typename Fn, typename... Args>
+KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
+{
+ return internal::dispatch_impl<c>(std::forward<Fn>(fn), std::forward<Args>(args)...);
+}
+}
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -0,0 +1,315 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "dispatch.hpp"
+#include "types.hpp"
+#include "vec.hpp"
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow"
+
+namespace kfr
+{
+
+template <typename T>
+using is_generic = is_same<generic, typename decay<T>::value_type>;
+
+template <typename T>
+using is_infinite = not_t<is_same<size_t, typename decay<T>::size_type>>;
+
+namespace internal
+{
+
+template <typename T1>
+constexpr inline T1 minsize(T1 x) noexcept
+{
+ return x;
+}
+
+template <typename T1, typename T2, typename... Ts>
+constexpr inline common_type<T1, T2, Ts...> minsize(T1 x, T2 y, Ts... rest) noexcept
+{
+ return x < y ? minsize(x, rest...) : minsize(y, rest...);
+}
+
+template <typename... Args>
+struct expression : input_expression
+{
+ using value_type = common_type<typename decay<Args>::value_type...>;
+
+ using size_type = common_type<typename decay<Args>::size_type...>;
+
+ constexpr size_type size() const noexcept { return size_impl(indicesfor_t<Args...>()); }
+
+ constexpr static size_t count = sizeof...(Args);
+ expression() = delete;
+ constexpr expression(Args&&... args) noexcept : args(std::forward<Args>(args)...) {}
+
+ KFR_INLINE void begin_block(size_t size) { begin_block_impl(size, indicesfor_t<Args...>()); }
+ KFR_INLINE void end_block(size_t size) { end_block_impl(size, indicesfor_t<Args...>()); }
+
+ KFR_INLINE void begin_block(size_t size) const { begin_block_impl(size, indicesfor_t<Args...>()); }
+ KFR_INLINE void end_block(size_t size) const { end_block_impl(size, indicesfor_t<Args...>()); }
+
+protected:
+ std::tuple<Args...> args;
+
+ template <size_t... indices>
+ constexpr size_type size_impl(csizes_t<indices...>) const noexcept
+ {
+ return minsize(std::get<indices>(this->args).size()...);
+ }
+
+ template <typename Fn, typename T, size_t N>
+ KFR_INLINE vec<T, N> call(Fn&& fn, size_t index, vec_t<T, N> x) const
+ {
+ return call_impl(std::forward<Fn>(fn), indicesfor_t<Args...>(), index, x);
+ }
+ template <size_t ArgIndex, typename T, size_t N>
+ KFR_INLINE vec<T, N> argument(csize_t<ArgIndex>, size_t index, vec_t<T, N> x) const
+ {
+ static_assert(ArgIndex < count, "Incorrect ArgIndex");
+ return std::get<ArgIndex>(this->args)(cinput, index, x);
+ }
+ template <typename T, size_t N>
+ KFR_INLINE vec<T, N> argument_first(size_t index, vec_t<T, N> x) const
+ {
+ return std::get<0>(this->args)(cinput, index, x);
+ }
+
+private:
+ template <typename Arg, size_t N, typename Tin,
+ typename Tout1 = conditional<is_generic<Arg>::value, Tin, typename decay<Arg>::value_type>,
+ typename Tout = Tout1>
+ KFR_INLINE vec_t<Tout, N> vec_t_for() const
+ {
+ return {};
+ }
+ template <typename Fn, typename T, size_t N, size_t... indices>
+ KFR_INLINE vec<T, N> call_impl(Fn&& fn, csizes_t<indices...>, size_t index, vec_t<T, N>) const
+ {
+ using ratio = func_ratio<Fn>;
+ constexpr size_t Nin = N * ratio::input / ratio::output;
+ using Tout = conditional<is_same<generic, value_type>::value, T, value_type>;
+
+ return cast<T>(fn(cast<Tout>(std::get<indices>(this->args)(
+ cinput, index * ratio::input / ratio::output, vec_t_for<Args, Nin, Tout>()))...));
+ }
+ template <size_t... indices>
+ KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>)
+ {
+ swallow{ (std::get<indices>(args).begin_block(size), 0)... };
+ }
+ template <size_t... indices>
+ KFR_INLINE void end_block_impl(size_t size, csizes_t<indices...>)
+ {
+ swallow{ (std::get<indices>(args).end_block(size), 0)... };
+ }
+ template <size_t... indices>
+ KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) const
+ {
+ swallow{ (std::get<indices>(args).begin_block(size), 0)... };
+ }
+ template <size_t... indices>
+ KFR_INLINE void end_block_impl(size_t size, csizes_t<indices...>) const
+ {
+ swallow{ (std::get<indices>(args).end_block(size), 0)... };
+ }
+};
+
+template <typename T, size_t width = 1>
+struct expression_scalar : input_expression
+{
+ using value_type = T;
+ expression_scalar() = delete;
+ constexpr expression_scalar(const T& val) noexcept : val(val) {}
+ constexpr expression_scalar(vec<T, width> val) noexcept : val(val) {}
+ const vec<T, width> val;
+
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
+ {
+ return resize<N>(cast<U>(val));
+ }
+};
+
+template <typename Fn, typename Args, typename Enable = void>
+struct generic_result
+{
+ using type = generic;
+};
+
+template <typename Fn, typename... Args>
+struct generic_result<Fn, ctypes_t<Args...>, void_t<enable_if<!or_t<is_same<generic, Args>...>::value>>>
+{
+ using type = subtype<decltype(std::declval<Fn>()(std::declval<vec<decay<Args>, 1>>()...))>;
+};
+
+template <typename Fn, typename... Args>
+struct expression_function : expression<Args...>
+{
+ using ratio = func_ratio<Fn>;
+
+ using value_type = typename generic_result<Fn, ctypes_t<value_type_of<Args>...>>::type;
+
+ template <cpu_t newcpu>
+ using retarget_this = expression_function<retarget<Fn, newcpu>, retarget<Args, newcpu>...>;
+
+ expression_function(Fn&& fn, Args&&... args) noexcept : expression<Args...>(std::forward<Args>(args)...),
+ fn(std::forward<Fn>(fn))
+ {
+ }
+ template <typename T, size_t N>
+ KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> x) const
+ {
+ static_assert(is_same<T, value_type_of<expression_function>>::value ||
+ is_generic<expression_function>::value,
+ "Can't cast from value_type to T");
+ return this->call(fn, index, x);
+ }
+
+protected:
+ Fn fn;
+};
+
+template <typename T>
+using arg_impl = conditional<is_number<T>::value || is_vec<T>::value,
+ expression_scalar<subtype<decay<T>>, compound_type_traits<decay<T>>::width>, T>;
+
+template <typename T>
+using arg = internal::arg_impl<T>;
+
+template <typename Tout, typename Tin, size_t width, typename OutFn, typename Fn>
+KFR_INLINE void process_cycle(OutFn&& outfn, const Fn& fn, size_t& i, size_t size)
+{
+ const size_t count = size / width * width;
+ KFR_LOOP_NOUNROLL
+ for (; i < count; i += width)
+ {
+ outfn(coutput, i, cast<Tout>(fn(cinput, i, vec_t<Tin, width>())));
+ }
+}
+}
+
+template <typename A>
+KFR_INLINE internal::arg<A> e(A&& a)
+{
+ return internal::arg<A>(std::forward<A>(a));
+}
+
+template <typename T>
+KFR_INLINE internal::expression_scalar<T> scalar(const T& val)
+{
+ return internal::expression_scalar<T>(val);
+}
+
+template <typename T, size_t N>
+KFR_INLINE internal::expression_scalar<T, N> scalar(vec<T, N> val)
+{
+ return internal::expression_scalar<T, N>(val);
+}
+
+template <typename Fn, typename... Args>
+KFR_INLINE internal::expression_function<decay<Fn>, internal::arg<Args>...> bind_expression(Fn&& fn,
+ Args&&... args)
+{
+ return internal::expression_function<decay<Fn>, internal::arg<Args>...>(std::forward<Fn>(fn),
+ std::forward<Args>(args)...);
+}
+
+template <typename Tout, cpu_t c = cpu_t::native, size_t width = internal::get_vector_width<Tout, c>(2, 4),
+ typename OutFn, typename Fn>
+KFR_INLINE void process(OutFn&& outfn, const Fn& fn, size_t size)
+{
+ static_assert(is_output_expression<OutFn>::value, "OutFn must be an expression");
+ static_assert(is_input_expression<Fn>::value, "Fn must be an expression");
+ constexpr size_t comp = lcm(func_ratio<OutFn>::input, func_ratio<Fn>::output);
+ size *= comp;
+ outfn.output_begin_block(size);
+ fn.begin_block(size);
+
+ using Tin = conditional<is_generic<Fn>::value, Tout, value_type_of<Fn>>;
+
+ size_t i = 0;
+ internal::process_cycle<Tout, Tin, width>(std::forward<OutFn>(outfn), fn, i, size);
+ internal::process_cycle<Tout, Tin, comp>(std::forward<OutFn>(outfn), fn, i, size);
+
+ fn.end_block(size);
+ outfn.output_end_block(size);
+}
+
+namespace internal
+{
+
+template <typename T, typename E1>
+struct expressoin_typed : input_expression
+{
+ using value_type = T;
+
+ expressoin_typed(E1&& e1) : e1(std::forward<E1>(e1)) {}
+
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>)
+ {
+ return cast<U>(e1(cinput, index, vec_t<T, N>()));
+ }
+ E1 e1;
+};
+
+template <typename T, typename E1>
+struct expressoin_sized : input_expression
+{
+ using value_type = T;
+ using size_type = size_t;
+
+ expressoin_sized(E1&& e1, size_t size) : e1(std::forward<E1>(e1)), m_size(size) {}
+
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ auto val = e1(cinput, index, vec_t<T, N>());
+ return cast<U>(val);
+ }
+
+ constexpr size_t size() const noexcept { return m_size; }
+ E1 e1;
+ size_t m_size;
+};
+}
+
+template <typename T, typename E1>
+inline internal::expressoin_typed<T, E1> typed(E1&& e1)
+{
+ return internal::expressoin_typed<T, E1>(std::forward<E1>(e1));
+}
+template <typename T, typename E1>
+inline internal::expressoin_sized<T, E1> typed(E1&& e1, size_t size)
+{
+ return internal::expressoin_sized<T, E1>(std::forward<E1>(e1), size);
+}
+
+template <typename Fn, typename... Args>
+using expr_func = internal::expression_function<Fn, internal::arg<Args>...>;
+}
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp
@@ -0,0 +1,124 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "dispatch.hpp"
+#include "expression.hpp"
+#include "shuffle.hpp"
+#include "types.hpp"
+#include "vec.hpp"
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow"
+
+namespace kfr
+{
+
+#define KFR_HANDLE_ALL(fn) \
+ template <typename T, size_t N, typename... Args> \
+ KFR_SINTRIN vec<T, N> fn(vec<T, N> x, Args&&... args) \
+ { \
+ return handle_all<cpu, fn_##fn>(x, std::forward<Args>(args)...); \
+ }
+#define KFR_HANDLE_ALL_REDUCE(redfn, fn) \
+ template <typename T, size_t N, typename... Args> \
+ KFR_SINTRIN auto fn(vec<T, N> x, Args&&... args) \
+ { \
+ return handle_all_reduce<cpu, redfn, fn_##fn>(x, std::forward<Args>(args)...); \
+ }
+
+#define KFR_HANDLE_SCALAR(fn) \
+ template <typename T, typename... Ts, KFR_ENABLE_IF(!is_vec<T>::value)> \
+ KFR_SINTRIN auto fn(const T& x, const Ts&... rest) \
+ { \
+ return fn(make_vector(x), make_vector(rest)...)[0]; \
+ }
+
+namespace internal
+{
+
+struct fn_disabled
+{
+ constexpr static bool disabled = true;
+};
+
+template <cpu_t c, typename T>
+constexpr inline size_t next_fast_width(size_t n)
+{
+ return n > vector_width<T, cpu_t::sse2> ? vector_width<T, c> : vector_width<T, cpu_t::sse2>;
+}
+
+template <cpu_t c, typename T, size_t N, size_t Nout = next_fast_width<c, T>(N)>
+KFR_INLINE vec<T, Nout> extend_reg(vec<T, N> x)
+{
+ return extend<Nout>(x);
+}
+template <cpu_t c, typename T, size_t N, size_t Nout = next_fast_width<c, T>(N)>
+KFR_INLINE vec<T, Nout> extend_reg(vec<T, N> x, T value)
+{
+ return widen<Nout>(x, value);
+}
+
+template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args,
+ KFR_ENABLE_IF(N < vector_width<T, cur>)>
+KFR_INLINE auto handle_all_f(Fn&& fn, vec<T, N> x, Args&&... args)
+{
+ return narrow<N>(fn(extend_reg<cur>(x), extend_reg<cur>(args)...));
+}
+template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args,
+ KFR_ENABLE_IF(N > vector_width<T, cur>)>
+KFR_INLINE auto handle_all_f(Fn&& fn, vec<T, N> x, Args&&... args)
+{
+ return concat(fn(low(x), low(args)...), fn(high(x), high(args)...));
+}
+
+template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args>
+KFR_INLINE auto handle_all(vec<T, N> x, Args&&... args)
+{
+ Fn fn{};
+ return handle_all_f<cur>(fn, x, std::forward<Args>(args)...);
+}
+
+template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args,
+ typename = u8[N < vector_width<T, cur>]>
+KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec<T, N> x, Args&&... args)
+{
+ return fn(extend_reg<cur>(x, redfn(initialvalue<T>())),
+ extend_reg<cur>(args, redfn(initialvalue<T>()))...);
+}
+template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args,
+ typename = u8[N > vector_width<T, cur>], typename = void>
+KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec<T, N> x, Args&&... args)
+{
+ return redfn(fn(low(x), low(args)...), fn(high(x), high(args)...));
+}
+template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args>
+KFR_INLINE auto handle_all_reduce(vec<T, N> x, Args&&... args)
+{
+ RedFn redfn{};
+ Fn fn{};
+ return handle_all_reduce_f<cur>(redfn, fn, x, std::forward<Args>(args)...);
+}
+}
+}
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/gamma.hpp b/include/kfr/base/gamma.hpp
@@ -0,0 +1,108 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "function.hpp"
+#include "log_exp.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Wc99-extensions")
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+template <typename T>
+constexpr T gamma_precalc[] = {
+ 0x2.81b263fec4e08p+0, 0x3.07b4100e04448p+16, -0xa.a0da01d4d4e2p+16, 0xf.05ccb27bb9dbp+16,
+ -0xa.fa79616b7c6ep+16, 0x4.6dd6c10d4df5p+16, -0xf.a2304199eb4ap+12, 0x1.c21dd4aade3dp+12,
+ -0x1.62f981f01cf84p+8, 0x5.a937aa5c48d98p+0, -0x3.c640bf82e2104p-8, 0xc.914c540f959cp-24,
+};
+
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_gamma : in_log_exp<cc>
+{
+private:
+ using in_log_exp<cc>::exp;
+ using in_log_exp<cc>::pow;
+
+public:
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> gamma(vec<T, N> z)
+ {
+ constexpr size_t Count = arraysize(internal::gamma_precalc<T>);
+ vec<T, N> accm = gamma_precalc<T>[0];
+ KFR_LOOP_UNROLL
+ for (size_t k = 1; k < Count; k++)
+ accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k));
+ accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5);
+ return accm / z;
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> factorial_approx(vec<T, N> x)
+ {
+ return gamma(x + T(1));
+ }
+ KFR_SPEC_FN(in_gamma, gamma)
+ KFR_SPEC_FN(in_gamma, factorial_approx)
+};
+}
+
+namespace native
+{
+using fn_gamma = internal::in_gamma<>::fn_gamma;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> gamma(const T1& x)
+{
+ return internal::in_gamma<>::gamma(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_gamma, E1> gamma(E1&& x)
+{
+ return { fn_gamma(), std::forward<E1>(x) };
+}
+
+using fn_factorial_approx = internal::in_gamma<>::fn_factorial_approx;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> factorial_approx(const T1& x)
+{
+ return internal::in_gamma<>::factorial_approx(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_factorial_approx, E1> factorial_approx(E1&& x)
+{
+ return { fn_factorial_approx(), std::forward<E1>(x) };
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/intrinsics.h b/include/kfr/base/intrinsics.h
@@ -0,0 +1,145 @@
+#pragma once
+
+#include "kfr.h"
+
+#if KFR_COMPILER_CLANG
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreserved-id-macro"
+
+#ifdef __AVX2__
+#define KFR_AVX2_DEFINED
+#endif
+#ifdef __AVX__
+#define KFR_AVX1_DEFINED
+#endif
+#ifdef __SSE4_2__
+#define KFR_SSE42_DEFINED
+#endif
+#ifdef __SSE4_1__
+#define KFR_SSE41_DEFINED
+#endif
+#ifdef __SSSE3__
+#define KFR_SSSE3_DEFINED
+#endif
+#ifdef __SSE3__
+#define KFR_SSE3_DEFINED
+#endif
+#ifdef __SSE2__
+#define KFR_SSE2_DEFINED
+#endif
+#ifdef __SSE__
+#define KFR_SSE1_DEFINED
+#endif
+#ifdef __MMX__
+#define KFR_MMX_DEFINED
+#endif
+
+#ifndef KFR_AVX2_DEFINED
+#define __AVX2__
+#endif
+#ifndef KFR_AVX1_DEFINED
+#define __AVX__
+#endif
+#ifndef KFR_SSE42_DEFINED
+#define __SSE4_2__
+#endif
+#ifndef KFR_SSE41_DEFINED
+#define __SSE4_1__
+#endif
+#ifndef KFR_SSSE3_DEFINED
+#define __SSSE3__
+#endif
+#ifndef KFR_SSE3_DEFINED
+#define __SSE3__
+#endif
+#ifndef KFR_SSE2_DEFINED
+#define __SSE2__
+#endif
+#ifndef KFR_SSE1_DEFINED
+#define __SSE__
+#endif
+#ifndef KFR_MMX_DEFINED
+#define __MMX__
+#endif
+
+#ifdef KFR_SKIP_AVX512
+#ifndef __AVX512FINTRIN_H
+#define __AVX512FINTRIN_H
+#endif
+#ifndef __AVX512VLINTRIN_H
+#define __AVX512VLINTRIN_H
+#endif
+#ifndef __AVX512BWINTRIN_H
+#define __AVX512BWINTRIN_H
+#endif
+#ifndef __AVX512CDINTRIN_H
+#define __AVX512CDINTRIN_H
+#endif
+#ifndef __AVX512DQINTRIN_H
+#define __AVX512DQINTRIN_H
+#endif
+#ifndef __AVX512VLBWINTRIN_H
+#define __AVX512VLBWINTRIN_H
+#endif
+#ifndef __AVX512VLDQINTRIN_H
+#define __AVX512VLDQINTRIN_H
+#endif
+#ifndef __AVX512ERINTRIN_H
+#define __AVX512ERINTRIN_H
+#endif
+#ifndef __IFMAINTRIN_H
+#define __IFMAINTRIN_H
+#endif
+#ifndef __IFMAVLINTRIN_H
+#define __IFMAVLINTRIN_H
+#endif
+#ifndef __VBMIINTRIN_H
+#define __VBMIINTRIN_H
+#endif
+#ifndef __VBMIVLINTRIN_H
+#define __VBMIVLINTRIN_H
+#endif
+
+#endif
+
+#include <immintrin.h>
+#ifdef KFR_OS_WIN
+#include <intrin.h>
+#endif
+
+#ifndef KFR_AVX2_DEFINED
+#undef __AVX2__
+#endif
+#ifndef KFR_AVX1_DEFINED
+#undef __AVX__
+#endif
+#ifndef KFR_SSE42_DEFINED
+#undef __SSE4_2__
+#endif
+#ifndef KFR_SSE41_DEFINED
+#undef __SSE4_1__
+#endif
+#ifndef KFR_SSSE3_DEFINED
+#undef __SSSE3__
+#endif
+#ifndef KFR_SSE3_DEFINED
+#undef __SSE3__
+#endif
+#ifndef KFR_SSE2_DEFINED
+#undef __SSE2__
+#endif
+#ifndef KFR_SSE1_DEFINED
+#undef __SSE__
+#endif
+#ifndef KFR_MMX_DEFINED
+#undef __MMX__
+#endif
+
+#pragma clang diagnostic pop
+
+#else
+
+#include <intrin.h>
+
+#endif
diff --git a/include/kfr/base/kfr.h b/include/kfr/base/kfr.h
@@ -0,0 +1,134 @@
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../cident.h"
+
+#define KFR_INLINE CID_INLINE
+#define KFR_INLINE_MEMBER CID_INLINE_MEMBER
+#define KFR_INLINE_LAMBDA CID_INLINE_LAMBDA
+#define KFR_NOINLINE CID_NOINLINE
+#define KFR_FLATTEN CID_FLATTEN
+#define KFR_RESTRICT CID_RESTRICT
+
+#ifdef CID_COMPILER_CLANG
+#define KFR_COMPILER_CLANG CID_COMPILER_CLANG
+#endif
+
+#ifdef CID_OS_WIN
+#define KFR_OS_WIN CID_OS_WIN
+#endif
+
+#ifdef CID_OS_OSX
+#define KFR_OS_OSX CID_OS_OSX
+#endif
+
+#ifdef CID_OS_LINUX
+#define KFR_OS_LINUX CID_OS_LINUX
+#endif
+
+#ifdef CID_GNU_ATTRIBUTES
+#define KFR_GNU_ATTRIBUTES CID_GNU_ATTRIBUTES
+#endif
+
+#ifdef CID_MSVC_ATTRIBUTES
+#define KFR_GNU_ATTRIBUTES CID_MSVC_ATTRIBUTES
+#endif
+
+#ifdef CID_ARCH_X64
+#define KFR_ARCH_X64 CID_ARCH_X64
+#endif
+
+#ifdef CID_ARCH_X32
+#define KFR_ARCH_X32 CID_ARCH_X32
+#endif
+
+#define KFR_ARCH_NAME CID_ARCH_NAME
+
+#define KFR_CDECL CID_CDECL
+
+#define KFR_PUBLIC_C CID_PUBLIC_C
+
+#ifdef __cplusplus
+namespace kfr
+{
+using ::cid::arraysize;
+}
+#endif
+
+#define KFR_VERSION_STRING "0.9.0"
+#define KFR_VERSION_MAJOR 0
+#define KFR_VERSION_MINOR 9
+#define KFR_VERSION_BUILD 0
+#define KFR_VERSION 900
+
+#ifdef __cplusplus
+namespace kfr
+{
+constexpr const char version_string[] = KFR_VERSION_STRING;
+constexpr int version_major = KFR_VERSION_MAJOR;
+constexpr int version_minor = KFR_VERSION_MINOR;
+constexpr int version_build = KFR_VERSION_BUILD;
+constexpr int version = KFR_VERSION;
+}
+#endif
+
+//#define KFR_MEMORY_ALIGNMENT 64
+
+#if KFR_COMPILER_CLANG
+#define KFR_LOOP_NOUNROLL \
+ _Pragma("clang loop vectorize( disable )") _Pragma("clang loop interleave( disable )") \
+ _Pragma("clang loop unroll( disable )")
+
+#define KFR_LOOP_UNROLL _Pragma("clang loop unroll( full )")
+
+#define KFR_VEC_CC __attribute__((vectorcall))
+#else
+#define KFR_LOOP_NOUNROLL
+#define KFR_LOOP_UNROLL
+#ifdef KFR_COMPILER_MSVC
+#define KFR_VEC_CC __vectorcall
+#endif
+
+#endif
+
+#define KFR_AVAIL_AVX2 1
+#define KFR_AVAIL_AVX 1
+#define KFR_AVAIL_SSE42 1
+#define KFR_AVAIL_SSE41 1
+#define KFR_AVAIL_SSSE3 1
+#define KFR_AVAIL_SSE3 1
+#define KFR_AVAIL_SSE2 1
+#define KFR_AVAIL_SSE 1
+
+#if defined(KFR_GNU_ATTRIBUTES)
+
+#define KFR_CPU_NAME_avx2 "avx2"
+#define KFR_CPU_NAME_avx "avx"
+#define KFR_CPU_NAME_sse42 "sse4.2"
+#define KFR_CPU_NAME_sse41 "sse4.1"
+#define KFR_CPU_NAME_ssse3 "ssse3"
+#define KFR_CPU_NAME_sse3 "sse3"
+#define KFR_CPU_NAME_sse2 "sse2"
+
+#if __has_attribute(target)
+#define KFR_USE_CPU(arch) __attribute__((target(KFR_CPU_NAME_##arch)))
+#else
+#define KFR_USE_CPU(arch)
+#endif
+
+#endif
+
+#if defined(KFR_GNU_ATTRIBUTES)
+#define KFR_FAST_CC __attribute__((fastcall))
+#else
+#define KFR_FAST_CC __fastcall
+#endif
+
+#define KFR_INTRIN CID_INTRIN
+#define KFR_SINTRIN CID_INTRIN CID_NODEBUG static
+#define KFR_AINTRIN inline CID_NODEBUG static
+#define KFR_FAST_NOINLINE CID_NOINLINE
+
+#define KFR_CPU_INTRIN(c) KFR_AINTRIN KFR_USE_CPU(c)
diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp
@@ -0,0 +1,575 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "abs.hpp"
+#include "constants.hpp"
+#include "function.hpp"
+#include "min_max.hpp"
+#include "operators.hpp"
+#include "round.hpp"
+#include "select.hpp"
+#include "shuffle.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native>
+struct in_log_exp : in_select<c>, in_min_max<c>, in_clamp<c>, in_round<c>, in_abs<c>
+{
+private:
+ constexpr static cpu_t cur = c;
+ using in_select<c>::select;
+ using in_round<c>::floor;
+ using in_clamp<c>::clamp;
+ using in_abs<c>::abs;
+
+public:
+ template <size_t N>
+ KFR_SINTRIN vec<i32, N> vilogbp1(vec<f32, N> d)
+ {
+ mask<i32, N> m = d < 5.421010862427522E-20f;
+ d = select(m, 1.8446744073709552E19f * d, d);
+ vec<i32, N> q = (ibitcast(d) >> 23) & 0xff;
+ q = select(m, q - (64 + 0x7e), q - 0x7e);
+ return q;
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<i64, N> vilogbp1(vec<f64, N> d)
+ {
+ mask<i64, N> m = d < 4.9090934652977266E-91;
+ d = select(m, 2.037035976334486E90 * d, d);
+ vec<i64, N> q = (ibitcast(d) >> 52) & 0x7ff;
+ q = select(m, q - (300 + 0x03fe), q - 0x03fe);
+ return q;
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> vldexpk(vec<f32, N> x, vec<i32, N> q)
+ {
+ vec<i32, N> m = q >> 31;
+ m = (((m + q) >> 6) - m) << 4;
+ q = q - (m << 2);
+ m = clamp(m + 0x7f, vec<i32, N>(0xff));
+ vec<f32, N> u = pow4(bitcast<f32>(cast<i32>(m) << 23));
+ return x * u * bitcast<f32>((cast<i32>(q + 0x7f)) << 23);
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> vldexpk(vec<f64, N> x, vec<i64, N> q)
+ {
+ vec<i64, N> m = q >> 31;
+ m = (((m + q) >> 9) - m) << 7;
+ q = q - (m << 2);
+ m = clamp(m + 0x3ff, i64(0x7ff));
+ vec<f64, N> u = pow4(bitcast<f64>(cast<i64>(m) << 52));
+ return x * u * bitcast<f64>((cast<i64>(q + 0x3ff)) << 52);
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> logb(vec<T, N> x)
+ {
+ return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1));
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> log(vec<f32, N> d)
+ {
+ vec<i32, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485f );
+ vec<f32, N> m = vldexpk(d, -e);
+
+ vec<f32, N> x = (m - 1.0f) / (m + 1.0f);
+ vec<f32, N> x2 = x * x;
+
+ vec<f32, N> sp = select(d < 0, c_qnan<f32>, c_neginfinity<f32>);
+
+ vec<f32, N> t = 0.2371599674224853515625f;
+ t = fmadd(t, x2, 0.285279005765914916992188f);
+ t = fmadd(t, x2, 0.400005519390106201171875f);
+ t = fmadd(t, x2, 0.666666567325592041015625f);
+ t = fmadd(t, x2, 2.0f);
+
+ x = x * t + c_log_2<f32> * cast<f32>(e);
+ x = select(d > 0, x, sp);
+
+ return x;
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> log(vec<f64, N> d)
+ {
+ vec<i64, N> e = vilogbp1(d * 0.7071); // 0678118654752440084436210485 );
+ vec<f64, N> m = vldexpk(d, -e);
+
+ vec<f64, N> x = (m - 1.0) / (m + 1.0);
+ vec<f64, N> x2 = x * x;
+
+ vec<f64, N> sp = select(d < 0, c_qnan<f64>, c_neginfinity<f64>);
+
+ vec<f64, N> t = 0.148197055177935105296783;
+ t = fmadd(t, x2, 0.153108178020442575739679);
+ t = fmadd(t, x2, 0.181837339521549679055568);
+ t = fmadd(t, x2, 0.22222194152736701733275);
+ t = fmadd(t, x2, 0.285714288030134544449368);
+ t = fmadd(t, x2, 0.399999999989941956712869);
+ t = fmadd(t, x2, 0.666666666666685503450651);
+ t = fmadd(t, x2, 2);
+
+ x = x * t + c_log_2<f64> * cast<f64>(e);
+ x = select(d > 0, x, sp);
+
+ return x;
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> log2(vec<T, N> x)
+ {
+ return log(x) * c_recip_log_2<T>;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> log10(vec<T, N> x)
+ {
+ return log(x) * c_recip_log_10<T>;
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> exp(vec<f32, N> d)
+ {
+ const f32 ln2_part1 = 0.6931457519f;
+ const f32 ln2_part2 = 1.4286067653e-6f;
+
+ vec<i32, N> q = cast<i32>(floor(d * c_recip_log_2<f32>));
+ vec<f32, N> s, u;
+
+ s = fmadd(cast<f32>(q), -ln2_part1, d);
+ s = fmadd(cast<f32>(q), -ln2_part2, s);
+
+ const f32 c2 = 0.4999999105930328369140625f;
+ const f32 c3 = 0.166668415069580078125f;
+ const f32 c4 = 4.16539050638675689697265625e-2f;
+ const f32 c5 = 8.378830738365650177001953125e-3f;
+ const f32 c6 = 1.304379315115511417388916015625e-3f;
+ const f32 c7 = 2.7555381529964506626129150390625e-4f;
+
+ u = c7;
+ u = fmadd(u, s, c6);
+ u = fmadd(u, s, c5);
+ u = fmadd(u, s, c4);
+ u = fmadd(u, s, c3);
+ u = fmadd(u, s, c2);
+
+ u = s * s * u + s + 1.0f;
+ u = vldexpk(u, q);
+
+ u = select(d == c_neginfinity<f32>, 0.f, u);
+
+ return u;
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> exp(vec<f64, N> d)
+ {
+ const f64 ln2_part1 = 0.69314717501401901245;
+ const f64 ln2_part2 = 5.545926273775592108e-009;
+
+ vec<i64, N> q = cast<i64>(floor(d * c_recip_log_2<f64>));
+ vec<f64, N> s, u;
+
+ s = fmadd(cast<f64>(q), -ln2_part1, d);
+ s = fmadd(cast<f64>(q), -ln2_part2, s);
+
+ const f64 c2 = 0.499999999999994948485237955537741072475910186767578;
+ const f64 c3 = 0.166666666667024204739888659787538927048444747924805;
+ const f64 c4 = 4.16666666578945840693215529881854308769106864929199e-2;
+ const f64 c5 = 8.3333334397461874404333670440792047884315252304077e-3;
+ const f64 c6 = 1.3888881489747750223179290074426717183087021112442e-3;
+ const f64 c7 = 1.9841587032493949419205414574918222569976933300495e-4;
+ const f64 c8 = 2.47929324077393282239802768662784160369483288377523e-5;
+ const f64 c9 = 2.77076037925831049422552981864598109496000688523054e-6;
+ const f64 c10 = 2.59589616274586264243611237120812340606335055781528e-7;
+ const f64 c11 = 3.43801438838789632454461529017381016259946591162588e-8;
+
+ u = c11;
+ u = fmadd(u, s, c10);
+ u = fmadd(u, s, c9);
+ u = fmadd(u, s, c8);
+ u = fmadd(u, s, c7);
+ u = fmadd(u, s, c6);
+ u = fmadd(u, s, c5);
+ u = fmadd(u, s, c4);
+ u = fmadd(u, s, c3);
+ u = fmadd(u, s, c2);
+
+ u = s * s * u + s + 1.0;
+ u = vldexpk(u, q);
+
+ u = select(d == c_neginfinity<f64>, 0.0, u);
+
+ return u;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> exp2(vec<T, N> x)
+ {
+ return exp(x * c_log_2<T>);
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> exp10(vec<T, N> x)
+ {
+ return exp(x * c_log_10<T>);
+ }
+
+ template <typename T1, typename T2>
+ KFR_SINTRIN common_type<T1, T2> logn(const T1& a, const T2& b)
+ {
+ return log(a) / log(b);
+ }
+
+ template <typename T1, typename T2>
+ KFR_SINTRIN common_type<T1, T2> logm(const T1& a, const T2& b)
+ {
+ return log(a) * b;
+ }
+
+ template <typename T1, typename T2, typename T3>
+ KFR_SINTRIN common_type<T1, T2, T3> exp_fmadd(const T1& x, const T2& m, const T3& a)
+ {
+ return exp(fmadd(x, m, a));
+ }
+
+ template <typename T1, typename T2, typename T3>
+ KFR_SINTRIN common_type<T1, T2, T3> log_fmadd(const T1& x, const T2& m, const T3& a)
+ {
+ return fmadd(log(x), m, a);
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> pow(vec<T, N> a, vec<T, N> b)
+ {
+ const vec<T, N> t = exp(b * log(abs(a)));
+ const mask<T, N> isint = floor(b) == b;
+ const mask<T, N> iseven = (cast<itype<T>>(b) & 1) == 0;
+ return select(a > T(), t,
+ select(a == T(), T(1), select(isint, select(iseven, t, -t), broadcast<N>(c_qnan<T>))));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> root(vec<T, N> x, vec<T, N> b)
+ {
+ return exp(reciprocal(b) * log(x));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> cbrt(vec<T, N> x)
+ {
+ return pow<T, N>(x, T(0.333333333333333333333333333333333));
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> exp(vec<T, N> x)
+ {
+ return exp(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> exp2(vec<T, N> x)
+ {
+ return exp2(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> exp10(vec<T, N> x)
+ {
+ return exp10(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> log(vec<T, N> x)
+ {
+ return log(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> log2(vec<T, N> x)
+ {
+ return log2(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> log10(vec<T, N> x)
+ {
+ return log10(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> cbrt(vec<T, N> x)
+ {
+ return cbrt(cast<Tout>(x));
+ }
+
+ KFR_HANDLE_SCALAR(exp)
+ KFR_HANDLE_SCALAR(exp2)
+ KFR_HANDLE_SCALAR(exp10)
+ KFR_HANDLE_SCALAR(log)
+ KFR_HANDLE_SCALAR(log2)
+ KFR_HANDLE_SCALAR(log10)
+ KFR_HANDLE_SCALAR(logb)
+ KFR_HANDLE_SCALAR(pow)
+ KFR_HANDLE_SCALAR(root)
+ KFR_HANDLE_SCALAR(cbrt)
+
+ KFR_SPEC_FN(in_log_exp, exp)
+ KFR_SPEC_FN(in_log_exp, exp2)
+ KFR_SPEC_FN(in_log_exp, exp10)
+ KFR_SPEC_FN(in_log_exp, log)
+ KFR_SPEC_FN(in_log_exp, log2)
+ KFR_SPEC_FN(in_log_exp, log10)
+ KFR_SPEC_FN(in_log_exp, logb)
+ KFR_SPEC_FN(in_log_exp, logn)
+ KFR_SPEC_FN(in_log_exp, logm)
+ KFR_SPEC_FN(in_log_exp, exp_fmadd)
+ KFR_SPEC_FN(in_log_exp, log_fmadd)
+ KFR_SPEC_FN(in_log_exp, pow)
+ KFR_SPEC_FN(in_log_exp, root)
+ KFR_SPEC_FN(in_log_exp, cbrt)
+};
+}
+namespace native
+{
+using fn_exp = internal::in_log_exp<>::fn_exp;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> exp(const T1& x)
+{
+ return internal::in_log_exp<>::exp(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_exp, E1> exp(E1&& x)
+{
+ return { fn_exp(), std::forward<E1>(x) };
+}
+
+using fn_exp2 = internal::in_log_exp<>::fn_exp2;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> exp2(const T1& x)
+{
+ return internal::in_log_exp<>::exp2(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_exp2, E1> exp2(E1&& x)
+{
+ return { fn_exp2(), std::forward<E1>(x) };
+}
+
+using fn_exp10 = internal::in_log_exp<>::fn_exp10;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> exp10(const T1& x)
+{
+ return internal::in_log_exp<>::exp10(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_exp10, E1> exp10(E1&& x)
+{
+ return { fn_exp10(), std::forward<E1>(x) };
+}
+
+using fn_log = internal::in_log_exp<>::fn_log;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> log(const T1& x)
+{
+ return internal::in_log_exp<>::log(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_log, E1> log(E1&& x)
+{
+ return { fn_log(), std::forward<E1>(x) };
+}
+
+using fn_log2 = internal::in_log_exp<>::fn_log2;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> log2(const T1& x)
+{
+ return internal::in_log_exp<>::log2(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_log2, E1> log2(E1&& x)
+{
+ return { fn_log2(), std::forward<E1>(x) };
+}
+
+using fn_log10 = internal::in_log_exp<>::fn_log10;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> log10(const T1& x)
+{
+ return internal::in_log_exp<>::log10(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_log10, E1> log10(E1&& x)
+{
+ return { fn_log10(), std::forward<E1>(x) };
+}
+
+using fn_logb = internal::in_log_exp<>::fn_logb;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> logb(const T1& x)
+{
+ return internal::in_log_exp<>::logb(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_logb, E1> logb(E1&& x)
+{
+ return { fn_logb(), std::forward<E1>(x) };
+}
+
+using fn_logn = internal::in_log_exp<>::fn_logn;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> logn(const T1& x)
+{
+ return internal::in_log_exp<>::logn(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_logn, E1> logn(E1&& x)
+{
+ return { fn_logn(), std::forward<E1>(x) };
+}
+
+using fn_logm = internal::in_log_exp<>::fn_logm;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> logm(const T1& x)
+{
+ return internal::in_log_exp<>::logm(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_logm, E1> logm(E1&& x)
+{
+ return { fn_logm(), std::forward<E1>(x) };
+}
+
+using fn_exp_fmadd = internal::in_log_exp<>::fn_exp_fmadd;
+template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
+KFR_INLINE ftype<common_type<T1, T2, T3>>
+
+exp_fmadd(const T1& x, const T2& m, const T3& a)
+{
+ return internal::in_log_exp<>::exp_fmadd(x, m, a);
+}
+
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
+KFR_INLINE expr_func<fn_exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& m, E3&& a)
+{
+ return { fn_exp_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a) };
+}
+using fn_log_fmadd = internal::in_log_exp<>::fn_log_fmadd;
+template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
+KFR_INLINE ftype<common_type<T1, T2, T3>>
+
+log_fmadd(const T1& x, const T2& m, const T3& a)
+{
+ return internal::in_log_exp<>::log_fmadd(x, m, a);
+}
+
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
+KFR_INLINE expr_func<fn_log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& m, E3&& a)
+{
+ return { fn_log_fmadd(), std::forward<E1>(x), std::forward<E2>(m), std::forward<E3>(a)
+
+ };
+}
+
+using fn_pow = internal::in_log_exp<>::fn_pow;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+pow(const T1& x, const T2& b)
+{
+ return internal::in_log_exp<>::pow(x, b);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_pow, E1, E2> pow(E1&& x, E2&& b)
+{
+ return { fn_pow(), std::forward<E1>(x), std::forward<E2>(b) };
+}
+using fn_root = internal::in_log_exp<>::fn_root;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+root(const T1& x, const T2& b)
+{
+ return internal::in_log_exp<>::root(x, b);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_root, E1, E2> root(E1&& x, E2&& b)
+{
+ return { fn_root(), std::forward<E1>(x), std::forward<E2>(b)
+
+ };
+}
+
+using fn_cbrt = internal::in_log_exp<>::fn_cbrt;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> cbrt(const T1& x)
+{
+ return internal::in_log_exp<>::cbrt(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_cbrt, E1> cbrt(E1&& x)
+{
+ return { fn_cbrt(), std::forward<E1>(x) };
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp
@@ -0,0 +1,339 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "abs.hpp"
+#include "function.hpp"
+#include "operators.hpp"
+
+namespace kfr
+{
+
+template <size_t bits>
+struct bitmask
+{
+ using type = findinttype<0, (1ull << bits) - 1>;
+ bitmask(type val) : value(val) {}
+ template <typename Itype>
+ bitmask(Itype val) : value(static_cast<type>(val))
+ {
+ }
+ type value;
+};
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native>
+struct in_bittest : in_bittest<older(c)>
+{
+ struct fn_bittestnone : fn_disabled
+ {
+ };
+ struct fn_bittestall : fn_disabled
+ {
+ };
+};
+
+struct logical_and
+{
+ template <typename T1, typename T2>
+ auto operator()(T1 x, T2 y) -> decltype(x && y)
+ {
+ return x && y;
+ }
+ template <typename T>
+ T operator()(initialvalue<T>)
+ {
+ return T();
+ }
+};
+
+template <>
+struct in_bittest<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse2;
+
+ KFR_SINTRIN bitmask<4> getmask(f32sse x) { return bitmask<4>(_mm_movemask_pd(*x)); }
+ KFR_SINTRIN bitmask<4> getmask(f64sse x) { return bitmask<4>(_mm_movemask_pd(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(u8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(u16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(u32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(u64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(i8sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(i16sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(i32sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<16> getmask(i64sse x) { return bitmask<16>(_mm_movemask_epi8(*x)); }
+
+ KFR_SINTRIN bool bittestnone(f32sse x) { return !_mm_movemask_ps(*x); }
+ KFR_SINTRIN bool bittestnone(f64sse x) { return !_mm_movemask_pd(*x); }
+ KFR_SINTRIN bool bittestnone(u8sse x) { return !_mm_movemask_epi8(*x); }
+ KFR_SINTRIN bool bittestnone(u16sse x) { return !_mm_movemask_epi8(*x); }
+ KFR_SINTRIN bool bittestnone(u32sse x) { return !_mm_movemask_epi8(*x); }
+ KFR_SINTRIN bool bittestnone(u64sse x) { return !_mm_movemask_epi8(*x); }
+ KFR_SINTRIN bool bittestnone(i8sse x) { return !_mm_movemask_epi8(*x); }
+ KFR_SINTRIN bool bittestnone(i16sse x) { return !_mm_movemask_epi8(*x); }
+ KFR_SINTRIN bool bittestnone(i32sse x) { return !_mm_movemask_epi8(*x); }
+ KFR_SINTRIN bool bittestnone(i64sse x) { return !_mm_movemask_epi8(*x); }
+
+ KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return bittestnone(x & y); }
+ KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return bittestnone(x & y); }
+
+ KFR_SINTRIN bool bittestall(f32sse x) { return !_mm_movemask_ps(*~x); }
+ KFR_SINTRIN bool bittestall(f64sse x) { return !_mm_movemask_pd(*~x); }
+ KFR_SINTRIN bool bittestall(u8sse x) { return !_mm_movemask_epi8(*~x); }
+ KFR_SINTRIN bool bittestall(u16sse x) { return !_mm_movemask_epi8(*~x); }
+ KFR_SINTRIN bool bittestall(u32sse x) { return !_mm_movemask_epi8(*~x); }
+ KFR_SINTRIN bool bittestall(u64sse x) { return !_mm_movemask_epi8(*~x); }
+ KFR_SINTRIN bool bittestall(i8sse x) { return !_mm_movemask_epi8(*~x); }
+ KFR_SINTRIN bool bittestall(i16sse x) { return !_mm_movemask_epi8(*~x); }
+ KFR_SINTRIN bool bittestall(i32sse x) { return !_mm_movemask_epi8(*~x); }
+ KFR_SINTRIN bool bittestall(i64sse x) { return !_mm_movemask_epi8(*~x); }
+
+ KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return bittestnone(~x & y); }
+ KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return bittestnone(~x & y); }
+
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
+ KFR_SPEC_FN(in_bittest, bittestnone)
+ KFR_SPEC_FN(in_bittest, bittestall)
+};
+
+template <>
+struct in_bittest<cpu_t::sse41> : in_bittest<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse41;
+
+ KFR_SINTRIN bool bittestnone(f32sse x, f32sse y) { return _mm_testz_ps(*x, *y); }
+ KFR_SINTRIN bool bittestnone(f64sse x, f64sse y) { return _mm_testz_pd(*x, *y); }
+ KFR_SINTRIN bool bittestnone(u8sse x, u8sse y) { return _mm_testz_si128(*x, *y); }
+ KFR_SINTRIN bool bittestnone(u16sse x, u16sse y) { return _mm_testz_si128(*x, *y); }
+ KFR_SINTRIN bool bittestnone(u32sse x, u32sse y) { return _mm_testz_si128(*x, *y); }
+ KFR_SINTRIN bool bittestnone(u64sse x, u64sse y) { return _mm_testz_si128(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i8sse x, i8sse y) { return _mm_testz_si128(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i16sse x, i16sse y) { return _mm_testz_si128(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i32sse x, i32sse y) { return _mm_testz_si128(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i64sse x, i64sse y) { return _mm_testz_si128(*x, *y); }
+
+ KFR_SINTRIN bool bittestnone(f32sse x) { return _mm_testz_ps(*x, *x); }
+ KFR_SINTRIN bool bittestnone(f64sse x) { return _mm_testz_pd(*x, *x); }
+ KFR_SINTRIN bool bittestnone(u8sse x) { return _mm_testz_si128(*x, *x); }
+ KFR_SINTRIN bool bittestnone(u16sse x) { return _mm_testz_si128(*x, *x); }
+ KFR_SINTRIN bool bittestnone(u32sse x) { return _mm_testz_si128(*x, *x); }
+ KFR_SINTRIN bool bittestnone(u64sse x) { return _mm_testz_si128(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i8sse x) { return _mm_testz_si128(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i16sse x) { return _mm_testz_si128(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i32sse x) { return _mm_testz_si128(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i64sse x) { return _mm_testz_si128(*x, *x); }
+
+ KFR_SINTRIN bool bittestall(f32sse x, f32sse y) { return _mm_testc_ps(*x, *y); }
+ KFR_SINTRIN bool bittestall(f64sse x, f64sse y) { return _mm_testc_pd(*x, *y); }
+ KFR_SINTRIN bool bittestall(u8sse x, u8sse y) { return _mm_testc_si128(*x, *y); }
+ KFR_SINTRIN bool bittestall(u16sse x, u16sse y) { return _mm_testc_si128(*x, *y); }
+ KFR_SINTRIN bool bittestall(u32sse x, u32sse y) { return _mm_testc_si128(*x, *y); }
+ KFR_SINTRIN bool bittestall(u64sse x, u64sse y) { return _mm_testc_si128(*x, *y); }
+ KFR_SINTRIN bool bittestall(i8sse x, i8sse y) { return _mm_testc_si128(*x, *y); }
+ KFR_SINTRIN bool bittestall(i16sse x, i16sse y) { return _mm_testc_si128(*x, *y); }
+ KFR_SINTRIN bool bittestall(i32sse x, i32sse y) { return _mm_testc_si128(*x, *y); }
+ KFR_SINTRIN bool bittestall(i64sse x, i64sse y) { return _mm_testc_si128(*x, *y); }
+
+ KFR_SINTRIN bool bittestall(f32sse x) { return _mm_testc_ps(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(f64sse x) { return _mm_testc_pd(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(u8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(u16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(u32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(u64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i8sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i16sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i32sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i64sse x) { return _mm_testc_si128(*x, *allonesvector(x)); }
+
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
+ KFR_SPEC_FN(in_bittest, bittestnone)
+ KFR_SPEC_FN(in_bittest, bittestall)
+};
+
+template <>
+struct in_bittest<cpu_t::avx1> : in_bittest<cpu_t::sse41>
+{
+ constexpr static cpu_t cpu = cpu_t::avx1;
+ using in_bittest<cpu_t::sse41>::bittestnone;
+ using in_bittest<cpu_t::sse41>::bittestall;
+
+ KFR_SINTRIN bitmask<8> getmask(f32avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); }
+ KFR_SINTRIN bitmask<8> getmask(f64avx x) { return bitmask<8>(_mm256_movemask_pd(*x)); }
+
+ KFR_SINTRIN bool bittestnone(f32avx x, f32avx y) { return _mm256_testz_ps(*x, *y); }
+ KFR_SINTRIN bool bittestnone(f64avx x, f64avx y) { return _mm256_testz_pd(*x, *y); }
+ KFR_SINTRIN bool bittestnone(f32avx x) { return _mm256_testz_ps(*x, *x); }
+ KFR_SINTRIN bool bittestnone(f64avx x) { return _mm256_testz_pd(*x, *x); }
+ KFR_SINTRIN bool bittestnall(f32avx x, f32avx y) { return _mm256_testc_ps(*x, *y); }
+ KFR_SINTRIN bool bittestnall(f64avx x, f64avx y) { return _mm256_testc_pd(*x, *y); }
+ KFR_SINTRIN bool bittestnall(f32avx x) { return _mm256_testc_ps(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestnall(f64avx x) { return _mm256_testc_pd(*x, *allonesvector(x)); }
+
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
+ KFR_SPEC_FN(in_bittest, bittestnone)
+ KFR_SPEC_FN(in_bittest, bittestall)
+};
+
+template <>
+struct in_bittest<cpu_t::avx2> : in_bittest<cpu_t::avx1>
+{
+ constexpr static cpu_t cpu = cpu_t::avx2;
+ using in_bittest<cpu_t::avx1>::bittestnone;
+ using in_bittest<cpu_t::avx1>::bittestall;
+
+ KFR_SINTRIN bitmask<32> getmask(u8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<32> getmask(u16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<32> getmask(u32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<32> getmask(u64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<32> getmask(i8avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<32> getmask(i16avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<32> getmask(i32avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+ KFR_SINTRIN bitmask<32> getmask(i64avx x) { return bitmask<32>(_mm256_movemask_epi8(*x)); }
+
+ KFR_SINTRIN bool bittestnone(u8avx x, u8avx y) { return _mm256_testz_si256(*x, *y); }
+ KFR_SINTRIN bool bittestnone(u16avx x, u16avx y) { return _mm256_testz_si256(*x, *y); }
+ KFR_SINTRIN bool bittestnone(u32avx x, u32avx y) { return _mm256_testz_si256(*x, *y); }
+ KFR_SINTRIN bool bittestnone(u64avx x, u64avx y) { return _mm256_testz_si256(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i8avx x, i8avx y) { return _mm256_testz_si256(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i16avx x, i16avx y) { return _mm256_testz_si256(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i32avx x, i32avx y) { return _mm256_testz_si256(*x, *y); }
+ KFR_SINTRIN bool bittestnone(i64avx x, i64avx y) { return _mm256_testz_si256(*x, *y); }
+
+ KFR_SINTRIN bool bittestnone(u8avx x) { return _mm256_testz_si256(*x, *x); }
+ KFR_SINTRIN bool bittestnone(u16avx x) { return _mm256_testz_si256(*x, *x); }
+ KFR_SINTRIN bool bittestnone(u32avx x) { return _mm256_testz_si256(*x, *x); }
+ KFR_SINTRIN bool bittestnone(u64avx x) { return _mm256_testz_si256(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i8avx x) { return _mm256_testz_si256(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i16avx x) { return _mm256_testz_si256(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i32avx x) { return _mm256_testz_si256(*x, *x); }
+ KFR_SINTRIN bool bittestnone(i64avx x) { return _mm256_testz_si256(*x, *x); }
+
+ KFR_SINTRIN bool bittestall(u8avx x, u8avx y) { return _mm256_testc_si256(*x, *y); }
+ KFR_SINTRIN bool bittestall(u16avx x, u16avx y) { return _mm256_testc_si256(*x, *y); }
+ KFR_SINTRIN bool bittestall(u32avx x, u32avx y) { return _mm256_testc_si256(*x, *y); }
+ KFR_SINTRIN bool bittestall(u64avx x, u64avx y) { return _mm256_testc_si256(*x, *y); }
+ KFR_SINTRIN bool bittestall(i8avx x, i8avx y) { return _mm256_testc_si256(*x, *y); }
+ KFR_SINTRIN bool bittestall(i16avx x, i16avx y) { return _mm256_testc_si256(*x, *y); }
+ KFR_SINTRIN bool bittestall(i32avx x, i32avx y) { return _mm256_testc_si256(*x, *y); }
+ KFR_SINTRIN bool bittestall(i64avx x, i64avx y) { return _mm256_testc_si256(*x, *y); }
+
+ KFR_SINTRIN bool bittestall(u8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(u16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(u32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(u64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i8avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i16avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i32avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+ KFR_SINTRIN bool bittestall(i64avx x) { return _mm256_testc_si256(*x, *allonesvector(x)); }
+
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestnone)
+ KFR_HANDLE_ALL_REDUCE(logical_and, bittestall)
+ KFR_SPEC_FN(in_bittest, bittestnone)
+ KFR_SPEC_FN(in_bittest, bittestall)
+};
+}
+
+namespace native
+{
+using fn_bittestnone = internal::in_bittest<>::fn_bittestnone;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> bittestnone(const T1& x)
+{
+ return internal::in_bittest<>::bittestnone(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_bittestnone, E1> bittestnone(E1&& x)
+{
+ return { fn_bittestnone(), std::forward<E1>(x) };
+}
+
+using fn_bittestall = internal::in_bittest<>::fn_bittestall;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> bittestall(const T1& x)
+{
+ return internal::in_bittest<>::bittestall(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_bittestall, E1> bittestall(E1&& x)
+{
+ return { fn_bittestall(), std::forward<E1>(x) };
+}
+
+using fn_bittestnone = internal::in_bittest<>::fn_bittestnone;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+bittestnone(const T1& x, const T2& y)
+{
+ return internal::in_bittest<>::bittestnone(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_bittestnone, E1, E2> bittestnone(E1&& x, E2&& y)
+{
+ return { fn_bittestnone(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+using fn_bittestall = internal::in_bittest<>::fn_bittestall;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+bittestall(const T1& x, const T2& y)
+{
+ return internal::in_bittest<>::bittestall(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_bittestall, E1, E2> bittestall(E1&& x, E2&& y)
+{
+ return { fn_bittestall(), std::forward<E1>(x), std::forward<E2>(y)
+
+ };
+}
+}
+}
diff --git a/include/kfr/base/memory.hpp b/include/kfr/base/memory.hpp
@@ -0,0 +1,209 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/read_write.hpp"
+#include "../base/types.hpp"
+#include <atomic>
+#include <memory>
+
+namespace kfr
+{
+
+namespace internal
+{
+
+struct memory_statistics
+{
+ std::atomic_uintptr_t allocation_count = ATOMIC_VAR_INIT(0);
+ std::atomic_uintptr_t allocation_size = ATOMIC_VAR_INIT(0);
+ std::atomic_uintptr_t deallocation_count = ATOMIC_VAR_INIT(0);
+ std::atomic_uintptr_t deallocation_size = ATOMIC_VAR_INIT(0);
+};
+
+inline memory_statistics& get_memory_statistics()
+{
+ static memory_statistics ms;
+ return ms;
+}
+
+struct mem_header
+{
+ u8 offset;
+ u8 alignment;
+ u8 reserved1;
+ u8 reserved2;
+ size_t size;
+} __attribute__((__packed__));
+
+inline mem_header* aligned_header(void* ptr) { return ptr_cast<mem_header>(ptr) - 1; }
+
+inline size_t aligned_size(void* ptr) { return aligned_header(ptr)->size; }
+
+inline void* aligned_malloc(size_t size, size_t alignment)
+{
+ get_memory_statistics().allocation_count++;
+ get_memory_statistics().allocation_size += size;
+ void* ptr = malloc(size + (alignment - 1) + sizeof(mem_header));
+ if (ptr == nullptr)
+ return nullptr;
+ void* aligned_ptr = advance(ptr, sizeof(mem_header));
+ aligned_ptr = align_up(aligned_ptr, alignment);
+ aligned_header(aligned_ptr)->alignment = static_cast<u8>(alignment > 255 ? 255 : alignment);
+ aligned_header(aligned_ptr)->offset = static_cast<u8>(distance(aligned_ptr, ptr));
+ aligned_header(aligned_ptr)->size = size;
+ return aligned_ptr;
+}
+inline void aligned_free(void* ptr)
+{
+ get_memory_statistics().deallocation_count++;
+ get_memory_statistics().deallocation_size += aligned_size(ptr);
+ free(advance(ptr, -static_cast<ptrdiff_t>(aligned_header(ptr)->offset)));
+}
+}
+
+template <typename T = void, size_t alignment = native_cache_alignment>
+KFR_INLINE T* aligned_allocate(size_t size = 1)
+{
+ T* ptr = static_cast<T*>(__builtin_assume_aligned(
+ internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>), alignment), alignment));
+ return ptr;
+}
+
+template <typename T = void>
+KFR_INLINE void aligned_deallocate(T* ptr)
+{
+ return internal::aligned_free(ptr);
+}
+
+namespace internal
+{
+template <typename T>
+struct aligned_deleter
+{
+ KFR_INLINE void operator()(T* ptr) const { aligned_deallocate(ptr); }
+};
+}
+
+template <typename T>
+struct autofree
+{
+ KFR_INLINE autofree() {}
+ explicit KFR_INLINE autofree(size_t size) : ptr(aligned_allocate<T>(size)) {}
+ autofree(const autofree&) = delete;
+ autofree& operator=(const autofree&) = delete;
+ autofree(autofree&&) noexcept = default;
+ autofree& operator=(autofree&&) noexcept = default;
+ KFR_INLINE T& operator[](size_t index) noexcept { return ptr[index]; }
+ KFR_INLINE const T& operator[](size_t index) const noexcept { return ptr[index]; }
+
+ template <typename U = T>
+ KFR_INLINE U* data() noexcept
+ {
+ return ptr_cast<U>(ptr.get());
+ }
+ template <typename U = T>
+ KFR_INLINE const U* data() const noexcept
+ {
+ return ptr_cast<U>(ptr.get());
+ }
+
+ std::unique_ptr<T[], internal::aligned_deleter<T>> ptr;
+};
+
+template <typename T>
+struct allocator
+{
+ using value_type = T;
+ using pointer = T*;
+ using const_pointer = const T*;
+ using reference = T&;
+ using const_reference = const T&;
+ using size_type = std::size_t;
+ using difference_type = std::ptrdiff_t;
+
+ template <typename U>
+ struct rebind
+ {
+ using other = allocator<U>;
+ };
+ constexpr allocator() noexcept = default;
+ constexpr allocator(const allocator&) noexcept = default;
+ template <typename U>
+ constexpr allocator(const allocator<U>&) noexcept
+ {
+ }
+ pointer address(reference x) const noexcept { return std::addressof(x); }
+ const_pointer address(const_reference x) const noexcept { return std::addressof(x); }
+ pointer allocate(size_type n, std::allocator<void>::const_pointer = 0) const
+ {
+ pointer result = aligned_allocate<value_type>(n);
+ if (!result)
+ CID_THROW(std::bad_alloc());
+ return result;
+ }
+ void deallocate(pointer p, size_type) { aligned_deallocate(p); }
+ size_type max_size() const { return std::numeric_limits<size_type>::max() / sizeof(value_type); }
+ template <typename U, typename... Args>
+ void construct(U* p, Args&&... args)
+ {
+ ::new (pvoid(p)) U(std::forward<Args>(args)...);
+ }
+ template <typename U>
+ void destroy(U* p)
+ {
+ p->~U();
+ }
+};
+
+template <typename T1, typename T2>
+constexpr inline bool operator==(const allocator<T1>&, const allocator<T2>&) noexcept
+{
+ return true;
+}
+template <typename T1, typename T2>
+constexpr inline bool operator!=(const allocator<T1>&, const allocator<T2>&) noexcept
+{
+ return false;
+}
+
+struct aligned_new
+{
+ inline static void* operator new(size_t size) { return aligned_allocate(size); }
+ inline static void operator delete(void* ptr) { return aligned_deallocate(ptr); }
+};
+
+#define KFR_CLASS_REFCOUNT(cl) \
+public: \
+ void addref() const { m_refcount++; } \
+ void release() const \
+ { \
+ if (--m_refcount == 0) \
+ { \
+ delete this; \
+ } \
+ } \
+ \
+private: \
+ mutable std::atomic_uintptr_t m_refcount = ATOMIC_VAR_INIT(0);
+}
diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp
@@ -0,0 +1,377 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "abs.hpp"
+#include "function.hpp"
+#include "operators.hpp"
+#include "select.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t cpu = cpu_t::native>
+struct in_min_max : in_min_max<older(cpu)>
+{
+ struct fn_min : in_min_max<older(cpu)>::fn_min, fn_disabled
+ {
+ };
+ struct fn_max : in_min_max<older(cpu)>::fn_max, fn_disabled
+ {
+ };
+};
+
+template <>
+struct in_min_max<cpu_t::sse2> : in_select<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse2;
+
+private:
+ using in_select<cpu>::select;
+
+public:
+ template <typename T>
+ KFR_SINTRIN T min(initialvalue<T>)
+ {
+ return std::numeric_limits<T>::max();
+ }
+ template <typename T>
+ KFR_SINTRIN T max(initialvalue<T>)
+ {
+ return std::numeric_limits<T>::min();
+ }
+
+ KFR_CPU_INTRIN(sse2) f32sse min(f32sse x, f32sse y) { return _mm_min_ps(*x, *y); }
+ KFR_CPU_INTRIN(sse2) f64sse min(f64sse x, f64sse y) { return _mm_min_pd(*x, *y); }
+ KFR_CPU_INTRIN(sse2) i8sse min(i8sse x, i8sse y) { return select(x < y, x, y); }
+ KFR_CPU_INTRIN(sse2) u16sse min(u16sse x, u16sse y) { return select(x < y, x, y); }
+ KFR_CPU_INTRIN(sse2) i32sse min(i32sse x, i32sse y) { return select(x < y, x, y); }
+ KFR_CPU_INTRIN(sse2) u32sse min(u32sse x, u32sse y) { return select(x < y, x, y); }
+ KFR_CPU_INTRIN(sse2) u8sse min(u8sse x, u8sse y) { return _mm_min_epu8(*x, *y); }
+ KFR_CPU_INTRIN(sse2) i16sse min(i16sse x, i16sse y) { return _mm_min_epi16(*x, *y); }
+ KFR_CPU_INTRIN(sse2) i64sse min(i64sse x, i64sse y) { return select(x < y, x, y); }
+ KFR_CPU_INTRIN(sse2) u64sse min(u64sse x, u64sse y) { return select(x < y, x, y); }
+
+ KFR_CPU_INTRIN(sse2) f32sse max(f32sse x, f32sse y) { return _mm_max_ps(*x, *y); }
+ KFR_CPU_INTRIN(sse2) f64sse max(f64sse x, f64sse y) { return _mm_max_pd(*x, *y); }
+ KFR_CPU_INTRIN(sse2) i8sse max(i8sse x, i8sse y) { return select(x > y, x, y); }
+ KFR_CPU_INTRIN(sse2) u16sse max(u16sse x, u16sse y) { return select(x > y, x, y); }
+ KFR_CPU_INTRIN(sse2) i32sse max(i32sse x, i32sse y) { return select(x > y, x, y); }
+ KFR_CPU_INTRIN(sse2) u32sse max(u32sse x, u32sse y) { return select(x > y, x, y); }
+ KFR_CPU_INTRIN(sse2) u8sse max(u8sse x, u8sse y) { return _mm_max_epu8(*x, *y); }
+ KFR_CPU_INTRIN(sse2) i16sse max(i16sse x, i16sse y) { return _mm_max_epi16(*x, *y); }
+ KFR_CPU_INTRIN(sse2) i64sse max(i64sse x, i64sse y) { return select(x > y, x, y); }
+ KFR_CPU_INTRIN(sse2) u64sse max(u64sse x, u64sse y) { return select(x > y, x, y); }
+
+ KFR_HANDLE_ALL(min)
+ KFR_HANDLE_ALL(max)
+
+ KFR_SPEC_FN(in_min_max, min)
+ KFR_SPEC_FN(in_min_max, max)
+};
+
+template <>
+struct in_min_max<cpu_t::sse41> : in_min_max<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse41;
+ using in_min_max<cpu_t::sse2>::min;
+ using in_min_max<cpu_t::sse2>::max;
+
+ KFR_CPU_INTRIN(sse41) i8sse min(i8sse x, i8sse y) { return _mm_min_epi8(*x, *y); }
+ KFR_CPU_INTRIN(sse41) u16sse min(u16sse x, u16sse y) { return _mm_min_epu16(*x, *y); }
+ KFR_CPU_INTRIN(sse41) i32sse min(i32sse x, i32sse y) { return _mm_min_epi32(*x, *y); }
+ KFR_CPU_INTRIN(sse41) u32sse min(u32sse x, u32sse y) { return _mm_min_epu32(*x, *y); }
+
+ KFR_CPU_INTRIN(sse41) i8sse max(i8sse x, i8sse y) { return _mm_max_epi8(*x, *y); }
+ KFR_CPU_INTRIN(sse41) u16sse max(u16sse x, u16sse y) { return _mm_max_epu16(*x, *y); }
+ KFR_CPU_INTRIN(sse41) i32sse max(i32sse x, i32sse y) { return _mm_max_epi32(*x, *y); }
+ KFR_CPU_INTRIN(sse41) u32sse max(u32sse x, u32sse y) { return _mm_max_epu32(*x, *y); }
+
+ KFR_HANDLE_ALL(min)
+ KFR_HANDLE_ALL(max)
+ KFR_SPEC_FN(in_min_max, min)
+ KFR_SPEC_FN(in_min_max, max)
+};
+
+template <>
+struct in_min_max<cpu_t::avx1> : in_min_max<cpu_t::sse41>
+{
+ constexpr static cpu_t cpu = cpu_t::avx1;
+ using in_min_max<cpu_t::sse41>::min;
+ using in_min_max<cpu_t::sse41>::max;
+
+ KFR_CPU_INTRIN(avx) f32avx min(f32avx x, f32avx y) { return _mm256_min_ps(*x, *y); }
+ KFR_CPU_INTRIN(avx) f64avx min(f64avx x, f64avx y) { return _mm256_min_pd(*x, *y); }
+ KFR_CPU_INTRIN(avx) f32avx max(f32avx x, f32avx y) { return _mm256_max_ps(*x, *y); }
+ KFR_CPU_INTRIN(avx) f64avx max(f64avx x, f64avx y) { return _mm256_max_pd(*x, *y); }
+
+ KFR_HANDLE_ALL(min)
+ KFR_HANDLE_ALL(max)
+ KFR_SPEC_FN(in_min_max, min)
+ KFR_SPEC_FN(in_min_max, max)
+};
+
+template <>
+struct in_min_max<cpu_t::avx2> : in_min_max<cpu_t::avx1>
+{
+ constexpr static cpu_t cpu = cpu_t::avx2;
+ using in_min_max<cpu_t::avx1>::min;
+ using in_min_max<cpu_t::avx1>::max;
+
+ KFR_CPU_INTRIN(avx2) u8avx min(u8avx x, u8avx y) { return _mm256_min_epu8(*x, *y); }
+ KFR_CPU_INTRIN(avx2) i16avx min(i16avx x, i16avx y) { return _mm256_min_epi16(*x, *y); }
+ KFR_CPU_INTRIN(avx2) i8avx min(i8avx x, i8avx y) { return _mm256_min_epi8(*x, *y); }
+ KFR_CPU_INTRIN(avx2) u16avx min(u16avx x, u16avx y) { return _mm256_min_epu16(*x, *y); }
+ KFR_CPU_INTRIN(avx2) i32avx min(i32avx x, i32avx y) { return _mm256_min_epi32(*x, *y); }
+ KFR_CPU_INTRIN(avx2) u32avx min(u32avx x, u32avx y) { return _mm256_min_epu32(*x, *y); }
+
+ KFR_CPU_INTRIN(avx2) u8avx max(u8avx x, u8avx y) { return _mm256_max_epu8(*x, *y); }
+ KFR_CPU_INTRIN(avx2) i16avx max(i16avx x, i16avx y) { return _mm256_max_epi16(*x, *y); }
+ KFR_CPU_INTRIN(avx2) i8avx max(i8avx x, i8avx y) { return _mm256_max_epi8(*x, *y); }
+ KFR_CPU_INTRIN(avx2) u16avx max(u16avx x, u16avx y) { return _mm256_max_epu16(*x, *y); }
+ KFR_CPU_INTRIN(avx2) i32avx max(i32avx x, i32avx y) { return _mm256_max_epi32(*x, *y); }
+ KFR_CPU_INTRIN(avx2) u32avx max(u32avx x, u32avx y) { return _mm256_max_epu32(*x, *y); }
+
+ KFR_HANDLE_ALL(min)
+ KFR_HANDLE_ALL(max)
+ KFR_SPEC_FN(in_min_max, min)
+ KFR_SPEC_FN(in_min_max, max)
+};
+
+template <cpu_t cpu = cpu_t::native>
+struct in_minabs_maxabs
+{
+public:
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> minabs(vec<T, N> x, vec<T, N> y)
+ {
+ return in_min_max<cpu>::min(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> maxabs(vec<T, N> x, vec<T, N> y)
+ {
+ return in_min_max<cpu>::max(in_abs<cpu>::abs(x), in_abs<cpu>::abs(y));
+ }
+
+ KFR_HANDLE_ALL(minabs)
+ KFR_HANDLE_ALL(maxabs)
+ KFR_SPEC_FN(in_minabs_maxabs, minabs)
+ KFR_SPEC_FN(in_minabs_maxabs, maxabs)
+};
+
+template <cpu_t cpu = cpu_t::native>
+struct in_clamp : in_min_max<cpu>
+{
+ using in_min_max<cpu>::min;
+ using in_min_max<cpu>::max;
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, T maximum)
+ {
+ return clamp(x, broadcast<N>(minimum), broadcast<N>(maximum));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T minimum, vec<T, N> maximum)
+ {
+ return clamp(x, broadcast<N>(minimum), maximum);
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, T maximum)
+ {
+ return clamp(x, minimum, broadcast<N>(maximum));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, T maximum)
+ {
+ return clamp(x, broadcast<N>(maximum));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum)
+ {
+ return max(minimum, min(x, maximum));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clamp(vec<T, N> x, vec<T, N> maximum)
+ {
+ return max(zerovector<T, N>(), min(x, maximum));
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> minimum, vec<T, N> maximum)
+ {
+ return max(minimum, min(x, maximum - T(1)));
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> clampm1(vec<T, N> x, vec<T, N> maximum)
+ {
+ return max(zerovector<T, N>(), min(x, maximum - T(1)));
+ }
+ KFR_HANDLE_ALL(clamp)
+ KFR_HANDLE_ALL(clampm1)
+ KFR_SPEC_FN(in_clamp, clamp)
+ KFR_SPEC_FN(in_clamp, clampm1)
+};
+}
+
+namespace native
+{
+using fn_min = internal::in_min_max<>::fn_min;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+min(const T1& x, const T2& y)
+{
+ return internal::in_min_max<>::min(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_min, E1, E2> min(E1&& x, E2&& y)
+{
+ return { fn_min(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+using fn_max = internal::in_min_max<>::fn_max;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+max(const T1& x, const T2& y)
+{
+ return internal::in_min_max<>::max(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_max, E1, E2> max(E1&& x, E2&& y)
+{
+ return { fn_max(), std::forward<E1>(x), std::forward<E2>(y)
+
+ };
+}
+using fn_minabs = internal::in_minabs_maxabs<>::fn_minabs;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+minabs(const T1& x, const T2& y)
+{
+ return internal::in_minabs_maxabs<>::minabs(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_minabs, E1, E2> minabs(E1&& x, E2&& y)
+{
+ return { fn_minabs(), std::forward<E1>(x), std::forward<E2>(y)
+
+ };
+}
+using fn_maxabs = internal::in_minabs_maxabs<>::fn_maxabs;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+maxabs(const T1& x, const T2& y)
+{
+ return internal::in_minabs_maxabs<>::maxabs(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_maxabs, E1, E2> maxabs(E1&& x, E2&& y)
+{
+ return { fn_maxabs(), std::forward<E1>(x), std::forward<E2>(y)
+
+ };
+}
+using fn_clamp = internal::in_clamp<>::fn_clamp;
+template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
+KFR_INLINE ftype<common_type<T1, T2, T3>>
+
+clamp(const T1& x, const T2& l, const T3& h)
+{
+ return internal::in_clamp<>::clamp(x, l, h);
+}
+
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
+KFR_INLINE expr_func<fn_clamp, E1, E2, E3> clamp(E1&& x, E2&& l, E3&& h)
+{
+ return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h)
+
+ };
+}
+using fn_clampm1 = internal::in_clamp<>::fn_clampm1;
+template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
+KFR_INLINE ftype<common_type<T1, T2, T3>>
+
+clampm1(const T1& x, const T2& l, const T3& h)
+{
+ return internal::in_clamp<>::clampm1(x, l, h);
+}
+
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
+KFR_INLINE expr_func<fn_clampm1, E1, E2, E3> clampm1(E1&& x, E2&& l, E3&& h)
+{
+ return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(l), std::forward<E3>(h)
+
+ };
+}
+
+using fn_clamp = internal::in_clamp<>::fn_clamp;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+clamp(const T1& x, const T2& h)
+{
+ return internal::in_clamp<>::clamp(x, h);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_clamp, E1, E2> clamp(E1&& x, E2&& h)
+{
+ return { fn_clamp(), std::forward<E1>(x), std::forward<E2>(h)
+
+ };
+}
+using fn_clampm1 = internal::in_clamp<>::fn_clampm1;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+clampm1(const T1& x, const T2& h)
+{
+ return internal::in_clamp<>::clampm1(x, h);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_clampm1, E1, E2> clampm1(E1&& x, E2&& h)
+{
+ return { fn_clampm1(), std::forward<E1>(x), std::forward<E2>(h)
+
+ };
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp
@@ -0,0 +1,663 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "function.hpp"
+#include <algorithm>
+#include <utility>
+
+namespace kfr
+{
+namespace internal
+{
+
+template <typename T, typename ReduceFn>
+KFR_INLINE T horizontal_impl(vec<T, 1> value, ReduceFn&&)
+{
+ return T(value[0]);
+}
+
+template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && is_poweroftwo(N))>
+KFR_INLINE T horizontal_impl(vec<T, N> value, ReduceFn&& reduce)
+{
+ return horizontal_impl(reduce(low(value), high(value)), std::forward<ReduceFn>(reduce));
+}
+template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && !is_poweroftwo(N))>
+KFR_INLINE T horizontal_impl(vec<T, N> value, ReduceFn&& reduce)
+{
+ const T initial = reduce(initialvalue<T>());
+ return horizontal_impl(widen<next_poweroftwo(N)>(value, initial), std::forward<ReduceFn>(reduce));
+}
+}
+
+template <typename T, size_t N, typename ReduceFn>
+KFR_INLINE T horizontal(vec<T, N> value, ReduceFn&& reduce)
+{
+ return internal::horizontal_impl(value, std::forward<ReduceFn>(reduce));
+}
+
+template <typename T>
+constexpr inline T add(T x)
+{
+ return x;
+}
+template <typename T1, typename T2, typename... Ts>
+constexpr inline common_type<T1, T2, Ts...> add(T1 x, T2 y, Ts... rest)
+{
+ return x + add(std::forward<T2>(y), std::forward<Ts>(rest)...);
+}
+template <typename T>
+constexpr inline T add(initialvalue<T>)
+{
+ return T(0);
+}
+KFR_FN(add)
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_add, E1, E2> add(E1&& x, E2&& y)
+{
+ return { fn_add(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_add, E1> add(E1&& x, E2&& y, E3&& z)
+{
+ return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z)
+
+ };
+}
+
+template <typename T1, typename T2>
+constexpr inline common_type<T1, T2> sub(T1 x, T2 y)
+{
+ return x - y;
+}
+template <typename T>
+constexpr inline T sub(initialvalue<T>)
+{
+ return T(0);
+}
+KFR_FN(sub)
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_sub, E1, E2> sub(E1&& x, E2&& y)
+{
+ return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y)
+
+ };
+}
+
+template <typename T1>
+constexpr inline T1 mul(T1 x)
+{
+ return x;
+}
+template <typename T1, typename T2, typename... Ts>
+constexpr inline common_type<T1, T2, Ts...> mul(T1 x, T2 y, Ts... rest)
+{
+ return x * mul(std::forward<T2>(y), std::forward<Ts>(rest)...);
+}
+
+template <typename T>
+constexpr inline T mul(initialvalue<T>)
+{
+ return T(1);
+}
+KFR_FN(mul)
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_mul, E1, E2> mul(E1&& x, E2&& y)
+{
+ return { fn_mul(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_mul, E1> mul(E1&& x, E2&& y, E3&& z)
+{
+ return { fn_mul(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) };
+}
+
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+constexpr inline T1 sqr(T1 x)
+{
+ return x * x;
+}
+KFR_FN(sqr)
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_sqr, E1> sqr(E1&& x)
+{
+ return { fn_sqr(), std::forward<E1>(x) };
+}
+
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+constexpr inline T1 cub(T1 x)
+{
+ return sqr(x) * x;
+}
+KFR_FN(cub)
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_cub, E1> cub(E1&& x)
+{
+ return { fn_cub(), std::forward<E1>(x)
+
+ };
+}
+
+template <typename T>
+constexpr inline T pow2(T x)
+{
+ return sqr(x);
+}
+
+template <typename T>
+constexpr inline T pow3(T x)
+{
+ return cub(x);
+}
+
+template <typename T>
+constexpr inline T pow4(T x)
+{
+ return sqr(sqr(x));
+}
+
+template <typename T>
+constexpr inline T pow5(T x)
+{
+ return pow4(x) * x;
+}
+KFR_FN(pow2)
+KFR_FN(pow3)
+KFR_FN(pow4)
+KFR_FN(pow5)
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_pow2, E1> pow2(E1&& x)
+{
+ return { fn_pow2(), std::forward<E1>(x)
+
+ };
+}
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_pow3, E1> pow3(E1&& x)
+{
+ return { fn_pow3(), std::forward<E1>(x)
+
+ };
+}
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_pow4, E1> pow4(E1&& x)
+{
+ return { fn_pow4(), std::forward<E1>(x)
+
+ };
+}
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INLINE expr_func<fn_pow5, E1> pow5(E1&& x)
+{
+ return { fn_pow5(), std::forward<E1>(x)
+
+ };
+}
+
+/// Raise x to the power base $x^{base}$
+/// @code
+/// CHECK( ipow( 10, 3 ) == 1000 );
+/// CHECK( ipow( 0.5, 2 ) == 0.25 );
+/// @endcode
+template <typename T>
+constexpr inline T ipow(T x, int base)
+{
+ T result = T(1);
+ while (base)
+ {
+ if (base & 1)
+ result *= x;
+ base >>= 1;
+ x *= x;
+ }
+ return result;
+}
+KFR_FN(ipow)
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_ipow, E1, E2> ipow(E1&& x, E2&& b)
+{
+ return { fn_ipow(), std::forward<E1>(x), std::forward<E2>(b)
+
+ };
+}
+
+/// Return square of the sum of all arguments
+/// *Example*::
+///
+/// CHECK(sqrsum(1,2,3) == 36);
+template <typename T1, typename... Ts>
+constexpr inline common_type<T1, Ts...> sqrsum(T1 x, Ts... rest)
+{
+ return sqr(add(x, std::forward<Ts>(rest)...));
+}
+
+template <typename T1, typename T2>
+constexpr inline common_type<T1, T2> sqrdiff(T1 x, T2 y)
+{
+ return sqr(x - y);
+}
+KFR_FN(sqrsum)
+KFR_FN(sqrdiff)
+
+/// Division
+template <typename T1, typename T2>
+inline common_type<T1, T2> div(T1 x, T2 y)
+{
+ return x / y;
+}
+KFR_FN(div)
+
+/// Remainder
+template <typename T1, typename T2>
+inline common_type<T1, T2> rem(T1 x, T2 y)
+{
+ return x % y;
+}
+KFR_FN(rem)
+
+/// Negation
+template <typename T1>
+inline T1 neg(T1 x)
+{
+ return -x;
+}
+KFR_FN(neg)
+
+/// Bitwise Not
+template <typename T1>
+inline T1 bitwisenot(T1 x)
+{
+ return ~x;
+}
+KFR_FN(bitwisenot)
+
+/// Bitwise And
+template <typename T1, typename T2>
+inline common_type<T1, T2> bitwiseand(T1 x, T2 y)
+{
+ return x & y;
+}
+template <typename T>
+constexpr inline T bitwiseand(initialvalue<T>)
+{
+ return internal::allones<subtype<T>>;
+}
+KFR_FN(bitwiseand)
+
+/// Bitwise And-Not
+template <typename T1, typename T2>
+inline common_type<T1, T2> bitwiseandnot(T1 x, T2 y)
+{
+ return x & ~y;
+}
+template <typename T>
+constexpr inline T bitwiseandnot(initialvalue<T>)
+{
+ return internal::allones<subtype<T>>;
+}
+KFR_FN(bitwiseandnot)
+
+/// Bitwise Or
+template <typename T1, typename T2>
+inline common_type<T1, T2> bitwiseor(T1 x, T2 y)
+{
+ return x | y;
+}
+template <typename T>
+constexpr inline T bitwiseor(initialvalue<T>)
+{
+ return subtype<T>();
+}
+KFR_FN(bitwiseor)
+
+/// Bitwise Xor (Exclusive Or)
+template <typename T1, typename T2>
+inline common_type<T1, T2> bitwisexor(T1 x, T2 y)
+{
+ return x ^ y;
+}
+template <typename T>
+constexpr inline T bitwisexor(initialvalue<T>)
+{
+ return subtype<T>();
+}
+KFR_FN(bitwisexor)
+
+/// Bitwise Left shift
+template <typename T1, typename T2>
+inline common_type<T1, T2> shl(T1 left, T2 right)
+{
+ return left << right;
+}
+KFR_FN(shl)
+
+/// Bitwise Right shift
+template <typename T1, typename T2>
+inline common_type<T1, T2> shr(T1 left, T2 right)
+{
+ return left >> right;
+}
+KFR_FN(shr)
+
+/// Bitwise Left Rotate
+template <typename T1, typename T2>
+inline common_type<T1, T2> rol(T1 left, T2 right)
+{
+ return shl(left, right) | shr(left, (static_cast<subtype<T1>>(typebits<T1>::bits) - right));
+}
+KFR_FN(rol)
+
+/// Bitwise Right Rotate
+template <typename T1, typename T2>
+inline common_type<T1, T2> ror(T1 left, T2 right)
+{
+ return shr(left, right) | shl(left, (static_cast<subtype<T1>>(typebits<T1>::bits) - right));
+}
+KFR_FN(ror)
+
+template <typename T1, typename T2>
+inline common_type<T1, T2> equal(T1 x, T2 y)
+{
+ return bitcast<subtype<common_type<T1, T2>>>(x == y);
+}
+template <typename T1, typename T2>
+inline common_type<T1, T2> notequal(T1 x, T2 y)
+{
+ return bitcast<subtype<common_type<T1, T2>>>(x != y);
+}
+template <typename T1, typename T2>
+inline common_type<T1, T2> less(T1 x, T2 y)
+{
+ return bitcast<subtype<common_type<T1, T2>>>(x < y);
+}
+template <typename T1, typename T2>
+inline common_type<T1, T2> greater(T1 x, T2 y)
+{
+ return bitcast<subtype<common_type<T1, T2>>>(x > y);
+}
+template <typename T1, typename T2>
+inline common_type<T1, T2> lessorequal(T1 x, T2 y)
+{
+ return bitcast<subtype<common_type<T1, T2>>>(x <= y);
+}
+template <typename T1, typename T2>
+inline common_type<T1, T2> greaterorequal(T1 x, T2 y)
+{
+ return bitcast<subtype<common_type<T1, T2>>>(x >= y);
+}
+KFR_FN(equal)
+KFR_FN(notequal)
+KFR_FN(less)
+KFR_FN(greater)
+KFR_FN(lessorequal)
+KFR_FN(greaterorequal)
+
+/// Fused Multiply-Add
+template <typename T1, typename T2, typename T3>
+constexpr inline common_type<T1, T2, T3> fmadd(T1 x, T2 y, T3 z)
+{
+ return x * y + z;
+}
+/// Fused Multiply-Sub
+template <typename T1, typename T2, typename T3>
+constexpr inline common_type<T1, T2, T3> fmsub(T1 x, T2 y, T3 z)
+{
+ return x * y - z;
+}
+KFR_FN(fmadd)
+KFR_FN(fmsub)
+
+/// Linear blend of `x` and `y` (`c` must be in the range 0...+1)
+/// Returns `x + ( y - x ) * c`
+template <typename T1, typename T2, typename T3>
+constexpr inline common_type<T1, T2, T3> mix(T1 c, T2 x, T3 y)
+{
+ return fmadd(c, y - x, x);
+}
+
+/// Linear blend of `x` and `y` (`c` must be in the range -1...+1)
+template <typename T1, typename T2, typename T3>
+constexpr inline common_type<T1, T2, T3> mixs(T1 c, T2 x, T3 y)
+{
+ return mix(fmadd(c, 0.5, 0.5), x, y);
+}
+KFR_FN(mix)
+KFR_FN(mixs)
+
+namespace internal
+{
+
+template <typename T1, typename T2>
+constexpr KFR_INLINE T1 horner(T1, T2 c0)
+{
+ return c0;
+}
+
+template <typename T1, typename T2, typename T3, typename... Ts>
+constexpr KFR_INLINE T1 horner(T1 x, T2 c0, T3 c1, Ts... values)
+{
+ return fmadd(horner(x, c1, values...), x, c0);
+}
+}
+
+/// Calculate polynomial using Horner's method
+///
+/// ``horner(x, 1, 2, 3)`` is equivalent to \(3x^2 + 2x + 1\)
+template <typename T1, typename... Ts>
+constexpr KFR_INLINE T1 horner(T1 x, Ts... c)
+{
+ return internal::horner(x, c...);
+}
+KFR_FN(horner)
+
+/// Calculate Multiplicative Inverse of `x`
+/// Returns `1/x`
+template <typename T>
+constexpr KFR_INLINE T reciprocal(T x)
+{
+ static_assert(std::is_floating_point<subtype<T>>::value, "T must be floating point type");
+ return subtype<T>(1) / x;
+}
+KFR_FN(reciprocal)
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> mulsign(vec<T, N> x, vec<T, N> y)
+{
+ return x ^ (y & internal::highbitmask<T>);
+}
+KFR_FN_S(mulsign)
+KFR_FN(mulsign)
+
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> copysign(vec<T, N> x, vec<T, N> y)
+{
+ return (x & internal::highbitmask<T>) | (y & internal::highbitmask<T>);
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_INLINE vec<T, N> fmod(vec<T, N> x, vec<T, N> y)
+{
+ return x - cast<itype<T>>(x / y) * y;
+}
+
+KFR_FN_S(fmod)
+KFR_FN(fmod)
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+constexpr KFR_INLINE vec<T, N> rem(vec<T, N> x, vec<T, N> y)
+{
+ return x % y;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_INLINE vec<T, N> rem(vec<T, N> x, vec<T, N> y)
+{
+ return fmod(x, y);
+}
+
+template <typename T, size_t N>
+KFR_INLINE mask<T, N> isnan(vec<T, N> x)
+{
+ return x != x;
+}
+
+template <typename T, size_t N>
+KFR_INLINE mask<T, N> isinf(vec<T, N> x)
+{
+ return x == c_infinity<T> || x == -c_infinity<T>;
+}
+
+template <typename T, size_t N>
+KFR_INLINE mask<T, N> isfinite(vec<T, N> x)
+{
+ return !isnan(x) && !isinf(x);
+}
+
+template <typename T, size_t N>
+KFR_INLINE mask<T, N> isnegative(vec<T, N> x)
+{
+ return (x & internal::highbitmask<T>) != 0;
+}
+
+template <typename T, size_t N>
+KFR_INLINE mask<T, N> ispositive(vec<T, N> x)
+{
+ return !isnegative(x);
+}
+
+template <typename T, size_t N>
+KFR_INLINE mask<T, N> iszero(vec<T, N> x)
+{
+ return x == T();
+}
+
+/// Swap byte order
+template <typename T, size_t N, KFR_ENABLE_IF(sizeof(vec<T, N>) > 8)>
+KFR_INLINE vec<T, N> swapbyteorder(vec<T, N> x)
+{
+ return bitcast<T>(swap<sizeof(T)>(bitcast<u8>(x)));
+}
+template <typename T, KFR_ENABLE_IF(sizeof(T) == 8)>
+KFR_INLINE T swapbyteorder(T x)
+{
+ return reinterpret_cast<const T&>(__builtin_bswap64(reinterpret_cast<const u64&>(x)));
+}
+template <typename T, KFR_ENABLE_IF(sizeof(T) == 4)>
+KFR_INLINE T swapbyteorder(T x)
+{
+ return reinterpret_cast<const T&>(__builtin_bswap32(reinterpret_cast<const u32&>(x)));
+}
+template <typename T, KFR_ENABLE_IF(sizeof(T) == 2)>
+KFR_INLINE T swapbyteorder(T x)
+{
+ return reinterpret_cast<const T&>(__builtin_bswap16(reinterpret_cast<const u16&>(x)));
+}
+KFR_FN(swapbyteorder)
+
+/// Sum all elements of the vector
+template <typename T, size_t N>
+KFR_INLINE T hadd(vec<T, N> value)
+{
+ return horizontal(value, fn_add());
+}
+KFR_FN(hadd)
+
+/// Multiply all elements of the vector
+template <typename T, size_t N>
+KFR_INLINE T hmul(vec<T, N> value)
+{
+ return horizontal(value, fn_mul());
+}
+KFR_FN(hmul)
+
+template <typename T, size_t N>
+KFR_INLINE T hbitwiseand(vec<T, N> value)
+{
+ return horizontal(value, fn_bitwiseand());
+}
+KFR_FN(hbitwiseand)
+template <typename T, size_t N>
+KFR_INLINE T hbitwiseor(vec<T, N> value)
+{
+ return horizontal(value, fn_bitwiseor());
+}
+KFR_FN(hbitwiseor)
+template <typename T, size_t N>
+KFR_INLINE T hbitwisexor(vec<T, N> value)
+{
+ return horizontal(value, fn_bitwisexor());
+}
+KFR_FN(hbitwisexor)
+
+/// Calculate the Dot-Product of two vectors
+template <typename T, size_t N>
+KFR_INLINE T dot(vec<T, N> x, vec<T, N> y)
+{
+ return hadd(x * y);
+}
+KFR_FN(dot)
+
+/// Calculate the Arithmetic mean of all elements in the vector
+template <typename T, size_t N>
+KFR_INLINE T avg(vec<T, N> value)
+{
+ return hadd(value) / N;
+}
+KFR_FN(avg)
+
+/// Calculate the RMS of all elements in the vector
+template <typename T, size_t N>
+KFR_INLINE T rms(vec<T, N> value)
+{
+ return internal::builtin_sqrt(hadd(value * value) / N);
+}
+KFR_FN(rms)
+
+template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
+KFR_INLINE vec<T, N> subadd(vec<T, N> a, vec<T, N> b)
+{
+ return blend<1, 0>(a + b, a - b);
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
+KFR_INLINE vec<T, N> addsub(vec<T, N> a, vec<T, N> b)
+{
+ return blend<0, 1>(a + b, a - b);
+}
+KFR_FN(subadd)
+KFR_FN(addsub)
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> negeven(const vec<T, N>& x)
+{
+ return x ^ broadcast<N / 2>(-T(), T());
+}
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> negodd(const vec<T, N>& x)
+{
+ return x ^ broadcast<N / 2>(T(), -T());
+}
+}
diff --git a/include/kfr/base/read_write.hpp b/include/kfr/base/read_write.hpp
@@ -0,0 +1,201 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "shuffle.hpp"
+#include "types.hpp"
+#include "vec.hpp"
+
+namespace kfr
+{
+
+template <size_t N, bool A = false, typename T>
+KFR_INLINE vec<T, N> read(const T* src)
+{
+ return internal_read_write::read<N, A, T>(src);
+}
+
+template <bool A = false, size_t N, typename T>
+KFR_INLINE void write(T* dest, vec<T, N> value)
+{
+ internal_read_write::write<A, N, T>(dest, value);
+}
+
+template <typename... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)>
+KFR_INLINE vec<T, Nout> gather(const T* base, size_t index, Indices... indices)
+{
+ return make_vector(base[index], base[indices]...);
+}
+
+template <size_t Index, size_t... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)>
+KFR_INLINE vec<T, Nout> gather(const T* base)
+{
+ return make_vector(base[Index], base[Indices]...);
+}
+
+template <size_t Index, size_t... Indices, typename T, size_t N, size_t InIndex = 0>
+KFR_INLINE void scatter(const T* base, vec<T, N> value)
+{
+ base[Index] = value[InIndex];
+ scatter<Indices..., T, N, InIndex + 1>(base, value);
+}
+
+namespace internal
+{
+template <typename T, size_t N, size_t... Indices>
+KFR_INLINE vec<T, N> gather(const T* base, vec<u32, N> indices, csizes_t<Indices...>)
+{
+ return make_vector(base[indices[Indices]]...);
+}
+template <size_t Nout, size_t Stride, typename T, size_t... Indices>
+KFR_INLINE vec<T, Nout> gather_stride(const T* base, csizes_t<Indices...>)
+{
+ return make_vector(base[Indices * Stride]...);
+}
+template <size_t Nout, typename T, size_t... Indices>
+KFR_INLINE vec<T, Nout> gather_stride_s(const T* base, size_t stride, csizes_t<Indices...>)
+{
+ return make_vector(base[Indices * stride]...);
+}
+}
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> gather(const T* base, vec<u32, N> indices)
+{
+ return internal::gather(base, indices, csizeseq<N>);
+}
+
+template <size_t Nout, typename T>
+KFR_INLINE vec<T, Nout> gather_stride(const T* base, size_t stride)
+{
+ return internal::gather_stride_s<Nout>(base, stride, csizeseq<Nout>);
+}
+
+template <size_t Nout, size_t Stride, typename T>
+KFR_INLINE vec<T, Nout> gather_stride(const T* base)
+{
+ return internal::gather_stride<Nout, Stride>(base, csizeseq<Nout>);
+}
+
+template <size_t groupsize, typename T, size_t N, typename IT, size_t... Indices>
+KFR_INLINE vec<T, N * groupsize> gather_helper(const T* base, vec<IT, N> offset, csizes_t<Indices...>)
+{
+ return concat(read<groupsize>(base + groupsize * (*offset)[Indices])...);
+}
+template <size_t groupsize = 1, typename T, size_t N, typename IT>
+KFR_INLINE vec<T, N * groupsize> gather(const T* base, vec<IT, N> offset)
+{
+ return gather_helper<groupsize>(base, offset, csizeseq<N>);
+}
+
+template <size_t groupsize, typename T, size_t N, size_t Nout = N* groupsize, typename IT, size_t... Indices>
+KFR_INLINE void scatter_helper(T* base, vec<IT, N> offset, vec<T, Nout> value, csizes_t<Indices...>)
+{
+ swallow{ (write(base + groupsize * (*offset)[Indices], slice<Indices * groupsize, groupsize>(value)),
+ 0)... };
+}
+template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N* groupsize, typename IT>
+KFR_INLINE void scatter(T* base, vec<IT, N> offset, vec<T, Nout> value)
+{
+ return scatter_helper<groupsize>(base, offset, value, csizeseq<N>);
+}
+
+template <typename T>
+constexpr T partial_masks[] = { internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ internal::allones<T>,
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T(),
+ T() };
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> partial_mask(size_t index)
+{
+ static_assert(N <= arraysize(partial_masks<T>) / 2,
+ "N must not be greater than half of partial_masks expression_array");
+ return read<N>(&partial_masks<T>[0] + arraysize(partial_masks<T>) / 2 - index);
+}
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> partial_mask(size_t index, vec_t<T, N>)
+{
+ return partial_mask<T, N>(index);
+}
+}
diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp
@@ -0,0 +1,298 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "function.hpp"
+#include "operators.hpp"
+
+namespace kfr
+{
+
+#define KFR_mm_trunc_ps(V) _mm_round_ps((V), _MM_FROUND_TRUNC)
+#define KFR_mm_roundnearest_ps(V) _mm_round_ps((V), _MM_FROUND_NINT)
+#define KFR_mm_trunc_pd(V) _mm_round_pd((V), _MM_FROUND_TRUNC)
+#define KFR_mm_roundnearest_pd(V) _mm_round_pd((V), _MM_FROUND_NINT)
+
+#define KFR_mm_trunc_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_TRUNC)
+#define KFR_mm_roundnearest_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_NINT)
+#define KFR_mm_trunc_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_TRUNC)
+#define KFR_mm_roundnearest_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_NINT)
+
+#define KFR_mm_floor_ss(V) _mm_floor_ss(_mm_setzero_ps(), (V))
+#define KFR_mm_floor_sd(V) _mm_floor_sd(_mm_setzero_pd(), (V))
+#define KFR_mm_ceil_ss(V) _mm_ceil_ss(_mm_setzero_ps(), (V))
+#define KFR_mm_ceil_sd(V) _mm_ceil_sd(_mm_setzero_pd(), (V))
+
+#define KFR_mm256_trunc_ps(V) _mm256_round_ps((V), _MM_FROUND_TRUNC)
+#define KFR_mm256_roundnearest_ps(V) _mm256_round_ps((V), _MM_FROUND_NINT)
+#define KFR_mm256_trunc_pd(V) _mm256_round_pd((V), _MM_FROUND_TRUNC)
+#define KFR_mm256_roundnearest_pd(V) _mm256_round_pd((V), _MM_FROUND_NINT)
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native>
+struct in_round : in_round<older(c)>
+{
+ struct fn_floor : in_round<older(c)>::fn_floor, fn_disabled
+ {
+ };
+ struct fn_ceil : in_round<older(c)>::fn_ceil, fn_disabled
+ {
+ };
+ struct fn_round : in_round<older(c)>::fn_round, fn_disabled
+ {
+ };
+ struct fn_trunc : in_round<older(c)>::fn_trunc, fn_disabled
+ {
+ };
+ struct fn_fract : in_round<older(c)>::fn_fract, fn_disabled
+ {
+ };
+};
+
+template <>
+struct in_round<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse2;
+
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
+ {
+ return value;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> ceil(vec<T, N> value)
+ {
+ return value;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> trunc(vec<T, N> value)
+ {
+ return value;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> round(vec<T, N> value)
+ {
+ return value;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> fract(vec<T, N>)
+ {
+ return T();
+ }
+
+ KFR_SINTRIN f32sse floor(f32sse x)
+ {
+ f32sse t = cast<f32>(cast<i32>(x));
+ return t - (bitcast<f32>(x < t) & 1.f);
+ }
+ KFR_SINTRIN f64sse floor(f64sse x)
+ {
+ f64sse t = cast<f64>(cast<i64>(x));
+ return t - (bitcast<f64>(x < t) & 1.0);
+ }
+ KFR_SINTRIN f32sse ceil(f32sse x)
+ {
+ f32sse t = cast<f32>(cast<i32>(x));
+ return t + (bitcast<f32>(x > t) & 1.f);
+ }
+ KFR_SINTRIN f64sse ceil(f64sse x)
+ {
+ f64sse t = cast<f64>(cast<i64>(x));
+ return t + (bitcast<f64>(x > t) & 1.0);
+ }
+ KFR_SINTRIN f32sse round(f32sse x) { return cast<f32>(cast<i32>(x + mulsign(f32x4(0.5f), x))); }
+ KFR_SINTRIN f64sse round(f64sse x) { return cast<f64>(cast<i64>(x + mulsign(f64x2(0.5), x))); }
+ KFR_SINTRIN f32sse trunc(f32sse x) { return cast<f32>(cast<i32>(x)); }
+ KFR_SINTRIN f64sse trunc(f64sse x) { return cast<f64>(cast<i64>(x)); }
+ KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
+ KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
+
+ KFR_HANDLE_ALL(floor)
+ KFR_HANDLE_ALL(ceil)
+ KFR_HANDLE_ALL(round)
+ KFR_HANDLE_ALL(trunc)
+ KFR_HANDLE_ALL(fract)
+ KFR_SPEC_FN(in_round, floor)
+ KFR_SPEC_FN(in_round, ceil)
+ KFR_SPEC_FN(in_round, round)
+ KFR_SPEC_FN(in_round, trunc)
+ KFR_SPEC_FN(in_round, fract)
+};
+
+template <>
+struct in_round<cpu_t::sse41> : in_round<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse41;
+
+ KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); }
+ KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); }
+ KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); }
+ KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); }
+ KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); }
+ KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); }
+ KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); }
+ KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); }
+ KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
+ KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
+
+ KFR_HANDLE_ALL(floor)
+ KFR_HANDLE_ALL(ceil)
+ KFR_HANDLE_ALL(round)
+ KFR_HANDLE_ALL(trunc)
+ KFR_HANDLE_ALL(fract)
+ KFR_SPEC_FN(in_round, floor)
+ KFR_SPEC_FN(in_round, ceil)
+ KFR_SPEC_FN(in_round, round)
+ KFR_SPEC_FN(in_round, trunc)
+ KFR_SPEC_FN(in_round, fract)
+};
+
+template <>
+struct in_round<cpu_t::avx1> : in_round<cpu_t::sse41>
+{
+ constexpr static cpu_t cpu = cpu_t::avx1;
+ using in_round<cpu_t::sse41>::floor;
+ using in_round<cpu_t::sse41>::ceil;
+ using in_round<cpu_t::sse41>::trunc;
+ using in_round<cpu_t::sse41>::round;
+ using in_round<cpu_t::sse41>::fract;
+
+ KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); }
+ KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); }
+ KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); }
+ KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); }
+ KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); }
+ KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); }
+ KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); }
+ KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); }
+ KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); }
+ KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); }
+
+ KFR_HANDLE_ALL(floor)
+ KFR_HANDLE_ALL(ceil)
+ KFR_HANDLE_ALL(round)
+ KFR_HANDLE_ALL(trunc)
+ KFR_HANDLE_ALL(fract)
+ KFR_SPEC_FN(in_round, floor)
+ KFR_SPEC_FN(in_round, ceil)
+ KFR_SPEC_FN(in_round, round)
+ KFR_SPEC_FN(in_round, trunc)
+ KFR_SPEC_FN(in_round, fract)
+};
+
+#undef KFR_mm_trunc_ps
+#undef KFR_mm_roundnearest_ps
+#undef KFR_mm_trunc_pd
+#undef KFR_mm_roundnearest_pd
+#undef KFR_mm_trunc_ss
+#undef KFR_mm_roundnearest_ss
+#undef KFR_mm_trunc_sd
+#undef KFR_mm_roundnearest_sd
+#undef KFR_mm_floor_ss
+#undef KFR_mm_floor_sd
+#undef KFR_mm_ceil_ss
+#undef KFR_mm_ceil_sd
+#undef KFR_mm256_trunc_ps
+#undef KFR_mm256_roundnearest_ps
+#undef KFR_mm256_trunc_pd
+#undef KFR_mm256_roundnearest_pd
+}
+
+namespace native
+{
+using fn_floor = internal::in_round<>::fn_floor;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> floor(const T1& x)
+{
+ return internal::in_round<>::floor(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_floor, E1> floor(E1&& x)
+{
+ return { fn_floor(), std::forward<E1>(x) };
+}
+
+using fn_ceil = internal::in_round<>::fn_ceil;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> ceil(const T1& x)
+{
+ return internal::in_round<>::ceil(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_ceil, E1> ceil(E1&& x)
+{
+ return { fn_ceil(), std::forward<E1>(x) };
+}
+
+using fn_round = internal::in_round<>::fn_round;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> round(const T1& x)
+{
+ return internal::in_round<>::round(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_round, E1> round(E1&& x)
+{
+ return { fn_round(), std::forward<E1>(x) };
+}
+
+using fn_trunc = internal::in_round<>::fn_trunc;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> trunc(const T1& x)
+{
+ return internal::in_round<>::trunc(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_trunc, E1> trunc(E1&& x)
+{
+ return { fn_trunc(), std::forward<E1>(x) };
+}
+
+using fn_fract = internal::in_round<>::fn_fract;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> fract(const T1& x)
+{
+ return internal::in_round<>::fract(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_fract, E1> fract(E1&& x)
+{
+ return { fn_fract(), std::forward<E1>(x) };
+}
+}
+}
diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp
@@ -0,0 +1,172 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "function.hpp"
+#include "select.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_saturated : in_saturated<older(c), cc>
+{
+ struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled
+ {
+ };
+};
+
+template <cpu_t cc>
+struct in_saturated<cpu_t::sse2, cc> : in_select<cc>
+{
+ constexpr static cpu_t cpu = cpu_t::sse2;
+
+private:
+ using in_select<cc>::select;
+
+public:
+ KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
+ KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
+ KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
+ KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
+
+ KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
+ KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
+ KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
+ KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
+
+ KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
+ KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
+ KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
+ KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
+
+ KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
+ KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
+ KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
+ KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
+
+private:
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b)
+ {
+ constexpr size_t shift = typebits<i32>::bits - 1;
+ const vec<T, N> sum = a + b;
+ a = (a >> shift) + allonesvector(a);
+
+ return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum);
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b)
+ {
+ constexpr size_t shift = typebits<i32>::bits - 1;
+ const vec<T, N> diff = a - b;
+ a = (a >> shift) + allonesvector(a);
+
+ return select(((a ^ b) & (a ^ diff)) < 0, a, diff);
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b)
+ {
+ constexpr vec<T, N> t = allonesvector(a);
+ return select(a > t - b, t, a + b);
+ }
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> saturated_unsigned_sub(vec<T, N> a, vec<T, N> b)
+ {
+ return select(a < b, zerovector(a), a - b);
+ }
+
+public:
+ KFR_HANDLE_ALL(satadd)
+ KFR_HANDLE_ALL(satsub)
+ KFR_SPEC_FN(in_saturated, satadd)
+ KFR_SPEC_FN(in_saturated, satsub)
+};
+
+template <cpu_t cc>
+struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc>
+{
+ constexpr static cpu_t cpu = cpu_t::avx2;
+ using in_saturated<cpu_t::sse41>::satadd;
+ using in_saturated<cpu_t::sse41>::satsub;
+
+ KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); }
+ KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); }
+ KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); }
+ KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); }
+
+ KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); }
+ KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); }
+ KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); }
+ KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); }
+
+ KFR_HANDLE_ALL(satadd)
+ KFR_HANDLE_ALL(satsub)
+ KFR_SPEC_FN(in_saturated, satadd)
+ KFR_SPEC_FN(in_saturated, satsub)
+};
+}
+namespace native
+{
+using fn_satadd = internal::in_saturated<>::fn_satadd;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+satadd(const T1& x, const T2& y)
+{
+ return internal::in_saturated<>::satadd(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_satadd, E1, E2> satadd(E1&& x, E2&& y)
+{
+ return { fn_satadd(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+using fn_satsub = internal::in_saturated<>::fn_satsub;
+template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
+KFR_INLINE ftype<common_type<T1, T2>>
+
+satsub(const T1& x, const T2& y)
+{
+ return internal::in_saturated<>::satsub(x, y);
+}
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_satsub, E1, E2> satsub(E1&& x, E2&& y)
+{
+ return { fn_satsub(), std::forward<E1>(x), std::forward<E2>(y)
+
+ };
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp
@@ -0,0 +1,204 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "function.hpp"
+
+namespace kfr
+{
+namespace internal
+{
+
+template <cpu_t c>
+struct in_select_impl : in_select_impl<older(c)>
+{
+ struct fn_select : fn_disabled
+ {
+ };
+};
+
+template <>
+struct in_select_impl<cpu_t::sse2>
+{
+ constexpr static cpu_t cur = cpu_t::sse2;
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y)
+ {
+ return y ^ ((x ^ y) & m);
+ }
+ KFR_SPEC_FN(in_select_impl, select)
+};
+
+template <>
+struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse41;
+
+ KFR_SINTRIN u8sse select(u8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN u16sse select(u16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN u32sse select(u32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN u64sse select(u64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN i8sse select(i8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN i16sse select(i16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN i32sse select(i32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN i64sse select(i64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
+ KFR_SINTRIN f32sse select(f32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); }
+ KFR_SINTRIN f64sse select(f64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); }
+
+ KFR_HANDLE_ALL(select)
+ KFR_SPEC_FN(in_select_impl, select)
+};
+
+template <>
+struct in_select_impl<cpu_t::avx1> : in_select_impl<cpu_t::sse41>
+{
+ constexpr static cpu_t cpu = cpu_t::avx1;
+ using in_select_impl<cpu_t::sse41>::select;
+
+ KFR_SINTRIN f64avx select(f64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); }
+ KFR_SINTRIN f32avx select(f32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); }
+
+ KFR_HANDLE_ALL(select)
+ KFR_SPEC_FN(in_select_impl, select)
+};
+
+template <>
+struct in_select_impl<cpu_t::avx2> : in_select_impl<cpu_t::avx1>
+{
+ constexpr static cpu_t cpu = cpu_t::avx2;
+ using in_select_impl<cpu_t::avx1>::select;
+
+ KFR_SINTRIN KFR_USE_CPU(avx2) u8avx select(u8avx m, u8avx x, u8avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+ KFR_SINTRIN KFR_USE_CPU(avx2) u16avx select(u16avx m, u16avx x, u16avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+ KFR_SINTRIN KFR_USE_CPU(avx2) u32avx select(u32avx m, u32avx x, u32avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+ KFR_SINTRIN KFR_USE_CPU(avx2) u64avx select(u64avx m, u64avx x, u64avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+ KFR_SINTRIN KFR_USE_CPU(avx2) i8avx select(i8avx m, i8avx x, i8avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+ KFR_SINTRIN KFR_USE_CPU(avx2) i16avx select(i16avx m, i16avx x, i16avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+ KFR_SINTRIN KFR_USE_CPU(avx2) i32avx select(i32avx m, i32avx x, i32avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+ KFR_SINTRIN KFR_USE_CPU(avx2) i64avx select(i64avx m, i64avx x, i64avx y)
+ {
+ return _mm256_blendv_epi8(*y, *x, *m);
+ }
+
+ KFR_HANDLE_ALL(select)
+ KFR_SPEC_FN(in_select_impl, select)
+};
+
+template <cpu_t c = cpu_t::native>
+struct in_select : in_select_impl<c>
+{
+ using in_select_impl<c>::select;
+
+ template <typename T, size_t N, typename M>
+ KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, vec<T, N> y)
+ {
+ static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
+ return in_select_impl<c>::select(bitcast<T>(m), x, y);
+ }
+ template <typename T, size_t N, typename M>
+ KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, mask<T, N> y)
+ {
+ static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
+ return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), ref_cast<vec<T, N>>(y));
+ }
+
+ template <typename T, size_t N, typename M>
+ KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, T y)
+ {
+ static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
+ return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), broadcast<N>(y));
+ }
+
+ template <typename T, size_t N, typename M>
+ KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, T y)
+ {
+ static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
+ return in_select_impl<c>::select(bitcast<T>(m), x, broadcast<N>(y));
+ }
+
+ template <typename T, size_t N, typename M>
+ KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, vec<T, N> y)
+ {
+ static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
+ return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), y);
+ }
+ template <typename T, size_t N, typename M>
+ KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, T y)
+ {
+ static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
+ return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), broadcast<N>(y));
+ }
+
+ template <typename T, size_t N, typename M>
+ KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, mask<T, N> y)
+ {
+ static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
+ return in_select_impl<c>::select(m, broadcast<N>(x), ref_cast<vec<T, N>>(y));
+ }
+ KFR_SPEC_FN(in_select, select)
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> sign(vec<T, N> x)
+ {
+ return select(x > T(), T(1), select(x < T(), T(-1), T(0)));
+ }
+};
+}
+
+namespace native
+{
+using fn_select = internal::in_select<>::fn_select;
+template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
+KFR_INLINE ftype<common_type<T2, T3>> select(const T1& arg1, const T2& arg2, const T3& arg3)
+{
+ return internal::in_select<>::select(arg1, arg2, arg3);
+}
+template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
+KFR_INLINE expr_func<fn_select, E1, E2, E3> select(E1&& arg1, E2&& arg2, E3&& arg3)
+{
+ return { fn_select(), std::forward<E1>(arg1), std::forward<E2>(arg2), std::forward<E3>(arg3) };
+}
+}
+}
diff --git a/include/kfr/base/shuffle.hpp b/include/kfr/base/shuffle.hpp
@@ -0,0 +1,582 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "constants.hpp"
+#include "expression.hpp"
+#include "types.hpp"
+#include "vec.hpp"
+
+#include <utility>
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <size_t index, typename T>
+constexpr KFR_INLINE T broadcast_get_nth()
+{
+ return c_qnan<T>;
+}
+
+template <size_t index, typename T, typename... Ts>
+constexpr KFR_INLINE T broadcast_get_nth(T x, Ts... rest)
+{
+ return index == 0 ? x : broadcast_get_nth<index - 1, T>(rest...);
+}
+
+template <typename T, typename... Ts, size_t... indices, size_t Nin = 1 + sizeof...(Ts),
+ size_t Nout = sizeof...(indices)>
+KFR_INLINE constexpr vec<T, Nout> broadcast_helper(csizes_t<indices...>, T x, Ts... rest)
+{
+ simd<T, Nout> result{ broadcast_get_nth<indices % Nin>(x, rest...)... };
+ return result;
+}
+}
+
+template <size_t N, typename T, typename... Ts, size_t Nout = N*(2 + sizeof...(Ts))>
+constexpr KFR_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest)
+{
+ return internal::broadcast_helper(csizeseq<Nout>, x, y, rest...);
+}
+KFR_FN(broadcast)
+
+template <size_t Ncount, typename T, size_t N>
+KFR_INLINE vec<T, N + Ncount> padhigh(vec<T, N> x)
+{
+ return shufflevector<N + Ncount, internal::shuffle_index_extend<0, N>>(x);
+}
+KFR_FN(padhigh)
+
+template <size_t Ncount, typename T, size_t N>
+KFR_INLINE vec<T, N + Ncount> padlow(vec<T, N> x)
+{
+ return shufflevector<N + Ncount, internal::shuffle_index_extend<Ncount, N>>(x);
+}
+KFR_FN(padlow)
+
+template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N != Nout)>
+KFR_INLINE vec<T, Nout> extend(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index_extend<0, N>>(x);
+}
+template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N == Nout)>
+constexpr KFR_INLINE vec<T, Nout> extend(vec<T, N> x)
+{
+ return x;
+}
+KFR_FN(extend)
+
+template <size_t start, size_t count, typename T, size_t N>
+KFR_INLINE vec<T, count> slice(vec<T, N> x)
+{
+ static_assert(start + count <= N, "start + count <= N");
+ return shufflevector<count, internal::shuffle_index<start>>(x);
+}
+template <size_t start, size_t count, typename T, size_t N>
+KFR_INLINE vec<T, count> slice(vec<T, N> x, vec<T, N> y)
+{
+ static_assert(start + count <= N * 2, "start + count <= N * 2");
+ return shufflevector<count, internal::shuffle_index<start>>(x, y);
+}
+KFR_FN(slice)
+
+template <size_t, typename T, size_t N>
+KFR_INLINE void split(vec<T, N>)
+{
+}
+template <size_t start = 0, typename T, size_t N, size_t Nout, typename... Args>
+KFR_INLINE void split(vec<T, N> x, vec<T, Nout>& out, Args&&... args)
+{
+ out = slice<start, Nout>(x);
+ split<start + Nout>(x, std::forward<Args>(args)...);
+}
+KFR_FN(split)
+
+template <size_t total, size_t number, typename T, size_t N, size_t Nout = N / total>
+KFR_INLINE vec<T, Nout> part(vec<T, N> x)
+{
+ static_assert(N % total == 0, "N % total == 0");
+ return shufflevector<Nout, internal::shuffle_index<number * Nout>>(x);
+}
+KFR_FN(part)
+
+template <size_t start, size_t count, typename T, size_t N1, size_t N2>
+KFR_INLINE vec<T, count> concat_and_slice(vec<T, N1> x, vec<T, N2> y)
+{
+ return internal::concattwo<start, count>(x, y);
+}
+KFR_FN(concat_and_slice)
+
+template <size_t Nout, typename T, size_t N>
+KFR_INLINE vec<T, Nout> widen(vec<T, N> x, identity<T> newvalue = T())
+{
+ static_assert(Nout > N, "Nout > N");
+ return concat(x, broadcast<Nout - N>(newvalue));
+}
+template <size_t Nout, typename T, typename TS>
+constexpr KFR_INLINE vec<T, Nout> widen(vec<T, Nout> x, TS)
+{
+ return x;
+}
+KFR_FN(widen)
+
+template <size_t Nout, typename T, size_t N>
+KFR_INLINE vec<T, Nout> narrow(vec<T, N> x)
+{
+ static_assert(Nout <= N, "Nout <= N");
+ return slice<0, Nout>(x);
+}
+KFR_FN(narrow)
+
+template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2,
+ KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)>
+KFR_INLINE vec<T, Nout> even(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index<0, 2>, groupsize>(x);
+}
+KFR_FNR(even, 2, 1)
+
+template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2,
+ KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)>
+KFR_INLINE vec<T, Nout> odd(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index<1, 2>, groupsize>(x);
+}
+KFR_FNR(odd, 2, 1)
+
+namespace internal
+{
+template <size_t groupsize = 2>
+struct shuffle_index_dup1
+{
+ constexpr inline size_t operator()(size_t index) const { return index / groupsize; }
+};
+
+template <size_t groupsize = 2, size_t start = 0>
+struct shuffle_index_dup
+{
+ constexpr inline size_t operator()(size_t index) const { return start + index / groupsize * groupsize; }
+};
+}
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> dupeven(vec<T, N> x)
+{
+ static_assert(N % 2 == 0, "N must be even");
+ return shufflevector<N, internal::shuffle_index_dup<2, 0>>(x);
+}
+KFR_FN(dupeven)
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> dupodd(vec<T, N> x)
+{
+ static_assert(N % 2 == 0, "N must be even");
+ return shufflevector<N, internal::shuffle_index_dup<2, 1>>(x);
+}
+KFR_FN(dupodd)
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N * 2> duphalfs(vec<T, N> x)
+{
+ return concat(x, x);
+}
+KFR_FN(duphalfs)
+
+namespace internal
+{
+template <size_t size, size_t... Indices>
+struct shuffle_index_shuffle
+{
+ constexpr static size_t indexcount = sizeof...(Indices);
+
+ template <size_t index>
+ constexpr inline size_t operator()() const
+ {
+ constexpr int result = csizes_t<Indices...>::get(csize<index % indexcount>);
+ return result + index / indexcount * indexcount;
+ }
+};
+}
+
+template <size_t... Indices, typename T, size_t N>
+KFR_INLINE vec<T, N> shuffle(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>())
+{
+ return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>>(x, y);
+}
+KFR_FN(shuffle)
+
+template <size_t groupsize, size_t... Indices, typename T, size_t N>
+KFR_INLINE vec<T, N> shufflegroups(vec<T, N> x, vec<T, N> y,
+ elements_t<Indices...> = elements_t<Indices...>())
+{
+ return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>, groupsize>(x, y);
+}
+KFR_FN(shufflegroups)
+
+namespace internal
+{
+template <size_t size, size_t... Indices>
+struct shuffle_index_permute
+{
+ constexpr static size_t indexcount = sizeof...(Indices);
+
+ template <size_t index>
+ constexpr inline size_t operator()() const
+ {
+ constexpr size_t result = csizes_t<Indices...>::get(csize<index % indexcount>);
+ static_assert(result < size, "result < size");
+ return result + index / indexcount * indexcount;
+ }
+};
+}
+
+template <size_t... Indices, typename T, size_t N>
+KFR_INLINE vec<T, N> permute(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>())
+{
+ return shufflevector<N, internal::shuffle_index_permute<N, Indices...>>(x);
+}
+KFR_FN(permute)
+
+template <size_t groupsize, size_t... Indices, typename T, size_t N>
+KFR_INLINE vec<T, N> permutegroups(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>())
+{
+ return shufflevector<N, internal::shuffle_index_permute<N, Indices...>, groupsize>(x);
+}
+KFR_FN(permutegroups)
+
+namespace internal
+{
+
+template <typename T, size_t Nout, typename Fn, size_t... Indices>
+constexpr KFR_INLINE vec<T, Nout> generate_vector(csizes_t<Indices...>)
+{
+ constexpr Fn fn{};
+ return make_vector(static_cast<T>(fn(Indices))...);
+}
+}
+
+template <typename T, size_t Nout, typename Fn>
+constexpr KFR_INLINE vec<T, Nout> generate_vector()
+{
+ return internal::generate_vector<T, Nout, Fn>(csizeseq<Nout>);
+}
+KFR_FN(generate_vector)
+
+namespace internal
+{
+template <typename T, size_t N, typename = u8[N > 1]>
+constexpr KFR_INLINE mask<T, N> evenmask()
+{
+ return broadcast<N / 2, T>(maskbits<T>(true), maskbits<T>(false));
+}
+template <typename T, size_t N, typename = u8[N > 1]>
+constexpr KFR_INLINE mask<T, N> oddmask()
+{
+ return broadcast<N / 2, T>(maskbits<T>(false), maskbits<T>(true));
+}
+}
+
+template <typename T, size_t N, size_t Nout = N * 2>
+KFR_INLINE vec<T, Nout> dup(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index_dup1<2>>(x, x);
+}
+KFR_FNR(dup, 1, 2)
+
+namespace internal
+{
+template <size_t count, size_t start = 0>
+struct shuffle_index_duphalf
+{
+ constexpr inline size_t operator()(size_t index) const { return start + (index) % count; }
+};
+}
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> duplow(vec<T, N> x)
+{
+ static_assert(N % 2 == 0, "N must be even");
+ return shufflevector<N, internal::shuffle_index_duphalf<N / 2, 0>>(x);
+}
+KFR_FN(duplow)
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> duphigh(vec<T, N> x)
+{
+ static_assert(N % 2 == 0, "N must be even");
+ return shufflevector<N, internal::shuffle_index_duphalf<N / 2, N / 2>>(x);
+}
+KFR_FN(duphigh)
+
+namespace internal
+{
+template <size_t size, size_t... Indices>
+struct shuffle_index_blend
+{
+ constexpr static size_t indexcount = sizeof...(Indices);
+
+ template <size_t index>
+ constexpr inline size_t operator()() const
+ {
+ return (elements_t<Indices...>::get(csize<index % indexcount>) ? size : 0) + index % size;
+ }
+};
+}
+
+template <size_t... Indices, typename T, size_t N>
+KFR_INLINE vec<T, N> blend(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>())
+{
+ return shufflevector<N, internal::shuffle_index_blend<N, Indices...>, 1>(x, y);
+}
+KFR_FN(blend)
+
+namespace internal
+{
+template <size_t elements>
+struct shuffle_index_swap
+{
+ constexpr inline size_t operator()(size_t index) const
+ {
+ static_assert(is_poweroftwo(elements), "is_poweroftwo( elements )");
+ return index ^ (elements - 1);
+ }
+};
+template <size_t amount, size_t N>
+struct shuffle_index_outputright
+{
+ constexpr inline size_t operator()(size_t index) const
+ {
+ return index < N - amount ? index : index + amount;
+ }
+};
+}
+
+template <size_t elements, typename T, size_t N>
+KFR_INLINE vec<T, N> swap(vec<T, N> x)
+{
+ return shufflevector<N, internal::shuffle_index_swap<elements>>(x);
+}
+KFR_FN(swap)
+
+template <size_t shift, typename T, size_t N>
+KFR_INLINE vec<T, N> rotatetwo(vec<T, N> lo, vec<T, N> hi)
+{
+ return shift == 0 ? lo : (shift == N ? hi : shufflevector<N, internal::shuffle_index<N - shift>>(hi, lo));
+}
+
+template <size_t amount, typename T, size_t N>
+KFR_INLINE vec<T, N> rotateright(vec<T, N> x, csize_t<amount> = csize_t<amount>())
+{
+ static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N");
+ return shufflevector<N, internal::shuffle_index_wrap<N, N - amount>>(x);
+}
+KFR_FN(rotateright)
+
+template <size_t amount, typename T, size_t N>
+KFR_INLINE vec<T, N> rotateleft(vec<T, N> x, csize_t<amount> = csize_t<amount>())
+{
+ static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N");
+ return shufflevector<N, internal::shuffle_index_wrap<N, amount>>(x);
+}
+KFR_FN(rotateleft)
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> insertright(T x, vec<T, N> y)
+{
+ return concat_and_slice<1, N>(y, vec<T, 1>(x));
+}
+KFR_FN(insertright)
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> insertleft(T x, vec<T, N> y)
+{
+ return concat_and_slice<0, N>(vec<T, 1>(x), y);
+}
+KFR_FN(insertleft)
+
+template <typename T, size_t N, size_t N2>
+KFR_INLINE vec<T, N> outputright(vec<T, N> x, vec<T, N2> y)
+{
+ return shufflevector<N, internal::shuffle_index_outputright<N2, N>>(x, extend<N>(y));
+}
+KFR_FN(outputright)
+
+namespace internal
+{
+template <size_t size, size_t side1>
+struct shuffle_index_transpose
+{
+ constexpr inline size_t operator()(size_t index) const
+ {
+ constexpr size_t side2 = size / side1;
+ return index % side2 * side1 + index / side2;
+ }
+};
+}
+
+template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)>
+KFR_INLINE vec<T, N> transpose(vec<T, N> x)
+{
+ return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, side>, groupsize>(x);
+}
+template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)>
+KFR_INLINE vec<T, N> transpose(vec<T, N> x)
+{
+ return x;
+}
+KFR_FN(transpose)
+
+template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)>
+KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x)
+{
+ return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / side>,
+ groupsize>(x);
+}
+template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)>
+KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x)
+{
+ return x;
+}
+KFR_FN(transposeinverse)
+
+template <size_t side, typename T, size_t N>
+KFR_INLINE vec<T, N> ctranspose(vec<T, N> x)
+{
+ return transpose<side, 2>(x);
+}
+KFR_FN(ctranspose)
+
+template <size_t side, typename T, size_t N>
+KFR_INLINE vec<T, N> ctransposeinverse(vec<T, N> x)
+{
+ return transposeinverse<side, 2>(x);
+}
+KFR_FN(ctransposeinverse)
+
+template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N * 2>
+KFR_INLINE vec<T, Nout> interleave(vec<T, N> x, vec<T, N> y)
+{
+ return shufflevector<Nout, internal::shuffle_index_transpose<Nout / groupsize, Nout / groupsize / 2>,
+ groupsize>(x, y);
+}
+KFR_FNR(interleave, 1, 2)
+
+template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_INLINE expr_func<fn_interleave, E1, E2> interleave(E1&& x, E2&& y)
+{
+ return { fn_interleave(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+template <size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, N> interleavehalfs(vec<T, N> x)
+{
+ return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / 2>, groupsize>(
+ x);
+}
+KFR_FN(interleavehalfs)
+
+template <size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, N> splitpairs(vec<T, N> x)
+{
+ return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, 2>, groupsize>(x);
+}
+KFR_FN(splitpairs)
+
+namespace internal
+{
+template <size_t size>
+struct shuffle_index_reverse
+{
+ constexpr inline size_t operator()(size_t index) const { return size - 1 - index; }
+};
+}
+
+template <size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, N> reverse(vec<T, N> x)
+{
+ return shufflevector<N, internal::shuffle_index_reverse<N / groupsize>, groupsize>(x);
+}
+KFR_FN(reverse)
+
+namespace internal
+{
+template <size_t N1, size_t N2>
+struct shuffle_index_combine
+{
+ constexpr inline size_t operator()(size_t index) const { return index >= N2 ? index : N1 + index; }
+};
+}
+
+template <typename T, size_t N1, size_t N2>
+KFR_INLINE vec<T, N1> combine(vec<T, N1> x, vec<T, N2> y)
+{
+ static_assert(N2 <= N1, "N2 <= N1");
+ return shufflevector<N1, internal::shuffle_index_combine<N1, N2>>(x, extend<N1>(y));
+}
+KFR_FN(combine)
+
+namespace internal
+{
+template <size_t start, size_t stride>
+struct generate_index
+{
+ constexpr size_t operator()(size_t index) const { return start + index * stride; }
+};
+template <size_t start, size_t size, int on, int off>
+struct generate_onoff
+{
+ constexpr size_t operator()(size_t index) const
+ {
+ return index >= start && index < start + size ? on : off;
+ }
+};
+}
+
+template <typename T, size_t N, size_t start = 0, size_t stride = 1>
+constexpr KFR_INLINE vec<T, N> enumerate()
+{
+ return generate_vector<T, N, internal::generate_index<start, stride>>();
+}
+template <size_t start = 0, size_t stride = 1, typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> enumerate(vec_t<T, N>)
+{
+ return generate_vector<T, N, internal::generate_index<start, stride>>();
+}
+KFR_FN(enumerate)
+
+template <typename T, size_t N, size_t start = 0, size_t size = 1, int on = 1, int off = 0>
+constexpr KFR_INLINE vec<T, N> onoff(cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>())
+{
+ return generate_vector<T, N, internal::generate_onoff<start, size, on, off>>();
+}
+template <size_t start = 0, size_t size = 1, int on = 1, int off = 0, typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> onoff(vec_t<T, N>, cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>())
+{
+ return generate_vector<T, N, internal::generate_onoff<start, size, on, off>>();
+}
+KFR_FN(onoff)
+}
+#define KFR_SHUFFLE_SPECIALIZATIONS
+#include "specializations.i"
diff --git a/include/kfr/base/sin_cos.hpp b/include/kfr/base/sin_cos.hpp
@@ -0,0 +1,586 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "abs.hpp"
+#include "constants.hpp"
+#include "function.hpp"
+#include "min_max.hpp"
+#include "operators.hpp"
+#include "round.hpp"
+#include "select.hpp"
+#include "shuffle.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+#if CID_HAS_WARNING("-Wc99-extensions")
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_trig : in_select<cc>
+{
+private:
+ using in_select<cc>::select;
+
+protected:
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> mask_horner(vec<T, N>, mask<T, N> msk, T a0, T b0)
+ {
+ return select(msk, a0, b0);
+ }
+
+ template <typename T, size_t N, typename... Ts>
+ KFR_SINTRIN vec<T, N> mask_horner(vec<T, N> x, mask<T, N> msk, T a0, T b0, T a1, T b1, Ts... values)
+ {
+ return fmadd(mask_horner(x, msk, a1, b1, values...), x, select(msk, a0, b0));
+ }
+};
+
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_sin_cos : private in_trig<cc>, private in_select<cc>, private in_round<cc>, private in_abs<cc>
+{
+
+private:
+ using in_abs<cc>::abs;
+ using in_round<cc>::floor;
+ using in_select<cc>::select;
+ using in_trig<cc>::mask_horner;
+
+ template <typename T, size_t N, typename Tprecise = f64>
+ KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x, vec<itype<T>, N>& quadrant)
+ {
+ const vec<T, N> xabs = abs(x);
+ constexpr vec<T, N> div = fold_constant_div<T>;
+ vec<T, N> y = floor(xabs / div);
+ quadrant = cast<itype<T>>(y - floor(y * T(1.0 / 16.0)) * T(16.0));
+
+ const mask<T, N> msk = bitcast<T>((quadrant & 1) != 0);
+ quadrant = select(msk, quadrant + 1, quadrant);
+ y = select(msk, y + T(1.0), y);
+ quadrant = quadrant & 7;
+
+ constexpr vec<Tprecise, N> hi = cast<Tprecise>(fold_constant_hi<T>);
+ constexpr vec<T, N> rem1 = fold_constant_rem1<T>;
+ constexpr vec<T, N> rem2 = fold_constant_rem2<T>;
+ return cast<T>(cast<Tprecise>(xabs) - cast<Tprecise>(y) * hi) - y * rem1 - y * rem2;
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> trig_sincos(vec<f32, N> folded, mask<f32, N> cosmask)
+ {
+ constexpr f32 sin_c2 = -0x2.aaaaacp-4f;
+ constexpr f32 sin_c4 = 0x2.222334p-8f;
+ constexpr f32 sin_c6 = -0xd.0566ep-16f;
+ constexpr f32 sin_c8 = 0x3.64cc1cp-20f;
+ constexpr f32 sin_c10 = -0x5.6c4a4p-24f;
+ constexpr f32 cos_c2 = -0x8.p-4f;
+ constexpr f32 cos_c4 = 0xa.aaaabp-8f;
+ constexpr f32 cos_c6 = -0x5.b05d48p-12f;
+ constexpr f32 cos_c8 = 0x1.a065f8p-16f;
+ constexpr f32 cos_c10 = -0x4.cd156p-24f;
+
+ const vec<f32, N> x2 = folded * folded;
+
+ vec<f32, N> formula = mask_horner(x2, cosmask, 1.0f, 1.0f, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6,
+ sin_c6, cos_c8, sin_c8, cos_c10, sin_c10);
+
+ formula = select(cosmask, formula, formula * folded);
+ return formula;
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> trig_sincos(vec<f64, N> folded, mask<f64, N> cosmask)
+ {
+ constexpr f64 sin_c2 = -0x2.aaaaaaaaaaaaap-4;
+ constexpr f64 sin_c4 = 0x2.22222222220cep-8;
+ constexpr f64 sin_c6 = -0xd.00d00cffd6618p-16;
+ constexpr f64 sin_c8 = 0x2.e3bc744fb879ep-20;
+ constexpr f64 sin_c10 = -0x6.b99034c1467a4p-28;
+ constexpr f64 sin_c12 = 0xb.0711ea8fe8ee8p-36;
+ constexpr f64 sin_c14 = -0xb.7e010897e55dp-44;
+ constexpr f64 sin_c16 = -0xb.64eac07f1d6bp-48;
+ constexpr f64 cos_c2 = -0x8.p-4;
+ constexpr f64 cos_c4 = 0xa.aaaaaaaaaaaa8p-8;
+ constexpr f64 cos_c6 = -0x5.b05b05b05ad28p-12;
+ constexpr f64 cos_c8 = 0x1.a01a01a0022e6p-16;
+ constexpr f64 cos_c10 = -0x4.9f93ed845de2cp-24;
+ constexpr f64 cos_c12 = 0x8.f76bc015abe48p-32;
+ constexpr f64 cos_c14 = -0xc.9bf2dbe00379p-40;
+ constexpr f64 cos_c16 = 0xd.1232ac32f7258p-48;
+
+ vec<f64, N> x2 = folded * folded;
+ vec<f64, N> formula =
+ mask_horner(x2, cosmask, 1.0, 1.0, cos_c2, sin_c2, cos_c4, sin_c4, cos_c6, sin_c6, cos_c8, sin_c8,
+ cos_c10, sin_c10, cos_c12, sin_c12, cos_c14, sin_c14, cos_c16, sin_c16);
+
+ formula = select(cosmask, formula, formula * folded);
+ return formula;
+ }
+
+ template <typename T, size_t N, typename = u8[N > 1]>
+ KFR_SINTRIN vec<T, N> sincos_mask(vec<T, N> x_full, mask<T, N> cosmask)
+ {
+ vec<itype<T>, N> quadrant;
+ vec<T, N> folded = trig_fold(x_full, quadrant);
+
+ mask<T, N> flip_sign = select(cosmask, (quadrant == 2) || (quadrant == 4), quadrant >= 4);
+
+ mask<T, N> usecos = (quadrant == 2) || (quadrant == 6);
+ usecos = usecos ^ cosmask;
+
+ vec<T, N> formula = trig_sincos(folded, usecos);
+
+ mask<T, N> negmask = x_full < 0;
+
+ flip_sign = flip_sign ^ (negmask & ~cosmask);
+
+ formula = select(flip_sign, -formula, formula);
+ return formula;
+ }
+
+ template <typename T>
+ constexpr static T fold_constant_div = choose_const<T>(0x1.921fb6p-1f, 0x1.921fb54442d18p-1);
+
+ template <typename T>
+ constexpr static T fold_constant_hi = choose_const<T>(0x1.922000p-1f, 0x1.921fb40000000p-1);
+ template <typename T>
+ constexpr static T fold_constant_rem1 = choose_const<T>(-0x1.2ae000p-19f, 0x1.4442d00000000p-25);
+ template <typename T>
+ constexpr static T fold_constant_rem2 = choose_const<T>(-0x1.de973ep-32f, 0x1.8469898cc5170p-49);
+ constexpr static cpu_t cur = c;
+
+public:
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> sin(vec<T, N> x)
+ {
+ vec<itype<T>, N> quadrant;
+ vec<T, N> folded = trig_fold(x, quadrant);
+
+ mask<T, N> flip_sign = quadrant >= 4;
+ mask<T, N> usecos = (quadrant == 2) || (quadrant == 6);
+
+ vec<T, N> formula = trig_sincos(folded, usecos);
+
+ formula = select(flip_sign ^ x.asmask(), -formula, formula);
+ return formula;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> cos(vec<T, N> x)
+ {
+ vec<itype<T>, N> quadrant;
+ vec<T, N> folded = trig_fold(x, quadrant);
+
+ mask<T, N> eq4 = (quadrant == 4);
+ mask<T, N> flip_sign = (quadrant == 2) || eq4;
+ mask<T, N> usecos = (quadrant == 0) || eq4;
+
+ vec<T, N> formula = trig_sincos(folded, usecos);
+
+ formula = select(flip_sign, -formula, formula);
+ return formula;
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> fastsin(vec<T, N> x)
+ {
+ constexpr vec<T, N> msk = broadcast<N>(highbitmask<T>);
+
+ constexpr static T c2 = -0.16665853559970855712890625;
+ constexpr static T c4 = +8.31427983939647674560546875e-3;
+ constexpr static T c6 = -1.85423981747590005397796630859375e-4;
+
+ const vec<T, N> pi = c_pi<T>;
+
+ x -= pi;
+ vec<T, N> y = abs(x);
+ y = select(y > c_pi<T, 1, 2>, pi - y, y);
+ y = y ^ (msk & ~x);
+
+ vec<T, N> y2 = y * y;
+ vec<T, N> formula = c6;
+ vec<T, N> y3 = y2 * y;
+ formula = fmadd(formula, y2, c4);
+ formula = fmadd(formula, y2, c2);
+ formula = formula * y3 + y;
+ return formula;
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> fastcos(vec<T, N> x)
+ {
+ x += c_pi<T, 1, 2>;
+ x = select(x >= c_pi<T, 2>, x - c_pi<T, 2>, x);
+ return fastsin(x);
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> sincos(vec<T, N> x)
+ {
+ return sincos_mask(x, internal::oddmask<T, N>());
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(N > 1 && is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> cossin(vec<T, N> x)
+ {
+ return sincos_mask(x, internal::evenmask<T, N>());
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+ KFR_SINTRIN vec<T, N> sinc(vec<T, N> x)
+ {
+ return select(abs(x) <= c_epsilon<T>, T(1), sin(x) / x);
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> sin(vec<T, N> x)
+ {
+ return sin(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> cos(vec<T, N> x)
+ {
+ return cos(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> fastsin(vec<T, N> x)
+ {
+ return fastsin(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> fastcos(vec<T, N> x)
+ {
+ return fastcos(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> sincos(vec<T, N> x)
+ {
+ return sincos(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> cossin(vec<T, N> x)
+ {
+ return cossin(cast<Tout>(x));
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value), typename Tout = ftype<T>>
+ KFR_SINTRIN vec<Tout, N> sinc(vec<T, N> x)
+ {
+ return sinc(cast<Tout>(x));
+ }
+
+ template <typename T>
+ KFR_SINTRIN T sindeg(const T& x)
+ {
+ return sin(x * c_degtorad<T>);
+ }
+ template <typename T>
+ KFR_SINTRIN T cosdeg(const T& x)
+ {
+ return cos(x * c_degtorad<T>);
+ }
+
+ template <typename T>
+ KFR_SINTRIN T fastsindeg(const T& x)
+ {
+ return fastsin(x * c_degtorad<T>);
+ }
+ template <typename T>
+ KFR_SINTRIN T fastcosdeg(const T& x)
+ {
+ return fastcos(x * c_degtorad<T>);
+ }
+
+ template <typename T>
+ KFR_SINTRIN T sincosdeg(const T& x)
+ {
+ return sincos(x * c_degtorad<T>);
+ }
+ template <typename T>
+ KFR_SINTRIN T cossindeg(const T& x)
+ {
+ return cossin(x * c_degtorad<T>);
+ }
+
+ KFR_HANDLE_SCALAR(sin)
+ KFR_HANDLE_SCALAR(cos)
+ KFR_HANDLE_SCALAR(fastsin)
+ KFR_HANDLE_SCALAR(fastcos)
+ KFR_HANDLE_SCALAR(sincos)
+ KFR_HANDLE_SCALAR(cossin)
+ KFR_HANDLE_SCALAR(sinc)
+
+ KFR_SPEC_FN(in_sin_cos, sin)
+ KFR_SPEC_FN(in_sin_cos, cos)
+ KFR_SPEC_FN(in_sin_cos, fastsin)
+ KFR_SPEC_FN(in_sin_cos, fastcos)
+ KFR_SPEC_FN(in_sin_cos, sincos_mask)
+ KFR_SPEC_FN(in_sin_cos, sincos)
+ KFR_SPEC_FN(in_sin_cos, cossin)
+ KFR_SPEC_FN(in_sin_cos, sinc)
+ KFR_SPEC_FN(in_sin_cos, sindeg)
+ KFR_SPEC_FN(in_sin_cos, cosdeg)
+ KFR_SPEC_FN(in_sin_cos, fastsindeg)
+ KFR_SPEC_FN(in_sin_cos, fastcosdeg)
+ KFR_SPEC_FN(in_sin_cos, sincosdeg)
+ KFR_SPEC_FN(in_sin_cos, cossindeg)
+};
+}
+
+namespace native
+{
+using fn_sin = internal::in_sin_cos<>::fn_sin;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sin(const T1& x)
+{
+ return internal::in_sin_cos<>::sin(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sin, E1> sin(E1&& x)
+{
+ return { fn_sin(), std::forward<E1>(x) };
+}
+
+using fn_cos = internal::in_sin_cos<>::fn_cos;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> cos(const T1& x)
+{
+ return internal::in_sin_cos<>::cos(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_cos, E1> cos(E1&& x)
+{
+ return { fn_cos(), std::forward<E1>(x) };
+}
+using fn_fastsin = internal::in_sin_cos<>::fn_fastsin;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> fastsin(const T1& x)
+{
+ return internal::in_sin_cos<>::fastsin(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_fastsin, E1> fastsin(E1&& x)
+{
+ return { fn_fastsin(), std::forward<E1>(x) };
+}
+
+using fn_fastcos = internal::in_sin_cos<>::fn_fastcos;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> fastcos(const T1& x)
+{
+ return internal::in_sin_cos<>::fastcos(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_fastcos, E1> fastcos(E1&& x)
+{
+ return { fn_fastcos(), std::forward<E1>(x) };
+}
+
+using fn_sincos_mask = internal::in_sin_cos<>::fn_sincos_mask;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sincos_mask(const T1& x)
+{
+ return internal::in_sin_cos<>::sincos_mask(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sincos_mask, E1> sincos_mask(E1&& x)
+{
+ return { fn_sincos_mask(), std::forward<E1>(x) };
+}
+
+using fn_sincos = internal::in_sin_cos<>::fn_sincos;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sincos(const T1& x)
+{
+ return internal::in_sin_cos<>::sincos(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sincos, E1> sincos(E1&& x)
+{
+ return { fn_sincos(), std::forward<E1>(x) };
+}
+
+using fn_cossin = internal::in_sin_cos<>::fn_cossin;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> cossin(const T1& x)
+{
+ return internal::in_sin_cos<>::cossin(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_cossin, E1> cossin(E1&& x)
+{
+ return { fn_cossin(), std::forward<E1>(x) };
+}
+
+using fn_sindeg = internal::in_sin_cos<>::fn_sindeg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sindeg(const T1& x)
+{
+ return internal::in_sin_cos<>::sindeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sindeg, E1> sindeg(E1&& x)
+{
+ return { fn_sindeg(), std::forward<E1>(x) };
+}
+
+using fn_cosdeg = internal::in_sin_cos<>::fn_cosdeg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> cosdeg(const T1& x)
+{
+ return internal::in_sin_cos<>::cosdeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_cosdeg, E1> cosdeg(E1&& x)
+{
+ return { fn_cosdeg(), std::forward<E1>(x) };
+}
+
+using fn_fastsindeg = internal::in_sin_cos<>::fn_fastsindeg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> fastsindeg(const T1& x)
+{
+ return internal::in_sin_cos<>::fastsindeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_fastsindeg, E1> fastsindeg(E1&& x)
+{
+ return { fn_fastsindeg(), std::forward<E1>(x) };
+}
+
+using fn_fastcosdeg = internal::in_sin_cos<>::fn_fastcosdeg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> fastcosdeg(const T1& x)
+{
+ return internal::in_sin_cos<>::fastcosdeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_fastcosdeg, E1> fastcosdeg(E1&& x)
+{
+ return { fn_fastcosdeg(), std::forward<E1>(x) };
+}
+
+using fn_sincosdeg = internal::in_sin_cos<>::fn_sincosdeg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sincosdeg(const T1& x)
+{
+ return internal::in_sin_cos<>::sincosdeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sincosdeg, E1> sincosdeg(E1&& x)
+{
+ return { fn_sincosdeg(), std::forward<E1>(x) };
+}
+
+using fn_cossindeg = internal::in_sin_cos<>::fn_cossindeg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> cossindeg(const T1& x)
+{
+ return internal::in_sin_cos<>::cossindeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_cossindeg, E1> cossindeg(E1&& x)
+{
+ return { fn_cossindeg(), std::forward<E1>(x) };
+}
+
+using fn_sinc = internal::in_sin_cos<>::fn_sinc;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> sinc(const T1& x)
+{
+ return internal::in_sin_cos<>::sinc(x);
+}
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_sinc, E1> sinc(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+
+template <typename T>
+inline T sin2x(const T& sinx, const T& cosx)
+{
+ return 2 * sinx * cosx;
+}
+template <typename T>
+inline T sin3x(const T& sinx, const T& cosx)
+{
+ return sinx * (-1 + 4 * sqr(cosx));
+}
+
+template <typename T>
+inline T cos2x(const T& sinx, const T& cosx)
+{
+ return sqr(cosx) - sqr(sinx);
+}
+template <typename T>
+inline T cos3x(const T& sinx, const T& cosx)
+{
+ return cosx * (1 - 4 * sqr(sinx));
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/sinh_cosh.hpp b/include/kfr/base/sinh_cosh.hpp
@@ -0,0 +1,143 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "abs.hpp"
+#include "constants.hpp"
+#include "function.hpp"
+#include "log_exp.hpp"
+#include "min_max.hpp"
+#include "operators.hpp"
+#include "select.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native>
+struct in_sinh_cosh : in_log_exp<c>
+{
+ constexpr static cpu_t cur = c;
+
+private:
+ using in_log_exp<c>::exp;
+
+public:
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> sinh(vec<T, N> x)
+ {
+ return (exp(x) - exp(-x)) * T(0.5);
+ }
+
+ template <typename T, size_t N>
+ KFR_SINTRIN vec<T, N> cosh(vec<T, N> x)
+ {
+ return (exp(x) + exp(-x)) * T(0.5);
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
+ KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x)
+ {
+ const vec<T, N> a = exp(x);
+ const vec<T, N> b = exp(-x);
+ return subadd(a, b) * T(0.5);
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
+ KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x)
+ {
+ const vec<T, N> a = exp(x);
+ const vec<T, N> b = exp(-x);
+ return addsub(a, b) * T(0.5);
+ }
+ KFR_SPEC_FN(in_sinh_cosh, sinh)
+ KFR_SPEC_FN(in_sinh_cosh, cosh)
+ KFR_SPEC_FN(in_sinh_cosh, sinhcosh)
+ KFR_SPEC_FN(in_sinh_cosh, coshsinh)
+};
+}
+
+namespace native
+{
+using fn_sinh = internal::in_sinh_cosh<>::fn_sinh;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sinh(const T1& x)
+{
+ return internal::in_sinh_cosh<>::sinh(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sinh, E1> sinh(E1&& x)
+{
+ return { fn_sinh(), std::forward<E1>(x) };
+}
+
+using fn_cosh = internal::in_sinh_cosh<>::fn_cosh;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> cosh(const T1& x)
+{
+ return internal::in_sinh_cosh<>::cosh(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_cosh, E1> cosh(E1&& x)
+{
+ return { fn_cosh(), std::forward<E1>(x) };
+}
+
+using fn_sinhcosh = internal::in_sinh_cosh<>::fn_sinhcosh;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sinhcosh(const T1& x)
+{
+ return internal::in_sinh_cosh<>::sinhcosh(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sinhcosh, E1> sinhcosh(E1&& x)
+{
+ return { fn_sinhcosh(), std::forward<E1>(x) };
+}
+
+using fn_coshsinh = internal::in_sinh_cosh<>::fn_coshsinh;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> coshsinh(const T1& x)
+{
+ return internal::in_sinh_cosh<>::coshsinh(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_coshsinh, E1> coshsinh(E1&& x)
+{
+ return { fn_coshsinh(), std::forward<E1>(x) };
+}
+}
+}
diff --git a/include/kfr/base/specializations.i b/include/kfr/base/specializations.i
@@ -0,0 +1,113 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ */
+#pragma once
+
+#include "vec.hpp"
+#ifndef KFR_SHUFFLE_SPECIALIZATIONS
+#include "shuffle.hpp"
+#endif
+
+namespace kfr
+{
+namespace internal
+{
+template <>
+inline vec<f32, 32> shufflevector<f32, 32>(
+ csizes_t<0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14,
+ 15, 22, 23, 30, 31>,
+ vec<f32, 32> x, vec<f32, 32>)
+{
+ f32x32 w = x;
+
+ w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(low(w)),
+ permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(high(w)));
+
+ w = permutegroups<(4), 0, 4, 2, 6, 1, 5, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op
+ return w;
+}
+
+template <>
+inline vec<f32, 32> shufflevector<f32, 32>(
+ csizes_t<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22,
+ 23, 14, 15, 30, 31>,
+ vec<f32, 32> x, vec<f32, 32>)
+{
+ f32x32 w = x;
+
+ w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(even<8>(w)),
+ permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(odd<8>(w)));
+
+ w = permutegroups<(4), 0, 4, 1, 5, 2, 6, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op
+ return w;
+}
+
+inline vec<f32, 32> bitreverse_2(vec<f32, 32> x)
+{
+ return shufflevector<f32, 32>(csizes<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18,
+ 19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31>,
+ x, x);
+}
+
+template <>
+inline vec<f32, 64> shufflevector<f32, 64>(
+ csizes_t<0, 1, 32, 33, 16, 17, 48, 49, 8, 9, 40, 41, 24, 25, 56, 57, 4, 5, 36, 37, 20, 21, 52, 53, 12, 13,
+ 44, 45, 28, 29, 60, 61, 2, 3, 34, 35, 18, 19, 50, 51, 10, 11, 42, 43, 26, 27, 58, 59, 6, 7, 38,
+ 39, 22, 23, 54, 55, 14, 15, 46, 47, 30, 31, 62, 63>,
+ vec<f32, 64> x, vec<f32, 64>)
+{
+ x = concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x)));
+ return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(x);
+}
+
+template <>
+inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15>,
+ vec<f32, 16> x, vec<f32, 16>)
+{
+// asm volatile("int $3");
+ x = permutegroups<(4), 0, 2, 1, 3>(x);
+
+ x = concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(x), high(x)), shuffle<1, 3, 8 + 1, 8 + 3>(low(x), high(x)));
+
+ return x;
+}
+
+template <>
+inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15>,
+ vec<f32, 16> x, vec<f32, 16>)
+{
+ x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
+
+ x = permutegroups<(4), 0, 2, 1, 3>(x);
+
+ return x;
+}
+
+template <>
+inline vec<f32, 32> shufflevector<f32, 32>(
+ csizes_t<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13,
+ 29, 14, 30, 15, 31>,
+ vec<f32, 32> x, vec<f32, 32>)
+{
+ x = permutegroups<(8), 0, 2, 1, 3>(x);
+
+ x = concat(interleavehalfs(low(x)), interleavehalfs(high(x)));
+
+ return x;
+}
+}
+}
diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp
@@ -0,0 +1,85 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "function.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native>
+struct in_sqrt : in_sqrt<older(c)>
+{
+ struct fn_sqrt : fn_disabled
+ {
+ };
+};
+
+template <>
+struct in_sqrt<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::sse2;
+
+ KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); }
+ KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); }
+
+ KFR_HANDLE_ALL(sqrt)
+ KFR_HANDLE_SCALAR(sqrt)
+ KFR_SPEC_FN(in_sqrt, sqrt)
+};
+
+template <>
+struct in_sqrt<cpu_t::avx1> : in_sqrt<cpu_t::sse2>
+{
+ constexpr static cpu_t cpu = cpu_t::avx1;
+ using in_sqrt<cpu_t::sse2>::sqrt;
+
+ KFR_SINTRIN f32avx KFR_USE_CPU(avx) sqrt(f32avx x) { return _mm256_sqrt_ps(*x); }
+ KFR_SINTRIN f64avx KFR_USE_CPU(avx) sqrt(f64avx x) { return _mm256_sqrt_pd(*x); }
+
+ KFR_HANDLE_ALL(sqrt)
+ KFR_HANDLE_SCALAR(sqrt)
+ KFR_SPEC_FN(in_sqrt, sqrt)
+};
+}
+namespace native
+{
+using fn_sqrt = internal::in_sqrt<>::fn_sqrt;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> sqrt(const T1& x)
+{
+ return internal::in_sqrt<>::sqrt(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_sqrt, E1> sqrt(E1&& x)
+{
+ return { fn_sqrt(), std::forward<E1>(x) };
+}
+}
+}
diff --git a/include/kfr/base/tan.hpp b/include/kfr/base/tan.hpp
@@ -0,0 +1,187 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "abs.hpp"
+#include "constants.hpp"
+#include "function.hpp"
+#include "operators.hpp"
+#include "select.hpp"
+#include "sin_cos.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+#if CID_HAS_WARNING("-Wc99-extensions")
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_tan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
+{
+private:
+ using in_abs<cc>::abs;
+ using in_round<cc>::floor;
+ using in_select<cc>::select;
+ using in_trig<cc>::mask_horner;
+
+ template <typename T, size_t N, typename IT = itype<T>>
+ KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x_full, mask<T, N>& inverse)
+ {
+ constexpr T pi_14 = c_pi<T, 1, 4>;
+
+ vec<T, N> y = abs(x_full);
+ vec<T, N> scaled = y / pi_14;
+
+ vec<T, N> k_real = floor(scaled);
+ vec<IT, N> k = cast<IT>(k_real);
+
+ vec<T, N> x = y - k_real * pi_14;
+
+ mask<T, N> need_offset = (k & 1) != 0;
+ x = select(need_offset, x - pi_14, x);
+
+ vec<IT, N> k_mod4 = k & 3;
+ inverse = (k_mod4 == 1) || (k_mod4 == 2);
+ return x;
+ }
+
+public:
+ template <size_t N>
+ KFR_SINTRIN vec<f32, N> tan(vec<f32, N> x_full)
+ {
+ mask<f32, N> inverse;
+ const vec<f32, N> x = trig_fold(x_full, inverse);
+
+ constexpr f32 tan_c2 = 0x5.555378p-4;
+ constexpr f32 tan_c4 = 0x2.225bb8p-4;
+ constexpr f32 tan_c6 = 0xd.ac3fep-8;
+ constexpr f32 tan_c8 = 0x6.41644p-8;
+ constexpr f32 tan_c10 = 0xc.bfe7ep-12;
+ constexpr f32 tan_c12 = 0x2.6754dp-8;
+
+ constexpr f32 cot_c2 = -0x5.555558p-4;
+ constexpr f32 cot_c4 = -0x5.b0581p-8;
+ constexpr f32 cot_c6 = -0x8.ac5ccp-12;
+ constexpr f32 cot_c8 = -0xd.aaa01p-16;
+ constexpr f32 cot_c10 = -0x1.a9a9b4p-16;
+ constexpr f32 cot_c12 = -0x6.f7d4dp-24;
+
+ const vec<f32, N> x2 = x * x;
+ const vec<f32, N> val = mask_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6,
+ tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12);
+
+ const vec<f32, N> z = select(inverse, val / -x, val * x);
+ return mulsign(z, x_full);
+ }
+
+ template <size_t N>
+ KFR_SINTRIN vec<f64, N> tan(vec<f64, N> x_full)
+ {
+ mask<f64, N> inverse;
+ const vec<f64, N> x = trig_fold(x_full, inverse);
+
+ constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4;
+ constexpr f64 tan_c4 = 0x2.222224820264p-4;
+ constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8;
+ constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8;
+ constexpr f64 tan_c10 = 0x2.434a142e413ap-8;
+ constexpr f64 tan_c12 = 0xf.2b59061305efp-12;
+ constexpr f64 tan_c14 = 0x4.a12565071a664p-12;
+ constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12;
+ constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12;
+ constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12;
+
+ constexpr f64 cot_c2 = -0x5.5555555555554p-4;
+ constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8;
+ constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12;
+ constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16;
+ constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16;
+ constexpr f64 cot_c12 = -0x2.450239be0ee92p-20;
+ constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24;
+ constexpr f64 cot_c16 = -0x5.ff4c42741356p-28;
+ constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32;
+ constexpr f64 cot_c20 = -0x1.644abedc113cap-32;
+
+ const vec<f64, N> x2 = x * x;
+ const vec<f64, N> val =
+ mask_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6, cot_c8, tan_c8,
+ cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14, cot_c16, tan_c16, cot_c18,
+ tan_c18, cot_c20, tan_c20);
+
+ const vec<f64, N> z = select(inverse, val / -x, val * x);
+ return mulsign(z, x_full);
+ }
+ template <typename T>
+ KFR_SINTRIN T tandeg(const T& x)
+ {
+ return tan(x * c_degtorad<T>);
+ }
+
+ KFR_HANDLE_SCALAR(tan)
+ KFR_SPEC_FN(in_tan, tan)
+ KFR_SPEC_FN(in_tan, tandeg)
+};
+}
+
+namespace native
+{
+using fn_tan = internal::in_tan<>::fn_tan;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> tan(const T1& x)
+{
+ return internal::in_tan<>::tan(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_tan, E1> tan(E1&& x)
+{
+ return { fn_tan(), std::forward<E1>(x) };
+}
+
+using fn_tandeg = internal::in_tan<>::fn_tandeg;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+
+KFR_INTRIN ftype<T1> tandeg(const T1& x)
+{
+ return internal::in_tan<>::tandeg(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+
+KFR_INTRIN expr_func<fn_tandeg, E1> tandeg(E1&& x)
+{
+ return { fn_tandeg(), std::forward<E1>(x) };
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -0,0 +1,728 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "kfr.h"
+
+#include "intrinsics.h"
+
+#include <algorithm>
+#include <tuple>
+#include <type_traits>
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow"
+
+#include "../cometa.hpp"
+
+#define KFR_ENABLE_IF CMT_ENABLE_IF
+
+#define KFR_FN(fn) \
+ struct fn_##fn \
+ { \
+ template <typename... Args> \
+ CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \
+ { \
+ return fn(std::forward<Args>(args)...); \
+ } \
+ };
+
+#define KFR_FNR(fn, in, out) \
+ struct fn_##fn \
+ { \
+ using ratio = ioratio<in, out>; \
+ template <typename... Args> \
+ CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \
+ { \
+ return fn(std::forward<Args>(args)...); \
+ } \
+ };
+
+#define KFR_SPEC_FN(tpl, fn) \
+ struct fn_##fn \
+ { \
+ constexpr fn_##fn() noexcept = default; \
+ template <cpu_t newcpu> \
+ using retarget_this = typename tpl<newcpu>::fn_##fn; \
+ template <typename... Args> \
+ KFR_INLINE decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \
+ { \
+ return fn(std::forward<Args>(args)...); \
+ } \
+ };
+
+namespace kfr
+{
+using namespace cometa;
+
+using f32 = float;
+using f64 = double;
+using i8 = int8_t;
+using i16 = int16_t;
+using i32 = int32_t;
+using i64 = int64_t;
+using u8 = uint8_t;
+using u16 = uint16_t;
+using u32 = uint32_t;
+using u64 = uint64_t;
+using umax = uint64_t;
+using imax = int64_t;
+using fmax = double;
+using f80 = long double;
+
+#ifdef KFR_BASETYPE_F32
+using fbase = f32;
+#else
+using fbase = f64;
+#endif
+
+constexpr ctype_t<f32> ctype_f32{};
+constexpr ctype_t<f64> ctype_f64{};
+constexpr ctype_t<i8> ctype_i8{};
+constexpr ctype_t<i16> ctype_i16{};
+constexpr ctype_t<i32> ctype_i32{};
+constexpr ctype_t<i64> ctype_i64{};
+constexpr ctype_t<u8> ctype_u8{};
+constexpr ctype_t<u16> ctype_u16{};
+constexpr ctype_t<u32> ctype_u32{};
+constexpr ctype_t<u64> ctype_u64{};
+constexpr ctype_t<umax> ctype_umax{};
+constexpr ctype_t<imax> ctype_imax{};
+constexpr ctype_t<fmax> ctype_fmax{};
+constexpr ctype_t<f80> ctype_f80{};
+constexpr ctype_t<fbase> ctype_base{};
+
+struct u24
+{
+ u8 raw[3];
+};
+
+struct i24
+{
+ u8 raw[3];
+};
+
+struct f16
+{
+ u16 raw;
+};
+
+template <typename T1>
+struct range
+{
+ T1 min;
+ T1 max;
+ T1 distance() const { return max - min; }
+};
+
+template <size_t in, size_t out>
+struct ioratio
+{
+ constexpr static size_t input = in;
+ constexpr static size_t output = out;
+};
+
+enum class datatype : int
+{
+ typebits_mask = 0xFF,
+ f = 0x100,
+ i = 0x200,
+ u = 0x300,
+ c = 0x400,
+ typeclass_mask = 0xF00,
+ x1 = 0x1000,
+ x2 = 0x2000,
+ x3 = 0x3000,
+ x4 = 0x4000,
+ typecomponents_mask = 0xF000,
+ f16 = static_cast<int>(f) | static_cast<int>(x1) | 16,
+ f32 = static_cast<int>(f) | static_cast<int>(x1) | 32,
+ f64 = static_cast<int>(f) | static_cast<int>(x1) | 64,
+ f80 = static_cast<int>(f) | static_cast<int>(x1) | 80,
+ i8 = static_cast<int>(i) | static_cast<int>(x1) | 8,
+ i16 = static_cast<int>(i) | static_cast<int>(x1) | 16,
+ i24 = static_cast<int>(i) | static_cast<int>(x1) | 24,
+ i32 = static_cast<int>(i) | static_cast<int>(x1) | 32,
+ i64 = static_cast<int>(i) | static_cast<int>(x1) | 64,
+ u8 = static_cast<int>(u) | static_cast<int>(x1) | 8,
+ u16 = static_cast<int>(u) | static_cast<int>(x1) | 16,
+ u24 = static_cast<int>(u) | static_cast<int>(x1) | 24,
+ u32 = static_cast<int>(u) | static_cast<int>(x1) | 32,
+ u64 = static_cast<int>(u) | static_cast<int>(x1) | 64,
+ c32 = static_cast<int>(c) | static_cast<int>(x2) | 32,
+ c64 = static_cast<int>(c) | static_cast<int>(x2) | 64
+};
+
+inline datatype operator|(datatype x, datatype y)
+{
+ using type = underlying_type<datatype>;
+ return static_cast<datatype>(static_cast<type>(x) | static_cast<type>(y));
+}
+
+inline datatype operator&(datatype x, datatype y)
+{
+ using type = underlying_type<datatype>;
+ return static_cast<datatype>(static_cast<type>(x) | static_cast<type>(y));
+}
+
+struct generic
+{
+ template <typename T>
+ KFR_INLINE constexpr operator T() const noexcept
+ {
+ return T();
+ }
+};
+
+struct infinite
+{
+ template <typename T>
+ KFR_INLINE constexpr operator T() const noexcept
+ {
+ return T();
+ }
+ constexpr friend bool operator<(infinite, size_t) noexcept { return false; }
+ constexpr friend bool operator<(size_t, infinite) noexcept { return true; }
+ constexpr friend bool operator<(infinite, infinite) noexcept { return false; }
+};
+
+enum class accuracy : int
+{
+ accuracy = 1,
+ speed = 2,
+ _accuracy_min = static_cast<int>(accuracy),
+ _accuracy_max = static_cast<int>(speed)
+};
+
+enum class archendianness : int
+{
+ littleendian = 1,
+ bigendian = 2,
+ _archendianness_min = static_cast<int>(littleendian),
+ _archendianness_max = static_cast<int>(bigendian)
+};
+
+typedef void*(KFR_CDECL* func_allocate)(size_t);
+
+typedef void(KFR_CDECL* func_deallocate)(void*);
+
+struct mem_allocator
+{
+ func_allocate allocate;
+ func_deallocate deallocate;
+ size_t granularity;
+ size_t alignment;
+};
+
+struct mem_header
+{
+ size_t size;
+ mem_allocator* allocator;
+ uintptr_t refcount;
+ uintptr_t reserved;
+};
+
+enum class outputinput_t
+{
+ output,
+ input
+};
+template <outputinput_t p>
+using coutputinput_t = cval_t<outputinput_t, p>;
+
+template <outputinput_t p>
+constexpr coutputinput_t<p> coutputinput{};
+
+using coutput_t = coutputinput_t<outputinput_t::output>;
+using cinput_t = coutputinput_t<outputinput_t::input>;
+
+constexpr coutput_t coutput{};
+constexpr cinput_t cinput{};
+
+namespace internal
+{
+template <typename Fn, typename enable = void_t<>>
+struct func_ratio_impl
+{
+ using type = ioratio<1, 1>;
+};
+template <typename Fn>
+struct func_ratio_impl<Fn, void_t<typename Fn::ratio>>
+{
+ using type = typename Fn::ratio;
+};
+}
+
+template <typename Fn>
+using func_ratio = typename internal::func_ratio_impl<remove_reference<Fn>>::type;
+
+template <typename T>
+constexpr inline T align_down(T x, identity<T> alignment)
+{
+ return (x) & ~(alignment - 1);
+}
+template <typename T>
+constexpr inline T* align_down(T* x, size_t alignment)
+{
+ return reinterpret_cast<T*>(align_down(reinterpret_cast<size_t>(x), alignment));
+}
+
+template <typename T>
+constexpr inline T align_up(T x, identity<T> alignment)
+{
+ return (x + alignment - 1) & ~(alignment - 1);
+}
+template <typename T>
+constexpr inline T* align_up(T* x, size_t alignment)
+{
+ return reinterpret_cast<T*>(align_up(reinterpret_cast<size_t>(x), alignment));
+}
+
+template <typename T>
+constexpr inline T* advance(T* x, ptrdiff_t offset)
+{
+ return x + offset;
+}
+constexpr inline void* advance(void* x, ptrdiff_t offset)
+{
+ return advance(static_cast<unsigned char*>(x), offset);
+}
+
+constexpr inline ptrdiff_t distance(const void* x, const void* y)
+{
+ return static_cast<const unsigned char*>(x) - static_cast<const unsigned char*>(y);
+}
+
+enum class cpu_t : int
+{
+ sse2 = 0,
+ sse3 = 1,
+ ssse3 = 2,
+ sse41 = 3,
+ sse42 = 4,
+ avx1 = 5,
+ avx2 = 6,
+ avx = static_cast<int>(avx1),
+ native = static_cast<int>(KFR_ARCH_NAME),
+ lowest = static_cast<int>(sse2),
+ highest = static_cast<int>(avx2),
+ runtime = -1,
+};
+
+template <cpu_t cpu>
+using ccpu_t = cval_t<cpu_t, cpu>;
+
+template <cpu_t cpu>
+constexpr ccpu_t<cpu> ccpu{};
+
+namespace internal
+{
+constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - 1); }
+constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); }
+
+constexpr auto cpu_list =
+ cvals<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>;
+}
+
+template <cpu_t cpu>
+using cpuval_t = cval_t<cpu_t, cpu>;
+template <cpu_t cpu>
+constexpr auto cpuval = cpuval_t<cpu>{};
+
+constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuval<cpu_t::native>);
+constexpr auto cpu_shuffle =
+ cfilter(cpu_all, cpu_all != cpuval<cpu_t::sse3> && cpu_all != cpuval<cpu_t::ssse3>);
+
+template <typename T>
+constexpr datatype typeclass = std::is_floating_point<typename compound_type_traits<T>::subtype>::value
+ ? datatype::f
+ : std::is_integral<typename compound_type_traits<T>::subtype>::value
+ ? (std::is_unsigned<typename compound_type_traits<T>::subtype>::value
+ ? datatype::u
+ : datatype::i)
+ : datatype();
+
+template <typename T>
+using is_f_class = std::integral_constant<bool, typeclass<T> == datatype::f>;
+template <typename T>
+using is_u_class = std::integral_constant<bool, typeclass<T> == datatype::u>;
+template <typename T>
+using is_i_class = std::integral_constant<bool, typeclass<T> == datatype::i>;
+
+template <typename T>
+struct typebits
+{
+ constexpr static size_t bits = sizeof(typename compound_type_traits<T>::subtype) * 8;
+ constexpr static size_t width = compound_type_traits<T>::is_scalar ? 0 : compound_type_traits<T>::width;
+ using subtype = typename compound_type_traits<T>::subtype;
+};
+
+namespace internal
+{
+template <size_t bits>
+struct float_type_impl;
+template <size_t bits>
+struct int_type_impl;
+template <size_t bits>
+struct unsigned_type_impl;
+
+template <>
+struct float_type_impl<32>
+{
+ using type = f32;
+};
+template <>
+struct float_type_impl<64>
+{
+ using type = f64;
+};
+
+template <>
+struct int_type_impl<8>
+{
+ using type = i8;
+};
+template <>
+struct int_type_impl<16>
+{
+ using type = i16;
+};
+template <>
+struct int_type_impl<32>
+{
+ using type = i32;
+};
+template <>
+struct int_type_impl<64>
+{
+ using type = i64;
+};
+
+template <>
+struct unsigned_type_impl<8>
+{
+ using type = u8;
+};
+template <>
+struct unsigned_type_impl<16>
+{
+ using type = u16;
+};
+template <>
+struct unsigned_type_impl<32>
+{
+ using type = u32;
+};
+template <>
+struct unsigned_type_impl<64>
+{
+ using type = u64;
+};
+}
+
+template <size_t bits>
+using float_type = typename internal::float_type_impl<bits>::type;
+template <size_t bits>
+using int_type = typename internal::int_type_impl<bits>::type;
+template <size_t bits>
+using unsigned_type = typename internal::unsigned_type_impl<bits>::type;
+
+template <typename T>
+using ftype = deep_rebind<T, float_type<typebits<deep_subtype<T>>::bits>>;
+template <typename T>
+using itype = deep_rebind<T, int_type<typebits<deep_subtype<T>>::bits>>;
+template <typename T>
+using utype = deep_rebind<T, unsigned_type<typebits<deep_subtype<T>>::bits>>;
+
+template <typename T>
+using fsubtype = ftype<subtype<T>>;
+template <typename T>
+using isubtype = itype<subtype<T>>;
+template <typename T>
+using usubtype = utype<subtype<T>>;
+
+template <typename T, size_t N>
+struct vec_t
+{
+ using value_type = T;
+ constexpr static size_t size() noexcept { return N; }
+ constexpr vec_t() noexcept = default;
+
+ using scalar_type = subtype<T>;
+ constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; }
+};
+
+template <typename T, typename R = T>
+using enable_if_vec = enable_if<(typebits<T>::width > 0), R>;
+template <typename T, typename R = T>
+using enable_if_not_vec = enable_if<(typebits<T>::width == 0), R>;
+
+template <typename T, typename R = T>
+using enable_if_i = enable_if<typeclass<T> == datatype::i, R>;
+template <typename T, typename R = T>
+using enable_if_u = enable_if<typeclass<T> == datatype::u, R>;
+template <typename T, typename R = T>
+using enable_if_f = enable_if<typeclass<T> == datatype::f, R>;
+
+template <typename T, typename R = T>
+using enable_if_not_i = enable_if<typeclass<T> != datatype::i, R>;
+template <typename T, typename R = T>
+using enable_if_not_u = enable_if<typeclass<T> != datatype::u, R>;
+template <typename T, typename R = T>
+using enable_if_not_f = enable_if<typeclass<T> != datatype::f, R>;
+
+namespace internal
+{
+KFR_INLINE f32 builtin_sqrt(f32 x) { return __builtin_sqrtf(x); }
+KFR_INLINE f64 builtin_sqrt(f64 x) { return __builtin_sqrt(x); }
+KFR_INLINE f80 builtin_sqrt(f80 x) { return __builtin_sqrtl(x); }
+KFR_INLINE void builtin_memcpy(void* dest, const void* src, size_t size)
+{
+ __builtin_memcpy(dest, src, size);
+}
+KFR_INLINE void builtin_memset(void* dest, int val, size_t size) { __builtin_memset(dest, val, size); }
+template <typename T1>
+KFR_INLINE void zeroize(T1& value)
+{
+ builtin_memset(static_cast<void*>(std::addressof(value)), 0, sizeof(T1));
+}
+}
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Wundefined-reinterpret-cast")
+#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
+#endif
+
+template <typename T, typename U>
+constexpr inline static T& ref_cast(U& ptr)
+{
+ return reinterpret_cast<T&>(ptr);
+}
+
+template <typename T, typename U>
+constexpr inline static const T& ref_cast(const U& ptr)
+{
+ return reinterpret_cast<const T&>(ptr);
+}
+
+template <typename T, typename U>
+constexpr inline static T* ptr_cast(U* ptr)
+{
+ return reinterpret_cast<T*>(ptr);
+}
+
+template <typename T, typename U>
+constexpr inline static const T* ptr_cast(const U* ptr)
+{
+ return reinterpret_cast<const T*>(ptr);
+}
+
+template <typename T, typename U>
+constexpr inline static T* ptr_cast(U* ptr, ptrdiff_t offset)
+{
+ return ptr_cast<T>(ptr_cast<u8>(ptr) + offset);
+}
+
+#pragma clang diagnostic pop
+
+__attribute__((unused)) static const char* cpu_name(cpu_t set)
+{
+ static const char* names[] = { "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" };
+ if (set >= cpu_t::lowest && set <= cpu_t::highest)
+ return names[static_cast<size_t>(set)];
+ return "-";
+}
+
+#define KFR_FN_S(fn) \
+ template <typename Arg, typename... Args> \
+ KFR_INLINE enable_if_not_vec<Arg> fn(Arg arg, Args... args) \
+ { \
+ return fn(make_vector(arg), make_vector(args)...)[0]; \
+ }
+#define KFR_FN_S_S(fn) \
+ template <typename Arg, typename... Args, KFR_ENABLE_IF(is_number<Arg>::value)> \
+ KFR_SINTRIN enable_if_not_vec<Arg> fn(Arg arg, Args... args) \
+ { \
+ return fn(make_vector(arg), make_vector(args)...)[0]; \
+ }
+
+template <typename T>
+struct initialvalue
+{
+};
+
+constexpr double infinity = __builtin_inf();
+constexpr double qnan = __builtin_nan("");
+
+namespace internal
+{
+constexpr f32 allones_f32 = -__builtin_nanf("0xFFFFFFFF");
+constexpr f64 allones_f64 = -__builtin_nan("0xFFFFFFFFFFFFFFFF");
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub allones = choose_const<Tsub>(allones_f32, allones_f64, static_cast<Tsub>(-1));
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub allzeros = Tsub();
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub highbitmask = choose_const<Tsub>(-0.f, -0.0, 1ull << (typebits<T>::bits - 1));
+
+template <typename T, typename Tsub = subtype<T>>
+constexpr Tsub invhighbitmask = choose_const<Tsub>(__builtin_nanf("0xFFFFFFFF"),
+ __builtin_nan("0xFFFFFFFFFFFFFFFF"),
+ ~(1ull << (typebits<T>::bits - 1)));
+
+template <typename T>
+constexpr inline T maskbits(bool value)
+{
+ return value ? internal::allones<T> : T();
+}
+}
+
+template <typename T>
+constexpr size_t widthof(T)
+{
+ return compound_type_traits<T>::width;
+}
+template <typename T>
+constexpr size_t widthof()
+{
+ return compound_type_traits<T>::width;
+}
+
+template <typename T>
+constexpr inline T bitness_const(T x32, T x64)
+{
+#ifdef KFR_ARCH_X64
+ (void)x32;
+ return x64;
+#else
+ (void)x64;
+ return x32;
+#endif
+}
+
+constexpr size_t native_cache_alignment = 64;
+constexpr size_t native_cache_alignment_mask = native_cache_alignment - 1;
+constexpr size_t maximum_vector_alignment = 32;
+constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
+constexpr size_t native_register_count = bitness_const(8, 16);
+template <cpu_t c>
+constexpr size_t native_float_vector_size = c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : 0;
+template <cpu_t c>
+constexpr size_t native_int_vector_size = c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : 0;
+
+struct input_expression
+{
+ using value_type = generic;
+ using size_type = infinite;
+ constexpr size_type size() const noexcept { return {}; }
+
+ KFR_INLINE void begin_block(size_t) const {}
+ KFR_INLINE void end_block(size_t) const {}
+};
+
+struct output_expression
+{
+ using value_type = generic;
+ using size_type = infinite;
+ constexpr size_type size() const noexcept { return {}; }
+
+ KFR_INLINE void output_begin_block(size_t) const {}
+ KFR_INLINE void output_end_block(size_t) const {}
+};
+
+template <typename E>
+using is_input_expression = std::is_base_of<input_expression, decay<E>>;
+
+template <typename... Es>
+using is_input_expressions = or_t<std::is_base_of<input_expression, decay<Es>>...>;
+
+template <typename E>
+using is_output_expression = std::is_base_of<output_expression, decay<E>>;
+
+template <typename T>
+using is_numeric = is_number<deep_subtype<T>>;
+
+template <typename... Ts>
+using is_numeric_args = and_t<is_numeric<Ts>...>;
+
+template <typename T, cpu_t c = cpu_t::native>
+constexpr size_t vector_width = typeclass<T> == datatype::f ? native_float_vector_size<c> / sizeof(T)
+ : native_int_vector_size<c> / sizeof(T);
+
+template <cpu_t c>
+constexpr size_t vector_width<void, c> = 0;
+
+namespace internal
+{
+
+template <cpu_t c>
+constexpr size_t native_vector_alignment = std::max(native_float_vector_size<c>, native_int_vector_size<c>);
+
+template <cpu_t c>
+constexpr bool fast_unaligned = c >= cpu_t::avx1;
+
+template <cpu_t c>
+constexpr size_t native_vector_alignment_mask = native_vector_alignment<c> - 1;
+
+template <typename T, cpu_t c>
+constexpr inline size_t get_vector_width(size_t scale = 1)
+{
+ return scale * vector_width<T, c>;
+}
+template <typename T, cpu_t c>
+constexpr inline size_t get_vector_width(size_t x32scale, size_t x64scale)
+{
+ return bitness_const(x32scale, x64scale) * vector_width<T, c>;
+}
+
+template <typename T, cpu_t c>
+constexpr auto vector_width_range = csize<1> << csizeseq<ilog2(vector_width<T, c>) + 1>;
+
+template <typename T, cpu_t c>
+constexpr size_t vector_capacity = native_register_count* vector_width<T, c>;
+
+template <typename T, cpu_t c>
+constexpr size_t maximum_vector_size = std::min(static_cast<size_t>(32), vector_capacity<T, c> / 4);
+}
+}
+namespace cometa
+{
+
+template <typename T, size_t N>
+struct compound_type_traits<kfr::vec_t<T, N>>
+{
+ constexpr static size_t width = N;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static bool is_scalar = false;
+
+ template <typename U>
+ using rebind = kfr::vec_t<U, N>;
+ template <typename U>
+ using deep_rebind = kfr::vec_t<cometa::deep_rebind<subtype, U>, N>;
+};
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp
@@ -0,0 +1,300 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/memory.hpp"
+#include "../base/read_write.hpp"
+#include "../base/types.hpp"
+
+namespace kfr
+{
+
+constexpr size_t tag_array_ref = 0;
+constexpr size_t tag_dynamic_vector = max_size_t;
+
+template <typename T, size_t Size = tag_dynamic_vector>
+struct univector;
+
+template <typename T, typename Class>
+struct univector_base : input_expression, output_expression
+{
+ template <typename U, size_t N>
+ KFR_INLINE void operator()(coutput_t, size_t index, vec<U, N> value)
+ {
+ T* data = ptr_cast<Class>(this)->data();
+ write(ptr_cast<T>(data) + index, cast<T>(value));
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ const T* data = ptr_cast<Class>(this)->data();
+ return cast<U>(read<N>(ptr_cast<T>(data) + index));
+ }
+
+ template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)>
+ KFR_INLINE Class& operator=(Input&& input)
+ {
+ assign_expr(std::forward<Input>(input));
+ return *ptr_cast<Class>(this);
+ }
+ univector<T, 0> slice(size_t start = 0, size_t size = max_size_t)
+ {
+ T* data = ptr_cast<Class>(this)->data();
+ const size_t this_size = ptr_cast<Class>(this)->size();
+ return array_ref<T>(data + start, std::min(size, this_size - start));
+ }
+ univector<const T, 0> slice(size_t start = 0, size_t size = max_size_t) const
+ {
+ const T* data = ptr_cast<Class>(this)->data();
+ const size_t this_size = ptr_cast<Class>(this)->size();
+ return array_ref<const T>(data + start, std::min(size, this_size - start));
+ }
+
+ array_ref<T> ref()
+ {
+ T* data = get_data();
+ const size_t size = get_size();
+ return array_ref<T>(data, size);
+ }
+ array_ref<const T> ref() const
+ {
+ const T* data = get_data();
+ const size_t size = get_size();
+ return array_ref<const T>(data, size);
+ }
+
+ void ringbuf_write(size_t& cursor, const T* src, size_t srcsize)
+ {
+ if (srcsize == 0)
+ return;
+ // skip redundant data
+ const size_t size = get_size();
+ T* data = get_data();
+ if (srcsize > size)
+ {
+ src = src + srcsize / size;
+ srcsize = srcsize % size;
+ }
+ const size_t fsize = size - cursor;
+ // one fragment
+ if (srcsize <= fsize)
+ {
+ std::copy_n(src, srcsize, data + cursor);
+ }
+ else // two fragments
+ {
+ std::copy_n(src, fsize, data + cursor);
+ std::copy_n(src + fsize, srcsize - fsize, data);
+ }
+ ringbuf_step(cursor, srcsize);
+ }
+
+ void ringbuf_write(size_t& cursor, const T value)
+ {
+ T* data = get_data();
+ data[cursor] = value;
+
+ ringbuf_step(cursor, 1);
+ }
+ void ringbuf_step(size_t& cursor, size_t step)
+ {
+ const size_t size = get_size();
+ cursor = cursor + step;
+ cursor = cursor >= size ? cursor - size : cursor;
+ }
+
+protected:
+ template <typename Input>
+ KFR_INLINE void assign_expr(Input&& input)
+ {
+ process<T>(*this, std::forward<Input>(input), get_size());
+ }
+
+private:
+ constexpr infinite size() const noexcept = delete;
+ KFR_INLINE size_t get_size() const { return ptr_cast<Class>(this)->size(); }
+ KFR_INLINE const T* get_data() const { return ptr_cast<Class>(this)->data(); }
+ KFR_INLINE T* get_data() { return ptr_cast<Class>(this)->data(); }
+};
+
+template <typename T, size_t Size>
+struct alignas(maximum_vector_alignment) univector : std::array<T, Size>,
+ univector_base<T, univector<T, Size>>
+{
+ using std::array<T, Size>::size;
+ using size_type = size_t;
+ template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)>
+ univector(Input&& input)
+ {
+ this->assign_expr(std::forward<Input>(input));
+ }
+ template <typename... Args>
+ constexpr univector(T x, Args... args) noexcept : std::array<T, Size>{ { x, static_cast<T>(args)... } }
+ {
+ }
+
+ constexpr univector() noexcept(noexcept(std::array<T, Size>())) = default;
+ constexpr univector(size_t, const T& value) { std::fill(this->begin(), this->end(), value); }
+ constexpr static bool size_known = true;
+ constexpr static bool is_array = true;
+ constexpr static bool is_array_ref = false;
+ constexpr static bool is_vector = false;
+ constexpr static bool is_aligned = true;
+ constexpr static bool is_pod = kfr::is_pod<T>::value;
+ using value_type = T;
+
+ using univector_base<T, univector>::operator=;
+};
+
+template <typename T>
+struct univector<T, tag_array_ref> : array_ref<T>, univector_base<T, univector<T, tag_array_ref>>
+{
+ using array_ref<T>::size;
+ using array_ref<T>::array_ref;
+ using size_type = size_t;
+ constexpr univector(const array_ref<T>& other) : array_ref<T>(other) {}
+ constexpr univector(array_ref<T>&& other) : array_ref<T>(std::move(other)) {}
+
+ template <size_t Tag>
+ constexpr univector(const univector<T, Tag>& other) : array_ref<T>(other.data(), other.size())
+ {
+ }
+ template <size_t Tag>
+ constexpr univector(univector<T, Tag>& other) : array_ref<T>(other.data(), other.size())
+ {
+ }
+ template <typename U, size_t Tag, KFR_ENABLE_IF(is_same<remove_const<T>, U>::value&& is_const<T>::value)>
+ constexpr univector(const univector<U, Tag>& other) : array_ref<T>(other.data(), other.size())
+ {
+ }
+ template <typename U, size_t Tag, KFR_ENABLE_IF(is_same<remove_const<T>, U>::value&& is_const<T>::value)>
+ constexpr univector(univector<U, Tag>& other) : array_ref<T>(other.data(), other.size())
+ {
+ }
+ constexpr static bool size_known = false;
+ constexpr static bool is_array = false;
+ constexpr static bool is_array_ref = true;
+ constexpr static bool is_vector = false;
+ constexpr static bool is_aligned = false;
+ using value_type = T;
+
+ using univector_base<T, univector>::operator=;
+};
+
+template <typename T>
+struct univector<T, tag_dynamic_vector> : std::vector<T, allocator<T>>,
+ univector_base<T, univector<T, tag_dynamic_vector>>
+{
+ using std::vector<T, allocator<T>>::size;
+ using std::vector<T, allocator<T>>::vector;
+ using size_type = size_t;
+ template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value && !is_infinite<Input>::value)>
+ univector(Input&& input)
+ {
+ this->resize(input.size());
+ this->assign_expr(std::forward<Input>(input));
+ }
+ constexpr univector() noexcept = default;
+ constexpr univector(const std::vector<T>& other) : std::vector<T, allocator<T>>(other) {}
+ constexpr univector(std::vector<T>&& other) : std::vector<T, allocator<T>>(std::move(other)) {}
+ constexpr univector(const array_ref<T>& other) : std::vector<T, allocator<T>>(other.begin(), other.end())
+ {
+ }
+ constexpr univector(const array_ref<const T>& other)
+ : std::vector<T, allocator<T>>(other.begin(), other.end())
+ {
+ }
+ constexpr static bool size_known = false;
+ constexpr static bool is_array = false;
+ constexpr static bool is_array_ref = false;
+ constexpr static bool is_vector = true;
+ constexpr static bool is_aligned = true;
+ using value_type = T;
+
+ using univector_base<T, univector>::operator=;
+};
+
+template <typename T>
+using univector_ref = univector<T, tag_array_ref>;
+
+template <typename T>
+using univector_dyn = univector<T, tag_dynamic_vector>;
+
+template <typename T, size_t Size1 = tag_dynamic_vector, size_t Size2 = tag_dynamic_vector>
+using univector2d = univector<univector<T, Size2>, Size1>;
+
+template <typename T, size_t Size1 = tag_dynamic_vector, size_t Size2 = tag_dynamic_vector,
+ size_t Size3 = tag_dynamic_vector>
+using univector3d = univector<univector<univector<T, Size3>, Size2>, Size1>;
+
+template <cpu_t c = cpu_t::native, size_t Tag, typename T, typename Fn>
+KFR_INLINE void process(univector<T, Tag>& vector, Fn&& fn)
+{
+ static_assert(is_input_expression<Fn>::value, "Fn must be an expression");
+ return process<T, c>(vector, std::forward<Fn>(fn), vector.size());
+}
+
+template <cpu_t c = cpu_t::native, typename T, size_t Nsize, typename Fn>
+KFR_INLINE void process(T (&dest)[Nsize], Fn&& fn)
+{
+ static_assert(is_input_expression<Fn>::value, "Fn must be an expression");
+ return process<T, c>(univector<T, tag_array_ref>(dest), std::forward<Fn>(fn), Nsize);
+}
+template <cpu_t c = cpu_t::native, typename T, typename Fn>
+KFR_INLINE void process(const array_ref<T>& vector, Fn&& fn)
+{
+ static_assert(is_input_expression<Fn>::value, "Fn must be an expression");
+ return process<T, c>(univector<T, tag_array_ref>(vector), std::forward<Fn>(fn), vector.size());
+}
+
+template <typename T>
+KFR_INLINE univector_ref<T> make_univector(T* data, size_t size)
+{
+ return univector_ref<T>(data, size);
+}
+
+template <typename T>
+KFR_INLINE univector_ref<const T> make_univector(const T* data, size_t size)
+{
+ return univector_ref<const T>(data, size);
+}
+
+template <typename Expr, typename T = value_type_of<Expr>>
+KFR_INLINE univector<T> render(Expr&& expr)
+{
+ univector<T> result;
+ result.resize(expr.size());
+ result = expr;
+ return result;
+}
+
+template <typename Expr, typename T = value_type_of<Expr>>
+KFR_INLINE univector<T> render(Expr&& expr, size_t size)
+{
+ univector<T> result;
+ result.resize(size);
+ result = expr;
+ return result;
+}
+}
diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp
@@ -0,0 +1,1324 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "kfr.h"
+
+#include "types.hpp"
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#pragma clang diagnostic ignored "-Wc++98-compat-local-type-template-args"
+#pragma clang diagnostic ignored "-Wshadow"
+#pragma clang diagnostic ignored "-Wpacked"
+
+namespace kfr
+{
+
+template <typename T, size_t N>
+struct vec;
+template <typename T, size_t N>
+struct mask;
+
+using simdindex = int;
+
+template <typename T, simdindex N>
+using simd = T __attribute__((ext_vector_type(N)));
+
+namespace internal
+{
+template <typename T>
+struct is_vec_impl : std::false_type
+{
+};
+
+template <typename T, size_t N>
+struct is_vec_impl<vec<T, N>> : std::true_type
+{
+};
+
+template <typename T, size_t N>
+struct is_vec_impl<mask<T, N>> : std::true_type
+{
+};
+
+template <typename T, bool A>
+struct struct_with_alignment
+{
+ T value;
+ KFR_INTRIN void operator=(T value) { this->value = value; }
+};
+
+template <typename T>
+struct struct_with_alignment<T, false>
+{
+ T value;
+ KFR_INTRIN void operator=(T value) { this->value = value; }
+} __attribute__((__packed__, __may_alias__)); //
+}
+
+template <typename T>
+using is_vec = internal::is_vec_impl<T>;
+
+template <typename T, size_t N, bool A>
+using vec_algn = internal::struct_with_alignment<simd<T, N>, A>;
+
+template <typename T, size_t N, bool A>
+struct vec_ptr
+{
+ constexpr KFR_INLINE vec_ptr(T* data) noexcept : data(data) {}
+ constexpr KFR_INLINE vec_ptr(const T* data) noexcept : data(const_cast<T*>(data)) {}
+ KFR_INLINE const vec_algn<T, N, A>& operator[](size_t i) const
+ {
+ return *static_cast<vec_algn<T, N, A>*>(data + i);
+ }
+ KFR_INLINE vec_algn<T, N, A>& operator[](size_t i) { return *static_cast<vec_algn<T, N, A>*>(data + i); }
+ T* data;
+};
+
+template <typename To, typename From, size_t N,
+ KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value),
+ size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width>
+constexpr KFR_INLINE vec<To, Nout> subcast(vec<From, N> value) noexcept
+{
+ return *value;
+}
+
+namespace internal
+{
+
+template <typename Fn, size_t index>
+constexpr enable_if<std::is_same<size_t, decltype(std::declval<Fn>().operator()(size_t()))>::value, size_t>
+get_vec_index()
+{
+ constexpr Fn fn{};
+ return fn(index);
+}
+
+template <typename Fn, size_t index>
+constexpr enable_if<
+ std::is_same<size_t, decltype(std::declval<Fn>().template operator() < index > ())>::value, size_t>
+get_vec_index(int = 0)
+{
+ constexpr Fn fn{};
+ return fn.template operator()<index>();
+}
+
+constexpr size_t index_undefined = static_cast<size_t>(-1);
+
+template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(!is_compound<T>::value)>
+KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, vec<T, N> x, vec<T, N> y)
+{
+ vec<T, sizeof...(Indices)> result = __builtin_shufflevector(
+ *x, *y, static_cast<intptr_t>(Indices == index_undefined ? -1 : static_cast<intptr_t>(Indices))...);
+ return result;
+}
+
+template <size_t... indices, size_t... counter, size_t groupsize = sizeof...(counter) / sizeof...(indices)>
+constexpr auto inflate_impl(csizes_t<indices...> ind, csizes_t<counter...> cnt)
+ -> csizes_t<(ind.get(csize<counter / groupsize>) == index_undefined
+ ? index_undefined
+ : (counter % groupsize + groupsize * ind.get(csize<counter / groupsize>)))...>
+{
+ return {};
+}
+
+template <size_t groupsize, size_t... indices>
+constexpr auto inflate(csize_t<groupsize>, csizes_t<indices...>)
+{
+ return inflate_impl(csizes<indices...>, csizeseq<sizeof...(indices)*groupsize>);
+}
+
+template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>::value)>
+KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, vec<T, N> x, vec<T, N> y)
+{
+ return subcast<T>(
+ shufflevector(inflate(csize<widthof<T>()>, indices), subcast<subtype<T>>(x), subcast<subtype<T>>(y)));
+}
+
+template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N>
+KFR_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, vec<T, N> x)
+{
+ return internal::shufflevector<T, N>(csizes<Indices...>, x, x);
+}
+
+template <typename Fn, size_t groupsize, typename T, size_t N, size_t... Indices,
+ size_t Nout = sizeof...(Indices)>
+KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y, cvals_t<size_t, Indices...>)
+{
+ static_assert(N % groupsize == 0, "N % groupsize == 0");
+ return internal::shufflevector<T, N>(
+ csizes<(get_vec_index<Fn, Indices / groupsize>() * groupsize + Indices % groupsize)...>, x, y);
+}
+}
+
+template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y)
+{
+ return internal::shufflevector<Fn, groupsize>(x, y, csizeseq<Nout>);
+}
+
+template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N>
+KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x)
+{
+ return internal::shufflevector<Fn, groupsize>(x, x, csizeseq<Nout>);
+}
+
+namespace swizzle
+{
+template <size_t>
+struct swiz
+{
+ constexpr swiz() {}
+};
+
+constexpr swiz<0> x{};
+constexpr swiz<1> y{};
+constexpr swiz<2> z{};
+constexpr swiz<3> w{};
+constexpr swiz<0> r{};
+constexpr swiz<1> g{};
+constexpr swiz<2> b{};
+constexpr swiz<3> a{};
+constexpr swiz<0> s{};
+constexpr swiz<1> t{};
+constexpr swiz<2> p{};
+constexpr swiz<3> q{};
+
+constexpr swiz<0> s0{};
+constexpr swiz<1> s1{};
+constexpr swiz<2> s2{};
+constexpr swiz<3> s3{};
+constexpr swiz<4> s4{};
+constexpr swiz<5> s5{};
+constexpr swiz<6> s6{};
+constexpr swiz<7> s7{};
+constexpr swiz<8> s8{};
+constexpr swiz<9> s9{};
+constexpr swiz<10> s10{};
+constexpr swiz<11> s11{};
+constexpr swiz<12> s12{};
+constexpr swiz<13> s13{};
+constexpr swiz<14> s14{};
+constexpr swiz<15> s15{};
+}
+
+template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To cast(From value) noexcept
+{
+ return static_cast<To>(value);
+}
+template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To bitcast(From value) noexcept
+{
+ union {
+ From from;
+ To to;
+ } u{ value };
+ return u.to;
+}
+
+template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To ubitcast(From value) noexcept
+{
+ return bitcast<To>(value);
+}
+
+template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To ibitcast(From value) noexcept
+{
+ return bitcast<To>(value);
+}
+
+template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To fbitcast(From value) noexcept
+{
+ return bitcast<To>(value);
+}
+
+template <typename To, typename From, size_t N, KFR_ENABLE_IF(!is_compound<To>::value)>
+constexpr KFR_INLINE vec<To, N> cast(vec<From, N> value) noexcept
+{
+ return __builtin_convertvector(*value, simd<To, N>);
+}
+template <typename To, typename From, simdindex N>
+constexpr KFR_INLINE simd<To, N> cast(simd<From, N> value) noexcept
+{
+ return __builtin_convertvector(value, simd<To, N>);
+}
+template <typename To, typename From, size_t N, size_t Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE vec<To, Nout> bitcast(vec<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(*value);
+}
+template <typename To, typename From, simdindex N, simdindex Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE simd<To, Nout> bitcast(simd<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(value);
+}
+
+template <typename From, size_t N, typename To = utype<From>, size_t Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE vec<To, Nout> ubitcast(vec<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(*value);
+}
+
+template <typename From, size_t N, typename To = itype<From>, size_t Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE vec<To, Nout> ibitcast(vec<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(*value);
+}
+
+template <typename From, size_t N, typename To = ftype<From>, size_t Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE vec<To, Nout> fbitcast(vec<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(*value);
+}
+
+template <typename From, simdindex N, typename To = utype<From>,
+ simdindex Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE simd<To, Nout> ubitcast(simd<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(value);
+}
+
+template <typename From, simdindex N, typename To = itype<From>,
+ simdindex Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE simd<To, Nout> ibitcast(simd<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(value);
+}
+
+template <typename From, simdindex N, typename To = ftype<From>,
+ simdindex Nout = sizeof(From) * N / sizeof(To)>
+constexpr KFR_INLINE simd<To, Nout> fbitcast(simd<From, N> value) noexcept
+{
+ return reinterpret_cast<simd<To, Nout>>(value);
+}
+
+constexpr KFR_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); }
+
+template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)>
+KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest);
+
+namespace internal
+{
+template <size_t start = 0, size_t stride = 1>
+struct shuffle_index
+{
+ constexpr KFR_INLINE size_t operator()(size_t index) const { return start + index * stride; }
+};
+
+template <size_t count, size_t start = 0, size_t stride = 1>
+struct shuffle_index_wrap
+{
+ constexpr inline size_t operator()(size_t index) const { return (start + index * stride) % count; }
+};
+}
+
+template <size_t count, typename T, size_t N, size_t Nout = N* count>
+KFR_INLINE vec<T, Nout> repeat(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x);
+}
+KFR_FN(repeat)
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wold-style-cast"
+
+template <size_t N, typename T>
+constexpr KFR_INLINE vec<T, N> broadcast(T x)
+{
+ return (simd<T, N>)(x);
+}
+
+#pragma clang diagnostic pop
+
+template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout != N)>
+KFR_INLINE vec<T, Nout> resize(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x);
+}
+template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout == N)>
+constexpr KFR_INLINE vec<T, Nout> resize(vec<T, N> x)
+{
+ return x;
+}
+KFR_FN(resize)
+
+namespace internal_read_write
+{
+
+template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(is_poweroftwo(N))>
+KFR_INLINE vec<T, N> read(const T* src)
+{
+ return ptr_cast<vec_algn<subtype<T>, vec<T, N>::scalar_size(), A>>(src)->value;
+}
+
+template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))>
+KFR_INLINE vec<T, N> read(const T* src)
+{
+ constexpr size_t first = prev_poweroftwo(N);
+ constexpr size_t rest = N - first;
+ return concat(internal_read_write::read<first, A>(src),
+ internal_read_write::read<rest, false>(src + first));
+}
+
+template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(is_poweroftwo(N))>
+KFR_INLINE void write(T* dest, vec<T, N> value)
+{
+ ptr_cast<vec_algn<subtype<T>, value.scalar_size(), A>>(dest)->value = *value;
+}
+
+template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))>
+KFR_INLINE void write(T* dest, vec<T, N> value)
+{
+ constexpr size_t first = prev_poweroftwo(N);
+ constexpr size_t rest = N - first;
+ internal_read_write::write<A, first>(dest, shufflevector<first, internal::shuffle_index<0>>(value));
+ internal_read_write::write<false, rest>(dest + first,
+ shufflevector<rest, internal::shuffle_index<first>>(value));
+}
+}
+
+template <typename T, size_t N>
+struct pkd_vec
+{
+ constexpr pkd_vec() noexcept {}
+ pkd_vec(const vec<T, N>& value) noexcept { internal_read_write::write(v, value); }
+ template <typename... Ts>
+ constexpr pkd_vec(Ts... init) noexcept : v{ static_cast<T>(init)... }
+ {
+ static_assert(N <= sizeof...(Ts), "Too few initializers for pkd_vec");
+ }
+
+private:
+ T v[N];
+ friend struct vec<T, N>;
+} __attribute__((packed));
+
+template <typename T>
+struct vec_op
+{
+ using scalar_type = subtype<T>;
+
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> add(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return x + y;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> sub(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return x - y;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> mul(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return x * y;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> div(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return x / y;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> rem(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return x % y;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> shl(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return x << y;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> shr(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return x >> y;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> neg(simd<scalar_type, N> x) noexcept
+ {
+ return -x;
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> band(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(ubitcast(x) & ubitcast(y));
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> bor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(ubitcast(x) | ubitcast(y));
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> bxor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(ubitcast(x) ^ ubitcast(y));
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> bnot(simd<scalar_type, N> x) noexcept
+ {
+ return bitcast<scalar_type>(~ubitcast(x));
+ }
+
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> eq(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(x == y);
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> ne(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(x != y);
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> lt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(x < y);
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> gt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(x > y);
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> le(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(x <= y);
+ }
+ template <simdindex N>
+ constexpr static simd<scalar_type, N> ge(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
+ {
+ return bitcast<scalar_type>(x >= y);
+ }
+};
+
+namespace internal
+{
+template <typename T, typename... Args, size_t... indices, size_t N = 1 + sizeof...(Args)>
+constexpr KFR_INLINE vec<T, N> make_vector_impl(csizes_t<indices...>, const T& x, const Args&... rest)
+{
+ constexpr size_t width = compound_type_traits<T>::width;
+ const std::tuple<const T&, const Args&...> list(x, rest...);
+ typename vec<T, N>::simd_t result{ compound_type_traits<T>::at(std::get<indices / width>(list),
+ indices % width)... };
+ return result;
+}
+}
+
+/// Create vector from scalar values
+/// @code
+/// CHECK( make_vector( 1, 2, 3, 4 ) == i32x4{1, 2, 3, 4} );
+/// @encode
+template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1),
+ typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>>
+constexpr KFR_INLINE vec<SubType, N> make_vector(const Arg& x, const Args&... rest)
+{
+ return internal::make_vector_impl<SubType>(csizeseq<N * widthof<SubType>()>, static_cast<SubType>(x),
+ static_cast<SubType>(rest)...);
+}
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> make_vector(vec<T, N> x)
+{
+ return x;
+}
+template <typename T, T... Values, size_t N = sizeof...(Values)>
+constexpr KFR_INLINE vec<T, N> make_vector(cvals_t<T, Values...>)
+{
+ return make_vector<T>(Values...);
+}
+KFR_FN(make_vector)
+
+template <typename T, size_t N>
+struct vec : vec_t<T, N>
+{
+ static_assert(N > 0 && N <= 256, "Invalid vector size");
+
+ using value_type = T;
+ using scalar_type = subtype<T>;
+ constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; }
+ using simd_t = simd<scalar_type, scalar_size()>;
+ using ref = vec&;
+ using cref = const vec&;
+
+ constexpr static bool is_pod = true;
+
+ constexpr KFR_INLINE vec() noexcept {}
+ constexpr KFR_INLINE vec(simd_t value) noexcept : v(value) {}
+ constexpr KFR_INLINE vec(const array_ref<T>& value) noexcept
+ : v(*internal_read_write::read<N, false>(value.data()))
+ {
+ }
+ template <typename U,
+ KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width > 1)>
+ constexpr KFR_INLINE vec(const U& value) noexcept
+ : v(*resize<scalar_size()>(bitcast<scalar_type>(make_vector(static_cast<T>(value)))))
+ {
+ }
+ template <typename U,
+ KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width == 1)>
+ constexpr KFR_INLINE vec(const U& value) noexcept : v(static_cast<T>(value))
+ {
+ }
+ template <typename... Ts>
+ constexpr KFR_INLINE vec(const T& x, const T& y, const Ts&... rest) noexcept
+ : v(*make_vector<T>(x, y, rest...))
+ {
+ static_assert(N <= 2 + sizeof...(Ts), "Too few initializers for vec");
+ }
+ template <size_t N1, size_t N2, size_t... Ns>
+ constexpr KFR_INLINE vec(const vec<T, N1>& v1, const vec<T, N2>& v2,
+ const vec<T, Ns>&... vectors) noexcept : v(*concat(v1, v2, vectors...))
+ {
+ static_assert(csum(csizes<N1, N2, Ns...>) == N, "Can't concat vectors: invalid csizes");
+ }
+ constexpr KFR_INLINE vec(const vec&) noexcept = default;
+ constexpr KFR_INLINE vec(vec&&) noexcept = default;
+ constexpr KFR_INLINE vec& operator=(const vec&) noexcept = default;
+ constexpr KFR_INLINE vec& operator=(vec&&) noexcept = default;
+
+ friend constexpr KFR_INLINE vec operator+(vec x, vec y) { return vec_op<T>::add(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator-(vec x, vec y) { return vec_op<T>::sub(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator*(vec x, vec y) { return vec_op<T>::mul(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator/(vec x, vec y) { return vec_op<T>::div(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator%(vec x, vec y) { return vec_op<T>::rem(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator-(vec x) { return vec_op<T>::neg(x.v); }
+
+ friend constexpr KFR_INLINE vec operator&(vec x, vec y) { return vec_op<T>::band(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator|(vec x, vec y) { return vec_op<T>::bor(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator^(vec x, vec y) { return vec_op<T>::bxor(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator~(vec x) { return vec_op<T>::bnot(x.v); }
+
+ friend constexpr KFR_INLINE vec operator<<(vec x, vec y) { return vec_op<T>::shl(x.v, y.v); }
+ friend constexpr KFR_INLINE vec operator>>(vec x, vec y) { return vec_op<T>::shr(x.v, y.v); }
+
+ friend constexpr KFR_INLINE mask<T, N> operator==(vec x, vec y) { return vec_op<T>::eq(x.v, y.v); }
+ friend constexpr KFR_INLINE mask<T, N> operator!=(vec x, vec y) { return vec_op<T>::ne(x.v, y.v); }
+ friend constexpr KFR_INLINE mask<T, N> operator<(vec x, vec y) { return vec_op<T>::lt(x.v, y.v); }
+ friend constexpr KFR_INLINE mask<T, N> operator>(vec x, vec y) { return vec_op<T>::gt(x.v, y.v); }
+ friend constexpr KFR_INLINE mask<T, N> operator<=(vec x, vec y) { return vec_op<T>::le(x.v, y.v); }
+ friend constexpr KFR_INLINE mask<T, N> operator>=(vec x, vec y) { return vec_op<T>::ge(x.v, y.v); }
+
+#define KFR_ASGN_OP(aop, op) \
+ friend KFR_INLINE vec& operator aop(vec& x, vec y) \
+ { \
+ x = x op y; \
+ return x; \
+ }
+ KFR_ASGN_OP(+=, +)
+ KFR_ASGN_OP(-=, -)
+ KFR_ASGN_OP(*=, *)
+ KFR_ASGN_OP(/=, /)
+ KFR_ASGN_OP(%=, %)
+ KFR_ASGN_OP(&=, &)
+ KFR_ASGN_OP(|=, |)
+ KFR_ASGN_OP(^=, ^)
+ KFR_ASGN_OP(<<=, <<)
+ KFR_ASGN_OP(>>=, >>)
+
+ constexpr KFR_INLINE simd_t operator*() const { return v; }
+ constexpr KFR_INLINE simd_t& operator*() { return v; }
+ KFR_INLINE mask<T, N>& asmask() { return ref_cast<mask<T, N>>(*this); }
+ KFR_INLINE const mask<T, N>& asmask() const { return ref_cast<mask<T, N>>(*this); }
+ KFR_INLINE value_type operator[](size_t index) const { return data()[index]; }
+
+ KFR_INLINE value_type* data() { return ptr_cast<T>(&v); }
+ KFR_INLINE const T* data() const { return ptr_cast<T>(&v); }
+ using array_t = T (&)[N];
+ KFR_INLINE array_t arr() { return ref_cast<array_t>(v); }
+
+ template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value)>
+ constexpr operator vec<U, N>() noexcept
+ {
+ return cast<U>(*this);
+ }
+
+private:
+ struct getter_setter;
+
+public:
+ getter_setter operator()(size_t index) { return { v, index }; }
+ scalar_type operator()(size_t index) const { return v[index]; }
+
+protected:
+ template <typename U, size_t M>
+ friend struct vec;
+ template <typename U, size_t M>
+ friend struct mask;
+ simd_t v;
+
+private:
+ struct getter_setter
+ {
+ constexpr getter_setter(simd_t& v, size_t index) noexcept : v(v), index(index) {}
+ KFR_INLINE getter_setter& operator=(scalar_type value) noexcept
+ {
+ v[index] = value;
+ return *this;
+ }
+ KFR_INLINE operator scalar_type() const { return v[index]; }
+ private:
+ friend struct vec;
+ simd_t& v;
+ const size_t index;
+ };
+};
+
+template <typename T, size_t N>
+struct mask : public vec<T, N>
+{
+ using type = T;
+ constexpr static size_t width = N;
+
+ using base = vec<T, N>;
+
+ constexpr KFR_INLINE mask() noexcept : base() {}
+
+ constexpr KFR_INLINE mask(simd<T, N> value) noexcept : base(value) {}
+ template <size_t N1, size_t... Ns>
+ constexpr KFR_INLINE mask(const mask<T, N1>& mask1, const mask<T, Ns>&... masks) noexcept
+ : base(*concat(mask1, masks...))
+ {
+ }
+ template <typename... Ts, typename = enable_if<sizeof...(Ts) + 2 == N>>
+ constexpr KFR_INLINE mask(bool x, bool y, Ts... rest) noexcept
+ : base{ internal::maskbits<T>(x), internal::maskbits<T>(y), internal::maskbits<T>(rest)... }
+ {
+ }
+ constexpr KFR_INLINE mask(const mask&) noexcept = default;
+ constexpr KFR_INLINE mask(mask&&) noexcept = default;
+ KFR_INLINE mask& operator=(const mask&) noexcept = default;
+ KFR_INLINE mask& operator=(mask&&) noexcept = default;
+
+ template <typename M, typename = u8[sizeof(T) == sizeof(M)]>
+ constexpr KFR_INLINE mask(vec<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value))
+ {
+ }
+
+ template <typename M, typename = u8[sizeof(T) == sizeof(M)]>
+ constexpr KFR_INLINE mask(mask<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value))
+ {
+ }
+ constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); }
+ constexpr KFR_INLINE mask operator&(vec<T, N> x) const
+ {
+ return bitcast<T>(ubitcast(this->v) & ubitcast(x.v));
+ }
+ constexpr KFR_INLINE mask operator|(vec<T, N> x) const
+ {
+ return bitcast<T>(ubitcast(this->v) | ubitcast(x.v));
+ }
+ constexpr KFR_INLINE mask operator^(vec<T, N> x) const
+ {
+ return bitcast<T>(ubitcast(this->v) ^ ubitcast(x.v));
+ }
+
+ constexpr KFR_INLINE mask operator&&(mask x) const { return *this & x; }
+ constexpr KFR_INLINE mask operator||(mask x) const { return *this | x; }
+ constexpr KFR_INLINE mask operator!() const { return ~*this; }
+
+ constexpr KFR_INLINE simd<T, N> operator*() const { return this->v; }
+
+ KFR_INLINE vec<T, N>& asvec() { return ref_cast<mask>(*this); }
+ KFR_INLINE const vec<T, N>& asvec() const { return ref_cast<mask>(*this); }
+
+ KFR_INLINE bool operator[](size_t index) const { return ibitcast(this->v[index]) < 0; }
+};
+
+template <typename T, size_t N>
+using cvec = vec<T, N * 2>;
+
+namespace internal
+{
+
+template <size_t start, size_t count>
+struct shuffle_index_extend
+{
+ constexpr KFR_INLINE size_t operator()(size_t index) const
+ {
+ return index >= start && index < start + count ? index - start : index_undefined;
+ }
+};
+
+template <size_t start, size_t count, typename T, size_t N>
+KFR_INLINE vec<T, count> concatexact(vec<T, N> x, vec<T, N> y)
+{
+ return kfr::shufflevector<count, internal::shuffle_index<start>>(x, y);
+}
+
+template <size_t start, size_t count, typename T, size_t N1, size_t N2>
+KFR_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y)
+{
+ return concatexact<start, count>(x, y);
+}
+
+template <size_t start, size_t count, typename T, size_t N1, size_t N2>
+KFR_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y)
+{
+ return concatexact<start, count>(x, shufflevector<N1, internal::shuffle_index_extend<0, N2>>(y));
+}
+template <size_t start, size_t count, typename T, size_t N1, size_t N2>
+KFR_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y)
+{
+ return concatexact<N2 - N1 + start, count>(
+ shufflevector<N2, internal::shuffle_index_extend<N2 - N1, N1>>(x), y);
+}
+
+template <typename T, size_t Nout, size_t N1, size_t... indices>
+constexpr mask<T, Nout> partial_mask_helper(csizes_t<indices...>)
+{
+ return make_vector(maskbits<T>(indices < N1)...);
+}
+template <typename T, size_t Nout, size_t N1>
+constexpr mask<T, Nout> partial_mask()
+{
+ return internal::partial_mask_helper<T, Nout, N1>(csizeseq<Nout>);
+}
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> concat(vec<T, N> x)
+{
+ return x;
+}
+
+template <typename T, size_t N1, size_t N2>
+KFR_INLINE vec<T, N1 + N2> concat(vec<T, N1> x, vec<T, N2> y)
+{
+ return concattwo<0, N1 + N2>(x, y);
+}
+
+template <typename T, size_t N1, size_t N2, size_t... Sizes>
+KFR_INLINE auto concat(vec<T, N1> x, vec<T, N2> y, vec<T, Sizes>... args)
+{
+ return concat(x, concat(y, args...));
+}
+}
+
+template <typename T, size_t N, size_t... Sizes, size_t Nout>
+KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest)
+{
+ return internal::concat(x, rest...);
+}
+KFR_FN(concat)
+
+using f32x1 = vec<f32, 1>;
+using f32x2 = vec<f32, 2>;
+using f32x3 = vec<f32, 3>;
+using f32x4 = vec<f32, 4>;
+using f32x8 = vec<f32, 8>;
+using f32x16 = vec<f32, 16>;
+using f32x32 = vec<f32, 32>;
+using f64x1 = vec<f64, 1>;
+using f64x2 = vec<f64, 2>;
+using f64x3 = vec<f64, 3>;
+using f64x4 = vec<f64, 4>;
+using f64x8 = vec<f64, 8>;
+using f64x16 = vec<f64, 16>;
+using f64x32 = vec<f64, 32>;
+using i8x1 = vec<i8, 1>;
+using i8x2 = vec<i8, 2>;
+using i8x3 = vec<i8, 3>;
+using i8x4 = vec<i8, 4>;
+using i8x8 = vec<i8, 8>;
+using i8x16 = vec<i8, 16>;
+using i8x32 = vec<i8, 32>;
+using i16x1 = vec<i16, 1>;
+using i16x2 = vec<i16, 2>;
+using i16x3 = vec<i16, 3>;
+using i16x4 = vec<i16, 4>;
+using i16x8 = vec<i16, 8>;
+using i16x16 = vec<i16, 16>;
+using i16x32 = vec<i16, 32>;
+using i32x1 = vec<i32, 1>;
+using i32x2 = vec<i32, 2>;
+using i32x3 = vec<i32, 3>;
+using i32x4 = vec<i32, 4>;
+using i32x8 = vec<i32, 8>;
+using i32x16 = vec<i32, 16>;
+using i32x32 = vec<i32, 32>;
+using i64x1 = vec<i64, 1>;
+using i64x2 = vec<i64, 2>;
+using i64x3 = vec<i64, 3>;
+using i64x4 = vec<i64, 4>;
+using i64x8 = vec<i64, 8>;
+using i64x16 = vec<i64, 16>;
+using i64x32 = vec<i64, 32>;
+using u8x1 = vec<u8, 1>;
+using u8x2 = vec<u8, 2>;
+using u8x3 = vec<u8, 3>;
+using u8x4 = vec<u8, 4>;
+using u8x8 = vec<u8, 8>;
+using u8x16 = vec<u8, 16>;
+using u8x32 = vec<u8, 32>;
+using u16x1 = vec<u16, 1>;
+using u16x2 = vec<u16, 2>;
+using u16x3 = vec<u16, 3>;
+using u16x4 = vec<u16, 4>;
+using u16x8 = vec<u16, 8>;
+using u16x16 = vec<u16, 16>;
+using u16x32 = vec<u16, 32>;
+using u32x1 = vec<u32, 1>;
+using u32x2 = vec<u32, 2>;
+using u32x3 = vec<u32, 3>;
+using u32x4 = vec<u32, 4>;
+using u32x8 = vec<u32, 8>;
+using u32x16 = vec<u32, 16>;
+using u32x32 = vec<u32, 32>;
+using u64x1 = vec<u64, 1>;
+using u64x2 = vec<u64, 2>;
+using u64x3 = vec<u64, 3>;
+using u64x4 = vec<u64, 4>;
+using u64x8 = vec<u64, 8>;
+using u64x16 = vec<u64, 16>;
+using u64x32 = vec<u64, 32>;
+
+using mf32x1 = mask<f32, 1>;
+using mf32x2 = mask<f32, 2>;
+using mf32x3 = mask<f32, 3>;
+using mf32x4 = mask<f32, 4>;
+using mf32x8 = mask<f32, 8>;
+using mf32x16 = mask<f32, 16>;
+using mf32x32 = mask<f32, 32>;
+using mf64x1 = mask<f64, 1>;
+using mf64x2 = mask<f64, 2>;
+using mf64x3 = mask<f64, 3>;
+using mf64x4 = mask<f64, 4>;
+using mf64x8 = mask<f64, 8>;
+using mf64x16 = mask<f64, 16>;
+using mf64x32 = mask<f64, 32>;
+using mi8x1 = mask<i8, 1>;
+using mi8x2 = mask<i8, 2>;
+using mi8x3 = mask<i8, 3>;
+using mi8x4 = mask<i8, 4>;
+using mi8x8 = mask<i8, 8>;
+using mi8x16 = mask<i8, 16>;
+using mi8x32 = mask<i8, 32>;
+using mi16x1 = mask<i16, 1>;
+using mi16x2 = mask<i16, 2>;
+using mi16x3 = mask<i16, 3>;
+using mi16x4 = mask<i16, 4>;
+using mi16x8 = mask<i16, 8>;
+using mi16x16 = mask<i16, 16>;
+using mi16x32 = mask<i16, 32>;
+using mi32x1 = mask<i32, 1>;
+using mi32x2 = mask<i32, 2>;
+using mi32x4 = mask<i32, 3>;
+using mi32x3 = mask<i32, 4>;
+using mi32x8 = mask<i32, 8>;
+using mi32x16 = mask<i32, 16>;
+using mi32x32 = mask<i32, 32>;
+using mi64x1 = mask<i64, 1>;
+using mi64x2 = mask<i64, 2>;
+using mi64x3 = mask<i64, 3>;
+using mi64x4 = mask<i64, 4>;
+using mi64x8 = mask<i64, 8>;
+using mi64x16 = mask<i64, 16>;
+using mi64x32 = mask<i64, 32>;
+using mu8x1 = mask<u8, 1>;
+using mu8x2 = mask<u8, 2>;
+using mu8x3 = mask<u8, 3>;
+using mu8x4 = mask<u8, 4>;
+using mu8x8 = mask<u8, 8>;
+using mu8x16 = mask<u8, 16>;
+using mu8x32 = mask<u8, 32>;
+using mu16x1 = mask<u16, 1>;
+using mu16x2 = mask<u16, 2>;
+using mu16x3 = mask<u16, 3>;
+using mu16x4 = mask<u16, 4>;
+using mu16x8 = mask<u16, 8>;
+using mu16x16 = mask<u16, 16>;
+using mu16x32 = mask<u16, 32>;
+using mu32x1 = mask<u32, 1>;
+using mu32x2 = mask<u32, 2>;
+using mu32x3 = mask<u32, 3>;
+using mu32x4 = mask<u32, 4>;
+using mu32x8 = mask<u32, 8>;
+using mu32x16 = mask<u32, 16>;
+using mu32x32 = mask<u32, 32>;
+using mu64x1 = mask<u64, 1>;
+using mu64x2 = mask<u64, 2>;
+using mu64x3 = mask<u64, 3>;
+using mu64x4 = mask<u64, 4>;
+using mu64x8 = mask<u64, 8>;
+using mu64x16 = mask<u64, 16>;
+using mu64x32 = mask<u64, 32>;
+
+namespace glsl_names
+{
+using vec2 = f32x2;
+using vec3 = f32x3;
+using vec4 = f32x4;
+using dvec2 = f64x2;
+using dvec3 = f64x3;
+using dvec4 = f64x4;
+using ivec2 = i32x2;
+using ivec3 = i32x3;
+using ivec4 = i32x4;
+using uvec2 = u32x2;
+using uvec3 = u32x3;
+using uvec4 = u32x4;
+}
+namespace opencl_names
+{
+using char2 = i8x2;
+using char3 = i8x3;
+using char4 = i8x4;
+using char8 = i8x8;
+using char16 = i8x16;
+using uchar2 = u8x2;
+using uchar3 = u8x3;
+using uchar4 = u8x4;
+using uchar8 = u8x8;
+using uchar16 = u8x16;
+
+using short2 = i16x2;
+using short3 = i16x3;
+using short4 = i16x4;
+using short8 = i16x8;
+using short16 = i16x16;
+using ushort2 = u16x2;
+using ushort3 = u16x3;
+using ushort4 = u16x4;
+using ushort8 = u16x8;
+using ushort16 = u16x16;
+
+using int2 = i32x2;
+using int3 = i32x3;
+using int4 = i32x4;
+using int8 = i32x8;
+using int16 = i32x16;
+using uint2 = u32x2;
+using uint3 = u32x3;
+using uint4 = u32x4;
+using uint8 = u32x8;
+using uint16 = u32x16;
+
+using long2 = i64x2;
+using long3 = i64x3;
+using long4 = i64x4;
+using long8 = i64x8;
+using long16 = i64x16;
+using ulong2 = u64x2;
+using ulong3 = u64x3;
+using ulong4 = u64x4;
+using ulong8 = u64x8;
+using ulong16 = u64x16;
+
+using float2 = f32x2;
+using float3 = f32x3;
+using float4 = f32x4;
+using float8 = f32x8;
+using float16 = f32x16;
+
+using double2 = f64x2;
+using double3 = f64x3;
+using double4 = f64x4;
+using double8 = f64x8;
+using double16 = f64x16;
+}
+
+namespace internal
+{
+using f32sse = vec<f32, vector_width<f32, cpu_t::sse2>>;
+using f64sse = vec<f64, vector_width<f64, cpu_t::sse2>>;
+using i8sse = vec<i8, vector_width<i8, cpu_t::sse2>>;
+using i16sse = vec<i16, vector_width<i16, cpu_t::sse2>>;
+using i32sse = vec<i32, vector_width<i32, cpu_t::sse2>>;
+using i64sse = vec<i64, vector_width<i64, cpu_t::sse2>>;
+using u8sse = vec<u8, vector_width<u8, cpu_t::sse2>>;
+using u16sse = vec<u16, vector_width<u16, cpu_t::sse2>>;
+using u32sse = vec<u32, vector_width<u32, cpu_t::sse2>>;
+using u64sse = vec<u64, vector_width<u64, cpu_t::sse2>>;
+
+using mf32sse = mask<f32, vector_width<f32, cpu_t::sse2>>;
+using mf64sse = mask<f64, vector_width<f64, cpu_t::sse2>>;
+using mi8sse = mask<i8, vector_width<i8, cpu_t::sse2>>;
+using mi16sse = mask<i16, vector_width<i16, cpu_t::sse2>>;
+using mi32sse = mask<i32, vector_width<i32, cpu_t::sse2>>;
+using mi64sse = mask<i64, vector_width<i64, cpu_t::sse2>>;
+using mu8sse = mask<u8, vector_width<u8, cpu_t::sse2>>;
+using mu16sse = mask<u16, vector_width<u16, cpu_t::sse2>>;
+using mu32sse = mask<u32, vector_width<u32, cpu_t::sse2>>;
+using mu64sse = mask<u64, vector_width<u64, cpu_t::sse2>>;
+
+using f32avx = vec<f32, vector_width<f32, cpu_t::avx1>>;
+using f64avx = vec<f64, vector_width<f64, cpu_t::avx1>>;
+using i8avx = vec<i8, vector_width<i8, cpu_t::avx2>>;
+using i16avx = vec<i16, vector_width<i16, cpu_t::avx2>>;
+using i32avx = vec<i32, vector_width<i32, cpu_t::avx2>>;
+using i64avx = vec<i64, vector_width<i64, cpu_t::avx2>>;
+using u8avx = vec<u8, vector_width<u8, cpu_t::avx2>>;
+using u16avx = vec<u16, vector_width<u16, cpu_t::avx2>>;
+using u32avx = vec<u32, vector_width<u32, cpu_t::avx2>>;
+using u64avx = vec<u64, vector_width<u64, cpu_t::avx2>>;
+
+using mf32avx = mask<f32, vector_width<f32, cpu_t::avx1>>;
+using mf64avx = mask<f64, vector_width<f64, cpu_t::avx1>>;
+using mi8avx = mask<i8, vector_width<i8, cpu_t::avx2>>;
+using mi16avx = mask<i16, vector_width<i16, cpu_t::avx2>>;
+using mi32avx = mask<i32, vector_width<i32, cpu_t::avx2>>;
+using mi64avx = mask<i64, vector_width<i64, cpu_t::avx2>>;
+using mu8avx = mask<u8, vector_width<u8, cpu_t::avx2>>;
+using mu16avx = mask<u16, vector_width<u16, cpu_t::avx2>>;
+using mu32avx = mask<u32, vector_width<u32, cpu_t::avx2>>;
+using mu64avx = mask<u64, vector_width<u64, cpu_t::avx2>>;
+
+template <typename T, size_t N>
+struct vec_type
+{
+ using type = vec<T, N>;
+};
+
+template <typename T, size_t Nmax>
+struct maxvec
+{
+ constexpr static size_t size = Nmax;
+ vec<T, size> vmax;
+ maxvec(T initial) : vmax(initial) {}
+ template <int N>
+ vec<T, N>& v()
+ {
+ static_assert(N <= size, "N <= size");
+ return reinterpret_cast<vec<T, N>&>(*this);
+ }
+ template <int N>
+ const vec<T, N>& v() const
+ {
+ static_assert(N <= size, "N <= size");
+ return reinterpret_cast<const vec<T, N>&>(*this);
+ }
+};
+
+template <size_t Index, typename T, size_t N, typename Fn, typename... Args,
+ typename Tout = result_of<Fn(subtype<remove_reference<Args>>...)>>
+constexpr KFR_INLINE Tout applyfn_helper(Fn&& fn, Args&&... args)
+{
+ return fn(args[Index]...);
+}
+
+template <typename T, size_t N, typename Fn, typename... Args,
+ typename Tout = result_of<Fn(subtype<remove_reference<Args>>...)>, size_t... Indices>
+constexpr KFR_INLINE vec<Tout, N> apply_helper(Fn&& fn, csizes_t<Indices...>, Args&&... args)
+{
+ return make_vector(applyfn_helper<Indices, T, N>(std::forward<Fn>(fn), std::forward<Args>(args)...)...);
+}
+template <typename T, size_t N, typename Fn, size_t... Indices>
+constexpr KFR_INLINE vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>)
+{
+ return make_vector(((void)Indices, void(), fn())...);
+}
+}
+
+template <typename T, size_t N, typename Fn, typename... Args,
+ typename Tout = result_of<Fn(T, subtype<remove_reference<Args>>...)>>
+constexpr KFR_INLINE vec<Tout, N> apply(Fn&& fn, vec<T, N> arg, Args&&... args)
+{
+ return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...);
+}
+
+template <size_t N, typename Fn, typename T = result_of<Fn()>>
+constexpr KFR_INLINE vec<T, N> apply(Fn&& fn)
+{
+ return internal::apply0_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>);
+}
+
+template <typename T, int N>
+KFR_INLINE vec<T, N> tovec(simd<T, N> x)
+{
+ return x;
+}
+KFR_INLINE f32x4 tovec(__m128 x) { return f32x4(x); }
+KFR_INLINE f64x2 tovec(__m128d x) { return f64x2(x); }
+KFR_INLINE f32x8 tovec(__m256 x) { return f32x8(x); }
+KFR_INLINE f64x4 tovec(__m256d x) { return f64x4(x); }
+
+template <typename T, typename... Args, size_t Nout = (sizeof...(Args) + 1)>
+constexpr KFR_INLINE mask<T, Nout> make_mask(bool arg, Args... args)
+{
+ simd<T, Nout> temp{ internal::maskbits<T>(arg), internal::maskbits<T>(static_cast<bool>(args))... };
+ return temp;
+}
+KFR_FN(make_mask)
+
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> zerovector()
+{
+ constexpr size_t width = N * compound_type_traits<T>::width;
+ return subcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>()));
+}
+
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> zerovector(vec_t<T, N>)
+{
+ return zerovector<T, N>();
+}
+KFR_FN(zerovector)
+
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> allonesvector()
+{
+ return zerovector<T, N>() == zerovector<T, N>();
+}
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> allonesvector(vec_t<T, N>)
+{
+ return allonesvector<T, N>();
+}
+KFR_FN(allonesvector)
+
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> undefinedvector()
+{
+ return vec<T, N>{};
+}
+template <typename T, size_t N>
+constexpr KFR_INLINE vec<T, N> undefinedvector(vec_t<T, N>)
+{
+ return undefinedvector<T, N>();
+}
+KFR_FN(undefinedvector)
+
+template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)>
+KFR_INLINE vec<T, Nout> low(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index<>>(x);
+}
+
+template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)>
+KFR_INLINE vec_t<T, Nout> low(vec_t<T, N>)
+{
+ return {};
+}
+
+template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)>
+KFR_INLINE vec<T, Nout> high(vec<T, N> x)
+{
+ return shufflevector<Nout, internal::shuffle_index<prev_poweroftwo(N - 1)>>(x);
+}
+
+template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)>
+KFR_INLINE vec_t<T, Nout> high(vec_t<T, N>)
+{
+ return {};
+}
+KFR_FN(low)
+KFR_FN(high)
+
+namespace internal
+{
+
+template <typename Fn>
+struct expression_lambda : input_expression
+{
+ KFR_INLINE expression_lambda(Fn&& fn) : fn(std::move(fn)) {}
+
+ template <typename T, size_t N, KFR_ENABLE_IF(is_callable<Fn, cinput_t, size_t, vec_t<T, N>>::value)>
+ KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const
+ {
+ return fn(cinput, index, y);
+ }
+
+ template <typename T, size_t N, KFR_ENABLE_IF(N&& is_callable<Fn, size_t>::value)>
+ KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) const
+ {
+ vec<T, N> result;
+ for (size_t i = 0; i < N; i++)
+ {
+ result(i) = fn(index + i);
+ }
+ return result;
+ }
+ template <typename T, size_t N, KFR_ENABLE_IF(N&& is_callable<Fn>::value)>
+ KFR_INLINE vec<T, N> operator()(cinput_t, size_t, vec_t<T, N>) const
+ {
+ vec<T, N> result;
+ for (size_t i = 0; i < N; i++)
+ {
+ result(i) = fn();
+ }
+ return result;
+ }
+
+ Fn fn;
+};
+}
+
+template <typename Fn>
+internal::expression_lambda<decay<Fn>> lambda(Fn&& fn)
+{
+ return internal::expression_lambda<Fn>(std::move(fn));
+}
+}
+
+#pragma clang diagnostic pop
+
+namespace cometa
+{
+
+template <typename T, size_t N>
+struct compound_type_traits<kfr::simd<T, N>>
+{
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static size_t width = N;
+ constexpr static bool is_scalar = false;
+ template <typename U>
+ using rebind = kfr::simd<U, N>;
+ template <typename U>
+ using deep_rebind = kfr::simd<cometa::deep_rebind<subtype, U>, N>;
+
+ static constexpr const subtype& at(const kfr::simd<T, N>& value, size_t index) { return value[index]; }
+};
+
+template <typename T, size_t N>
+struct compound_type_traits<kfr::vec<T, N>>
+{
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static size_t width = N;
+ constexpr static bool is_scalar = false;
+ template <typename U>
+ using rebind = kfr::vec<U, N>;
+ template <typename U>
+ using deep_rebind = kfr::vec<cometa::deep_rebind<subtype, U>, N>;
+
+ static constexpr subtype at(const kfr::vec<T, N>& value, size_t index) { return value[index]; }
+};
+
+template <typename T, size_t N>
+struct compound_type_traits<kfr::mask<T, N>>
+{
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static size_t width = N;
+ constexpr static bool is_scalar = false;
+ template <typename U>
+ using rebind = kfr::mask<U, N>;
+ template <typename U>
+ using deep_rebind = kfr::mask<cometa::deep_rebind<subtype, U>, N>;
+
+ static constexpr subtype at(const kfr::mask<T, N>& value, size_t index) { return value[index]; }
+};
+}
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -0,0 +1,357 @@
+#pragma once
+
+#if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__)
+#define CID_ARCH_X86 1
+#endif
+
+#ifdef CID_ARCH_X86
+#if defined(_M_X64) || defined(__x86_64__)
+#define CID_ARCH_X64 1
+#else
+#define CID_ARCH_X32 1
+#endif
+
+#if defined __AVX512F__ && !defined CID_ARCH_AVX512
+#define CID_ARCH_AVX512 1
+#define CID_ARCH_AVX2 1
+#define CID_ARCH_AVX 1
+#define CID_ARCH_SSE42 1
+#define CID_ARCH_SSE41 1
+#define CID_ARCH_SSSE3 1
+#define CID_ARCH_SSE3 1
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+#if defined __AVX2__ && !defined CID_ARCH_AVX2
+#define CID_ARCH_AVX2 1
+#define CID_ARCH_AVX 1
+#define CID_ARCH_SSE42 1
+#define CID_ARCH_SSE41 1
+#define CID_ARCH_SSSE3 1
+#define CID_ARCH_SSE3 1
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+#if defined __AVX__ && !defined CID_ARCH_AVX
+#define CID_ARCH_AVX 1
+#define CID_ARCH_SSE42 1
+#define CID_ARCH_SSE41 1
+#define CID_ARCH_SSSE3 1
+#define CID_ARCH_SSE3 1
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+#if defined __SSE4_2__ && !defined CID_ARCH_SSE4_2
+#define CID_ARCH_SSE4_2 1
+#define CID_ARCH_SSE41 1
+#define CID_ARCH_SSSE3 1
+#define CID_ARCH_SSE3 1
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+#if defined __SSE4_1__ && !defined CID_ARCH_SSE4_1
+#define CID_ARCH_SSE4_1 1
+#define CID_ARCH_SSSE3 1
+#define CID_ARCH_SSE3 1
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+#if defined __SSSE3__ && !defined CID_ARCH_SSSE3
+#define CID_ARCH_SSSE3 1
+#define CID_ARCH_SSE3 1
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+#if defined __SSE3__ && !defined CID_ARCH_SSE3
+#define CID_ARCH_SSE3 1
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+#if (defined CID_ARCH_X64 || defined __SSE2__) && !defined CID_ARCH_SSE2
+#define CID_ARCH_SSE2 1
+#define CID_ARCH_SSE 1
+#endif
+
+#if (defined CID_ARCH_X64 || defined __SSE__) && !defined CID_ARCH_SSE1
+#define CID_ARCH_SSE 1
+#endif
+
+#if defined __FMA__ && !defined CID_ARCH_FMA
+#define CID_ARCH_FMA 1
+#endif
+
+#if defined __AES__ && !defined CID_ARCH_AES
+#define CID_ARCH_AES 1
+#endif
+
+#if defined __BMI__ && !defined CID_ARCH_BMI
+#define CID_ARCH_BMI 1
+#endif
+
+#if defined __BMI2__ && !defined CID_ARCH_BMI2
+#define CID_ARCH_BMI2 1
+#endif
+
+#if defined __LZCNT__ && !defined CID_ARCH_LZCNT
+#define CID_ARCH_LZCNT 1
+#endif
+
+#if defined CID_ARCH_AVX512
+#define CID_ARCH_NAME avx512
+#elif defined CID_ARCH_AVX2
+#define CID_ARCH_NAME avx2
+#elif defined CID_ARCH_AVX
+#define CID_ARCH_NAME avx
+#elif defined CID_ARCH_SSE4_1
+#define CID_ARCH_NAME sse41
+#elif defined CID_ARCH_SSSE3
+#define CID_ARCH_NAME ssse3
+#elif defined CID_ARCH_SSE3
+#define CID_ARCH_NAME sse3
+#elif defined CID_ARCH_SSE2
+#define CID_ARCH_NAME sse2
+#elif defined CID_ARCH_SSE
+#define CID_ARCH_NAME sse
+#else
+#define CID_ARCH_NAME legacy
+#endif
+
+#endif
+
+#define CID_STRINGIFY2(x) #x
+#define CID_STRINGIFY(x) CID_STRINGIFY2(x)
+
+#if defined(_WIN32) // Windows
+#define CID_OS_WIN 1
+#endif
+
+#if defined(__APPLE__)
+#include "TargetConditionals.h"
+#ifdef TARGET_OS_IPHONE
+#define CID_OS_IOS 1
+#define CID_OS_MOBILE 1
+#elif TARGET_IPHONE_SIMULATOR
+#define CID_OS_IOS 1
+#define CID_OS_IOS_SIMULATOR 1
+#define CID_OS_MOBILE 1
+#elif TARGET_OS_MAC
+#define CID_OS_MAC 1
+#define CID_OS_OSX 1
+#endif
+#define CID_OS_POSIX 1
+#endif
+
+#if defined(__ANDROID__)
+#define CID_OS_ANDROID 1
+#define CID_OS_MOBILE 1
+#define CID_OS_POSIX 1
+#endif
+
+#if defined(__linux__)
+#define CID_OS_LINUX 1
+#define CID_OS_POSIX 1
+#endif
+
+#if defined(_MSC_VER) // Visual C/C++
+#define CID_COMPILER_MSVC 1
+#define CID_MSVC_ATTRIBUTES 1
+#define CID_MSC_VER _MSC_VER
+#else
+#define CID_MSC_VER 0
+#endif
+
+#if defined(__GNUC__) // GCC, Clang
+#define CID_COMPILER_GNU 1
+#define CID_GNU_ATTRIBUTES 1
+#define CID_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#if __cplusplus >= 201103L || defined __GXX_EXPERIMENTAL_CXX0X__
+#define CID_HAS_GXX_CXX11 1
+#endif
+#else
+#define CID_GCC_VERSION 0
+#endif
+
+#if defined(__INTEL_COMPILER) // Intel Compiler
+#define CID_COMPILER_INTEL 1
+#define CID_ICC_VERSION __INTEL_COMPILER
+#elif defined(__ICL)
+#define CID_COMPILER_INTEL 1
+#define CID_ICC_VERSION __ICL
+#else
+#define CID_ICC_VERSION 0
+#endif
+
+#if defined(__clang__) // Clang
+#define CID_COMPILER_CLANG 1
+#ifndef CID_GNU_ATTRIBUTES
+#define CID_GNU_ATTRIBUTES 1
+#endif
+#endif
+
+#if defined(CID_GNU_ATTRIBUTES)
+
+#define CID_NODEBUG
+// __attribute__((__nodebug__))
+#define CID_INLINE __inline__ __attribute__((__always_inline__))
+#define CID_INTRIN CID_INLINE CID_NODEBUG
+#define CID_INLINE_MEMBER __attribute__((__always_inline__))
+#define CID_INLINE_LAMBDA CID_INLINE_MEMBER
+#define CID_NOINLINE __attribute__((__noinline__))
+#define CID_FLATTEN __attribute__((__flatten__))
+#define CID_RESTRICT __restrict__
+
+#elif defined(CID_MSVC_ATTRIBUTES)
+
+#define CID_NODEBUG
+#define CID_INLINE inline __forceinline
+#define CID_INTRIN CID_INLINE CID_NODEBUG
+#define CID_INLINE_MEMBER __forceinline
+#define CID_INLINE_LAMBDA
+#define CID_NOINLINE __declspec(noinline)
+#define CID_FLATTEN
+#define CID_RESTRICT __restrict
+
+#endif
+
+#define CID_INLINE_STATIC CID_INLINE static
+
+#define CID_EXTERN_C extern "C"
+
+#define CID_PUBLIC_C CID_EXTERN_C CID_NOINLINE
+
+#define CID_ALWAYS_INLINE_STATIC CID_ALWAYS_INLINE static
+
+#ifdef CID_OS_WIN
+#define CID_CDECL __cdecl
+#else
+#define CID_CDECL __attribute__((cdecl))
+#endif
+
+#ifdef CID_OS_WIN
+#if defined(CID_MSVC_ATTRIBUTES)
+#define CID_DLL_EXPORT __declspec(dllexport)
+#define CID_DLL_IMPORT __declspec(dllimport)
+#else
+#define CID_DLL_EXPORT __attribute__((dllexport))
+#define CID_DLL_IMPORT __attribute__((dllimport))
+#endif
+#else
+#define CID_DLL_EXPORT
+#define CID_DLL_IMPORT
+#endif
+
+#ifdef __has_builtin
+#define CID_HAS_BUILTIN(builtin) __has_builtin(builtin)
+#else
+#define CID_HAS_BUILTIN(builtin) 0
+#endif
+
+#ifdef __has_feature
+#define CID_HAS_FEATURE(feature) __has_feature(feature)
+#else
+#define CID_HAS_FEATURE(feature) 0
+#endif
+
+#ifdef __has_extension
+#define CID_HAS_EXTENSION(extension) __has_extension(extension)
+#else
+#define CID_HAS_EXTENSION(extension) 0
+#endif
+
+#ifdef __has_attribute
+#define CID_HAS_ATTRIBUTE(attribute) __has_attribute(attribute)
+#else
+#define CID_HAS_ATTRIBUTE(attribute) 0
+#endif
+
+#ifdef __has_warning
+#define CID_HAS_WARNING(warning) __has_warning(warning)
+#else
+#define CID_HAS_WARNING(warning) 0
+#endif
+
+#define CID_HAS_VARIADIC_TEMPLATES \
+ (CID_HAS_FEATURE(cxx_variadic_templates) || (CID_GCC_VERSION >= 404 && CID_HAS_GXX_CXX11) || \
+ CID_MSC_VER >= 1800)
+
+#ifdef CID_BUILDING_DLL
+#define CID_C_API CID_DLL_EXPORT
+#else
+#define CID_C_API CID_DLL_IMPORT
+#endif
+
+#if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr)
+#define CID_HAS_CONSTEXPR 1
+#endif
+
+#if __cpp_constexpr >= 201304 || CID_HAS_FEATURE(cxx_constexpr)
+#define CID_HAS_FULL_CONSTEXPR 1
+#endif
+
+#if CID_HAS_CONSTEXPR
+#define CID_CONSTEXPR constexpr
+#else
+#define CID_CONSTEXPR
+#endif
+
+#if CID_HAS_FEATURE(cxx_noexcept) || (CID_GCC_VERSION >= 408 && CID_HAS_GXX_CXX11) || CID_MSC_VER >= 1900
+#define CID_HAS_NOEXCEPT 1
+#endif
+
+#if CID_HAS_NOEXCEPT
+#define CID_NOEXCEPT noexcept
+#else
+#define CID_NOEXCEPT
+#endif
+
+#if CID_COMPILER_GNU && !defined(__EXCEPTIONS)
+#define CID_HAS_EXCEPTIONS 0
+#endif
+#if CID_MSC_VER && !_HAS_EXCEPTIONS
+#define CID_HAS_EXCEPTIONS 0
+#endif
+
+#ifndef CID_HAS_EXCEPTIONS
+#define CID_HAS_EXCEPTIONS 1
+#endif
+
+#include <assert.h>
+
+#ifndef CID_THROW
+#if CID_HAS_EXCEPTIONS
+#define CID_THROW(x) throw x
+#else
+#define CID_THROW(x) assert(false)
+#endif
+#endif
+
+#if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr)
+
+#include <cstdint>
+namespace cid
+{
+template <typename T, size_t N>
+constexpr inline static size_t arraysize(const T (&)[N]) noexcept
+{
+ return N;
+}
+}
+
+#define CID_ARRAYSIZE(arr) ::cid::arraysize(arr)
+#elif CID_COMPILER_MSVC
+#define CID_ARRAYSIZE(arr) _countof(arr)
+#elif __cplusplus >= 199711L && \
+ (defined(__INTEL_COMPILER) || defined(__clang__) || \
+ (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))))
+template <typename T, size_t N>
+char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(T (&)[N]))[N];
+#define CID_ARRAYSIZE(x) sizeof(COUNTOF_REQUIRES_ARRAY_ARGUMENT(x))
+#else
+#define CID_ARRAYSIZE(arr) sizeof(arr) / sizeof(arr[0])
+#endif
+
+#ifdef CID_COMPILER_MSVC
+#define CID_FUNC_SIGNATURE __FUNCSIG__
+#else
+#define CID_FUNC_SIGNATURE __PRETTY_FUNCTION__
+#endif
diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp
@@ -0,0 +1,1819 @@
+#pragma once
+
+#include "cident.h"
+
+#include <algorithm>
+#include <array>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow"
+
+namespace cometa
+{
+
+using std::size_t;
+
+using pvoid = void*;
+
+template <typename...>
+using void_t = void;
+
+namespace details
+{
+constexpr inline bool args_or() { return false; }
+template <typename... Ts>
+constexpr inline bool args_or(bool x, Ts... rest)
+{
+ return x || args_or(rest...);
+}
+
+constexpr inline bool args_and() { return true; }
+template <typename... Ts>
+constexpr inline bool args_and(bool x, Ts... rest)
+{
+ return x && args_or(rest...);
+}
+
+template <typename T, typename Enable = void>
+struct is_pod_impl : std::false_type
+{
+};
+
+template <typename T>
+struct is_pod_impl<T, void_t<decltype(T::is_pod)>> : std::integral_constant<bool, T::is_pod>
+{
+};
+}
+
+template <typename... Ts>
+struct or_t : std::integral_constant<bool, details::args_or(Ts::value...)>
+{
+};
+
+template <typename... Ts>
+struct and_t : std::integral_constant<bool, details::args_and(Ts::value...)>
+{
+};
+
+template <typename T>
+struct not_t : std::integral_constant<bool, !T::value>
+{
+};
+
+constexpr size_t max_size_t = size_t(-1);
+
+template <typename... T>
+using common_type = typename std::common_type<T...>::type;
+
+template <typename T>
+using result_of = typename std::result_of<T>::type;
+
+template <bool Condition, typename Type = void>
+using enable_if = typename std::enable_if<Condition, Type>::type;
+
+template <bool Condition, typename T, typename F>
+using conditional = typename std::conditional<Condition, T, F>::type;
+
+template <typename T>
+using remove_reference = typename std::remove_reference<T>::type;
+
+template <typename T>
+using remove_cv = typename std::remove_cv<T>::type;
+
+template <typename T>
+using remove_pointer = typename std::remove_pointer<T>::type;
+
+template <typename T>
+using remove_extent = typename std::remove_extent<T>::type;
+
+template <typename T>
+using remove_const = typename std::remove_const<T>::type;
+
+template <typename T>
+using underlying_type = typename std::underlying_type<T>::type;
+
+template <typename T>
+using is_pod = or_t<std::is_pod<T>, details::is_pod_impl<T>>;
+
+template <typename T>
+using is_class = std::is_class<T>;
+
+template <typename T>
+using is_const = std::is_const<T>;
+
+template <typename T>
+using is_pointer = std::is_pointer<T>;
+
+template <typename T>
+using is_array = std::is_array<T>;
+
+template <typename T>
+using is_void = std::is_void<T>;
+
+template <typename T1, typename T2>
+using is_same = std::is_same<T1, T2>;
+
+template <typename T>
+using is_template_arg = std::integral_constant<bool, std::is_integral<T>::value || std::is_enum<T>::value>;
+
+template <typename T>
+using decay = typename std::decay<T>::type;
+
+template <typename... T>
+using decay_common = decay<common_type<T...>>;
+
+template <typename T1, typename T2 = void, typename... Ts>
+constexpr size_t typeindex()
+{
+ return is_same<T1, T2>() ? 0 : 1 + typeindex<T1, Ts...>();
+}
+
+template <typename T>
+struct compound_type_traits
+{
+ constexpr static size_t width = 1;
+ using subtype = T;
+ using deep_subtype = T;
+ constexpr static bool is_scalar = true;
+
+ template <typename U>
+ using rebind = U;
+ template <typename U>
+ using deep_rebind = U;
+
+ static constexpr const subtype& at(const T& value, size_t /*index*/) { return value; }
+};
+
+template <typename T>
+using is_compound = std::integral_constant<bool, !compound_type_traits<decay<T>>::is_scalar>;
+
+template <typename T>
+using subtype = typename compound_type_traits<T>::subtype;
+
+template <typename T>
+using deep_subtype = typename compound_type_traits<T>::deep_subtype;
+
+template <typename T, typename SubType>
+using rebind = typename compound_type_traits<T>::template rebind<SubType>;
+
+template <typename T, typename SubType>
+using deep_rebind = typename compound_type_traits<T>::template deep_rebind<SubType>;
+
+template <typename T>
+struct compound_type_traits<std::pair<T, T>>
+{
+ constexpr static size_t width = 2;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static bool is_scalar = false;
+
+ template <typename U>
+ using rebind = std::pair<U, U>;
+ template <typename U>
+ using deep_rebind = std::pair<cometa::deep_rebind<subtype, U>, cometa::deep_rebind<subtype, U>>;
+
+ static constexpr const subtype& at(const std::pair<subtype, subtype>& value, size_t index)
+ {
+ return index == 0 ? value.first : value.second;
+ }
+};
+
+template <typename T, T val>
+struct cval_t
+{
+ constexpr static T value = val;
+ constexpr cval_t() noexcept = default;
+ constexpr cval_t(const cval_t&) noexcept = default;
+ constexpr cval_t(cval_t&&) noexcept = default;
+ typedef T value_type;
+ typedef cval_t type;
+ constexpr operator value_type() const { return value; }
+ constexpr value_type operator()() const { return value; }
+};
+
+template <typename T, T value>
+constexpr inline T val_of(cval_t<T, value>)
+{
+ return value;
+}
+
+template <typename T>
+constexpr inline T val_of(T value)
+{
+ return value;
+}
+
+template <typename T>
+constexpr inline bool is_constant_val(T)
+{
+ return false;
+}
+
+template <typename T, T value>
+constexpr inline bool is_constant_val(cval_t<T, value>)
+{
+ return true;
+}
+
+namespace details
+{
+
+template <typename T>
+struct inherit : T
+{
+};
+
+template <typename T, typename Enable = void>
+struct is_inheritable_impl : std::false_type
+{
+};
+
+template <typename T>
+struct is_inheritable_impl<T, void_t<inherit<T>>> : std::true_type
+{
+};
+
+template <typename T>
+struct is_val_impl : std::false_type
+{
+};
+
+template <typename T, T val>
+struct is_val_impl<cval_t<T, val>> : std::true_type
+{
+};
+}
+
+template <typename T>
+using is_inheritable = typename details::is_inheritable_impl<T>::type;
+
+template <typename T>
+using is_val_t = typename details::is_val_impl<T>::type;
+
+template <bool val>
+using cbool_t = cval_t<bool, val>;
+
+template <int val>
+using cint_t = cval_t<int, val>;
+
+template <unsigned val>
+using cuint_t = cval_t<unsigned, val>;
+
+template <size_t val>
+using csize_t = cval_t<size_t, val>;
+
+template <typename T, T val>
+constexpr cval_t<T, val> cval{};
+
+template <bool val>
+constexpr cbool_t<val> cbool{};
+
+using cfalse_t = cbool_t<false>;
+using ctrue_t = cbool_t<true>;
+
+constexpr ctrue_t ctrue{};
+constexpr cfalse_t cfalse{};
+
+template <int val>
+constexpr cint_t<val> cint{};
+
+template <unsigned val>
+constexpr cuint_t<val> cuint{};
+
+template <size_t val>
+constexpr csize_t<val> csize{};
+
+namespace details
+{
+template <size_t index, typename T, T first, T... rest>
+struct get_nth : get_nth<index - 1, T, rest...>
+{
+};
+
+template <typename T, T first, T... rest>
+struct get_nth<0, T, first, rest...>
+{
+ constexpr static T value = first;
+};
+
+template <size_t index, typename... Types>
+struct get_nth_type;
+
+template <size_t index, typename first, typename... rest>
+struct get_nth_type<index, first, rest...> : get_nth_type<index - 1, rest...>
+{
+};
+
+template <typename first, typename... rest>
+struct get_nth_type<0, first, rest...>
+{
+ using type = first;
+};
+
+template <size_t index>
+struct get_nth_type<index>
+{
+};
+}
+
+template <typename T, T... values>
+struct cvals_t
+{
+ using type = cvals_t<T, values...>;
+ constexpr static size_t size() { return sizeof...(values); }
+ template <size_t index>
+ constexpr T operator[](csize_t<index>)
+ {
+ return get(csize<index>);
+ }
+ template <size_t index>
+ constexpr static T get(csize_t<index> = csize_t<index>())
+ {
+ return details::get_nth<index, T, values...>::value;
+ }
+ constexpr static T front() { return get(csize<0>); }
+ constexpr static T back() { return get(csize<size() - 1>); }
+
+ static const T* begin() { return array(); }
+ static const T* end() { return array() + size(); }
+
+ static const T* array()
+ {
+ static const T arr[] = { values... };
+ return &arr[0];
+ }
+ template <size_t... indices>
+ constexpr cvals_t<T, details::get_nth<indices, T, values...>::value...> operator[](
+ cvals_t<size_t, indices...>) const
+ {
+ return {};
+ }
+};
+
+template <typename T>
+struct cvals_t<T>
+{
+ using type = cvals_t<T>;
+ constexpr static size_t size() { return 0; }
+};
+
+namespace details
+{
+template <typename T1, typename T2>
+struct concat_impl;
+
+template <typename T, T... values1, T... values2>
+struct concat_impl<cvals_t<T, values1...>, cvals_t<T, values2...>>
+{
+ using type = cvals_t<T, values1..., values2...>;
+};
+}
+template <typename T1, typename T2>
+using concat_lists = typename details::concat_impl<T1, T2>::type;
+
+template <typename T1, typename T2>
+constexpr inline concat_lists<T1, T2> cconcat(T1, T2)
+{
+ return {};
+}
+
+template <bool... values>
+using cbools_t = cvals_t<bool, values...>;
+
+template <int... values>
+using cints_t = cvals_t<int, values...>;
+
+template <char... values>
+using cchars_t = cvals_t<char, values...>;
+
+template <unsigned... values>
+using cuints_t = cvals_t<unsigned, values...>;
+
+template <size_t... values>
+using csizes_t = cvals_t<size_t, values...>;
+
+template <size_t... values>
+using elements_t = cvals_t<size_t, values...>;
+
+template <typename T, T... values>
+constexpr cvals_t<T, values...> cvals{};
+
+template <bool... vals>
+constexpr cbools_t<vals...> cbools{};
+
+constexpr cbools_t<false, true> cfalse_true{};
+
+template <int... vals>
+constexpr cints_t<vals...> cints{};
+
+template <char... vals>
+constexpr cchars_t<vals...> cchars{};
+
+template <unsigned... vals>
+constexpr cuints_t<vals...> cuints{};
+
+template <size_t... vals>
+constexpr csizes_t<vals...> csizes{};
+
+template <size_t... vals>
+constexpr elements_t<vals...> elements{};
+
+template <typename T>
+constexpr inline T csum(cvals_t<T>)
+{
+ return 0;
+}
+
+template <typename T, T first, T... rest>
+constexpr inline T csum(cvals_t<T, first, rest...>)
+{
+ return first + csum(cvals<T, rest...>);
+}
+
+template <typename T>
+constexpr inline T cprod(cvals_t<T>)
+{
+ return 1;
+}
+
+template <typename T, T first, T... rest>
+constexpr inline T cprod(cvals_t<T, first, rest...>)
+{
+ return first * cprod(cvals<T, rest...>);
+}
+
+template <typename T>
+struct ctype_t
+{
+ using type = T;
+};
+
+template <typename T>
+using type_of = typename T::type;
+
+template <typename T>
+constexpr ctype_t<T> ctype{};
+
+template <typename... Types>
+struct ctypes_t
+{
+ constexpr static size_t size() { return sizeof...(Types); }
+
+ template <size_t index>
+ using nth = typename details::get_nth_type<index, Types...>::type;
+
+ template <size_t index>
+ constexpr static auto get(csize_t<index>) -> ctype_t<nth<index>>
+ {
+ return {};
+ }
+};
+
+template <typename... Ts>
+constexpr ctypes_t<Ts...> ctypes{};
+
+namespace details
+{
+
+template <typename>
+struct function_arguments_impl;
+
+template <typename Ret, typename... Args>
+struct function_arguments_impl<Ret (*)(Args...)>
+{
+ using result = Ret;
+ using args = ctypes_t<Args...>;
+};
+
+template <typename Class, typename Ret, typename... Args>
+struct function_arguments_impl<Ret (Class::*)(Args...)>
+{
+ using result = Ret;
+ using args = ctypes_t<Args...>;
+};
+
+template <typename Class, typename Ret, typename... Args>
+struct function_arguments_impl<Ret (Class::*)(Args...) const>
+{
+ using result = Ret;
+ using args = ctypes_t<Args...>;
+};
+
+template <typename T1, typename T2>
+struct filter_impl;
+
+template <typename T>
+struct filter_impl<cvals_t<T>, cvals_t<bool>>
+{
+ using type = cvals_t<T>;
+};
+
+template <typename T, T value, T... values, bool flag, bool... flags>
+struct filter_impl<cvals_t<T, value, values...>, cvals_t<bool, flag, flags...>>
+{
+ using filtered = typename filter_impl<cvals_t<T, values...>, cvals_t<bool, flags...>>::type;
+ using type = conditional<flag, concat_lists<cvals_t<T, value>, filtered>, filtered>;
+};
+}
+
+template <typename Fn>
+using function_arguments = typename details::function_arguments_impl<decltype(&Fn::operator())>::args;
+
+template <typename Fn>
+using function_result = typename details::function_arguments_impl<decltype(&Fn::operator())>::result;
+
+template <typename T1, typename T2>
+using cfilter_t = typename details::filter_impl<T1, T2>::type;
+
+template <typename T, T... vals, bool... flags,
+ typename Ret = cfilter_t<cvals_t<T, vals...>, cvals_t<bool, flags...>>>
+constexpr inline Ret cfilter(cvals_t<T, vals...>, cvals_t<bool, flags...>)
+{
+ return Ret{};
+}
+
+#define CMT_UN_OP(op) \
+ template <typename T1, T1... vals1, \
+ typename Ret = cvals_t<decltype(op std::declval<T1>()), (op vals1)...>> \
+ constexpr inline Ret operator op(cvals_t<T1, vals1...>) \
+ { \
+ return Ret{}; \
+ } \
+ template <typename T1, T1 val1, typename Ret = cval_t<decltype(op std::declval<T1>()), (op val1)>> \
+ constexpr inline Ret operator op(cval_t<T1, val1>) \
+ { \
+ return Ret{}; \
+ }
+
+#define CMT_BIN_OP(op) \
+ template <typename T1, T1... vals1, typename T2, T2... vals2, \
+ typename Ret = \
+ cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op vals2)...>> \
+ constexpr inline Ret operator op(cvals_t<T1, vals1...>, cvals_t<T2, vals2...>) \
+ { \
+ return Ret{}; \
+ } \
+ template <typename T1, T1... vals1, typename T2, T2 val2, \
+ typename Ret = \
+ cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op val2)...>> \
+ constexpr inline Ret operator op(cvals_t<T1, vals1...>, cval_t<T2, val2>) \
+ { \
+ return Ret{}; \
+ } \
+ template <typename T1, T1 val1, typename T2, T2... vals2, \
+ typename Ret = \
+ cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (val1 op vals2)...>> \
+ constexpr inline Ret operator op(cval_t<T1, val1>, cvals_t<T2, vals2...>) \
+ { \
+ return Ret{}; \
+ }
+
+// clang-format off
+CMT_UN_OP(-)
+CMT_UN_OP(+)
+CMT_UN_OP(~)
+CMT_UN_OP(!)
+
+CMT_BIN_OP(&&)
+CMT_BIN_OP(||)
+CMT_BIN_OP(==)
+CMT_BIN_OP(!=)
+CMT_BIN_OP(<)
+CMT_BIN_OP(>)
+CMT_BIN_OP(<=)
+CMT_BIN_OP(>=)
+CMT_BIN_OP(+)
+CMT_BIN_OP(-)
+CMT_BIN_OP(*)
+CMT_BIN_OP(/)
+CMT_BIN_OP(%)
+CMT_BIN_OP(<<)
+CMT_BIN_OP(>>)
+CMT_BIN_OP(&)
+CMT_BIN_OP(|)
+CMT_BIN_OP(^)
+// clang-format on
+
+namespace details
+{
+template <typename T, size_t Nsize, T Nstart, ptrdiff_t Nstep>
+struct cvalseq_impl;
+
+template <typename T, size_t Nsize, T Nstart, ptrdiff_t Nstep>
+using cgen_seq = typename cvalseq_impl<T, Nsize, Nstart, Nstep>::type;
+
+template <typename T, size_t Nsize, T Nstart, ptrdiff_t Nstep>
+struct cvalseq_impl : concat_impl<cgen_seq<T, Nsize / 2, Nstart, Nstep>,
+ cgen_seq<T, Nsize - Nsize / 2, Nstart + (Nsize / 2) * Nstep, Nstep>>
+{
+};
+
+template <typename T, T Nstart, ptrdiff_t Nstep>
+struct cvalseq_impl<T, 0, Nstart, Nstep> : cvals_t<T>
+{
+};
+template <typename T, T Nstart, ptrdiff_t Nstep>
+struct cvalseq_impl<T, 1, Nstart, Nstep> : cvals_t<T, static_cast<T>(Nstart)>
+{
+};
+}
+
+template <typename T, size_t size, T start = T(), ptrdiff_t step = 1>
+using cvalseq_t = typename details::cvalseq_impl<T, size, start, step>::type;
+
+template <typename T, T begin, T end>
+constexpr cvalseq_t<T, end - begin, begin> cvalrange{};
+
+template <size_t begin, size_t end>
+constexpr cvalseq_t<size_t, end - begin, begin> csizerange{};
+
+template <int begin, int end>
+constexpr cvalseq_t<int, end - begin, begin> cintrange{};
+
+template <unsigned begin, unsigned end>
+constexpr cvalseq_t<unsigned, end - begin, begin> cuintrange{};
+
+template <typename T, size_t size, T start = T(), ptrdiff_t step = 1>
+constexpr cvalseq_t<T, size, start, step> cvalseq{};
+
+template <size_t size, size_t start = 0, ptrdiff_t step = 1>
+constexpr cvalseq_t<size_t, size, start, step> csizeseq{};
+
+template <size_t size, int start = 0, ptrdiff_t step = 1>
+constexpr cvalseq_t<int, size, start, step> cintseq{};
+
+template <size_t size, unsigned start = 0, ptrdiff_t step = 1>
+constexpr cvalseq_t<unsigned, size, start, step> cuintseq{};
+
+template <typename... List>
+using indicesfor_t = cvalseq_t<size_t, sizeof...(List), 0>;
+
+template <typename... List>
+constexpr indicesfor_t<List...> indicesfor{};
+
+namespace details
+{
+
+template <typename Ret, typename T, typename enable = void_t<>>
+struct is_returning_type_impl : std::false_type
+{
+};
+
+template <typename Ret, typename Fn, typename... Args>
+struct is_returning_type_impl<Ret, Fn(Args...), void_t<result_of<Fn(Args...)>>>
+ : std::is_same<Ret, result_of<Fn(Args...)>>
+{
+};
+
+template <typename Fn, typename Args, typename enable = void_t<>>
+struct is_callable_impl : std::false_type
+{
+};
+
+template <typename Fn, typename... Args>
+struct is_callable_impl<Fn, ctypes_t<Args...>, void_t<result_of<Fn(Args...)>>> : std::true_type
+{
+};
+
+template <typename T, typename enable = void_t<>>
+struct is_enabled_impl : std::true_type
+{
+};
+
+template <typename Fn>
+struct is_enabled_impl<Fn, void_t<decltype(Fn::disabled)>> : std::integral_constant<bool, !Fn::disabled>
+{
+};
+
+template <size_t N>
+struct unique_enum_impl
+{
+ enum class type : size_t
+ {
+ value = N
+ };
+};
+template <size_t N>
+using unique_enum = typename unique_enum_impl<N>::type;
+
+#define CMT_ENABLE_IF_IMPL(N, ...) \
+ typename ::std::enable_if<(__VA_ARGS__), ::cometa::details::unique_enum<N>>::type = \
+ ::cometa::details::unique_enum<N>::value
+
+#define CMT_ENABLE_IF(...) CMT_ENABLE_IF_IMPL(__LINE__, __VA_ARGS__)
+}
+
+template <typename T>
+struct is_enabled : details::is_enabled_impl<T>
+{
+};
+
+template <typename Fn, typename... Args>
+struct is_callable : details::is_callable_impl<Fn, ctypes_t<Args...>>
+{
+};
+
+template <typename Ret, typename T>
+struct is_returning_type : details::is_returning_type_impl<Ret, T>
+{
+};
+
+namespace details
+{
+template <typename Fn, CMT_ENABLE_IF(is_callable<Fn()>())>
+inline auto call_if_callable(Fn&& fn)
+{
+ return fn();
+}
+
+template <typename Fn, CMT_ENABLE_IF(!is_callable<Fn()>())>
+inline auto call_if_callable(Fn&& fn)
+{
+ return std::forward<Fn>(fn);
+}
+}
+
+template <typename Fn, typename... Args>
+inline auto bind_func(Fn&& fn, Args&&... args)
+{
+ return [=]() CID_INLINE_LAMBDA { return fn(details::call_if_callable(std::forward<Args>(args))...); };
+}
+
+template <typename T>
+constexpr inline bool is_even(T x)
+{
+ return (x % 2) == 0;
+}
+
+template <typename T>
+constexpr inline bool is_odd(T x)
+{
+ return !is_even(x);
+}
+
+template <typename T>
+constexpr inline bool is_poweroftwo(T x)
+{
+ return ((x != 0) && !(x & (x - 1)));
+}
+
+template <typename T>
+constexpr inline unsigned ilog2(T n, unsigned p = 0)
+{
+ return (n <= 1) ? p : ilog2(n / 2, p + 1);
+}
+
+template <typename T>
+constexpr inline T next_poweroftwo(T n)
+{
+ return n > 2 ? T(1) << (ilog2(n - 1) + 1) : n;
+}
+
+template <typename T>
+constexpr inline T prev_poweroftwo(T n)
+{
+ return n > 2 ? T(1) << (ilog2(n)) : n;
+}
+
+template <typename T>
+constexpr inline bool is_divisible(T x, T divisor)
+{
+ return x % divisor == 0;
+}
+
+template <typename T>
+constexpr inline T gcd(T a)
+{
+ return a;
+}
+
+template <typename T>
+constexpr inline T gcd(T a, T b)
+{
+ return a < b ? gcd(b, a) : ((a % b == 0) ? b : gcd(b, a % b));
+}
+
+template <typename T, typename... Ts>
+constexpr inline T gcd(T a, T b, T c, Ts... rest)
+{
+ return gcd(a, gcd(b, c, rest...));
+}
+
+template <typename T>
+constexpr inline T lcm(T a)
+{
+ return a;
+}
+
+template <typename T>
+constexpr inline T lcm(T a, T b)
+{
+ return a * b / gcd(a, b);
+}
+
+template <typename T, typename... Ts>
+constexpr inline T lcm(T a, T b, T c, Ts... rest)
+{
+ return lcm(a, lcm(b, c, rest...));
+}
+
+namespace details
+{
+template <int64_t min, int64_t max, typename... Types>
+struct findinttype_impl
+{
+};
+template <int64_t min, int64_t max, typename T, typename... Types>
+struct findinttype_impl<min, max, T, Types...>
+{
+ using type = conditional<(std::numeric_limits<T>::min() <= min && std::numeric_limits<T>::max() >= max),
+ T, typename findinttype_impl<min, max, Types...>::type>;
+};
+template <int64_t min, int64_t max>
+struct findinttype_impl<min, max>
+{
+ using type = void;
+};
+
+template <typename T>
+using is_number_impl =
+ std::integral_constant<bool, ((std::is_integral<T>::value) || (std::is_floating_point<T>::value)) &&
+ !std::is_same<T, bool>::value>;
+}
+
+template <int64_t min, int64_t max>
+using findinttype = typename details::findinttype_impl<min, max, uint8_t, int8_t, uint16_t, int16_t, uint32_t,
+ int32_t, uint64_t, int64_t>::type;
+
+template <typename T>
+using is_number = details::is_number_impl<decay<T>>;
+
+template <typename... Ts>
+using is_numbers = and_t<details::is_number_impl<decay<Ts>>...>;
+
+namespace details
+{
+template <typename T>
+struct identity_impl
+{
+ using type = T;
+};
+
+template <typename T>
+constexpr size_t elementsize = sizeof(T);
+
+template <>
+constexpr size_t elementsize<void> = 1;
+}
+
+template <typename T>
+using identity = typename details::identity_impl<T>::type;
+
+struct swallow
+{
+ template <typename... T>
+ CID_INTRIN constexpr swallow(T&&...) noexcept
+ {
+ }
+};
+
+template <typename T, size_t N>
+struct carray;
+
+template <typename T>
+struct carray<T, 1>
+{
+ constexpr carray() noexcept = default;
+ constexpr carray(T val) noexcept : val(val) {}
+
+ template <typename Fn, size_t index = 0>
+ constexpr carray(Fn&& fn, csize_t<index> = csize_t<index>{}) noexcept
+ : val(static_cast<T>(fn(csize<index>)))
+ {
+ }
+
+ constexpr carray(const carray&) noexcept = default;
+ constexpr carray(carray&&) noexcept = default;
+ static constexpr size_t size() noexcept { return 1; }
+
+ template <size_t index>
+ CID_INTRIN constexpr T& get(csize_t<index>) noexcept
+ {
+ static_assert(index == 0, "carray: Array index is out of range");
+ return val;
+ }
+ template <size_t index>
+ CID_INTRIN constexpr const T& get(csize_t<index>) const noexcept
+ {
+ static_assert(index == 0, "carray: Array index is out of range");
+ return val;
+ }
+ template <size_t index>
+ CID_INTRIN constexpr T& get() noexcept
+ {
+ return get(csize<index>);
+ }
+ template <size_t index>
+ CID_INTRIN constexpr const T& get() const noexcept
+ {
+ return get(csize<index>);
+ }
+ constexpr const T* front() const noexcept { return val; }
+ constexpr T* front() noexcept { return val; }
+ constexpr const T* back() const noexcept { return val; }
+ constexpr T* back() noexcept { return val; }
+ constexpr const T* begin() const noexcept { return &val; }
+ constexpr const T* end() const noexcept { return &val + 1; }
+ constexpr T* begin() noexcept { return &val; }
+ constexpr T* end() noexcept { return &val + 1; }
+ constexpr const T* data() const noexcept { return begin(); }
+ constexpr T* data() noexcept { return begin(); }
+ constexpr bool empty() const noexcept { return false; }
+ T val;
+};
+
+template <typename T, size_t N>
+struct carray : carray<T, N - 1>
+{
+ template <typename... Ts>
+ constexpr carray(T first, Ts... list) noexcept : carray<T, N - 1>(list...), val(first)
+ {
+ static_assert(sizeof...(list) + 1 == N, "carray: Argument count is invalid");
+ }
+
+ template <typename Fn, size_t index = N - 1>
+ constexpr carray(Fn&& fn, csize_t<index> = csize_t<index>{}) noexcept
+ : carray<T, N - 1>(std::forward<Fn>(fn), csize<index - 1>),
+ val(static_cast<T>(fn(csize<index>)))
+ {
+ }
+
+ constexpr carray() noexcept = default;
+ constexpr carray(const carray&) noexcept = default;
+ constexpr carray(carray&&) noexcept = default;
+ static constexpr size_t size() noexcept { return N; }
+ CID_INTRIN constexpr T& get(csize_t<N - 1>) noexcept { return val; }
+ template <size_t index>
+ CID_INTRIN constexpr T& get(csize_t<index>) noexcept
+ {
+ return carray<T, N - 1>::get(csize<index>);
+ }
+ template <size_t index>
+ CID_INTRIN constexpr T& get() noexcept
+ {
+ return get(csize<index>);
+ }
+ CID_INTRIN constexpr const T& get(csize_t<N - 1>) const noexcept { return val; }
+ template <size_t index>
+ CID_INTRIN constexpr const T& get(csize_t<index>) const noexcept
+ {
+ return carray<T, N - 1>::get(csize<index>);
+ }
+ template <size_t index>
+ CID_INTRIN constexpr const T& get() const noexcept
+ {
+ return get(csize<index>);
+ }
+ CID_INTRIN constexpr const T* front() const noexcept { return carray<T, N - 1>::front(); }
+ CID_INTRIN constexpr T* front() noexcept { return carray<T, N - 1>::front(); }
+ CID_INTRIN constexpr const T* back() const noexcept { return val; }
+ CID_INTRIN constexpr T* back() noexcept { return val; }
+ CID_INTRIN constexpr const T* begin() const noexcept { return carray<T, N - 1>::begin(); }
+ CID_INTRIN constexpr const T* end() const noexcept { return &val + 1; }
+ CID_INTRIN constexpr T* begin() noexcept { return carray<T, N - 1>::begin(); }
+ CID_INTRIN constexpr T* end() noexcept { return &val + 1; }
+ CID_INTRIN constexpr const T* data() const noexcept { return begin(); }
+ CID_INTRIN constexpr T* data() noexcept { return begin(); }
+ CID_INTRIN constexpr bool empty() const noexcept { return false; }
+private:
+ T val;
+};
+
+#define CMT_FN(fn) \
+ struct fn_##fn \
+ { \
+ template <typename... Args> \
+ CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \
+ { \
+ return fn(std::forward<Args>(args)...); \
+ } \
+ };
+
+#define CMT_ESC(...) __VA_ARGS__
+
+#define CMT_FN_TPL(tpl_list, tpl_args, fn) \
+ template <CMT_ESC tpl_list> \
+ struct fn_##fn \
+ { \
+ template <typename... Args> \
+ CID_INLINE_MEMBER decltype(fn<CMT_ESC tpl_args>(std::declval<Args>()...)) operator()( \
+ Args&&... args) const \
+ { \
+ return fn<CMT_ESC tpl_args>(std::forward<Args>(args)...); \
+ } \
+ };
+
+template <typename T>
+inline auto pass_through(T&& x) noexcept
+{
+ return x;
+}
+
+template <typename... Ts>
+inline void noop(Ts...) noexcept
+{
+}
+
+template <typename T1, typename... Ts>
+constexpr inline T1&& get_first(T1&& x, Ts...) noexcept
+{
+ return std::forward<T1>(x);
+}
+
+template <typename T1, typename T2, typename... Ts>
+constexpr inline T2&& get_second(T1, T2&& x, Ts...) noexcept
+{
+ return std::forward<T2>(x);
+}
+
+template <typename T1, typename T2, typename T3, typename... Ts>
+constexpr inline T3&& get_third(T1, T2, T3&& x, Ts...) noexcept
+{
+ return std::forward<T3>(x);
+}
+template <typename T, typename... Ts>
+constexpr inline T returns(Ts...)
+{
+ return T();
+}
+
+CMT_FN(pass_through)
+CMT_FN(noop)
+CMT_FN(get_first)
+CMT_FN(get_second)
+CMT_FN(get_third)
+CMT_FN_TPL((typename T), (T), returns)
+
+template <typename T1, typename T2>
+inline bool is_equal(const T1& x, const T2& y)
+{
+ return x == y;
+}
+template <typename T1, typename T2>
+inline bool is_notequal(const T1& x, const T2& y)
+{
+ return x != y;
+}
+template <typename T1, typename T2>
+inline bool is_less(const T1& x, const T2& y)
+{
+ return x < y;
+}
+template <typename T1, typename T2>
+inline bool is_greater(const T1& x, const T2& y)
+{
+ return x > y;
+}
+template <typename T1, typename T2>
+inline bool is_lessorequal(const T1& x, const T2& y)
+{
+ return x <= y;
+}
+template <typename T1, typename T2>
+inline bool is_greaterorequal(const T1& x, const T2& y)
+{
+ return x >= y;
+}
+CMT_FN(is_equal)
+CMT_FN(is_notequal)
+CMT_FN(is_less)
+CMT_FN(is_greater)
+CMT_FN(is_lessorequal)
+CMT_FN(is_greaterorequal)
+
+namespace details
+{
+template <typename, typename = void>
+struct has_begin_end_impl : std::false_type
+{
+};
+
+template <typename T>
+struct has_begin_end_impl<T, void_t<decltype(std::declval<T>().begin()), decltype(std::declval<T>().end())>>
+ : std::true_type
+{
+};
+
+template <typename, typename = void>
+struct has_value_type_impl : std::false_type
+{
+};
+
+template <typename T>
+struct has_value_type_impl<T, void_t<typename T::value_type>> : std::true_type
+{
+};
+
+template <typename, typename = void>
+struct has_data_size_impl : std::false_type
+{
+};
+
+template <typename T>
+struct has_data_size_impl<T, void_t<decltype(std::declval<T>().size()), decltype(std::declval<T>().data())>>
+ : std::true_type
+{
+};
+
+template <typename, typename Fallback, typename = void>
+struct value_type_impl
+{
+ using type = Fallback;
+};
+
+template <typename T, typename Fallback>
+struct value_type_impl<T, Fallback, void_t<typename T::value_type>>
+{
+ using type = typename T::value_type;
+};
+}
+
+template <typename T>
+using has_begin_end = details::has_begin_end_impl<decay<T>>;
+
+template <typename T>
+using has_data_size = details::has_data_size_impl<decay<T>>;
+
+template <typename T>
+using value_type_of = typename decay<T>::value_type;
+
+template <typename T, typename Fn>
+CID_INTRIN void cforeach(cvals_t<T>, Fn&&)
+{
+}
+
+template <typename T, T v0, T... values, typename Fn>
+CID_INTRIN void cforeach(cvals_t<T, v0, values...>, Fn&& fn)
+{
+ fn(cval<T, v0>);
+ cforeach(cvals_t<T, values...>(), std::forward<Fn>(fn));
+}
+
+template <typename Fn>
+CID_INTRIN void cforeach(ctypes_t<>, Fn&&)
+{
+}
+
+template <typename T0, typename... types, typename Fn>
+CID_INTRIN void cforeach(ctypes_t<T0, types...>, Fn&& fn)
+{
+ fn(ctype<T0>);
+ cforeach(ctypes_t<types...>(), std::forward<Fn>(fn));
+}
+
+template <typename T, typename Fn, CMT_ENABLE_IF(has_begin_end<T>::value)>
+CID_INTRIN void cforeach(T&& list, Fn&& fn)
+{
+ for (const auto& v : list)
+ {
+ fn(v);
+ }
+}
+
+template <typename T, size_t N, typename Fn>
+CID_INTRIN void cforeach(const T (&array)[N], Fn&& fn)
+{
+ for (size_t i = 0; i < N; i++)
+ {
+ fn(array[i]);
+ }
+}
+
+namespace details
+{
+template <typename... Ts, typename Fn, size_t... indices>
+CID_INTRIN void cforeach_tuple_impl(const std::tuple<Ts...>& tuple, Fn&& fn, csizes_t<indices...>)
+{
+ swallow{ (fn(std::get<indices>(tuple)), void(), 0)... };
+}
+}
+
+template <typename... Ts, typename Fn>
+CID_INTRIN void cforeach(const std::tuple<Ts...>& tuple, Fn&& fn)
+{
+ details::cforeach_tuple_impl(tuple, std::forward<Fn>(fn), csizeseq<sizeof...(Ts)>);
+}
+
+template <typename A0, typename A1, typename Fn>
+CID_INTRIN void cforeach(A0&& a0, A1&& a1, Fn&& fn)
+{
+ cforeach(std::forward<A0>(a0),
+ [&](auto v0) { cforeach(std::forward<A1>(a1), [&](auto v1) { fn(v0, v1); }); });
+}
+
+template <typename A0, typename A1, typename A2, typename Fn>
+CID_INTRIN void cforeach(A0&& a0, A1&& a1, A2&& a2, Fn&& fn)
+{
+ cforeach(std::forward<A0>(a0), [&](auto v0) {
+ cforeach(std::forward<A1>(a1),
+ [&](auto v1) { cforeach(std::forward<A2>(a2), [&](auto v2) { fn(v0, v1, v2); }); });
+ });
+}
+
+template <typename T, typename Fn, typename DefFn = fn_noop, typename CmpFn = fn_is_equal>
+CID_INTRIN decltype(auto) cswitch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn(), CmpFn&& = CmpFn())
+{
+ return deffn();
+}
+
+template <typename T, T v0, T... values, typename Fn, typename DefFn = fn_noop, typename CmpFn = fn_is_equal>
+CID_INTRIN decltype(auto) cswitch(cvals_t<T, v0, values...>, identity<T> value, Fn&& fn,
+ DefFn&& deffn = DefFn(), CmpFn&& cmpfn = CmpFn())
+{
+ if (cmpfn(value, v0))
+ {
+ return fn(cval<T, v0>);
+ }
+ else
+ {
+ return cswitch(cvals_t<T, values...>(), value, std::forward<Fn>(fn), std::forward<DefFn>(deffn),
+ std::forward<CmpFn>(cmpfn));
+ }
+}
+
+template <typename TrueFn, typename FalseFn = fn_noop>
+CID_INTRIN decltype(auto) cif(cbool_t<true>, TrueFn&& truefn, FalseFn&& = FalseFn())
+{
+ return truefn(cbool<true>);
+}
+
+template <typename TrueFn, typename FalseFn = fn_noop>
+CID_INTRIN decltype(auto) cif(cbool_t<false>, TrueFn&&, FalseFn&& falsefn = FalseFn())
+{
+ return falsefn(cbool<false>);
+}
+
+template <typename T, T start, T stop, typename BodyFn>
+CID_INTRIN decltype(auto) cfor(cval_t<T, start>, cval_t<T, stop>, BodyFn&& bodyfn)
+{
+ return cforeach(cvalrange<T, start, stop>, std::forward<BodyFn>(bodyfn));
+}
+
+namespace details
+{
+
+template <typename T, typename Fn1, typename Fn2, typename... Fns>
+inline decltype(auto) cmatch_impl(T&& value, Fn1&& first, Fn2&& second, Fns&&... rest);
+template <typename T, typename Fn, typename... Ts>
+inline decltype(auto) cmatch_impl(T&& value, Fn&& last);
+
+template <typename T, typename Fn, typename... Fns>
+inline decltype(auto) cmatch_impl2(cbool_t<true>, T&& value, Fn&& fn, Fns&&...)
+{
+ return fn(std::forward<T>(value));
+}
+
+template <typename T, typename Fn, typename... Fns>
+inline decltype(auto) cmatch_impl2(cbool_t<false>, T&& value, Fn&&, Fns&&... rest)
+{
+ return cmatch_impl(std::forward<T>(value), std::forward<Fns>(rest)...);
+}
+
+template <typename T, typename Fn1, typename Fn2, typename... Fns>
+inline decltype(auto) cmatch_impl(T&& value, Fn1&& first, Fn2&& second, Fns&&... rest)
+{
+ using first_arg = typename function_arguments<Fn1>::template nth<0>;
+ constexpr bool is_same = std::is_same<decay<T>, decay<first_arg>>::value;
+ return cmatch_impl2(cbool<is_same>, std::forward<T>(value), std::forward<Fn1>(first),
+ std::forward<Fn2>(second), std::forward<Fns>(rest)...);
+}
+
+template <typename T, typename Fn, typename... Ts>
+inline decltype(auto) cmatch_impl(T&& value, Fn&& last)
+{
+ return last(std::forward<T>(value));
+}
+}
+
+template <typename T, typename Fn, typename... Args>
+inline decltype(auto) cmatch(T&& value, Fn&& fn, Args... args)
+{
+ return details::cmatch_impl(std::forward<T>(value), std::forward<Fn>(fn), std::forward<Args>(args)...);
+}
+
+namespace details
+{
+
+template <typename Result, typename... Args>
+struct virtual_function
+{
+ virtual Result operator()(Args... args) = 0;
+ virtual virtual_function* make_copy() const = 0;
+ CID_INTRIN virtual ~virtual_function() = default;
+};
+
+template <typename Fn, typename Result, typename... Args>
+struct virtual_function_impl : virtual_function<Result, Args...>
+{
+public:
+ CID_INTRIN virtual_function_impl(const Fn& fn) : fn(fn) {}
+ CID_INTRIN Result operator()(Args... args) override final { return fn(args...); }
+ CID_INTRIN virtual_function<Result, Args...>* make_copy() const override final
+ {
+ return new virtual_function_impl{ fn };
+ }
+ CID_INTRIN ~virtual_function_impl() {}
+
+private:
+ Fn fn;
+};
+
+template <typename Fn>
+struct func_filter
+{
+ typedef Fn type;
+};
+template <typename Result, typename... Args>
+struct func_filter<Result(Args...)>
+{
+ typedef Result (*type)(Args...);
+};
+
+template <typename T>
+constexpr CID_INTRIN T return_val() noexcept
+{
+ return {};
+}
+
+template <>
+constexpr CID_INTRIN void return_val<void>() noexcept
+{
+}
+}
+
+template <typename>
+struct function;
+
+/**
+ * @brief std::function-like lightweight function wrapper
+ * @code
+ * function<int( float )> f = []( float x ){ return static_cast<int>( x ); };
+ * CHECK( f( 3.4f ) == 3 )
+ * @encode
+ */
+template <typename Result, typename... Args>
+struct function<Result(Args...)>
+{
+ using this_t = function<Result(Args...)>;
+
+ function(function&& other) : fn(other.fn) { other.fn = nullptr; }
+ function& operator=(function&& other)
+ {
+ fn = other.fn;
+ other.fn = nullptr;
+ return *this;
+ }
+
+ CID_INTRIN function() : fn(nullptr) {}
+ CID_INTRIN function(std::nullptr_t) : fn(nullptr) {}
+ template <typename Func>
+ CID_INTRIN function(const Func& x)
+ : fn(new details::virtual_function_impl<typename details::func_filter<Func>::type, Result, Args...>(
+ x))
+ {
+ }
+ function(const this_t& other) : fn(other.fn ? other.fn->make_copy() : nullptr) {}
+ CID_INTRIN function& operator=(const this_t& other)
+ {
+ if ((&other != this) && (other.fn))
+ {
+ auto* temp = other.fn->make_copy();
+ delete fn;
+ fn = temp;
+ }
+ return *this;
+ }
+ CID_INTRIN function& operator=(std::nullptr_t)
+ {
+ delete fn;
+ fn = nullptr;
+ return *this;
+ }
+ template <typename Fn>
+ CID_INTRIN function& operator=(const Fn& x)
+ {
+ using FnImpl =
+ details::virtual_function_impl<typename details::func_filter<Fn>::type, Result, Args...>;
+ FnImpl* temp = new FnImpl(x);
+ delete fn;
+ fn = temp;
+ return *this;
+ }
+ CID_INTRIN Result operator()(Args... args) const
+ {
+ if (fn)
+ return (*fn)(args...);
+ else
+ return details::return_val<Result>();
+ }
+ CID_INTRIN explicit operator bool() const noexcept { return !!fn; }
+
+ CID_INTRIN ~function() { delete fn; }
+private:
+ details::virtual_function<Result, Args...>* fn;
+};
+
+template <typename Ret, typename... Args, typename T, typename Fn, typename DefFn = fn_noop>
+CID_INLINE function<Ret(Args...)> cdispatch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn())
+{
+ return [=](Args... args) CID_INLINE_MEMBER -> Ret { return deffn(std::forward<Args>(args)...); };
+}
+
+template <typename Ret, typename... Args, typename T, T v0, T... values, typename Fn,
+ typename DefFn = fn_noop>
+inline function<Ret(Args...)> cdispatch(cvals_t<T, v0, values...>, identity<T> value, Fn&& fn,
+ DefFn&& deffn = DefFn())
+{
+ if (value == v0)
+ {
+ return [=](Args... args)
+ CID_INLINE_MEMBER -> Ret { return fn(cval<T, v0>, std::forward<Args>(args)...); };
+ }
+ else
+ {
+ return cdispatch<Ret, Args...>(cvals_t<T, values...>(), value, std::forward<Fn>(fn),
+ std::forward<DefFn>(deffn));
+ }
+}
+
+template <typename T, T... values>
+inline size_t cfind(cvals_t<T, values...>, identity<T> value)
+{
+ static const T temp[] = { values... };
+ return static_cast<size_t>(
+ std::distance(std::begin(temp), std::find(std::begin(temp), std::end(temp), value)));
+}
+
+template <typename Fn, typename... Args>
+CID_NOINLINE static result_of<Fn(Args...)> noinline(Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+
+template <typename Fn>
+struct fn_noinline
+{
+ template <typename... Args>
+ CID_INTRIN result_of<Fn(Args...)> operator()(Args&&... args) const
+ {
+ return noinline(Fn{}, std::forward<Args>(args)...);
+ }
+};
+
+template <typename... Args, typename Fn, typename Ret = decltype(std::declval<Fn>()(std::declval<Args>()...)),
+ typename NonMemFn = Ret (*)(Fn*, Args...)>
+CID_INTRIN NonMemFn make_nonmember(const Fn&)
+{
+ return [](Fn* fn, Args... args) -> Ret { return fn->operator()(std::forward<Args>(args)...); };
+}
+
+using type_id_t = const void*;
+
+namespace details
+{
+
+constexpr inline size_t strlen(const char* str) { return *str ? 1 + cometa::details::strlen(str + 1) : 0; }
+
+template <size_t... indices, size_t Nout = 1 + sizeof...(indices)>
+constexpr inline std::array<char, Nout> gettypename_impl(const char* str, csizes_t<indices...>)
+{
+ std::array<char, Nout> arr{ { str[indices]..., 0 } };
+ return arr;
+}
+
+template <typename T>
+constexpr inline const void* typeident_impl() noexcept
+{
+ return type_id_t(&typeident_impl<T>);
+}
+}
+
+/**
+ * @brief Gets the fully qualified name of the type, including namespace and template parameters (if any)
+ * @tparam T type
+ * @return name of the type
+ */
+template <typename T>
+inline const char* type_name() noexcept
+{
+ constexpr size_t prefix = details::strlen("const char *cometa::type_name() [T = ");
+ constexpr size_t postfix = details::strlen("]");
+ constexpr size_t length = sizeof(CID_FUNC_SIGNATURE) - 1 - prefix - postfix;
+ static const std::array<char, length + 1> name =
+ details::gettypename_impl(CID_FUNC_SIGNATURE + prefix, csizeseq<length>);
+ return name.data();
+}
+
+/**
+ * @brief Gets the fully qualified name of the type, including namespace and template parameters (if any)
+ * @param x value of specific type
+ * @return name of the type
+ */
+template <typename T>
+inline const char* type_name(T x) noexcept
+{
+ (void)x;
+ return type_name<T>();
+}
+
+/**
+ * @brief Gets unique value associated with the type
+ * @tparam T type
+ * @return value of type that supports operator== and operator!=
+ */
+template <typename T>
+constexpr inline type_id_t ctypeid()
+{
+ return details::typeident_impl<T>();
+}
+/**
+ * @brief Gets unique value associated with the type
+ * @param x value of specific type
+ * @return value of type that supports operator== and operator!=
+ */
+template <typename T>
+constexpr inline type_id_t ctypeid(T x)
+{
+ (void)x;
+ return details::typeident_impl<T>();
+}
+
+template <typename T>
+struct array_ref
+{
+public:
+ using value_type = T;
+ using pointer = value_type*;
+ using const_pointer = const value_type*;
+ using reference = value_type&;
+ using const_reference = const value_type&;
+ using iterator = pointer;
+ using const_iterator = const_pointer;
+ using reverse_iterator = std::reverse_iterator<pointer>;
+ using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+ using size_type = std::size_t;
+ using difference_type = std::ptrdiff_t;
+
+ constexpr array_ref() noexcept : m_data(nullptr), m_size(0) {}
+ constexpr array_ref(const array_ref&) noexcept = default;
+ constexpr array_ref(array_ref&&) noexcept = default;
+ constexpr array_ref& operator=(const array_ref&) noexcept = default;
+ constexpr array_ref& operator=(array_ref&&) noexcept = default;
+
+ template <size_t N>
+ constexpr array_ref(value_type (&arr)[N]) noexcept : m_data(arr), m_size(N)
+ {
+ }
+ template <size_t N>
+ constexpr array_ref(const std::array<T, N>& arr) noexcept : m_data(arr.data()), m_size(N)
+ {
+ }
+ template <size_t N>
+ constexpr array_ref(std::array<T, N>& arr) noexcept : m_data(arr.data()), m_size(N)
+ {
+ }
+ template <typename... Ts>
+ constexpr array_ref(const std::vector<T, Ts...>& vec) noexcept : m_data(vec.data()), m_size(vec.size())
+ {
+ }
+ template <typename... Ts, CMT_ENABLE_IF(sizeof...(Ts), is_const<T>::value)>
+ constexpr array_ref(const std::vector<remove_const<T>, Ts...>& vec) noexcept : m_data(vec.data()),
+ m_size(vec.size())
+ {
+ }
+ template <typename... Ts>
+ constexpr array_ref(std::vector<T, Ts...>& vec) noexcept : m_data(vec.data()), m_size(vec.size())
+ {
+ }
+ template <typename InputIter>
+ constexpr array_ref(InputIter first, InputIter last) noexcept : m_data(std::addressof(*first)),
+ m_size(std::distance(first, last))
+ {
+ }
+ constexpr array_ref(T* data, size_type size) noexcept : m_data(data), m_size(size) {}
+
+ constexpr reference front() const noexcept { return m_data[0]; }
+ constexpr reference back() const noexcept { return m_data[m_size - 1]; }
+ constexpr iterator begin() const noexcept { return m_data; }
+ constexpr iterator end() const noexcept { return m_data + m_size; }
+ constexpr const_iterator cbegin() const noexcept { return m_data; }
+ constexpr const_iterator cend() const noexcept { return m_data + m_size; }
+ constexpr pointer data() const noexcept { return m_data; }
+ constexpr std::size_t size() const noexcept { return m_size; }
+ constexpr bool empty() const noexcept { return !m_size; }
+ constexpr reference operator[](std::size_t index) const { return m_data[index]; }
+
+private:
+ pointer m_data;
+ size_type m_size;
+};
+
+template <typename T>
+constexpr inline T choose_const()
+{
+ static_assert(sizeof(T) != 0, "T not found in the list of template arguments");
+ return T();
+}
+
+/**
+ * Selects constant of the specific type
+ * @code
+ * CHECK( choose_const<f32>( 32.0f, 64.0 ) == 32.0f );
+ * CHECK( choose_const<f64>( 32.0f, 64.0 ) == 64.0 );
+ * @endcode
+ */
+template <typename T, typename C1, typename... Cs>
+constexpr inline T choose_const(C1 c1, Cs... constants)
+{
+ return std::is_same<T, C1>::value ? static_cast<T>(c1) : choose_const<T>(constants...);
+}
+
+template <typename T, std::size_t size>
+inline array_ref<T> make_array_ref(T (&data)[size])
+{
+ return array_ref<T>(data);
+}
+
+template <typename T>
+inline array_ref<T> make_array_ref(T* data, std::size_t size)
+{
+ return array_ref<T>(data, data + size);
+}
+
+template <typename Container, CMT_ENABLE_IF(has_data_size<Container>::value),
+ typename T = remove_pointer<decltype(std::declval<Container>().data())>>
+inline array_ref<T> make_array_ref(Container& cont)
+{
+ return array_ref<T>(cont.data(), cont.size());
+}
+
+template <typename Container, CMT_ENABLE_IF(has_data_size<Container>::value),
+ typename T = remove_pointer<decltype(std::declval<Container>().data())>>
+inline array_ref<T> make_array_ref(const Container& cont)
+{
+ return array_ref<T>(cont.data(), cont.size());
+}
+
+template <typename T>
+inline array_ref<T> make_array_ref(std::vector<T>& cont)
+{
+ return array_ref<T>(cont.data(), cont.size());
+}
+template <typename T>
+inline array_ref<const T> make_array_ref(const std::vector<T>& cont)
+{
+ return array_ref<const T>(cont.data(), cont.size());
+}
+
+template <typename Type, typename ErrEnum, ErrEnum OkValue = static_cast<ErrEnum>(0)>
+struct result
+{
+ using value_type = Type;
+ using reference = value_type&;
+ using const_reference = const value_type&;
+ using pointer = value_type*;
+ using const_pointer = const value_type*;
+
+ using error_type = ErrEnum;
+
+ constexpr static error_type ok_value = OkValue;
+
+ constexpr result(const result&) = default;
+ constexpr result(result&&) noexcept = default;
+
+ constexpr result(ErrEnum error) noexcept : m_error(error) {}
+
+ template <typename ValueInit, CMT_ENABLE_IF(std::is_constructible<value_type, ValueInit>::value)>
+ constexpr result(ValueInit&& value) noexcept : m_value(std::forward<ValueInit>(value)), m_error(OkValue)
+ {
+ }
+
+ constexpr result(const Type& value) noexcept : m_value(value), m_error(OkValue) {}
+ constexpr result(Type&& value) noexcept : m_value(std::move(value)), m_error(OkValue) {}
+
+ constexpr explicit operator bool() const { return m_error == OkValue; }
+ constexpr const_reference operator*() const { return m_value; }
+ constexpr reference operator*() { return m_value; }
+ constexpr const_pointer operator->() const { return &m_value; }
+ constexpr pointer operator->() { return &m_value; }
+
+ constexpr const_reference value() const { return m_value; }
+ constexpr reference value() { return m_value; }
+ constexpr ErrEnum error() const { return m_error; }
+ constexpr bool ok() const { return m_error == OkValue; }
+private:
+ Type m_value;
+ ErrEnum m_error;
+};
+
+template <typename Tfrom>
+struct autocast_impl
+{
+ const Tfrom value;
+ template <typename T>
+ CID_INTRIN constexpr operator T() const noexcept
+ {
+ return static_cast<T>(value);
+ }
+};
+
+template <typename Tfrom>
+CID_INTRIN constexpr autocast_impl<Tfrom> autocast(const Tfrom& value) noexcept
+{
+ return { value };
+}
+
+inline void stop_constexpr() {}
+
+namespace details
+{
+template <typename T, typename = void>
+struct signed_type_impl
+{
+ using type = T;
+};
+template <typename T>
+struct signed_type_impl<T, void_t<enable_if<std::is_unsigned<T>::value>>>
+{
+ using type = findinttype<std::numeric_limits<T>::min(), std::numeric_limits<T>::max()>;
+};
+}
+
+template <typename T>
+using signed_type = typename details::signed_type_impl<T>::type;
+
+template <typename T>
+struct range
+{
+ using value_type = T;
+ using reference = T&;
+ using const_reference = const T&;
+ using pointer = T*;
+ using const_pointer = const T*;
+
+ struct iterator
+ {
+ T value;
+ const_reference operator*() const { return value; }
+ const_pointer operator->() const { return &value; }
+ iterator& operator++()
+ {
+ ++value;
+ return *this;
+ }
+ iterator operator++(int)
+ {
+ iterator copy = *this;
+ ++(*this);
+ return copy;
+ }
+ bool operator!=(const iterator& other) const { return value != other.value; }
+ };
+ T value_begin;
+ T value_end;
+ iterator begin() const { return iterator{ value_begin }; }
+ iterator end() const { return iterator{ value_end }; }
+};
+
+template <typename T1, typename T2>
+range<common_type<T1, T2>> make_range(T1 begin, T2 end)
+{
+ return { begin, end };
+}
+
+template <typename T>
+struct named_arg
+{
+ T value;
+ const char* name;
+};
+
+struct named
+{
+ constexpr named(const char* name) noexcept : name(name) {}
+
+ template <typename T>
+ constexpr named_arg<T> operator=(T&& value)
+ {
+ return named_arg<T>{ std::forward<T>(value), name };
+ }
+ const char* name;
+};
+
+inline named operator""_arg(const char* name, size_t) { return name; }
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/cometa/string.hpp b/include/kfr/cometa/string.hpp
@@ -0,0 +1,481 @@
+#pragma once
+
+#include "../cometa.hpp"
+#include <array>
+#include <cstdio>
+#include <string>
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Wformat-security")
+#pragma clang diagnostic ignored "-Wformat-security"
+#pragma clang diagnostic ignored "-Wused-but-marked-unused"
+#endif
+
+namespace cometa
+{
+
+template <typename... Args>
+CID_INLINE std::string as_string(const Args&... args);
+
+template <typename T>
+constexpr inline const T& repr(const T& value)
+{
+ return value;
+}
+
+template <typename T>
+inline std::string repr(const named_arg<T>& value)
+{
+ return std::string(value.name) + " = " + as_string(value.value);
+}
+
+template <typename T>
+using repr_type = decay<decltype(repr(std::declval<T>()))>;
+
+template <size_t N>
+using cstring = std::array<char, N>;
+
+namespace details
+{
+
+template <size_t N, size_t... indices>
+CID_INLINE constexpr cstring<N> make_cstring_impl(const char (&str)[N], csizes_t<indices...>)
+{
+ return { { str[indices]..., 0 } };
+}
+
+template <size_t N1, size_t N2, size_t... indices>
+CID_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1,
+ const cstring<N2>& str2,
+ csizes_t<indices...>)
+{
+ constexpr size_t L1 = N1 - 1;
+ return { { (indices < L1 ? str1[indices] : str2[indices - L1])..., 0 } };
+}
+template <size_t N1, size_t N2, typename... Args>
+CID_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1,
+ const cstring<N2>& str2)
+{
+ return concat_str_impl(str1, str2, csizeseq<N1 - 1 + N2 - 1>);
+}
+template <size_t N1, size_t Nfrom, size_t Nto, size_t... indices>
+cstring<N1 - Nfrom + Nto> str_replace_impl(size_t pos, const cstring<N1>& str, const cstring<Nfrom>&,
+ const cstring<Nto>& to, csizes_t<indices...>)
+{
+ if (pos == size_t(-1))
+ stop_constexpr();
+ return { { (indices < pos ? str[indices] : (indices < pos + Nto - 1) ? to[indices - pos]
+ : str[indices - Nto + Nfrom])...,
+ 0 } };
+}
+}
+
+CID_INLINE constexpr cstring<1> concat_cstring() { return { { 0 } }; }
+
+template <size_t N1>
+CID_INLINE constexpr cstring<N1> concat_cstring(const cstring<N1>& str1)
+{
+ return str1;
+}
+
+template <size_t N1, size_t N2, typename... Args>
+CID_INLINE constexpr auto concat_cstring(const cstring<N1>& str1, const cstring<N2>& str2,
+ const Args&... args)
+{
+ return details::concat_str_impl(str1, concat_cstring(str2, args...));
+}
+
+template <size_t N>
+CID_INLINE constexpr cstring<N> make_cstring(const char (&str)[N])
+{
+ return details::make_cstring_impl(str, csizeseq<N - 1>);
+}
+
+template <char... chars>
+CID_INLINE constexpr cstring<sizeof...(chars) + 1> make_cstring(cchars_t<chars...>)
+{
+ return { { chars..., 0 } };
+}
+
+template <size_t N1, size_t Nneedle>
+size_t str_find(const cstring<N1>& str, const cstring<Nneedle>& needle)
+{
+ size_t count = 0;
+ for (size_t i = 0; i < N1; i++)
+ {
+ if (str[i] == needle[count])
+ count++;
+ else
+ count = 0;
+ if (count == Nneedle - 1)
+ return i + 1 - (Nneedle - 1);
+ }
+ return size_t(-1);
+}
+
+template <size_t N1, size_t Nfrom, size_t Nto>
+cstring<N1 - Nfrom + Nto> str_replace(const cstring<N1>& str, const cstring<Nfrom>& from,
+ const cstring<Nto>& to)
+{
+ return details::str_replace_impl(str_find(str, from), str, from, to, csizeseq<N1 - Nfrom + Nto - 1>);
+}
+
+namespace details
+{
+template <typename T, char t = -1, int width = -1, int prec = -1>
+struct fmt_t
+{
+ const T& value;
+};
+
+template <int number, CMT_ENABLE_IF(number >= 0 && number < 10)>
+constexpr cstring<2> itoa()
+{
+ return cstring<2>{ { static_cast<char>(number + '0'), 0 } };
+}
+template <int number, CMT_ENABLE_IF(number >= 10)>
+constexpr auto itoa()
+{
+ return concat_cstring(itoa<number / 10>(), itoa<number % 10>());
+}
+template <int number, CMT_ENABLE_IF(number < 0)>
+constexpr auto itoa()
+{
+ return concat_cstring(make_cstring("-"), itoa<-number>());
+}
+
+template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width < 0 && prec >= 0)>
+CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>)
+{
+ return concat_cstring(make_cstring("."), itoa<prec>());
+}
+template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width >= 0 && prec < 0)>
+CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>)
+{
+ return itoa<width>();
+}
+template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width < 0 && prec < 0)>
+CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>)
+{
+ return make_cstring("");
+}
+template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width >= 0 && prec >= 0)>
+CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>)
+{
+ return concat_cstring(itoa<width>(), make_cstring("."), itoa<prec>());
+}
+
+CID_INLINE constexpr auto value_fmt(ctype_t<bool>) { return make_cstring("s"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<std::string>) { return make_cstring("s"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<char>) { return make_cstring("d"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<signed char>) { return make_cstring("d"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<unsigned char>) { return make_cstring("d"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<short>) { return make_cstring("d"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<unsigned short>) { return make_cstring("d"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<int>) { return make_cstring("d"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<long>) { return make_cstring("ld"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<long long>) { return make_cstring("lld"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<unsigned int>) { return make_cstring("u"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<unsigned long>) { return make_cstring("lu"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<unsigned long long>) { return make_cstring("llu"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<float>) { return make_cstring("g"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<double>) { return make_cstring("g"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<long double>) { return make_cstring("Lg"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<const char*>) { return make_cstring("s"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<char*>) { return make_cstring("s"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<void*>) { return make_cstring("p"); }
+CID_INLINE constexpr auto value_fmt(ctype_t<const void*>) { return make_cstring("p"); }
+
+template <char... chars>
+CID_INLINE constexpr auto value_fmt(ctype_t<cchars_t<chars...>>)
+{
+ return concat_cstring(make_cstring("s"), make_cstring(cchars<chars...>));
+}
+
+template <typename T>
+CID_INLINE constexpr auto value_fmt(ctype_t<ctype_t<T>>)
+{
+ return make_cstring("s");
+}
+
+template <typename T, int width, int prec>
+CID_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, -1, width, prec>> fmt)
+{
+ return concat_cstring(value_fmt_arg(fmt), value_fmt(ctype<repr_type<T>>));
+}
+template <typename T, char t, int width, int prec>
+CID_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, t, width, prec>> fmt)
+{
+ return concat_cstring(value_fmt_arg(fmt), cstring<2>{ { t, 0 } });
+}
+
+template <char... chars>
+CID_INLINE const char* pack_value(const cchars_t<chars...>&)
+{
+ return "";
+}
+
+template <typename Arg>
+CID_INLINE const Arg& pack_value(const Arg& value)
+{
+ return value;
+}
+CID_INLINE double pack_value(float value) { return static_cast<double>(value); }
+CID_INLINE auto pack_value(bool value) { return value ? "true" : "false"; }
+CID_INLINE auto pack_value(const std::string& value) { return value.c_str(); }
+
+template <typename T>
+CID_INLINE const char* pack_value(ctype_t<T>)
+{
+ return type_name<T>();
+}
+
+template <typename T, char t, int width, int prec>
+CID_INLINE auto pack_value(const fmt_t<T, t, width, prec>& value)
+{
+ return pack_value(repr(value.value));
+}
+
+template <size_t N1, size_t Nnew, size_t... indices>
+CID_INLINE constexpr cstring<N1 - 3 + Nnew> fmt_replace_impl(const cstring<N1>& str,
+ const cstring<Nnew>& newfmt,
+ csizes_t<indices...>)
+{
+ size_t start = 0;
+ size_t end = 0;
+ cstring<N1 - 3 + Nnew> result;
+ for (size_t i = 0; i < N1; i++)
+ {
+ if (str[i] == '{')
+ start = i;
+ else if (str[i] == '}')
+ end = i;
+ }
+
+ if (end - start == 1) // {}
+ {
+ for (size_t i = 0; i < N1; i++)
+ {
+ if (i < start)
+ result[i] = str[i];
+ else if (i == start)
+ result[i] = '%';
+ else if (i > start && i - start - 1 < Nnew - 1)
+ result[i] = newfmt[i - start - 1];
+ else if (i - Nnew + 3 < N1 - 1)
+ result[i] = str[i - Nnew + 3];
+ else
+ result[i] = 0;
+ }
+ }
+ return result;
+}
+
+template <size_t N1, size_t Nto>
+CID_INLINE constexpr cstring<N1 - 3 + Nto> fmt_replace(const cstring<N1>& str, const cstring<Nto>& newfmt)
+{
+ return fmt_replace_impl(str, newfmt, csizeseq<N1 - 3 + Nto - 1>);
+}
+
+inline std::string replace_one(const std::string& str, const std::string& from, const std::string& to)
+{
+ std::string r = str;
+ size_t start_pos = 0;
+ if ((start_pos = r.find(from, start_pos)) != std::string::npos)
+ {
+ r.replace(start_pos, from.size(), to);
+ }
+ return r;
+}
+
+CID_INLINE const std::string& build_fmt(const std::string& str, ctypes_t<>) { return str; }
+
+template <typename Arg, typename... Args>
+CID_INLINE auto build_fmt(const std::string& str, ctypes_t<Arg, Args...>)
+{
+ constexpr auto fmt = value_fmt(ctype<decay<Arg>>);
+ return build_fmt(replace_one(str, "{}", "%" + std::string(fmt.data())), ctypes<Args...>);
+}
+}
+
+template <char t, int width = -1, int prec = -1, typename T>
+CID_INLINE details::fmt_t<T, t, width, prec> fmt(const T& value)
+{
+ return { value };
+}
+
+template <int width = -1, int prec = -1, typename T>
+CID_INLINE details::fmt_t<T, -1, width, prec> fmtwidth(const T& value)
+{
+ return { value };
+}
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wgnu-string-literal-operator-template"
+
+constexpr auto build_fmt_str(cchars_t<>, ctypes_t<>) { return make_cstring(""); }
+
+template <char... chars, typename Arg, typename... Args>
+constexpr auto build_fmt_str(cchars_t<'@', chars...>, ctypes_t<Arg, Args...>)
+{
+ return concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<Arg>>),
+ build_fmt_str(cchars<chars...>, ctypes<Args...>));
+}
+
+template <char ch, char... chars, typename... Args>
+constexpr auto build_fmt_str(cchars_t<ch, chars...>, ctypes_t<Args...>)
+{
+ return concat_cstring(make_cstring(cchars<ch>), build_fmt_str(cchars<chars...>, ctypes<Args...>));
+}
+
+template <char... chars>
+struct format_t
+{
+ template <typename... Args>
+ inline std::string operator()(const Args&... args)
+ {
+ constexpr auto format_str = build_fmt_str(cchars<chars...>, ctypes<repr_type<Args>...>);
+
+ std::string result;
+ const int size = std::snprintf(nullptr, 0, format_str.data(), details::pack_value(args)...);
+ if (size <= 0)
+ return result;
+ result.resize(size_t(size + 1));
+ result.resize(size_t(std::snprintf(&result[0], size_t(size + 1), format_str.data(),
+ details::pack_value(repr(args))...)));
+ return result;
+ }
+};
+
+template <char... chars>
+struct print_t
+{
+ template <typename... Args>
+ CID_INLINE void operator()(const Args&... args)
+ {
+ constexpr auto format_str = build_fmt_str(cchars<chars...>, ctypes<repr_type<Args>...>);
+
+ std::printf(format_str.data(), details::pack_value(args)...);
+ }
+};
+
+template <typename Char, Char... chars>
+constexpr format_t<chars...> operator""_format()
+{
+ return {};
+}
+
+template <typename Char, Char... chars>
+constexpr CID_INLINE print_t<chars...> operator""_print()
+{
+ return {};
+}
+
+#pragma clang diagnostic pop
+
+template <typename... Args>
+CID_INLINE void printfmt(const std::string& fmt, const Args&... args)
+{
+ const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ std::printf(format_str.data(), details::pack_value(repr(args))...);
+}
+
+template <typename... Args>
+CID_INLINE void fprintfmt(FILE* f, const std::string& fmt, const Args&... args)
+{
+ const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ std::fprintf(f, format_str.data(), details::pack_value(repr(args))...);
+}
+
+template <typename... Args>
+CID_INLINE int snprintfmt(char* str, size_t size, const std::string& fmt, const Args&... args)
+{
+ const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ return std::snprintf(str, size, format_str.data(), details::pack_value(repr(args))...);
+}
+
+template <typename... Args>
+CID_INLINE std::string format(const std::string& fmt, const Args&... args)
+{
+ std::string result;
+ const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>);
+ const int size = std::snprintf(nullptr, 0, format_str.data(), details::pack_value(repr(args))...);
+ if (size <= 0)
+ return result;
+ result.resize(size_t(size + 1));
+ result.resize(size_t(
+ std::snprintf(&result[0], size_t(size + 1), format_str.data(), details::pack_value(repr(args))...)));
+ return result;
+}
+
+template <typename... Args>
+CID_INLINE void print(const Args&... args)
+{
+ constexpr auto format_str = concat_cstring(
+ concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))...);
+ std::printf(format_str.data(), details::pack_value(repr(args))...);
+}
+
+template <typename... Args>
+CID_INLINE void println(const Args&... args)
+{
+ constexpr auto format_str = concat_cstring(
+ concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))...,
+ make_cstring("\n"));
+ std::printf(format_str.data(), details::pack_value(repr(args))...);
+}
+
+template <typename... Args>
+CID_INLINE std::string as_string(const Args&... args)
+{
+ std::string result;
+ constexpr auto format_str = concat_cstring(
+ concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))...);
+
+ const int size = std::snprintf(nullptr, 0, format_str.data(), details::pack_value(repr(args))...);
+ if (size <= 0)
+ return result;
+ result.resize(size_t(size + 1));
+ result.resize(size_t(
+ std::snprintf(&result[0], size_t(size + 1), format_str.data(), details::pack_value(repr(args))...)));
+ return result;
+}
+
+inline std::string padright(size_t size, const std::string& text, char character = ' ')
+{
+ const size_t pad = size >= text.size() ? size - text.size() : 0;
+ return std::string(pad, character) + text;
+}
+
+inline std::string padleft(size_t size, const std::string& text, char character = ' ')
+{
+ const size_t pad = size >= text.size() ? size - text.size() : 0;
+ return text + std::string(pad, character);
+}
+
+inline std::string padcenter(size_t size, const std::string& text, char character = ' ')
+{
+ const size_t pad = size >= text.size() ? size - text.size() : 0;
+ return std::string(pad / 2, character) + text + std::string(pad - pad / 2, character);
+}
+
+template <typename T>
+inline std::string q(T x)
+{
+ return "\"" + as_string(std::forward<T>(x)) + "\"";
+}
+
+template <typename T>
+inline std::string join(T x)
+{
+ return as_string(std::forward<T>(x));
+}
+
+template <typename T, typename U, typename... Ts>
+inline std::string join(T x, U y, Ts... rest)
+{
+ return format("{}, {}", x, join(std::forward<U>(y), std::forward<Ts>(rest)...));
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/data/bitrev.hpp b/include/kfr/data/bitrev.hpp
@@ -0,0 +1,1057 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+namespace kfr
+{
+
+namespace data
+{
+constexpr unsigned short bitrev_table[] = {
+ 0, 8192, 4096, 12288, 2048, 10240, 6144, 14336, 1024, 9216, 5120, 13312, 3072, 11264, 7168, 15360,
+ 512, 8704, 4608, 12800, 2560, 10752, 6656, 14848, 1536, 9728, 5632, 13824, 3584, 11776, 7680, 15872,
+ 256, 8448, 4352, 12544, 2304, 10496, 6400, 14592, 1280, 9472, 5376, 13568, 3328, 11520, 7424, 15616,
+ 768, 8960, 4864, 13056, 2816, 11008, 6912, 15104, 1792, 9984, 5888, 14080, 3840, 12032, 7936, 16128,
+ 128, 8320, 4224, 12416, 2176, 10368, 6272, 14464, 1152, 9344, 5248, 13440, 3200, 11392, 7296, 15488,
+ 640, 8832, 4736, 12928, 2688, 10880, 6784, 14976, 1664, 9856, 5760, 13952, 3712, 11904, 7808, 16000,
+ 384, 8576, 4480, 12672, 2432, 10624, 6528, 14720, 1408, 9600, 5504, 13696, 3456, 11648, 7552, 15744,
+ 896, 9088, 4992, 13184, 2944, 11136, 7040, 15232, 1920, 10112, 6016, 14208, 3968, 12160, 8064, 16256,
+ 64, 8256, 4160, 12352, 2112, 10304, 6208, 14400, 1088, 9280, 5184, 13376, 3136, 11328, 7232, 15424,
+ 576, 8768, 4672, 12864, 2624, 10816, 6720, 14912, 1600, 9792, 5696, 13888, 3648, 11840, 7744, 15936,
+ 320, 8512, 4416, 12608, 2368, 10560, 6464, 14656, 1344, 9536, 5440, 13632, 3392, 11584, 7488, 15680,
+ 832, 9024, 4928, 13120, 2880, 11072, 6976, 15168, 1856, 10048, 5952, 14144, 3904, 12096, 8000, 16192,
+ 192, 8384, 4288, 12480, 2240, 10432, 6336, 14528, 1216, 9408, 5312, 13504, 3264, 11456, 7360, 15552,
+ 704, 8896, 4800, 12992, 2752, 10944, 6848, 15040, 1728, 9920, 5824, 14016, 3776, 11968, 7872, 16064,
+ 448, 8640, 4544, 12736, 2496, 10688, 6592, 14784, 1472, 9664, 5568, 13760, 3520, 11712, 7616, 15808,
+ 960, 9152, 5056, 13248, 3008, 11200, 7104, 15296, 1984, 10176, 6080, 14272, 4032, 12224, 8128, 16320,
+ 32, 8224, 4128, 12320, 2080, 10272, 6176, 14368, 1056, 9248, 5152, 13344, 3104, 11296, 7200, 15392,
+ 544, 8736, 4640, 12832, 2592, 10784, 6688, 14880, 1568, 9760, 5664, 13856, 3616, 11808, 7712, 15904,
+ 288, 8480, 4384, 12576, 2336, 10528, 6432, 14624, 1312, 9504, 5408, 13600, 3360, 11552, 7456, 15648,
+ 800, 8992, 4896, 13088, 2848, 11040, 6944, 15136, 1824, 10016, 5920, 14112, 3872, 12064, 7968, 16160,
+ 160, 8352, 4256, 12448, 2208, 10400, 6304, 14496, 1184, 9376, 5280, 13472, 3232, 11424, 7328, 15520,
+ 672, 8864, 4768, 12960, 2720, 10912, 6816, 15008, 1696, 9888, 5792, 13984, 3744, 11936, 7840, 16032,
+ 416, 8608, 4512, 12704, 2464, 10656, 6560, 14752, 1440, 9632, 5536, 13728, 3488, 11680, 7584, 15776,
+ 928, 9120, 5024, 13216, 2976, 11168, 7072, 15264, 1952, 10144, 6048, 14240, 4000, 12192, 8096, 16288,
+ 96, 8288, 4192, 12384, 2144, 10336, 6240, 14432, 1120, 9312, 5216, 13408, 3168, 11360, 7264, 15456,
+ 608, 8800, 4704, 12896, 2656, 10848, 6752, 14944, 1632, 9824, 5728, 13920, 3680, 11872, 7776, 15968,
+ 352, 8544, 4448, 12640, 2400, 10592, 6496, 14688, 1376, 9568, 5472, 13664, 3424, 11616, 7520, 15712,
+ 864, 9056, 4960, 13152, 2912, 11104, 7008, 15200, 1888, 10080, 5984, 14176, 3936, 12128, 8032, 16224,
+ 224, 8416, 4320, 12512, 2272, 10464, 6368, 14560, 1248, 9440, 5344, 13536, 3296, 11488, 7392, 15584,
+ 736, 8928, 4832, 13024, 2784, 10976, 6880, 15072, 1760, 9952, 5856, 14048, 3808, 12000, 7904, 16096,
+ 480, 8672, 4576, 12768, 2528, 10720, 6624, 14816, 1504, 9696, 5600, 13792, 3552, 11744, 7648, 15840,
+ 992, 9184, 5088, 13280, 3040, 11232, 7136, 15328, 2016, 10208, 6112, 14304, 4064, 12256, 8160, 16352,
+ 16, 8208, 4112, 12304, 2064, 10256, 6160, 14352, 1040, 9232, 5136, 13328, 3088, 11280, 7184, 15376,
+ 528, 8720, 4624, 12816, 2576, 10768, 6672, 14864, 1552, 9744, 5648, 13840, 3600, 11792, 7696, 15888,
+ 272, 8464, 4368, 12560, 2320, 10512, 6416, 14608, 1296, 9488, 5392, 13584, 3344, 11536, 7440, 15632,
+ 784, 8976, 4880, 13072, 2832, 11024, 6928, 15120, 1808, 10000, 5904, 14096, 3856, 12048, 7952, 16144,
+ 144, 8336, 4240, 12432, 2192, 10384, 6288, 14480, 1168, 9360, 5264, 13456, 3216, 11408, 7312, 15504,
+ 656, 8848, 4752, 12944, 2704, 10896, 6800, 14992, 1680, 9872, 5776, 13968, 3728, 11920, 7824, 16016,
+ 400, 8592, 4496, 12688, 2448, 10640, 6544, 14736, 1424, 9616, 5520, 13712, 3472, 11664, 7568, 15760,
+ 912, 9104, 5008, 13200, 2960, 11152, 7056, 15248, 1936, 10128, 6032, 14224, 3984, 12176, 8080, 16272,
+ 80, 8272, 4176, 12368, 2128, 10320, 6224, 14416, 1104, 9296, 5200, 13392, 3152, 11344, 7248, 15440,
+ 592, 8784, 4688, 12880, 2640, 10832, 6736, 14928, 1616, 9808, 5712, 13904, 3664, 11856, 7760, 15952,
+ 336, 8528, 4432, 12624, 2384, 10576, 6480, 14672, 1360, 9552, 5456, 13648, 3408, 11600, 7504, 15696,
+ 848, 9040, 4944, 13136, 2896, 11088, 6992, 15184, 1872, 10064, 5968, 14160, 3920, 12112, 8016, 16208,
+ 208, 8400, 4304, 12496, 2256, 10448, 6352, 14544, 1232, 9424, 5328, 13520, 3280, 11472, 7376, 15568,
+ 720, 8912, 4816, 13008, 2768, 10960, 6864, 15056, 1744, 9936, 5840, 14032, 3792, 11984, 7888, 16080,
+ 464, 8656, 4560, 12752, 2512, 10704, 6608, 14800, 1488, 9680, 5584, 13776, 3536, 11728, 7632, 15824,
+ 976, 9168, 5072, 13264, 3024, 11216, 7120, 15312, 2000, 10192, 6096, 14288, 4048, 12240, 8144, 16336,
+ 48, 8240, 4144, 12336, 2096, 10288, 6192, 14384, 1072, 9264, 5168, 13360, 3120, 11312, 7216, 15408,
+ 560, 8752, 4656, 12848, 2608, 10800, 6704, 14896, 1584, 9776, 5680, 13872, 3632, 11824, 7728, 15920,
+ 304, 8496, 4400, 12592, 2352, 10544, 6448, 14640, 1328, 9520, 5424, 13616, 3376, 11568, 7472, 15664,
+ 816, 9008, 4912, 13104, 2864, 11056, 6960, 15152, 1840, 10032, 5936, 14128, 3888, 12080, 7984, 16176,
+ 176, 8368, 4272, 12464, 2224, 10416, 6320, 14512, 1200, 9392, 5296, 13488, 3248, 11440, 7344, 15536,
+ 688, 8880, 4784, 12976, 2736, 10928, 6832, 15024, 1712, 9904, 5808, 14000, 3760, 11952, 7856, 16048,
+ 432, 8624, 4528, 12720, 2480, 10672, 6576, 14768, 1456, 9648, 5552, 13744, 3504, 11696, 7600, 15792,
+ 944, 9136, 5040, 13232, 2992, 11184, 7088, 15280, 1968, 10160, 6064, 14256, 4016, 12208, 8112, 16304,
+ 112, 8304, 4208, 12400, 2160, 10352, 6256, 14448, 1136, 9328, 5232, 13424, 3184, 11376, 7280, 15472,
+ 624, 8816, 4720, 12912, 2672, 10864, 6768, 14960, 1648, 9840, 5744, 13936, 3696, 11888, 7792, 15984,
+ 368, 8560, 4464, 12656, 2416, 10608, 6512, 14704, 1392, 9584, 5488, 13680, 3440, 11632, 7536, 15728,
+ 880, 9072, 4976, 13168, 2928, 11120, 7024, 15216, 1904, 10096, 6000, 14192, 3952, 12144, 8048, 16240,
+ 240, 8432, 4336, 12528, 2288, 10480, 6384, 14576, 1264, 9456, 5360, 13552, 3312, 11504, 7408, 15600,
+ 752, 8944, 4848, 13040, 2800, 10992, 6896, 15088, 1776, 9968, 5872, 14064, 3824, 12016, 7920, 16112,
+ 496, 8688, 4592, 12784, 2544, 10736, 6640, 14832, 1520, 9712, 5616, 13808, 3568, 11760, 7664, 15856,
+ 1008, 9200, 5104, 13296, 3056, 11248, 7152, 15344, 2032, 10224, 6128, 14320, 4080, 12272, 8176, 16368,
+ 8, 8200, 4104, 12296, 2056, 10248, 6152, 14344, 1032, 9224, 5128, 13320, 3080, 11272, 7176, 15368,
+ 520, 8712, 4616, 12808, 2568, 10760, 6664, 14856, 1544, 9736, 5640, 13832, 3592, 11784, 7688, 15880,
+ 264, 8456, 4360, 12552, 2312, 10504, 6408, 14600, 1288, 9480, 5384, 13576, 3336, 11528, 7432, 15624,
+ 776, 8968, 4872, 13064, 2824, 11016, 6920, 15112, 1800, 9992, 5896, 14088, 3848, 12040, 7944, 16136,
+ 136, 8328, 4232, 12424, 2184, 10376, 6280, 14472, 1160, 9352, 5256, 13448, 3208, 11400, 7304, 15496,
+ 648, 8840, 4744, 12936, 2696, 10888, 6792, 14984, 1672, 9864, 5768, 13960, 3720, 11912, 7816, 16008,
+ 392, 8584, 4488, 12680, 2440, 10632, 6536, 14728, 1416, 9608, 5512, 13704, 3464, 11656, 7560, 15752,
+ 904, 9096, 5000, 13192, 2952, 11144, 7048, 15240, 1928, 10120, 6024, 14216, 3976, 12168, 8072, 16264,
+ 72, 8264, 4168, 12360, 2120, 10312, 6216, 14408, 1096, 9288, 5192, 13384, 3144, 11336, 7240, 15432,
+ 584, 8776, 4680, 12872, 2632, 10824, 6728, 14920, 1608, 9800, 5704, 13896, 3656, 11848, 7752, 15944,
+ 328, 8520, 4424, 12616, 2376, 10568, 6472, 14664, 1352, 9544, 5448, 13640, 3400, 11592, 7496, 15688,
+ 840, 9032, 4936, 13128, 2888, 11080, 6984, 15176, 1864, 10056, 5960, 14152, 3912, 12104, 8008, 16200,
+ 200, 8392, 4296, 12488, 2248, 10440, 6344, 14536, 1224, 9416, 5320, 13512, 3272, 11464, 7368, 15560,
+ 712, 8904, 4808, 13000, 2760, 10952, 6856, 15048, 1736, 9928, 5832, 14024, 3784, 11976, 7880, 16072,
+ 456, 8648, 4552, 12744, 2504, 10696, 6600, 14792, 1480, 9672, 5576, 13768, 3528, 11720, 7624, 15816,
+ 968, 9160, 5064, 13256, 3016, 11208, 7112, 15304, 1992, 10184, 6088, 14280, 4040, 12232, 8136, 16328,
+ 40, 8232, 4136, 12328, 2088, 10280, 6184, 14376, 1064, 9256, 5160, 13352, 3112, 11304, 7208, 15400,
+ 552, 8744, 4648, 12840, 2600, 10792, 6696, 14888, 1576, 9768, 5672, 13864, 3624, 11816, 7720, 15912,
+ 296, 8488, 4392, 12584, 2344, 10536, 6440, 14632, 1320, 9512, 5416, 13608, 3368, 11560, 7464, 15656,
+ 808, 9000, 4904, 13096, 2856, 11048, 6952, 15144, 1832, 10024, 5928, 14120, 3880, 12072, 7976, 16168,
+ 168, 8360, 4264, 12456, 2216, 10408, 6312, 14504, 1192, 9384, 5288, 13480, 3240, 11432, 7336, 15528,
+ 680, 8872, 4776, 12968, 2728, 10920, 6824, 15016, 1704, 9896, 5800, 13992, 3752, 11944, 7848, 16040,
+ 424, 8616, 4520, 12712, 2472, 10664, 6568, 14760, 1448, 9640, 5544, 13736, 3496, 11688, 7592, 15784,
+ 936, 9128, 5032, 13224, 2984, 11176, 7080, 15272, 1960, 10152, 6056, 14248, 4008, 12200, 8104, 16296,
+ 104, 8296, 4200, 12392, 2152, 10344, 6248, 14440, 1128, 9320, 5224, 13416, 3176, 11368, 7272, 15464,
+ 616, 8808, 4712, 12904, 2664, 10856, 6760, 14952, 1640, 9832, 5736, 13928, 3688, 11880, 7784, 15976,
+ 360, 8552, 4456, 12648, 2408, 10600, 6504, 14696, 1384, 9576, 5480, 13672, 3432, 11624, 7528, 15720,
+ 872, 9064, 4968, 13160, 2920, 11112, 7016, 15208, 1896, 10088, 5992, 14184, 3944, 12136, 8040, 16232,
+ 232, 8424, 4328, 12520, 2280, 10472, 6376, 14568, 1256, 9448, 5352, 13544, 3304, 11496, 7400, 15592,
+ 744, 8936, 4840, 13032, 2792, 10984, 6888, 15080, 1768, 9960, 5864, 14056, 3816, 12008, 7912, 16104,
+ 488, 8680, 4584, 12776, 2536, 10728, 6632, 14824, 1512, 9704, 5608, 13800, 3560, 11752, 7656, 15848,
+ 1000, 9192, 5096, 13288, 3048, 11240, 7144, 15336, 2024, 10216, 6120, 14312, 4072, 12264, 8168, 16360,
+ 24, 8216, 4120, 12312, 2072, 10264, 6168, 14360, 1048, 9240, 5144, 13336, 3096, 11288, 7192, 15384,
+ 536, 8728, 4632, 12824, 2584, 10776, 6680, 14872, 1560, 9752, 5656, 13848, 3608, 11800, 7704, 15896,
+ 280, 8472, 4376, 12568, 2328, 10520, 6424, 14616, 1304, 9496, 5400, 13592, 3352, 11544, 7448, 15640,
+ 792, 8984, 4888, 13080, 2840, 11032, 6936, 15128, 1816, 10008, 5912, 14104, 3864, 12056, 7960, 16152,
+ 152, 8344, 4248, 12440, 2200, 10392, 6296, 14488, 1176, 9368, 5272, 13464, 3224, 11416, 7320, 15512,
+ 664, 8856, 4760, 12952, 2712, 10904, 6808, 15000, 1688, 9880, 5784, 13976, 3736, 11928, 7832, 16024,
+ 408, 8600, 4504, 12696, 2456, 10648, 6552, 14744, 1432, 9624, 5528, 13720, 3480, 11672, 7576, 15768,
+ 920, 9112, 5016, 13208, 2968, 11160, 7064, 15256, 1944, 10136, 6040, 14232, 3992, 12184, 8088, 16280,
+ 88, 8280, 4184, 12376, 2136, 10328, 6232, 14424, 1112, 9304, 5208, 13400, 3160, 11352, 7256, 15448,
+ 600, 8792, 4696, 12888, 2648, 10840, 6744, 14936, 1624, 9816, 5720, 13912, 3672, 11864, 7768, 15960,
+ 344, 8536, 4440, 12632, 2392, 10584, 6488, 14680, 1368, 9560, 5464, 13656, 3416, 11608, 7512, 15704,
+ 856, 9048, 4952, 13144, 2904, 11096, 7000, 15192, 1880, 10072, 5976, 14168, 3928, 12120, 8024, 16216,
+ 216, 8408, 4312, 12504, 2264, 10456, 6360, 14552, 1240, 9432, 5336, 13528, 3288, 11480, 7384, 15576,
+ 728, 8920, 4824, 13016, 2776, 10968, 6872, 15064, 1752, 9944, 5848, 14040, 3800, 11992, 7896, 16088,
+ 472, 8664, 4568, 12760, 2520, 10712, 6616, 14808, 1496, 9688, 5592, 13784, 3544, 11736, 7640, 15832,
+ 984, 9176, 5080, 13272, 3032, 11224, 7128, 15320, 2008, 10200, 6104, 14296, 4056, 12248, 8152, 16344,
+ 56, 8248, 4152, 12344, 2104, 10296, 6200, 14392, 1080, 9272, 5176, 13368, 3128, 11320, 7224, 15416,
+ 568, 8760, 4664, 12856, 2616, 10808, 6712, 14904, 1592, 9784, 5688, 13880, 3640, 11832, 7736, 15928,
+ 312, 8504, 4408, 12600, 2360, 10552, 6456, 14648, 1336, 9528, 5432, 13624, 3384, 11576, 7480, 15672,
+ 824, 9016, 4920, 13112, 2872, 11064, 6968, 15160, 1848, 10040, 5944, 14136, 3896, 12088, 7992, 16184,
+ 184, 8376, 4280, 12472, 2232, 10424, 6328, 14520, 1208, 9400, 5304, 13496, 3256, 11448, 7352, 15544,
+ 696, 8888, 4792, 12984, 2744, 10936, 6840, 15032, 1720, 9912, 5816, 14008, 3768, 11960, 7864, 16056,
+ 440, 8632, 4536, 12728, 2488, 10680, 6584, 14776, 1464, 9656, 5560, 13752, 3512, 11704, 7608, 15800,
+ 952, 9144, 5048, 13240, 3000, 11192, 7096, 15288, 1976, 10168, 6072, 14264, 4024, 12216, 8120, 16312,
+ 120, 8312, 4216, 12408, 2168, 10360, 6264, 14456, 1144, 9336, 5240, 13432, 3192, 11384, 7288, 15480,
+ 632, 8824, 4728, 12920, 2680, 10872, 6776, 14968, 1656, 9848, 5752, 13944, 3704, 11896, 7800, 15992,
+ 376, 8568, 4472, 12664, 2424, 10616, 6520, 14712, 1400, 9592, 5496, 13688, 3448, 11640, 7544, 15736,
+ 888, 9080, 4984, 13176, 2936, 11128, 7032, 15224, 1912, 10104, 6008, 14200, 3960, 12152, 8056, 16248,
+ 248, 8440, 4344, 12536, 2296, 10488, 6392, 14584, 1272, 9464, 5368, 13560, 3320, 11512, 7416, 15608,
+ 760, 8952, 4856, 13048, 2808, 11000, 6904, 15096, 1784, 9976, 5880, 14072, 3832, 12024, 7928, 16120,
+ 504, 8696, 4600, 12792, 2552, 10744, 6648, 14840, 1528, 9720, 5624, 13816, 3576, 11768, 7672, 15864,
+ 1016, 9208, 5112, 13304, 3064, 11256, 7160, 15352, 2040, 10232, 6136, 14328, 4088, 12280, 8184, 16376,
+ 4, 8196, 4100, 12292, 2052, 10244, 6148, 14340, 1028, 9220, 5124, 13316, 3076, 11268, 7172, 15364,
+ 516, 8708, 4612, 12804, 2564, 10756, 6660, 14852, 1540, 9732, 5636, 13828, 3588, 11780, 7684, 15876,
+ 260, 8452, 4356, 12548, 2308, 10500, 6404, 14596, 1284, 9476, 5380, 13572, 3332, 11524, 7428, 15620,
+ 772, 8964, 4868, 13060, 2820, 11012, 6916, 15108, 1796, 9988, 5892, 14084, 3844, 12036, 7940, 16132,
+ 132, 8324, 4228, 12420, 2180, 10372, 6276, 14468, 1156, 9348, 5252, 13444, 3204, 11396, 7300, 15492,
+ 644, 8836, 4740, 12932, 2692, 10884, 6788, 14980, 1668, 9860, 5764, 13956, 3716, 11908, 7812, 16004,
+ 388, 8580, 4484, 12676, 2436, 10628, 6532, 14724, 1412, 9604, 5508, 13700, 3460, 11652, 7556, 15748,
+ 900, 9092, 4996, 13188, 2948, 11140, 7044, 15236, 1924, 10116, 6020, 14212, 3972, 12164, 8068, 16260,
+ 68, 8260, 4164, 12356, 2116, 10308, 6212, 14404, 1092, 9284, 5188, 13380, 3140, 11332, 7236, 15428,
+ 580, 8772, 4676, 12868, 2628, 10820, 6724, 14916, 1604, 9796, 5700, 13892, 3652, 11844, 7748, 15940,
+ 324, 8516, 4420, 12612, 2372, 10564, 6468, 14660, 1348, 9540, 5444, 13636, 3396, 11588, 7492, 15684,
+ 836, 9028, 4932, 13124, 2884, 11076, 6980, 15172, 1860, 10052, 5956, 14148, 3908, 12100, 8004, 16196,
+ 196, 8388, 4292, 12484, 2244, 10436, 6340, 14532, 1220, 9412, 5316, 13508, 3268, 11460, 7364, 15556,
+ 708, 8900, 4804, 12996, 2756, 10948, 6852, 15044, 1732, 9924, 5828, 14020, 3780, 11972, 7876, 16068,
+ 452, 8644, 4548, 12740, 2500, 10692, 6596, 14788, 1476, 9668, 5572, 13764, 3524, 11716, 7620, 15812,
+ 964, 9156, 5060, 13252, 3012, 11204, 7108, 15300, 1988, 10180, 6084, 14276, 4036, 12228, 8132, 16324,
+ 36, 8228, 4132, 12324, 2084, 10276, 6180, 14372, 1060, 9252, 5156, 13348, 3108, 11300, 7204, 15396,
+ 548, 8740, 4644, 12836, 2596, 10788, 6692, 14884, 1572, 9764, 5668, 13860, 3620, 11812, 7716, 15908,
+ 292, 8484, 4388, 12580, 2340, 10532, 6436, 14628, 1316, 9508, 5412, 13604, 3364, 11556, 7460, 15652,
+ 804, 8996, 4900, 13092, 2852, 11044, 6948, 15140, 1828, 10020, 5924, 14116, 3876, 12068, 7972, 16164,
+ 164, 8356, 4260, 12452, 2212, 10404, 6308, 14500, 1188, 9380, 5284, 13476, 3236, 11428, 7332, 15524,
+ 676, 8868, 4772, 12964, 2724, 10916, 6820, 15012, 1700, 9892, 5796, 13988, 3748, 11940, 7844, 16036,
+ 420, 8612, 4516, 12708, 2468, 10660, 6564, 14756, 1444, 9636, 5540, 13732, 3492, 11684, 7588, 15780,
+ 932, 9124, 5028, 13220, 2980, 11172, 7076, 15268, 1956, 10148, 6052, 14244, 4004, 12196, 8100, 16292,
+ 100, 8292, 4196, 12388, 2148, 10340, 6244, 14436, 1124, 9316, 5220, 13412, 3172, 11364, 7268, 15460,
+ 612, 8804, 4708, 12900, 2660, 10852, 6756, 14948, 1636, 9828, 5732, 13924, 3684, 11876, 7780, 15972,
+ 356, 8548, 4452, 12644, 2404, 10596, 6500, 14692, 1380, 9572, 5476, 13668, 3428, 11620, 7524, 15716,
+ 868, 9060, 4964, 13156, 2916, 11108, 7012, 15204, 1892, 10084, 5988, 14180, 3940, 12132, 8036, 16228,
+ 228, 8420, 4324, 12516, 2276, 10468, 6372, 14564, 1252, 9444, 5348, 13540, 3300, 11492, 7396, 15588,
+ 740, 8932, 4836, 13028, 2788, 10980, 6884, 15076, 1764, 9956, 5860, 14052, 3812, 12004, 7908, 16100,
+ 484, 8676, 4580, 12772, 2532, 10724, 6628, 14820, 1508, 9700, 5604, 13796, 3556, 11748, 7652, 15844,
+ 996, 9188, 5092, 13284, 3044, 11236, 7140, 15332, 2020, 10212, 6116, 14308, 4068, 12260, 8164, 16356,
+ 20, 8212, 4116, 12308, 2068, 10260, 6164, 14356, 1044, 9236, 5140, 13332, 3092, 11284, 7188, 15380,
+ 532, 8724, 4628, 12820, 2580, 10772, 6676, 14868, 1556, 9748, 5652, 13844, 3604, 11796, 7700, 15892,
+ 276, 8468, 4372, 12564, 2324, 10516, 6420, 14612, 1300, 9492, 5396, 13588, 3348, 11540, 7444, 15636,
+ 788, 8980, 4884, 13076, 2836, 11028, 6932, 15124, 1812, 10004, 5908, 14100, 3860, 12052, 7956, 16148,
+ 148, 8340, 4244, 12436, 2196, 10388, 6292, 14484, 1172, 9364, 5268, 13460, 3220, 11412, 7316, 15508,
+ 660, 8852, 4756, 12948, 2708, 10900, 6804, 14996, 1684, 9876, 5780, 13972, 3732, 11924, 7828, 16020,
+ 404, 8596, 4500, 12692, 2452, 10644, 6548, 14740, 1428, 9620, 5524, 13716, 3476, 11668, 7572, 15764,
+ 916, 9108, 5012, 13204, 2964, 11156, 7060, 15252, 1940, 10132, 6036, 14228, 3988, 12180, 8084, 16276,
+ 84, 8276, 4180, 12372, 2132, 10324, 6228, 14420, 1108, 9300, 5204, 13396, 3156, 11348, 7252, 15444,
+ 596, 8788, 4692, 12884, 2644, 10836, 6740, 14932, 1620, 9812, 5716, 13908, 3668, 11860, 7764, 15956,
+ 340, 8532, 4436, 12628, 2388, 10580, 6484, 14676, 1364, 9556, 5460, 13652, 3412, 11604, 7508, 15700,
+ 852, 9044, 4948, 13140, 2900, 11092, 6996, 15188, 1876, 10068, 5972, 14164, 3924, 12116, 8020, 16212,
+ 212, 8404, 4308, 12500, 2260, 10452, 6356, 14548, 1236, 9428, 5332, 13524, 3284, 11476, 7380, 15572,
+ 724, 8916, 4820, 13012, 2772, 10964, 6868, 15060, 1748, 9940, 5844, 14036, 3796, 11988, 7892, 16084,
+ 468, 8660, 4564, 12756, 2516, 10708, 6612, 14804, 1492, 9684, 5588, 13780, 3540, 11732, 7636, 15828,
+ 980, 9172, 5076, 13268, 3028, 11220, 7124, 15316, 2004, 10196, 6100, 14292, 4052, 12244, 8148, 16340,
+ 52, 8244, 4148, 12340, 2100, 10292, 6196, 14388, 1076, 9268, 5172, 13364, 3124, 11316, 7220, 15412,
+ 564, 8756, 4660, 12852, 2612, 10804, 6708, 14900, 1588, 9780, 5684, 13876, 3636, 11828, 7732, 15924,
+ 308, 8500, 4404, 12596, 2356, 10548, 6452, 14644, 1332, 9524, 5428, 13620, 3380, 11572, 7476, 15668,
+ 820, 9012, 4916, 13108, 2868, 11060, 6964, 15156, 1844, 10036, 5940, 14132, 3892, 12084, 7988, 16180,
+ 180, 8372, 4276, 12468, 2228, 10420, 6324, 14516, 1204, 9396, 5300, 13492, 3252, 11444, 7348, 15540,
+ 692, 8884, 4788, 12980, 2740, 10932, 6836, 15028, 1716, 9908, 5812, 14004, 3764, 11956, 7860, 16052,
+ 436, 8628, 4532, 12724, 2484, 10676, 6580, 14772, 1460, 9652, 5556, 13748, 3508, 11700, 7604, 15796,
+ 948, 9140, 5044, 13236, 2996, 11188, 7092, 15284, 1972, 10164, 6068, 14260, 4020, 12212, 8116, 16308,
+ 116, 8308, 4212, 12404, 2164, 10356, 6260, 14452, 1140, 9332, 5236, 13428, 3188, 11380, 7284, 15476,
+ 628, 8820, 4724, 12916, 2676, 10868, 6772, 14964, 1652, 9844, 5748, 13940, 3700, 11892, 7796, 15988,
+ 372, 8564, 4468, 12660, 2420, 10612, 6516, 14708, 1396, 9588, 5492, 13684, 3444, 11636, 7540, 15732,
+ 884, 9076, 4980, 13172, 2932, 11124, 7028, 15220, 1908, 10100, 6004, 14196, 3956, 12148, 8052, 16244,
+ 244, 8436, 4340, 12532, 2292, 10484, 6388, 14580, 1268, 9460, 5364, 13556, 3316, 11508, 7412, 15604,
+ 756, 8948, 4852, 13044, 2804, 10996, 6900, 15092, 1780, 9972, 5876, 14068, 3828, 12020, 7924, 16116,
+ 500, 8692, 4596, 12788, 2548, 10740, 6644, 14836, 1524, 9716, 5620, 13812, 3572, 11764, 7668, 15860,
+ 1012, 9204, 5108, 13300, 3060, 11252, 7156, 15348, 2036, 10228, 6132, 14324, 4084, 12276, 8180, 16372,
+ 12, 8204, 4108, 12300, 2060, 10252, 6156, 14348, 1036, 9228, 5132, 13324, 3084, 11276, 7180, 15372,
+ 524, 8716, 4620, 12812, 2572, 10764, 6668, 14860, 1548, 9740, 5644, 13836, 3596, 11788, 7692, 15884,
+ 268, 8460, 4364, 12556, 2316, 10508, 6412, 14604, 1292, 9484, 5388, 13580, 3340, 11532, 7436, 15628,
+ 780, 8972, 4876, 13068, 2828, 11020, 6924, 15116, 1804, 9996, 5900, 14092, 3852, 12044, 7948, 16140,
+ 140, 8332, 4236, 12428, 2188, 10380, 6284, 14476, 1164, 9356, 5260, 13452, 3212, 11404, 7308, 15500,
+ 652, 8844, 4748, 12940, 2700, 10892, 6796, 14988, 1676, 9868, 5772, 13964, 3724, 11916, 7820, 16012,
+ 396, 8588, 4492, 12684, 2444, 10636, 6540, 14732, 1420, 9612, 5516, 13708, 3468, 11660, 7564, 15756,
+ 908, 9100, 5004, 13196, 2956, 11148, 7052, 15244, 1932, 10124, 6028, 14220, 3980, 12172, 8076, 16268,
+ 76, 8268, 4172, 12364, 2124, 10316, 6220, 14412, 1100, 9292, 5196, 13388, 3148, 11340, 7244, 15436,
+ 588, 8780, 4684, 12876, 2636, 10828, 6732, 14924, 1612, 9804, 5708, 13900, 3660, 11852, 7756, 15948,
+ 332, 8524, 4428, 12620, 2380, 10572, 6476, 14668, 1356, 9548, 5452, 13644, 3404, 11596, 7500, 15692,
+ 844, 9036, 4940, 13132, 2892, 11084, 6988, 15180, 1868, 10060, 5964, 14156, 3916, 12108, 8012, 16204,
+ 204, 8396, 4300, 12492, 2252, 10444, 6348, 14540, 1228, 9420, 5324, 13516, 3276, 11468, 7372, 15564,
+ 716, 8908, 4812, 13004, 2764, 10956, 6860, 15052, 1740, 9932, 5836, 14028, 3788, 11980, 7884, 16076,
+ 460, 8652, 4556, 12748, 2508, 10700, 6604, 14796, 1484, 9676, 5580, 13772, 3532, 11724, 7628, 15820,
+ 972, 9164, 5068, 13260, 3020, 11212, 7116, 15308, 1996, 10188, 6092, 14284, 4044, 12236, 8140, 16332,
+ 44, 8236, 4140, 12332, 2092, 10284, 6188, 14380, 1068, 9260, 5164, 13356, 3116, 11308, 7212, 15404,
+ 556, 8748, 4652, 12844, 2604, 10796, 6700, 14892, 1580, 9772, 5676, 13868, 3628, 11820, 7724, 15916,
+ 300, 8492, 4396, 12588, 2348, 10540, 6444, 14636, 1324, 9516, 5420, 13612, 3372, 11564, 7468, 15660,
+ 812, 9004, 4908, 13100, 2860, 11052, 6956, 15148, 1836, 10028, 5932, 14124, 3884, 12076, 7980, 16172,
+ 172, 8364, 4268, 12460, 2220, 10412, 6316, 14508, 1196, 9388, 5292, 13484, 3244, 11436, 7340, 15532,
+ 684, 8876, 4780, 12972, 2732, 10924, 6828, 15020, 1708, 9900, 5804, 13996, 3756, 11948, 7852, 16044,
+ 428, 8620, 4524, 12716, 2476, 10668, 6572, 14764, 1452, 9644, 5548, 13740, 3500, 11692, 7596, 15788,
+ 940, 9132, 5036, 13228, 2988, 11180, 7084, 15276, 1964, 10156, 6060, 14252, 4012, 12204, 8108, 16300,
+ 108, 8300, 4204, 12396, 2156, 10348, 6252, 14444, 1132, 9324, 5228, 13420, 3180, 11372, 7276, 15468,
+ 620, 8812, 4716, 12908, 2668, 10860, 6764, 14956, 1644, 9836, 5740, 13932, 3692, 11884, 7788, 15980,
+ 364, 8556, 4460, 12652, 2412, 10604, 6508, 14700, 1388, 9580, 5484, 13676, 3436, 11628, 7532, 15724,
+ 876, 9068, 4972, 13164, 2924, 11116, 7020, 15212, 1900, 10092, 5996, 14188, 3948, 12140, 8044, 16236,
+ 236, 8428, 4332, 12524, 2284, 10476, 6380, 14572, 1260, 9452, 5356, 13548, 3308, 11500, 7404, 15596,
+ 748, 8940, 4844, 13036, 2796, 10988, 6892, 15084, 1772, 9964, 5868, 14060, 3820, 12012, 7916, 16108,
+ 492, 8684, 4588, 12780, 2540, 10732, 6636, 14828, 1516, 9708, 5612, 13804, 3564, 11756, 7660, 15852,
+ 1004, 9196, 5100, 13292, 3052, 11244, 7148, 15340, 2028, 10220, 6124, 14316, 4076, 12268, 8172, 16364,
+ 28, 8220, 4124, 12316, 2076, 10268, 6172, 14364, 1052, 9244, 5148, 13340, 3100, 11292, 7196, 15388,
+ 540, 8732, 4636, 12828, 2588, 10780, 6684, 14876, 1564, 9756, 5660, 13852, 3612, 11804, 7708, 15900,
+ 284, 8476, 4380, 12572, 2332, 10524, 6428, 14620, 1308, 9500, 5404, 13596, 3356, 11548, 7452, 15644,
+ 796, 8988, 4892, 13084, 2844, 11036, 6940, 15132, 1820, 10012, 5916, 14108, 3868, 12060, 7964, 16156,
+ 156, 8348, 4252, 12444, 2204, 10396, 6300, 14492, 1180, 9372, 5276, 13468, 3228, 11420, 7324, 15516,
+ 668, 8860, 4764, 12956, 2716, 10908, 6812, 15004, 1692, 9884, 5788, 13980, 3740, 11932, 7836, 16028,
+ 412, 8604, 4508, 12700, 2460, 10652, 6556, 14748, 1436, 9628, 5532, 13724, 3484, 11676, 7580, 15772,
+ 924, 9116, 5020, 13212, 2972, 11164, 7068, 15260, 1948, 10140, 6044, 14236, 3996, 12188, 8092, 16284,
+ 92, 8284, 4188, 12380, 2140, 10332, 6236, 14428, 1116, 9308, 5212, 13404, 3164, 11356, 7260, 15452,
+ 604, 8796, 4700, 12892, 2652, 10844, 6748, 14940, 1628, 9820, 5724, 13916, 3676, 11868, 7772, 15964,
+ 348, 8540, 4444, 12636, 2396, 10588, 6492, 14684, 1372, 9564, 5468, 13660, 3420, 11612, 7516, 15708,
+ 860, 9052, 4956, 13148, 2908, 11100, 7004, 15196, 1884, 10076, 5980, 14172, 3932, 12124, 8028, 16220,
+ 220, 8412, 4316, 12508, 2268, 10460, 6364, 14556, 1244, 9436, 5340, 13532, 3292, 11484, 7388, 15580,
+ 732, 8924, 4828, 13020, 2780, 10972, 6876, 15068, 1756, 9948, 5852, 14044, 3804, 11996, 7900, 16092,
+ 476, 8668, 4572, 12764, 2524, 10716, 6620, 14812, 1500, 9692, 5596, 13788, 3548, 11740, 7644, 15836,
+ 988, 9180, 5084, 13276, 3036, 11228, 7132, 15324, 2012, 10204, 6108, 14300, 4060, 12252, 8156, 16348,
+ 60, 8252, 4156, 12348, 2108, 10300, 6204, 14396, 1084, 9276, 5180, 13372, 3132, 11324, 7228, 15420,
+ 572, 8764, 4668, 12860, 2620, 10812, 6716, 14908, 1596, 9788, 5692, 13884, 3644, 11836, 7740, 15932,
+ 316, 8508, 4412, 12604, 2364, 10556, 6460, 14652, 1340, 9532, 5436, 13628, 3388, 11580, 7484, 15676,
+ 828, 9020, 4924, 13116, 2876, 11068, 6972, 15164, 1852, 10044, 5948, 14140, 3900, 12092, 7996, 16188,
+ 188, 8380, 4284, 12476, 2236, 10428, 6332, 14524, 1212, 9404, 5308, 13500, 3260, 11452, 7356, 15548,
+ 700, 8892, 4796, 12988, 2748, 10940, 6844, 15036, 1724, 9916, 5820, 14012, 3772, 11964, 7868, 16060,
+ 444, 8636, 4540, 12732, 2492, 10684, 6588, 14780, 1468, 9660, 5564, 13756, 3516, 11708, 7612, 15804,
+ 956, 9148, 5052, 13244, 3004, 11196, 7100, 15292, 1980, 10172, 6076, 14268, 4028, 12220, 8124, 16316,
+ 124, 8316, 4220, 12412, 2172, 10364, 6268, 14460, 1148, 9340, 5244, 13436, 3196, 11388, 7292, 15484,
+ 636, 8828, 4732, 12924, 2684, 10876, 6780, 14972, 1660, 9852, 5756, 13948, 3708, 11900, 7804, 15996,
+ 380, 8572, 4476, 12668, 2428, 10620, 6524, 14716, 1404, 9596, 5500, 13692, 3452, 11644, 7548, 15740,
+ 892, 9084, 4988, 13180, 2940, 11132, 7036, 15228, 1916, 10108, 6012, 14204, 3964, 12156, 8060, 16252,
+ 252, 8444, 4348, 12540, 2300, 10492, 6396, 14588, 1276, 9468, 5372, 13564, 3324, 11516, 7420, 15612,
+ 764, 8956, 4860, 13052, 2812, 11004, 6908, 15100, 1788, 9980, 5884, 14076, 3836, 12028, 7932, 16124,
+ 508, 8700, 4604, 12796, 2556, 10748, 6652, 14844, 1532, 9724, 5628, 13820, 3580, 11772, 7676, 15868,
+ 1020, 9212, 5116, 13308, 3068, 11260, 7164, 15356, 2044, 10236, 6140, 14332, 4092, 12284, 8188, 16380,
+ 2, 8194, 4098, 12290, 2050, 10242, 6146, 14338, 1026, 9218, 5122, 13314, 3074, 11266, 7170, 15362,
+ 514, 8706, 4610, 12802, 2562, 10754, 6658, 14850, 1538, 9730, 5634, 13826, 3586, 11778, 7682, 15874,
+ 258, 8450, 4354, 12546, 2306, 10498, 6402, 14594, 1282, 9474, 5378, 13570, 3330, 11522, 7426, 15618,
+ 770, 8962, 4866, 13058, 2818, 11010, 6914, 15106, 1794, 9986, 5890, 14082, 3842, 12034, 7938, 16130,
+ 130, 8322, 4226, 12418, 2178, 10370, 6274, 14466, 1154, 9346, 5250, 13442, 3202, 11394, 7298, 15490,
+ 642, 8834, 4738, 12930, 2690, 10882, 6786, 14978, 1666, 9858, 5762, 13954, 3714, 11906, 7810, 16002,
+ 386, 8578, 4482, 12674, 2434, 10626, 6530, 14722, 1410, 9602, 5506, 13698, 3458, 11650, 7554, 15746,
+ 898, 9090, 4994, 13186, 2946, 11138, 7042, 15234, 1922, 10114, 6018, 14210, 3970, 12162, 8066, 16258,
+ 66, 8258, 4162, 12354, 2114, 10306, 6210, 14402, 1090, 9282, 5186, 13378, 3138, 11330, 7234, 15426,
+ 578, 8770, 4674, 12866, 2626, 10818, 6722, 14914, 1602, 9794, 5698, 13890, 3650, 11842, 7746, 15938,
+ 322, 8514, 4418, 12610, 2370, 10562, 6466, 14658, 1346, 9538, 5442, 13634, 3394, 11586, 7490, 15682,
+ 834, 9026, 4930, 13122, 2882, 11074, 6978, 15170, 1858, 10050, 5954, 14146, 3906, 12098, 8002, 16194,
+ 194, 8386, 4290, 12482, 2242, 10434, 6338, 14530, 1218, 9410, 5314, 13506, 3266, 11458, 7362, 15554,
+ 706, 8898, 4802, 12994, 2754, 10946, 6850, 15042, 1730, 9922, 5826, 14018, 3778, 11970, 7874, 16066,
+ 450, 8642, 4546, 12738, 2498, 10690, 6594, 14786, 1474, 9666, 5570, 13762, 3522, 11714, 7618, 15810,
+ 962, 9154, 5058, 13250, 3010, 11202, 7106, 15298, 1986, 10178, 6082, 14274, 4034, 12226, 8130, 16322,
+ 34, 8226, 4130, 12322, 2082, 10274, 6178, 14370, 1058, 9250, 5154, 13346, 3106, 11298, 7202, 15394,
+ 546, 8738, 4642, 12834, 2594, 10786, 6690, 14882, 1570, 9762, 5666, 13858, 3618, 11810, 7714, 15906,
+ 290, 8482, 4386, 12578, 2338, 10530, 6434, 14626, 1314, 9506, 5410, 13602, 3362, 11554, 7458, 15650,
+ 802, 8994, 4898, 13090, 2850, 11042, 6946, 15138, 1826, 10018, 5922, 14114, 3874, 12066, 7970, 16162,
+ 162, 8354, 4258, 12450, 2210, 10402, 6306, 14498, 1186, 9378, 5282, 13474, 3234, 11426, 7330, 15522,
+ 674, 8866, 4770, 12962, 2722, 10914, 6818, 15010, 1698, 9890, 5794, 13986, 3746, 11938, 7842, 16034,
+ 418, 8610, 4514, 12706, 2466, 10658, 6562, 14754, 1442, 9634, 5538, 13730, 3490, 11682, 7586, 15778,
+ 930, 9122, 5026, 13218, 2978, 11170, 7074, 15266, 1954, 10146, 6050, 14242, 4002, 12194, 8098, 16290,
+ 98, 8290, 4194, 12386, 2146, 10338, 6242, 14434, 1122, 9314, 5218, 13410, 3170, 11362, 7266, 15458,
+ 610, 8802, 4706, 12898, 2658, 10850, 6754, 14946, 1634, 9826, 5730, 13922, 3682, 11874, 7778, 15970,
+ 354, 8546, 4450, 12642, 2402, 10594, 6498, 14690, 1378, 9570, 5474, 13666, 3426, 11618, 7522, 15714,
+ 866, 9058, 4962, 13154, 2914, 11106, 7010, 15202, 1890, 10082, 5986, 14178, 3938, 12130, 8034, 16226,
+ 226, 8418, 4322, 12514, 2274, 10466, 6370, 14562, 1250, 9442, 5346, 13538, 3298, 11490, 7394, 15586,
+ 738, 8930, 4834, 13026, 2786, 10978, 6882, 15074, 1762, 9954, 5858, 14050, 3810, 12002, 7906, 16098,
+ 482, 8674, 4578, 12770, 2530, 10722, 6626, 14818, 1506, 9698, 5602, 13794, 3554, 11746, 7650, 15842,
+ 994, 9186, 5090, 13282, 3042, 11234, 7138, 15330, 2018, 10210, 6114, 14306, 4066, 12258, 8162, 16354,
+ 18, 8210, 4114, 12306, 2066, 10258, 6162, 14354, 1042, 9234, 5138, 13330, 3090, 11282, 7186, 15378,
+ 530, 8722, 4626, 12818, 2578, 10770, 6674, 14866, 1554, 9746, 5650, 13842, 3602, 11794, 7698, 15890,
+ 274, 8466, 4370, 12562, 2322, 10514, 6418, 14610, 1298, 9490, 5394, 13586, 3346, 11538, 7442, 15634,
+ 786, 8978, 4882, 13074, 2834, 11026, 6930, 15122, 1810, 10002, 5906, 14098, 3858, 12050, 7954, 16146,
+ 146, 8338, 4242, 12434, 2194, 10386, 6290, 14482, 1170, 9362, 5266, 13458, 3218, 11410, 7314, 15506,
+ 658, 8850, 4754, 12946, 2706, 10898, 6802, 14994, 1682, 9874, 5778, 13970, 3730, 11922, 7826, 16018,
+ 402, 8594, 4498, 12690, 2450, 10642, 6546, 14738, 1426, 9618, 5522, 13714, 3474, 11666, 7570, 15762,
+ 914, 9106, 5010, 13202, 2962, 11154, 7058, 15250, 1938, 10130, 6034, 14226, 3986, 12178, 8082, 16274,
+ 82, 8274, 4178, 12370, 2130, 10322, 6226, 14418, 1106, 9298, 5202, 13394, 3154, 11346, 7250, 15442,
+ 594, 8786, 4690, 12882, 2642, 10834, 6738, 14930, 1618, 9810, 5714, 13906, 3666, 11858, 7762, 15954,
+ 338, 8530, 4434, 12626, 2386, 10578, 6482, 14674, 1362, 9554, 5458, 13650, 3410, 11602, 7506, 15698,
+ 850, 9042, 4946, 13138, 2898, 11090, 6994, 15186, 1874, 10066, 5970, 14162, 3922, 12114, 8018, 16210,
+ 210, 8402, 4306, 12498, 2258, 10450, 6354, 14546, 1234, 9426, 5330, 13522, 3282, 11474, 7378, 15570,
+ 722, 8914, 4818, 13010, 2770, 10962, 6866, 15058, 1746, 9938, 5842, 14034, 3794, 11986, 7890, 16082,
+ 466, 8658, 4562, 12754, 2514, 10706, 6610, 14802, 1490, 9682, 5586, 13778, 3538, 11730, 7634, 15826,
+ 978, 9170, 5074, 13266, 3026, 11218, 7122, 15314, 2002, 10194, 6098, 14290, 4050, 12242, 8146, 16338,
+ 50, 8242, 4146, 12338, 2098, 10290, 6194, 14386, 1074, 9266, 5170, 13362, 3122, 11314, 7218, 15410,
+ 562, 8754, 4658, 12850, 2610, 10802, 6706, 14898, 1586, 9778, 5682, 13874, 3634, 11826, 7730, 15922,
+ 306, 8498, 4402, 12594, 2354, 10546, 6450, 14642, 1330, 9522, 5426, 13618, 3378, 11570, 7474, 15666,
+ 818, 9010, 4914, 13106, 2866, 11058, 6962, 15154, 1842, 10034, 5938, 14130, 3890, 12082, 7986, 16178,
+ 178, 8370, 4274, 12466, 2226, 10418, 6322, 14514, 1202, 9394, 5298, 13490, 3250, 11442, 7346, 15538,
+ 690, 8882, 4786, 12978, 2738, 10930, 6834, 15026, 1714, 9906, 5810, 14002, 3762, 11954, 7858, 16050,
+ 434, 8626, 4530, 12722, 2482, 10674, 6578, 14770, 1458, 9650, 5554, 13746, 3506, 11698, 7602, 15794,
+ 946, 9138, 5042, 13234, 2994, 11186, 7090, 15282, 1970, 10162, 6066, 14258, 4018, 12210, 8114, 16306,
+ 114, 8306, 4210, 12402, 2162, 10354, 6258, 14450, 1138, 9330, 5234, 13426, 3186, 11378, 7282, 15474,
+ 626, 8818, 4722, 12914, 2674, 10866, 6770, 14962, 1650, 9842, 5746, 13938, 3698, 11890, 7794, 15986,
+ 370, 8562, 4466, 12658, 2418, 10610, 6514, 14706, 1394, 9586, 5490, 13682, 3442, 11634, 7538, 15730,
+ 882, 9074, 4978, 13170, 2930, 11122, 7026, 15218, 1906, 10098, 6002, 14194, 3954, 12146, 8050, 16242,
+ 242, 8434, 4338, 12530, 2290, 10482, 6386, 14578, 1266, 9458, 5362, 13554, 3314, 11506, 7410, 15602,
+ 754, 8946, 4850, 13042, 2802, 10994, 6898, 15090, 1778, 9970, 5874, 14066, 3826, 12018, 7922, 16114,
+ 498, 8690, 4594, 12786, 2546, 10738, 6642, 14834, 1522, 9714, 5618, 13810, 3570, 11762, 7666, 15858,
+ 1010, 9202, 5106, 13298, 3058, 11250, 7154, 15346, 2034, 10226, 6130, 14322, 4082, 12274, 8178, 16370,
+ 10, 8202, 4106, 12298, 2058, 10250, 6154, 14346, 1034, 9226, 5130, 13322, 3082, 11274, 7178, 15370,
+ 522, 8714, 4618, 12810, 2570, 10762, 6666, 14858, 1546, 9738, 5642, 13834, 3594, 11786, 7690, 15882,
+ 266, 8458, 4362, 12554, 2314, 10506, 6410, 14602, 1290, 9482, 5386, 13578, 3338, 11530, 7434, 15626,
+ 778, 8970, 4874, 13066, 2826, 11018, 6922, 15114, 1802, 9994, 5898, 14090, 3850, 12042, 7946, 16138,
+ 138, 8330, 4234, 12426, 2186, 10378, 6282, 14474, 1162, 9354, 5258, 13450, 3210, 11402, 7306, 15498,
+ 650, 8842, 4746, 12938, 2698, 10890, 6794, 14986, 1674, 9866, 5770, 13962, 3722, 11914, 7818, 16010,
+ 394, 8586, 4490, 12682, 2442, 10634, 6538, 14730, 1418, 9610, 5514, 13706, 3466, 11658, 7562, 15754,
+ 906, 9098, 5002, 13194, 2954, 11146, 7050, 15242, 1930, 10122, 6026, 14218, 3978, 12170, 8074, 16266,
+ 74, 8266, 4170, 12362, 2122, 10314, 6218, 14410, 1098, 9290, 5194, 13386, 3146, 11338, 7242, 15434,
+ 586, 8778, 4682, 12874, 2634, 10826, 6730, 14922, 1610, 9802, 5706, 13898, 3658, 11850, 7754, 15946,
+ 330, 8522, 4426, 12618, 2378, 10570, 6474, 14666, 1354, 9546, 5450, 13642, 3402, 11594, 7498, 15690,
+ 842, 9034, 4938, 13130, 2890, 11082, 6986, 15178, 1866, 10058, 5962, 14154, 3914, 12106, 8010, 16202,
+ 202, 8394, 4298, 12490, 2250, 10442, 6346, 14538, 1226, 9418, 5322, 13514, 3274, 11466, 7370, 15562,
+ 714, 8906, 4810, 13002, 2762, 10954, 6858, 15050, 1738, 9930, 5834, 14026, 3786, 11978, 7882, 16074,
+ 458, 8650, 4554, 12746, 2506, 10698, 6602, 14794, 1482, 9674, 5578, 13770, 3530, 11722, 7626, 15818,
+ 970, 9162, 5066, 13258, 3018, 11210, 7114, 15306, 1994, 10186, 6090, 14282, 4042, 12234, 8138, 16330,
+ 42, 8234, 4138, 12330, 2090, 10282, 6186, 14378, 1066, 9258, 5162, 13354, 3114, 11306, 7210, 15402,
+ 554, 8746, 4650, 12842, 2602, 10794, 6698, 14890, 1578, 9770, 5674, 13866, 3626, 11818, 7722, 15914,
+ 298, 8490, 4394, 12586, 2346, 10538, 6442, 14634, 1322, 9514, 5418, 13610, 3370, 11562, 7466, 15658,
+ 810, 9002, 4906, 13098, 2858, 11050, 6954, 15146, 1834, 10026, 5930, 14122, 3882, 12074, 7978, 16170,
+ 170, 8362, 4266, 12458, 2218, 10410, 6314, 14506, 1194, 9386, 5290, 13482, 3242, 11434, 7338, 15530,
+ 682, 8874, 4778, 12970, 2730, 10922, 6826, 15018, 1706, 9898, 5802, 13994, 3754, 11946, 7850, 16042,
+ 426, 8618, 4522, 12714, 2474, 10666, 6570, 14762, 1450, 9642, 5546, 13738, 3498, 11690, 7594, 15786,
+ 938, 9130, 5034, 13226, 2986, 11178, 7082, 15274, 1962, 10154, 6058, 14250, 4010, 12202, 8106, 16298,
+ 106, 8298, 4202, 12394, 2154, 10346, 6250, 14442, 1130, 9322, 5226, 13418, 3178, 11370, 7274, 15466,
+ 618, 8810, 4714, 12906, 2666, 10858, 6762, 14954, 1642, 9834, 5738, 13930, 3690, 11882, 7786, 15978,
+ 362, 8554, 4458, 12650, 2410, 10602, 6506, 14698, 1386, 9578, 5482, 13674, 3434, 11626, 7530, 15722,
+ 874, 9066, 4970, 13162, 2922, 11114, 7018, 15210, 1898, 10090, 5994, 14186, 3946, 12138, 8042, 16234,
+ 234, 8426, 4330, 12522, 2282, 10474, 6378, 14570, 1258, 9450, 5354, 13546, 3306, 11498, 7402, 15594,
+ 746, 8938, 4842, 13034, 2794, 10986, 6890, 15082, 1770, 9962, 5866, 14058, 3818, 12010, 7914, 16106,
+ 490, 8682, 4586, 12778, 2538, 10730, 6634, 14826, 1514, 9706, 5610, 13802, 3562, 11754, 7658, 15850,
+ 1002, 9194, 5098, 13290, 3050, 11242, 7146, 15338, 2026, 10218, 6122, 14314, 4074, 12266, 8170, 16362,
+ 26, 8218, 4122, 12314, 2074, 10266, 6170, 14362, 1050, 9242, 5146, 13338, 3098, 11290, 7194, 15386,
+ 538, 8730, 4634, 12826, 2586, 10778, 6682, 14874, 1562, 9754, 5658, 13850, 3610, 11802, 7706, 15898,
+ 282, 8474, 4378, 12570, 2330, 10522, 6426, 14618, 1306, 9498, 5402, 13594, 3354, 11546, 7450, 15642,
+ 794, 8986, 4890, 13082, 2842, 11034, 6938, 15130, 1818, 10010, 5914, 14106, 3866, 12058, 7962, 16154,
+ 154, 8346, 4250, 12442, 2202, 10394, 6298, 14490, 1178, 9370, 5274, 13466, 3226, 11418, 7322, 15514,
+ 666, 8858, 4762, 12954, 2714, 10906, 6810, 15002, 1690, 9882, 5786, 13978, 3738, 11930, 7834, 16026,
+ 410, 8602, 4506, 12698, 2458, 10650, 6554, 14746, 1434, 9626, 5530, 13722, 3482, 11674, 7578, 15770,
+ 922, 9114, 5018, 13210, 2970, 11162, 7066, 15258, 1946, 10138, 6042, 14234, 3994, 12186, 8090, 16282,
+ 90, 8282, 4186, 12378, 2138, 10330, 6234, 14426, 1114, 9306, 5210, 13402, 3162, 11354, 7258, 15450,
+ 602, 8794, 4698, 12890, 2650, 10842, 6746, 14938, 1626, 9818, 5722, 13914, 3674, 11866, 7770, 15962,
+ 346, 8538, 4442, 12634, 2394, 10586, 6490, 14682, 1370, 9562, 5466, 13658, 3418, 11610, 7514, 15706,
+ 858, 9050, 4954, 13146, 2906, 11098, 7002, 15194, 1882, 10074, 5978, 14170, 3930, 12122, 8026, 16218,
+ 218, 8410, 4314, 12506, 2266, 10458, 6362, 14554, 1242, 9434, 5338, 13530, 3290, 11482, 7386, 15578,
+ 730, 8922, 4826, 13018, 2778, 10970, 6874, 15066, 1754, 9946, 5850, 14042, 3802, 11994, 7898, 16090,
+ 474, 8666, 4570, 12762, 2522, 10714, 6618, 14810, 1498, 9690, 5594, 13786, 3546, 11738, 7642, 15834,
+ 986, 9178, 5082, 13274, 3034, 11226, 7130, 15322, 2010, 10202, 6106, 14298, 4058, 12250, 8154, 16346,
+ 58, 8250, 4154, 12346, 2106, 10298, 6202, 14394, 1082, 9274, 5178, 13370, 3130, 11322, 7226, 15418,
+ 570, 8762, 4666, 12858, 2618, 10810, 6714, 14906, 1594, 9786, 5690, 13882, 3642, 11834, 7738, 15930,
+ 314, 8506, 4410, 12602, 2362, 10554, 6458, 14650, 1338, 9530, 5434, 13626, 3386, 11578, 7482, 15674,
+ 826, 9018, 4922, 13114, 2874, 11066, 6970, 15162, 1850, 10042, 5946, 14138, 3898, 12090, 7994, 16186,
+ 186, 8378, 4282, 12474, 2234, 10426, 6330, 14522, 1210, 9402, 5306, 13498, 3258, 11450, 7354, 15546,
+ 698, 8890, 4794, 12986, 2746, 10938, 6842, 15034, 1722, 9914, 5818, 14010, 3770, 11962, 7866, 16058,
+ 442, 8634, 4538, 12730, 2490, 10682, 6586, 14778, 1466, 9658, 5562, 13754, 3514, 11706, 7610, 15802,
+ 954, 9146, 5050, 13242, 3002, 11194, 7098, 15290, 1978, 10170, 6074, 14266, 4026, 12218, 8122, 16314,
+ 122, 8314, 4218, 12410, 2170, 10362, 6266, 14458, 1146, 9338, 5242, 13434, 3194, 11386, 7290, 15482,
+ 634, 8826, 4730, 12922, 2682, 10874, 6778, 14970, 1658, 9850, 5754, 13946, 3706, 11898, 7802, 15994,
+ 378, 8570, 4474, 12666, 2426, 10618, 6522, 14714, 1402, 9594, 5498, 13690, 3450, 11642, 7546, 15738,
+ 890, 9082, 4986, 13178, 2938, 11130, 7034, 15226, 1914, 10106, 6010, 14202, 3962, 12154, 8058, 16250,
+ 250, 8442, 4346, 12538, 2298, 10490, 6394, 14586, 1274, 9466, 5370, 13562, 3322, 11514, 7418, 15610,
+ 762, 8954, 4858, 13050, 2810, 11002, 6906, 15098, 1786, 9978, 5882, 14074, 3834, 12026, 7930, 16122,
+ 506, 8698, 4602, 12794, 2554, 10746, 6650, 14842, 1530, 9722, 5626, 13818, 3578, 11770, 7674, 15866,
+ 1018, 9210, 5114, 13306, 3066, 11258, 7162, 15354, 2042, 10234, 6138, 14330, 4090, 12282, 8186, 16378,
+ 6, 8198, 4102, 12294, 2054, 10246, 6150, 14342, 1030, 9222, 5126, 13318, 3078, 11270, 7174, 15366,
+ 518, 8710, 4614, 12806, 2566, 10758, 6662, 14854, 1542, 9734, 5638, 13830, 3590, 11782, 7686, 15878,
+ 262, 8454, 4358, 12550, 2310, 10502, 6406, 14598, 1286, 9478, 5382, 13574, 3334, 11526, 7430, 15622,
+ 774, 8966, 4870, 13062, 2822, 11014, 6918, 15110, 1798, 9990, 5894, 14086, 3846, 12038, 7942, 16134,
+ 134, 8326, 4230, 12422, 2182, 10374, 6278, 14470, 1158, 9350, 5254, 13446, 3206, 11398, 7302, 15494,
+ 646, 8838, 4742, 12934, 2694, 10886, 6790, 14982, 1670, 9862, 5766, 13958, 3718, 11910, 7814, 16006,
+ 390, 8582, 4486, 12678, 2438, 10630, 6534, 14726, 1414, 9606, 5510, 13702, 3462, 11654, 7558, 15750,
+ 902, 9094, 4998, 13190, 2950, 11142, 7046, 15238, 1926, 10118, 6022, 14214, 3974, 12166, 8070, 16262,
+ 70, 8262, 4166, 12358, 2118, 10310, 6214, 14406, 1094, 9286, 5190, 13382, 3142, 11334, 7238, 15430,
+ 582, 8774, 4678, 12870, 2630, 10822, 6726, 14918, 1606, 9798, 5702, 13894, 3654, 11846, 7750, 15942,
+ 326, 8518, 4422, 12614, 2374, 10566, 6470, 14662, 1350, 9542, 5446, 13638, 3398, 11590, 7494, 15686,
+ 838, 9030, 4934, 13126, 2886, 11078, 6982, 15174, 1862, 10054, 5958, 14150, 3910, 12102, 8006, 16198,
+ 198, 8390, 4294, 12486, 2246, 10438, 6342, 14534, 1222, 9414, 5318, 13510, 3270, 11462, 7366, 15558,
+ 710, 8902, 4806, 12998, 2758, 10950, 6854, 15046, 1734, 9926, 5830, 14022, 3782, 11974, 7878, 16070,
+ 454, 8646, 4550, 12742, 2502, 10694, 6598, 14790, 1478, 9670, 5574, 13766, 3526, 11718, 7622, 15814,
+ 966, 9158, 5062, 13254, 3014, 11206, 7110, 15302, 1990, 10182, 6086, 14278, 4038, 12230, 8134, 16326,
+ 38, 8230, 4134, 12326, 2086, 10278, 6182, 14374, 1062, 9254, 5158, 13350, 3110, 11302, 7206, 15398,
+ 550, 8742, 4646, 12838, 2598, 10790, 6694, 14886, 1574, 9766, 5670, 13862, 3622, 11814, 7718, 15910,
+ 294, 8486, 4390, 12582, 2342, 10534, 6438, 14630, 1318, 9510, 5414, 13606, 3366, 11558, 7462, 15654,
+ 806, 8998, 4902, 13094, 2854, 11046, 6950, 15142, 1830, 10022, 5926, 14118, 3878, 12070, 7974, 16166,
+ 166, 8358, 4262, 12454, 2214, 10406, 6310, 14502, 1190, 9382, 5286, 13478, 3238, 11430, 7334, 15526,
+ 678, 8870, 4774, 12966, 2726, 10918, 6822, 15014, 1702, 9894, 5798, 13990, 3750, 11942, 7846, 16038,
+ 422, 8614, 4518, 12710, 2470, 10662, 6566, 14758, 1446, 9638, 5542, 13734, 3494, 11686, 7590, 15782,
+ 934, 9126, 5030, 13222, 2982, 11174, 7078, 15270, 1958, 10150, 6054, 14246, 4006, 12198, 8102, 16294,
+ 102, 8294, 4198, 12390, 2150, 10342, 6246, 14438, 1126, 9318, 5222, 13414, 3174, 11366, 7270, 15462,
+ 614, 8806, 4710, 12902, 2662, 10854, 6758, 14950, 1638, 9830, 5734, 13926, 3686, 11878, 7782, 15974,
+ 358, 8550, 4454, 12646, 2406, 10598, 6502, 14694, 1382, 9574, 5478, 13670, 3430, 11622, 7526, 15718,
+ 870, 9062, 4966, 13158, 2918, 11110, 7014, 15206, 1894, 10086, 5990, 14182, 3942, 12134, 8038, 16230,
+ 230, 8422, 4326, 12518, 2278, 10470, 6374, 14566, 1254, 9446, 5350, 13542, 3302, 11494, 7398, 15590,
+ 742, 8934, 4838, 13030, 2790, 10982, 6886, 15078, 1766, 9958, 5862, 14054, 3814, 12006, 7910, 16102,
+ 486, 8678, 4582, 12774, 2534, 10726, 6630, 14822, 1510, 9702, 5606, 13798, 3558, 11750, 7654, 15846,
+ 998, 9190, 5094, 13286, 3046, 11238, 7142, 15334, 2022, 10214, 6118, 14310, 4070, 12262, 8166, 16358,
+ 22, 8214, 4118, 12310, 2070, 10262, 6166, 14358, 1046, 9238, 5142, 13334, 3094, 11286, 7190, 15382,
+ 534, 8726, 4630, 12822, 2582, 10774, 6678, 14870, 1558, 9750, 5654, 13846, 3606, 11798, 7702, 15894,
+ 278, 8470, 4374, 12566, 2326, 10518, 6422, 14614, 1302, 9494, 5398, 13590, 3350, 11542, 7446, 15638,
+ 790, 8982, 4886, 13078, 2838, 11030, 6934, 15126, 1814, 10006, 5910, 14102, 3862, 12054, 7958, 16150,
+ 150, 8342, 4246, 12438, 2198, 10390, 6294, 14486, 1174, 9366, 5270, 13462, 3222, 11414, 7318, 15510,
+ 662, 8854, 4758, 12950, 2710, 10902, 6806, 14998, 1686, 9878, 5782, 13974, 3734, 11926, 7830, 16022,
+ 406, 8598, 4502, 12694, 2454, 10646, 6550, 14742, 1430, 9622, 5526, 13718, 3478, 11670, 7574, 15766,
+ 918, 9110, 5014, 13206, 2966, 11158, 7062, 15254, 1942, 10134, 6038, 14230, 3990, 12182, 8086, 16278,
+ 86, 8278, 4182, 12374, 2134, 10326, 6230, 14422, 1110, 9302, 5206, 13398, 3158, 11350, 7254, 15446,
+ 598, 8790, 4694, 12886, 2646, 10838, 6742, 14934, 1622, 9814, 5718, 13910, 3670, 11862, 7766, 15958,
+ 342, 8534, 4438, 12630, 2390, 10582, 6486, 14678, 1366, 9558, 5462, 13654, 3414, 11606, 7510, 15702,
+ 854, 9046, 4950, 13142, 2902, 11094, 6998, 15190, 1878, 10070, 5974, 14166, 3926, 12118, 8022, 16214,
+ 214, 8406, 4310, 12502, 2262, 10454, 6358, 14550, 1238, 9430, 5334, 13526, 3286, 11478, 7382, 15574,
+ 726, 8918, 4822, 13014, 2774, 10966, 6870, 15062, 1750, 9942, 5846, 14038, 3798, 11990, 7894, 16086,
+ 470, 8662, 4566, 12758, 2518, 10710, 6614, 14806, 1494, 9686, 5590, 13782, 3542, 11734, 7638, 15830,
+ 982, 9174, 5078, 13270, 3030, 11222, 7126, 15318, 2006, 10198, 6102, 14294, 4054, 12246, 8150, 16342,
+ 54, 8246, 4150, 12342, 2102, 10294, 6198, 14390, 1078, 9270, 5174, 13366, 3126, 11318, 7222, 15414,
+ 566, 8758, 4662, 12854, 2614, 10806, 6710, 14902, 1590, 9782, 5686, 13878, 3638, 11830, 7734, 15926,
+ 310, 8502, 4406, 12598, 2358, 10550, 6454, 14646, 1334, 9526, 5430, 13622, 3382, 11574, 7478, 15670,
+ 822, 9014, 4918, 13110, 2870, 11062, 6966, 15158, 1846, 10038, 5942, 14134, 3894, 12086, 7990, 16182,
+ 182, 8374, 4278, 12470, 2230, 10422, 6326, 14518, 1206, 9398, 5302, 13494, 3254, 11446, 7350, 15542,
+ 694, 8886, 4790, 12982, 2742, 10934, 6838, 15030, 1718, 9910, 5814, 14006, 3766, 11958, 7862, 16054,
+ 438, 8630, 4534, 12726, 2486, 10678, 6582, 14774, 1462, 9654, 5558, 13750, 3510, 11702, 7606, 15798,
+ 950, 9142, 5046, 13238, 2998, 11190, 7094, 15286, 1974, 10166, 6070, 14262, 4022, 12214, 8118, 16310,
+ 118, 8310, 4214, 12406, 2166, 10358, 6262, 14454, 1142, 9334, 5238, 13430, 3190, 11382, 7286, 15478,
+ 630, 8822, 4726, 12918, 2678, 10870, 6774, 14966, 1654, 9846, 5750, 13942, 3702, 11894, 7798, 15990,
+ 374, 8566, 4470, 12662, 2422, 10614, 6518, 14710, 1398, 9590, 5494, 13686, 3446, 11638, 7542, 15734,
+ 886, 9078, 4982, 13174, 2934, 11126, 7030, 15222, 1910, 10102, 6006, 14198, 3958, 12150, 8054, 16246,
+ 246, 8438, 4342, 12534, 2294, 10486, 6390, 14582, 1270, 9462, 5366, 13558, 3318, 11510, 7414, 15606,
+ 758, 8950, 4854, 13046, 2806, 10998, 6902, 15094, 1782, 9974, 5878, 14070, 3830, 12022, 7926, 16118,
+ 502, 8694, 4598, 12790, 2550, 10742, 6646, 14838, 1526, 9718, 5622, 13814, 3574, 11766, 7670, 15862,
+ 1014, 9206, 5110, 13302, 3062, 11254, 7158, 15350, 2038, 10230, 6134, 14326, 4086, 12278, 8182, 16374,
+ 14, 8206, 4110, 12302, 2062, 10254, 6158, 14350, 1038, 9230, 5134, 13326, 3086, 11278, 7182, 15374,
+ 526, 8718, 4622, 12814, 2574, 10766, 6670, 14862, 1550, 9742, 5646, 13838, 3598, 11790, 7694, 15886,
+ 270, 8462, 4366, 12558, 2318, 10510, 6414, 14606, 1294, 9486, 5390, 13582, 3342, 11534, 7438, 15630,
+ 782, 8974, 4878, 13070, 2830, 11022, 6926, 15118, 1806, 9998, 5902, 14094, 3854, 12046, 7950, 16142,
+ 142, 8334, 4238, 12430, 2190, 10382, 6286, 14478, 1166, 9358, 5262, 13454, 3214, 11406, 7310, 15502,
+ 654, 8846, 4750, 12942, 2702, 10894, 6798, 14990, 1678, 9870, 5774, 13966, 3726, 11918, 7822, 16014,
+ 398, 8590, 4494, 12686, 2446, 10638, 6542, 14734, 1422, 9614, 5518, 13710, 3470, 11662, 7566, 15758,
+ 910, 9102, 5006, 13198, 2958, 11150, 7054, 15246, 1934, 10126, 6030, 14222, 3982, 12174, 8078, 16270,
+ 78, 8270, 4174, 12366, 2126, 10318, 6222, 14414, 1102, 9294, 5198, 13390, 3150, 11342, 7246, 15438,
+ 590, 8782, 4686, 12878, 2638, 10830, 6734, 14926, 1614, 9806, 5710, 13902, 3662, 11854, 7758, 15950,
+ 334, 8526, 4430, 12622, 2382, 10574, 6478, 14670, 1358, 9550, 5454, 13646, 3406, 11598, 7502, 15694,
+ 846, 9038, 4942, 13134, 2894, 11086, 6990, 15182, 1870, 10062, 5966, 14158, 3918, 12110, 8014, 16206,
+ 206, 8398, 4302, 12494, 2254, 10446, 6350, 14542, 1230, 9422, 5326, 13518, 3278, 11470, 7374, 15566,
+ 718, 8910, 4814, 13006, 2766, 10958, 6862, 15054, 1742, 9934, 5838, 14030, 3790, 11982, 7886, 16078,
+ 462, 8654, 4558, 12750, 2510, 10702, 6606, 14798, 1486, 9678, 5582, 13774, 3534, 11726, 7630, 15822,
+ 974, 9166, 5070, 13262, 3022, 11214, 7118, 15310, 1998, 10190, 6094, 14286, 4046, 12238, 8142, 16334,
+ 46, 8238, 4142, 12334, 2094, 10286, 6190, 14382, 1070, 9262, 5166, 13358, 3118, 11310, 7214, 15406,
+ 558, 8750, 4654, 12846, 2606, 10798, 6702, 14894, 1582, 9774, 5678, 13870, 3630, 11822, 7726, 15918,
+ 302, 8494, 4398, 12590, 2350, 10542, 6446, 14638, 1326, 9518, 5422, 13614, 3374, 11566, 7470, 15662,
+ 814, 9006, 4910, 13102, 2862, 11054, 6958, 15150, 1838, 10030, 5934, 14126, 3886, 12078, 7982, 16174,
+ 174, 8366, 4270, 12462, 2222, 10414, 6318, 14510, 1198, 9390, 5294, 13486, 3246, 11438, 7342, 15534,
+ 686, 8878, 4782, 12974, 2734, 10926, 6830, 15022, 1710, 9902, 5806, 13998, 3758, 11950, 7854, 16046,
+ 430, 8622, 4526, 12718, 2478, 10670, 6574, 14766, 1454, 9646, 5550, 13742, 3502, 11694, 7598, 15790,
+ 942, 9134, 5038, 13230, 2990, 11182, 7086, 15278, 1966, 10158, 6062, 14254, 4014, 12206, 8110, 16302,
+ 110, 8302, 4206, 12398, 2158, 10350, 6254, 14446, 1134, 9326, 5230, 13422, 3182, 11374, 7278, 15470,
+ 622, 8814, 4718, 12910, 2670, 10862, 6766, 14958, 1646, 9838, 5742, 13934, 3694, 11886, 7790, 15982,
+ 366, 8558, 4462, 12654, 2414, 10606, 6510, 14702, 1390, 9582, 5486, 13678, 3438, 11630, 7534, 15726,
+ 878, 9070, 4974, 13166, 2926, 11118, 7022, 15214, 1902, 10094, 5998, 14190, 3950, 12142, 8046, 16238,
+ 238, 8430, 4334, 12526, 2286, 10478, 6382, 14574, 1262, 9454, 5358, 13550, 3310, 11502, 7406, 15598,
+ 750, 8942, 4846, 13038, 2798, 10990, 6894, 15086, 1774, 9966, 5870, 14062, 3822, 12014, 7918, 16110,
+ 494, 8686, 4590, 12782, 2542, 10734, 6638, 14830, 1518, 9710, 5614, 13806, 3566, 11758, 7662, 15854,
+ 1006, 9198, 5102, 13294, 3054, 11246, 7150, 15342, 2030, 10222, 6126, 14318, 4078, 12270, 8174, 16366,
+ 30, 8222, 4126, 12318, 2078, 10270, 6174, 14366, 1054, 9246, 5150, 13342, 3102, 11294, 7198, 15390,
+ 542, 8734, 4638, 12830, 2590, 10782, 6686, 14878, 1566, 9758, 5662, 13854, 3614, 11806, 7710, 15902,
+ 286, 8478, 4382, 12574, 2334, 10526, 6430, 14622, 1310, 9502, 5406, 13598, 3358, 11550, 7454, 15646,
+ 798, 8990, 4894, 13086, 2846, 11038, 6942, 15134, 1822, 10014, 5918, 14110, 3870, 12062, 7966, 16158,
+ 158, 8350, 4254, 12446, 2206, 10398, 6302, 14494, 1182, 9374, 5278, 13470, 3230, 11422, 7326, 15518,
+ 670, 8862, 4766, 12958, 2718, 10910, 6814, 15006, 1694, 9886, 5790, 13982, 3742, 11934, 7838, 16030,
+ 414, 8606, 4510, 12702, 2462, 10654, 6558, 14750, 1438, 9630, 5534, 13726, 3486, 11678, 7582, 15774,
+ 926, 9118, 5022, 13214, 2974, 11166, 7070, 15262, 1950, 10142, 6046, 14238, 3998, 12190, 8094, 16286,
+ 94, 8286, 4190, 12382, 2142, 10334, 6238, 14430, 1118, 9310, 5214, 13406, 3166, 11358, 7262, 15454,
+ 606, 8798, 4702, 12894, 2654, 10846, 6750, 14942, 1630, 9822, 5726, 13918, 3678, 11870, 7774, 15966,
+ 350, 8542, 4446, 12638, 2398, 10590, 6494, 14686, 1374, 9566, 5470, 13662, 3422, 11614, 7518, 15710,
+ 862, 9054, 4958, 13150, 2910, 11102, 7006, 15198, 1886, 10078, 5982, 14174, 3934, 12126, 8030, 16222,
+ 222, 8414, 4318, 12510, 2270, 10462, 6366, 14558, 1246, 9438, 5342, 13534, 3294, 11486, 7390, 15582,
+ 734, 8926, 4830, 13022, 2782, 10974, 6878, 15070, 1758, 9950, 5854, 14046, 3806, 11998, 7902, 16094,
+ 478, 8670, 4574, 12766, 2526, 10718, 6622, 14814, 1502, 9694, 5598, 13790, 3550, 11742, 7646, 15838,
+ 990, 9182, 5086, 13278, 3038, 11230, 7134, 15326, 2014, 10206, 6110, 14302, 4062, 12254, 8158, 16350,
+ 62, 8254, 4158, 12350, 2110, 10302, 6206, 14398, 1086, 9278, 5182, 13374, 3134, 11326, 7230, 15422,
+ 574, 8766, 4670, 12862, 2622, 10814, 6718, 14910, 1598, 9790, 5694, 13886, 3646, 11838, 7742, 15934,
+ 318, 8510, 4414, 12606, 2366, 10558, 6462, 14654, 1342, 9534, 5438, 13630, 3390, 11582, 7486, 15678,
+ 830, 9022, 4926, 13118, 2878, 11070, 6974, 15166, 1854, 10046, 5950, 14142, 3902, 12094, 7998, 16190,
+ 190, 8382, 4286, 12478, 2238, 10430, 6334, 14526, 1214, 9406, 5310, 13502, 3262, 11454, 7358, 15550,
+ 702, 8894, 4798, 12990, 2750, 10942, 6846, 15038, 1726, 9918, 5822, 14014, 3774, 11966, 7870, 16062,
+ 446, 8638, 4542, 12734, 2494, 10686, 6590, 14782, 1470, 9662, 5566, 13758, 3518, 11710, 7614, 15806,
+ 958, 9150, 5054, 13246, 3006, 11198, 7102, 15294, 1982, 10174, 6078, 14270, 4030, 12222, 8126, 16318,
+ 126, 8318, 4222, 12414, 2174, 10366, 6270, 14462, 1150, 9342, 5246, 13438, 3198, 11390, 7294, 15486,
+ 638, 8830, 4734, 12926, 2686, 10878, 6782, 14974, 1662, 9854, 5758, 13950, 3710, 11902, 7806, 15998,
+ 382, 8574, 4478, 12670, 2430, 10622, 6526, 14718, 1406, 9598, 5502, 13694, 3454, 11646, 7550, 15742,
+ 894, 9086, 4990, 13182, 2942, 11134, 7038, 15230, 1918, 10110, 6014, 14206, 3966, 12158, 8062, 16254,
+ 254, 8446, 4350, 12542, 2302, 10494, 6398, 14590, 1278, 9470, 5374, 13566, 3326, 11518, 7422, 15614,
+ 766, 8958, 4862, 13054, 2814, 11006, 6910, 15102, 1790, 9982, 5886, 14078, 3838, 12030, 7934, 16126,
+ 510, 8702, 4606, 12798, 2558, 10750, 6654, 14846, 1534, 9726, 5630, 13822, 3582, 11774, 7678, 15870,
+ 1022, 9214, 5118, 13310, 3070, 11262, 7166, 15358, 2046, 10238, 6142, 14334, 4094, 12286, 8190, 16382,
+ 1, 8193, 4097, 12289, 2049, 10241, 6145, 14337, 1025, 9217, 5121, 13313, 3073, 11265, 7169, 15361,
+ 513, 8705, 4609, 12801, 2561, 10753, 6657, 14849, 1537, 9729, 5633, 13825, 3585, 11777, 7681, 15873,
+ 257, 8449, 4353, 12545, 2305, 10497, 6401, 14593, 1281, 9473, 5377, 13569, 3329, 11521, 7425, 15617,
+ 769, 8961, 4865, 13057, 2817, 11009, 6913, 15105, 1793, 9985, 5889, 14081, 3841, 12033, 7937, 16129,
+ 129, 8321, 4225, 12417, 2177, 10369, 6273, 14465, 1153, 9345, 5249, 13441, 3201, 11393, 7297, 15489,
+ 641, 8833, 4737, 12929, 2689, 10881, 6785, 14977, 1665, 9857, 5761, 13953, 3713, 11905, 7809, 16001,
+ 385, 8577, 4481, 12673, 2433, 10625, 6529, 14721, 1409, 9601, 5505, 13697, 3457, 11649, 7553, 15745,
+ 897, 9089, 4993, 13185, 2945, 11137, 7041, 15233, 1921, 10113, 6017, 14209, 3969, 12161, 8065, 16257,
+ 65, 8257, 4161, 12353, 2113, 10305, 6209, 14401, 1089, 9281, 5185, 13377, 3137, 11329, 7233, 15425,
+ 577, 8769, 4673, 12865, 2625, 10817, 6721, 14913, 1601, 9793, 5697, 13889, 3649, 11841, 7745, 15937,
+ 321, 8513, 4417, 12609, 2369, 10561, 6465, 14657, 1345, 9537, 5441, 13633, 3393, 11585, 7489, 15681,
+ 833, 9025, 4929, 13121, 2881, 11073, 6977, 15169, 1857, 10049, 5953, 14145, 3905, 12097, 8001, 16193,
+ 193, 8385, 4289, 12481, 2241, 10433, 6337, 14529, 1217, 9409, 5313, 13505, 3265, 11457, 7361, 15553,
+ 705, 8897, 4801, 12993, 2753, 10945, 6849, 15041, 1729, 9921, 5825, 14017, 3777, 11969, 7873, 16065,
+ 449, 8641, 4545, 12737, 2497, 10689, 6593, 14785, 1473, 9665, 5569, 13761, 3521, 11713, 7617, 15809,
+ 961, 9153, 5057, 13249, 3009, 11201, 7105, 15297, 1985, 10177, 6081, 14273, 4033, 12225, 8129, 16321,
+ 33, 8225, 4129, 12321, 2081, 10273, 6177, 14369, 1057, 9249, 5153, 13345, 3105, 11297, 7201, 15393,
+ 545, 8737, 4641, 12833, 2593, 10785, 6689, 14881, 1569, 9761, 5665, 13857, 3617, 11809, 7713, 15905,
+ 289, 8481, 4385, 12577, 2337, 10529, 6433, 14625, 1313, 9505, 5409, 13601, 3361, 11553, 7457, 15649,
+ 801, 8993, 4897, 13089, 2849, 11041, 6945, 15137, 1825, 10017, 5921, 14113, 3873, 12065, 7969, 16161,
+ 161, 8353, 4257, 12449, 2209, 10401, 6305, 14497, 1185, 9377, 5281, 13473, 3233, 11425, 7329, 15521,
+ 673, 8865, 4769, 12961, 2721, 10913, 6817, 15009, 1697, 9889, 5793, 13985, 3745, 11937, 7841, 16033,
+ 417, 8609, 4513, 12705, 2465, 10657, 6561, 14753, 1441, 9633, 5537, 13729, 3489, 11681, 7585, 15777,
+ 929, 9121, 5025, 13217, 2977, 11169, 7073, 15265, 1953, 10145, 6049, 14241, 4001, 12193, 8097, 16289,
+ 97, 8289, 4193, 12385, 2145, 10337, 6241, 14433, 1121, 9313, 5217, 13409, 3169, 11361, 7265, 15457,
+ 609, 8801, 4705, 12897, 2657, 10849, 6753, 14945, 1633, 9825, 5729, 13921, 3681, 11873, 7777, 15969,
+ 353, 8545, 4449, 12641, 2401, 10593, 6497, 14689, 1377, 9569, 5473, 13665, 3425, 11617, 7521, 15713,
+ 865, 9057, 4961, 13153, 2913, 11105, 7009, 15201, 1889, 10081, 5985, 14177, 3937, 12129, 8033, 16225,
+ 225, 8417, 4321, 12513, 2273, 10465, 6369, 14561, 1249, 9441, 5345, 13537, 3297, 11489, 7393, 15585,
+ 737, 8929, 4833, 13025, 2785, 10977, 6881, 15073, 1761, 9953, 5857, 14049, 3809, 12001, 7905, 16097,
+ 481, 8673, 4577, 12769, 2529, 10721, 6625, 14817, 1505, 9697, 5601, 13793, 3553, 11745, 7649, 15841,
+ 993, 9185, 5089, 13281, 3041, 11233, 7137, 15329, 2017, 10209, 6113, 14305, 4065, 12257, 8161, 16353,
+ 17, 8209, 4113, 12305, 2065, 10257, 6161, 14353, 1041, 9233, 5137, 13329, 3089, 11281, 7185, 15377,
+ 529, 8721, 4625, 12817, 2577, 10769, 6673, 14865, 1553, 9745, 5649, 13841, 3601, 11793, 7697, 15889,
+ 273, 8465, 4369, 12561, 2321, 10513, 6417, 14609, 1297, 9489, 5393, 13585, 3345, 11537, 7441, 15633,
+ 785, 8977, 4881, 13073, 2833, 11025, 6929, 15121, 1809, 10001, 5905, 14097, 3857, 12049, 7953, 16145,
+ 145, 8337, 4241, 12433, 2193, 10385, 6289, 14481, 1169, 9361, 5265, 13457, 3217, 11409, 7313, 15505,
+ 657, 8849, 4753, 12945, 2705, 10897, 6801, 14993, 1681, 9873, 5777, 13969, 3729, 11921, 7825, 16017,
+ 401, 8593, 4497, 12689, 2449, 10641, 6545, 14737, 1425, 9617, 5521, 13713, 3473, 11665, 7569, 15761,
+ 913, 9105, 5009, 13201, 2961, 11153, 7057, 15249, 1937, 10129, 6033, 14225, 3985, 12177, 8081, 16273,
+ 81, 8273, 4177, 12369, 2129, 10321, 6225, 14417, 1105, 9297, 5201, 13393, 3153, 11345, 7249, 15441,
+ 593, 8785, 4689, 12881, 2641, 10833, 6737, 14929, 1617, 9809, 5713, 13905, 3665, 11857, 7761, 15953,
+ 337, 8529, 4433, 12625, 2385, 10577, 6481, 14673, 1361, 9553, 5457, 13649, 3409, 11601, 7505, 15697,
+ 849, 9041, 4945, 13137, 2897, 11089, 6993, 15185, 1873, 10065, 5969, 14161, 3921, 12113, 8017, 16209,
+ 209, 8401, 4305, 12497, 2257, 10449, 6353, 14545, 1233, 9425, 5329, 13521, 3281, 11473, 7377, 15569,
+ 721, 8913, 4817, 13009, 2769, 10961, 6865, 15057, 1745, 9937, 5841, 14033, 3793, 11985, 7889, 16081,
+ 465, 8657, 4561, 12753, 2513, 10705, 6609, 14801, 1489, 9681, 5585, 13777, 3537, 11729, 7633, 15825,
+ 977, 9169, 5073, 13265, 3025, 11217, 7121, 15313, 2001, 10193, 6097, 14289, 4049, 12241, 8145, 16337,
+ 49, 8241, 4145, 12337, 2097, 10289, 6193, 14385, 1073, 9265, 5169, 13361, 3121, 11313, 7217, 15409,
+ 561, 8753, 4657, 12849, 2609, 10801, 6705, 14897, 1585, 9777, 5681, 13873, 3633, 11825, 7729, 15921,
+ 305, 8497, 4401, 12593, 2353, 10545, 6449, 14641, 1329, 9521, 5425, 13617, 3377, 11569, 7473, 15665,
+ 817, 9009, 4913, 13105, 2865, 11057, 6961, 15153, 1841, 10033, 5937, 14129, 3889, 12081, 7985, 16177,
+ 177, 8369, 4273, 12465, 2225, 10417, 6321, 14513, 1201, 9393, 5297, 13489, 3249, 11441, 7345, 15537,
+ 689, 8881, 4785, 12977, 2737, 10929, 6833, 15025, 1713, 9905, 5809, 14001, 3761, 11953, 7857, 16049,
+ 433, 8625, 4529, 12721, 2481, 10673, 6577, 14769, 1457, 9649, 5553, 13745, 3505, 11697, 7601, 15793,
+ 945, 9137, 5041, 13233, 2993, 11185, 7089, 15281, 1969, 10161, 6065, 14257, 4017, 12209, 8113, 16305,
+ 113, 8305, 4209, 12401, 2161, 10353, 6257, 14449, 1137, 9329, 5233, 13425, 3185, 11377, 7281, 15473,
+ 625, 8817, 4721, 12913, 2673, 10865, 6769, 14961, 1649, 9841, 5745, 13937, 3697, 11889, 7793, 15985,
+ 369, 8561, 4465, 12657, 2417, 10609, 6513, 14705, 1393, 9585, 5489, 13681, 3441, 11633, 7537, 15729,
+ 881, 9073, 4977, 13169, 2929, 11121, 7025, 15217, 1905, 10097, 6001, 14193, 3953, 12145, 8049, 16241,
+ 241, 8433, 4337, 12529, 2289, 10481, 6385, 14577, 1265, 9457, 5361, 13553, 3313, 11505, 7409, 15601,
+ 753, 8945, 4849, 13041, 2801, 10993, 6897, 15089, 1777, 9969, 5873, 14065, 3825, 12017, 7921, 16113,
+ 497, 8689, 4593, 12785, 2545, 10737, 6641, 14833, 1521, 9713, 5617, 13809, 3569, 11761, 7665, 15857,
+ 1009, 9201, 5105, 13297, 3057, 11249, 7153, 15345, 2033, 10225, 6129, 14321, 4081, 12273, 8177, 16369,
+ 9, 8201, 4105, 12297, 2057, 10249, 6153, 14345, 1033, 9225, 5129, 13321, 3081, 11273, 7177, 15369,
+ 521, 8713, 4617, 12809, 2569, 10761, 6665, 14857, 1545, 9737, 5641, 13833, 3593, 11785, 7689, 15881,
+ 265, 8457, 4361, 12553, 2313, 10505, 6409, 14601, 1289, 9481, 5385, 13577, 3337, 11529, 7433, 15625,
+ 777, 8969, 4873, 13065, 2825, 11017, 6921, 15113, 1801, 9993, 5897, 14089, 3849, 12041, 7945, 16137,
+ 137, 8329, 4233, 12425, 2185, 10377, 6281, 14473, 1161, 9353, 5257, 13449, 3209, 11401, 7305, 15497,
+ 649, 8841, 4745, 12937, 2697, 10889, 6793, 14985, 1673, 9865, 5769, 13961, 3721, 11913, 7817, 16009,
+ 393, 8585, 4489, 12681, 2441, 10633, 6537, 14729, 1417, 9609, 5513, 13705, 3465, 11657, 7561, 15753,
+ 905, 9097, 5001, 13193, 2953, 11145, 7049, 15241, 1929, 10121, 6025, 14217, 3977, 12169, 8073, 16265,
+ 73, 8265, 4169, 12361, 2121, 10313, 6217, 14409, 1097, 9289, 5193, 13385, 3145, 11337, 7241, 15433,
+ 585, 8777, 4681, 12873, 2633, 10825, 6729, 14921, 1609, 9801, 5705, 13897, 3657, 11849, 7753, 15945,
+ 329, 8521, 4425, 12617, 2377, 10569, 6473, 14665, 1353, 9545, 5449, 13641, 3401, 11593, 7497, 15689,
+ 841, 9033, 4937, 13129, 2889, 11081, 6985, 15177, 1865, 10057, 5961, 14153, 3913, 12105, 8009, 16201,
+ 201, 8393, 4297, 12489, 2249, 10441, 6345, 14537, 1225, 9417, 5321, 13513, 3273, 11465, 7369, 15561,
+ 713, 8905, 4809, 13001, 2761, 10953, 6857, 15049, 1737, 9929, 5833, 14025, 3785, 11977, 7881, 16073,
+ 457, 8649, 4553, 12745, 2505, 10697, 6601, 14793, 1481, 9673, 5577, 13769, 3529, 11721, 7625, 15817,
+ 969, 9161, 5065, 13257, 3017, 11209, 7113, 15305, 1993, 10185, 6089, 14281, 4041, 12233, 8137, 16329,
+ 41, 8233, 4137, 12329, 2089, 10281, 6185, 14377, 1065, 9257, 5161, 13353, 3113, 11305, 7209, 15401,
+ 553, 8745, 4649, 12841, 2601, 10793, 6697, 14889, 1577, 9769, 5673, 13865, 3625, 11817, 7721, 15913,
+ 297, 8489, 4393, 12585, 2345, 10537, 6441, 14633, 1321, 9513, 5417, 13609, 3369, 11561, 7465, 15657,
+ 809, 9001, 4905, 13097, 2857, 11049, 6953, 15145, 1833, 10025, 5929, 14121, 3881, 12073, 7977, 16169,
+ 169, 8361, 4265, 12457, 2217, 10409, 6313, 14505, 1193, 9385, 5289, 13481, 3241, 11433, 7337, 15529,
+ 681, 8873, 4777, 12969, 2729, 10921, 6825, 15017, 1705, 9897, 5801, 13993, 3753, 11945, 7849, 16041,
+ 425, 8617, 4521, 12713, 2473, 10665, 6569, 14761, 1449, 9641, 5545, 13737, 3497, 11689, 7593, 15785,
+ 937, 9129, 5033, 13225, 2985, 11177, 7081, 15273, 1961, 10153, 6057, 14249, 4009, 12201, 8105, 16297,
+ 105, 8297, 4201, 12393, 2153, 10345, 6249, 14441, 1129, 9321, 5225, 13417, 3177, 11369, 7273, 15465,
+ 617, 8809, 4713, 12905, 2665, 10857, 6761, 14953, 1641, 9833, 5737, 13929, 3689, 11881, 7785, 15977,
+ 361, 8553, 4457, 12649, 2409, 10601, 6505, 14697, 1385, 9577, 5481, 13673, 3433, 11625, 7529, 15721,
+ 873, 9065, 4969, 13161, 2921, 11113, 7017, 15209, 1897, 10089, 5993, 14185, 3945, 12137, 8041, 16233,
+ 233, 8425, 4329, 12521, 2281, 10473, 6377, 14569, 1257, 9449, 5353, 13545, 3305, 11497, 7401, 15593,
+ 745, 8937, 4841, 13033, 2793, 10985, 6889, 15081, 1769, 9961, 5865, 14057, 3817, 12009, 7913, 16105,
+ 489, 8681, 4585, 12777, 2537, 10729, 6633, 14825, 1513, 9705, 5609, 13801, 3561, 11753, 7657, 15849,
+ 1001, 9193, 5097, 13289, 3049, 11241, 7145, 15337, 2025, 10217, 6121, 14313, 4073, 12265, 8169, 16361,
+ 25, 8217, 4121, 12313, 2073, 10265, 6169, 14361, 1049, 9241, 5145, 13337, 3097, 11289, 7193, 15385,
+ 537, 8729, 4633, 12825, 2585, 10777, 6681, 14873, 1561, 9753, 5657, 13849, 3609, 11801, 7705, 15897,
+ 281, 8473, 4377, 12569, 2329, 10521, 6425, 14617, 1305, 9497, 5401, 13593, 3353, 11545, 7449, 15641,
+ 793, 8985, 4889, 13081, 2841, 11033, 6937, 15129, 1817, 10009, 5913, 14105, 3865, 12057, 7961, 16153,
+ 153, 8345, 4249, 12441, 2201, 10393, 6297, 14489, 1177, 9369, 5273, 13465, 3225, 11417, 7321, 15513,
+ 665, 8857, 4761, 12953, 2713, 10905, 6809, 15001, 1689, 9881, 5785, 13977, 3737, 11929, 7833, 16025,
+ 409, 8601, 4505, 12697, 2457, 10649, 6553, 14745, 1433, 9625, 5529, 13721, 3481, 11673, 7577, 15769,
+ 921, 9113, 5017, 13209, 2969, 11161, 7065, 15257, 1945, 10137, 6041, 14233, 3993, 12185, 8089, 16281,
+ 89, 8281, 4185, 12377, 2137, 10329, 6233, 14425, 1113, 9305, 5209, 13401, 3161, 11353, 7257, 15449,
+ 601, 8793, 4697, 12889, 2649, 10841, 6745, 14937, 1625, 9817, 5721, 13913, 3673, 11865, 7769, 15961,
+ 345, 8537, 4441, 12633, 2393, 10585, 6489, 14681, 1369, 9561, 5465, 13657, 3417, 11609, 7513, 15705,
+ 857, 9049, 4953, 13145, 2905, 11097, 7001, 15193, 1881, 10073, 5977, 14169, 3929, 12121, 8025, 16217,
+ 217, 8409, 4313, 12505, 2265, 10457, 6361, 14553, 1241, 9433, 5337, 13529, 3289, 11481, 7385, 15577,
+ 729, 8921, 4825, 13017, 2777, 10969, 6873, 15065, 1753, 9945, 5849, 14041, 3801, 11993, 7897, 16089,
+ 473, 8665, 4569, 12761, 2521, 10713, 6617, 14809, 1497, 9689, 5593, 13785, 3545, 11737, 7641, 15833,
+ 985, 9177, 5081, 13273, 3033, 11225, 7129, 15321, 2009, 10201, 6105, 14297, 4057, 12249, 8153, 16345,
+ 57, 8249, 4153, 12345, 2105, 10297, 6201, 14393, 1081, 9273, 5177, 13369, 3129, 11321, 7225, 15417,
+ 569, 8761, 4665, 12857, 2617, 10809, 6713, 14905, 1593, 9785, 5689, 13881, 3641, 11833, 7737, 15929,
+ 313, 8505, 4409, 12601, 2361, 10553, 6457, 14649, 1337, 9529, 5433, 13625, 3385, 11577, 7481, 15673,
+ 825, 9017, 4921, 13113, 2873, 11065, 6969, 15161, 1849, 10041, 5945, 14137, 3897, 12089, 7993, 16185,
+ 185, 8377, 4281, 12473, 2233, 10425, 6329, 14521, 1209, 9401, 5305, 13497, 3257, 11449, 7353, 15545,
+ 697, 8889, 4793, 12985, 2745, 10937, 6841, 15033, 1721, 9913, 5817, 14009, 3769, 11961, 7865, 16057,
+ 441, 8633, 4537, 12729, 2489, 10681, 6585, 14777, 1465, 9657, 5561, 13753, 3513, 11705, 7609, 15801,
+ 953, 9145, 5049, 13241, 3001, 11193, 7097, 15289, 1977, 10169, 6073, 14265, 4025, 12217, 8121, 16313,
+ 121, 8313, 4217, 12409, 2169, 10361, 6265, 14457, 1145, 9337, 5241, 13433, 3193, 11385, 7289, 15481,
+ 633, 8825, 4729, 12921, 2681, 10873, 6777, 14969, 1657, 9849, 5753, 13945, 3705, 11897, 7801, 15993,
+ 377, 8569, 4473, 12665, 2425, 10617, 6521, 14713, 1401, 9593, 5497, 13689, 3449, 11641, 7545, 15737,
+ 889, 9081, 4985, 13177, 2937, 11129, 7033, 15225, 1913, 10105, 6009, 14201, 3961, 12153, 8057, 16249,
+ 249, 8441, 4345, 12537, 2297, 10489, 6393, 14585, 1273, 9465, 5369, 13561, 3321, 11513, 7417, 15609,
+ 761, 8953, 4857, 13049, 2809, 11001, 6905, 15097, 1785, 9977, 5881, 14073, 3833, 12025, 7929, 16121,
+ 505, 8697, 4601, 12793, 2553, 10745, 6649, 14841, 1529, 9721, 5625, 13817, 3577, 11769, 7673, 15865,
+ 1017, 9209, 5113, 13305, 3065, 11257, 7161, 15353, 2041, 10233, 6137, 14329, 4089, 12281, 8185, 16377,
+ 5, 8197, 4101, 12293, 2053, 10245, 6149, 14341, 1029, 9221, 5125, 13317, 3077, 11269, 7173, 15365,
+ 517, 8709, 4613, 12805, 2565, 10757, 6661, 14853, 1541, 9733, 5637, 13829, 3589, 11781, 7685, 15877,
+ 261, 8453, 4357, 12549, 2309, 10501, 6405, 14597, 1285, 9477, 5381, 13573, 3333, 11525, 7429, 15621,
+ 773, 8965, 4869, 13061, 2821, 11013, 6917, 15109, 1797, 9989, 5893, 14085, 3845, 12037, 7941, 16133,
+ 133, 8325, 4229, 12421, 2181, 10373, 6277, 14469, 1157, 9349, 5253, 13445, 3205, 11397, 7301, 15493,
+ 645, 8837, 4741, 12933, 2693, 10885, 6789, 14981, 1669, 9861, 5765, 13957, 3717, 11909, 7813, 16005,
+ 389, 8581, 4485, 12677, 2437, 10629, 6533, 14725, 1413, 9605, 5509, 13701, 3461, 11653, 7557, 15749,
+ 901, 9093, 4997, 13189, 2949, 11141, 7045, 15237, 1925, 10117, 6021, 14213, 3973, 12165, 8069, 16261,
+ 69, 8261, 4165, 12357, 2117, 10309, 6213, 14405, 1093, 9285, 5189, 13381, 3141, 11333, 7237, 15429,
+ 581, 8773, 4677, 12869, 2629, 10821, 6725, 14917, 1605, 9797, 5701, 13893, 3653, 11845, 7749, 15941,
+ 325, 8517, 4421, 12613, 2373, 10565, 6469, 14661, 1349, 9541, 5445, 13637, 3397, 11589, 7493, 15685,
+ 837, 9029, 4933, 13125, 2885, 11077, 6981, 15173, 1861, 10053, 5957, 14149, 3909, 12101, 8005, 16197,
+ 197, 8389, 4293, 12485, 2245, 10437, 6341, 14533, 1221, 9413, 5317, 13509, 3269, 11461, 7365, 15557,
+ 709, 8901, 4805, 12997, 2757, 10949, 6853, 15045, 1733, 9925, 5829, 14021, 3781, 11973, 7877, 16069,
+ 453, 8645, 4549, 12741, 2501, 10693, 6597, 14789, 1477, 9669, 5573, 13765, 3525, 11717, 7621, 15813,
+ 965, 9157, 5061, 13253, 3013, 11205, 7109, 15301, 1989, 10181, 6085, 14277, 4037, 12229, 8133, 16325,
+ 37, 8229, 4133, 12325, 2085, 10277, 6181, 14373, 1061, 9253, 5157, 13349, 3109, 11301, 7205, 15397,
+ 549, 8741, 4645, 12837, 2597, 10789, 6693, 14885, 1573, 9765, 5669, 13861, 3621, 11813, 7717, 15909,
+ 293, 8485, 4389, 12581, 2341, 10533, 6437, 14629, 1317, 9509, 5413, 13605, 3365, 11557, 7461, 15653,
+ 805, 8997, 4901, 13093, 2853, 11045, 6949, 15141, 1829, 10021, 5925, 14117, 3877, 12069, 7973, 16165,
+ 165, 8357, 4261, 12453, 2213, 10405, 6309, 14501, 1189, 9381, 5285, 13477, 3237, 11429, 7333, 15525,
+ 677, 8869, 4773, 12965, 2725, 10917, 6821, 15013, 1701, 9893, 5797, 13989, 3749, 11941, 7845, 16037,
+ 421, 8613, 4517, 12709, 2469, 10661, 6565, 14757, 1445, 9637, 5541, 13733, 3493, 11685, 7589, 15781,
+ 933, 9125, 5029, 13221, 2981, 11173, 7077, 15269, 1957, 10149, 6053, 14245, 4005, 12197, 8101, 16293,
+ 101, 8293, 4197, 12389, 2149, 10341, 6245, 14437, 1125, 9317, 5221, 13413, 3173, 11365, 7269, 15461,
+ 613, 8805, 4709, 12901, 2661, 10853, 6757, 14949, 1637, 9829, 5733, 13925, 3685, 11877, 7781, 15973,
+ 357, 8549, 4453, 12645, 2405, 10597, 6501, 14693, 1381, 9573, 5477, 13669, 3429, 11621, 7525, 15717,
+ 869, 9061, 4965, 13157, 2917, 11109, 7013, 15205, 1893, 10085, 5989, 14181, 3941, 12133, 8037, 16229,
+ 229, 8421, 4325, 12517, 2277, 10469, 6373, 14565, 1253, 9445, 5349, 13541, 3301, 11493, 7397, 15589,
+ 741, 8933, 4837, 13029, 2789, 10981, 6885, 15077, 1765, 9957, 5861, 14053, 3813, 12005, 7909, 16101,
+ 485, 8677, 4581, 12773, 2533, 10725, 6629, 14821, 1509, 9701, 5605, 13797, 3557, 11749, 7653, 15845,
+ 997, 9189, 5093, 13285, 3045, 11237, 7141, 15333, 2021, 10213, 6117, 14309, 4069, 12261, 8165, 16357,
+ 21, 8213, 4117, 12309, 2069, 10261, 6165, 14357, 1045, 9237, 5141, 13333, 3093, 11285, 7189, 15381,
+ 533, 8725, 4629, 12821, 2581, 10773, 6677, 14869, 1557, 9749, 5653, 13845, 3605, 11797, 7701, 15893,
+ 277, 8469, 4373, 12565, 2325, 10517, 6421, 14613, 1301, 9493, 5397, 13589, 3349, 11541, 7445, 15637,
+ 789, 8981, 4885, 13077, 2837, 11029, 6933, 15125, 1813, 10005, 5909, 14101, 3861, 12053, 7957, 16149,
+ 149, 8341, 4245, 12437, 2197, 10389, 6293, 14485, 1173, 9365, 5269, 13461, 3221, 11413, 7317, 15509,
+ 661, 8853, 4757, 12949, 2709, 10901, 6805, 14997, 1685, 9877, 5781, 13973, 3733, 11925, 7829, 16021,
+ 405, 8597, 4501, 12693, 2453, 10645, 6549, 14741, 1429, 9621, 5525, 13717, 3477, 11669, 7573, 15765,
+ 917, 9109, 5013, 13205, 2965, 11157, 7061, 15253, 1941, 10133, 6037, 14229, 3989, 12181, 8085, 16277,
+ 85, 8277, 4181, 12373, 2133, 10325, 6229, 14421, 1109, 9301, 5205, 13397, 3157, 11349, 7253, 15445,
+ 597, 8789, 4693, 12885, 2645, 10837, 6741, 14933, 1621, 9813, 5717, 13909, 3669, 11861, 7765, 15957,
+ 341, 8533, 4437, 12629, 2389, 10581, 6485, 14677, 1365, 9557, 5461, 13653, 3413, 11605, 7509, 15701,
+ 853, 9045, 4949, 13141, 2901, 11093, 6997, 15189, 1877, 10069, 5973, 14165, 3925, 12117, 8021, 16213,
+ 213, 8405, 4309, 12501, 2261, 10453, 6357, 14549, 1237, 9429, 5333, 13525, 3285, 11477, 7381, 15573,
+ 725, 8917, 4821, 13013, 2773, 10965, 6869, 15061, 1749, 9941, 5845, 14037, 3797, 11989, 7893, 16085,
+ 469, 8661, 4565, 12757, 2517, 10709, 6613, 14805, 1493, 9685, 5589, 13781, 3541, 11733, 7637, 15829,
+ 981, 9173, 5077, 13269, 3029, 11221, 7125, 15317, 2005, 10197, 6101, 14293, 4053, 12245, 8149, 16341,
+ 53, 8245, 4149, 12341, 2101, 10293, 6197, 14389, 1077, 9269, 5173, 13365, 3125, 11317, 7221, 15413,
+ 565, 8757, 4661, 12853, 2613, 10805, 6709, 14901, 1589, 9781, 5685, 13877, 3637, 11829, 7733, 15925,
+ 309, 8501, 4405, 12597, 2357, 10549, 6453, 14645, 1333, 9525, 5429, 13621, 3381, 11573, 7477, 15669,
+ 821, 9013, 4917, 13109, 2869, 11061, 6965, 15157, 1845, 10037, 5941, 14133, 3893, 12085, 7989, 16181,
+ 181, 8373, 4277, 12469, 2229, 10421, 6325, 14517, 1205, 9397, 5301, 13493, 3253, 11445, 7349, 15541,
+ 693, 8885, 4789, 12981, 2741, 10933, 6837, 15029, 1717, 9909, 5813, 14005, 3765, 11957, 7861, 16053,
+ 437, 8629, 4533, 12725, 2485, 10677, 6581, 14773, 1461, 9653, 5557, 13749, 3509, 11701, 7605, 15797,
+ 949, 9141, 5045, 13237, 2997, 11189, 7093, 15285, 1973, 10165, 6069, 14261, 4021, 12213, 8117, 16309,
+ 117, 8309, 4213, 12405, 2165, 10357, 6261, 14453, 1141, 9333, 5237, 13429, 3189, 11381, 7285, 15477,
+ 629, 8821, 4725, 12917, 2677, 10869, 6773, 14965, 1653, 9845, 5749, 13941, 3701, 11893, 7797, 15989,
+ 373, 8565, 4469, 12661, 2421, 10613, 6517, 14709, 1397, 9589, 5493, 13685, 3445, 11637, 7541, 15733,
+ 885, 9077, 4981, 13173, 2933, 11125, 7029, 15221, 1909, 10101, 6005, 14197, 3957, 12149, 8053, 16245,
+ 245, 8437, 4341, 12533, 2293, 10485, 6389, 14581, 1269, 9461, 5365, 13557, 3317, 11509, 7413, 15605,
+ 757, 8949, 4853, 13045, 2805, 10997, 6901, 15093, 1781, 9973, 5877, 14069, 3829, 12021, 7925, 16117,
+ 501, 8693, 4597, 12789, 2549, 10741, 6645, 14837, 1525, 9717, 5621, 13813, 3573, 11765, 7669, 15861,
+ 1013, 9205, 5109, 13301, 3061, 11253, 7157, 15349, 2037, 10229, 6133, 14325, 4085, 12277, 8181, 16373,
+ 13, 8205, 4109, 12301, 2061, 10253, 6157, 14349, 1037, 9229, 5133, 13325, 3085, 11277, 7181, 15373,
+ 525, 8717, 4621, 12813, 2573, 10765, 6669, 14861, 1549, 9741, 5645, 13837, 3597, 11789, 7693, 15885,
+ 269, 8461, 4365, 12557, 2317, 10509, 6413, 14605, 1293, 9485, 5389, 13581, 3341, 11533, 7437, 15629,
+ 781, 8973, 4877, 13069, 2829, 11021, 6925, 15117, 1805, 9997, 5901, 14093, 3853, 12045, 7949, 16141,
+ 141, 8333, 4237, 12429, 2189, 10381, 6285, 14477, 1165, 9357, 5261, 13453, 3213, 11405, 7309, 15501,
+ 653, 8845, 4749, 12941, 2701, 10893, 6797, 14989, 1677, 9869, 5773, 13965, 3725, 11917, 7821, 16013,
+ 397, 8589, 4493, 12685, 2445, 10637, 6541, 14733, 1421, 9613, 5517, 13709, 3469, 11661, 7565, 15757,
+ 909, 9101, 5005, 13197, 2957, 11149, 7053, 15245, 1933, 10125, 6029, 14221, 3981, 12173, 8077, 16269,
+ 77, 8269, 4173, 12365, 2125, 10317, 6221, 14413, 1101, 9293, 5197, 13389, 3149, 11341, 7245, 15437,
+ 589, 8781, 4685, 12877, 2637, 10829, 6733, 14925, 1613, 9805, 5709, 13901, 3661, 11853, 7757, 15949,
+ 333, 8525, 4429, 12621, 2381, 10573, 6477, 14669, 1357, 9549, 5453, 13645, 3405, 11597, 7501, 15693,
+ 845, 9037, 4941, 13133, 2893, 11085, 6989, 15181, 1869, 10061, 5965, 14157, 3917, 12109, 8013, 16205,
+ 205, 8397, 4301, 12493, 2253, 10445, 6349, 14541, 1229, 9421, 5325, 13517, 3277, 11469, 7373, 15565,
+ 717, 8909, 4813, 13005, 2765, 10957, 6861, 15053, 1741, 9933, 5837, 14029, 3789, 11981, 7885, 16077,
+ 461, 8653, 4557, 12749, 2509, 10701, 6605, 14797, 1485, 9677, 5581, 13773, 3533, 11725, 7629, 15821,
+ 973, 9165, 5069, 13261, 3021, 11213, 7117, 15309, 1997, 10189, 6093, 14285, 4045, 12237, 8141, 16333,
+ 45, 8237, 4141, 12333, 2093, 10285, 6189, 14381, 1069, 9261, 5165, 13357, 3117, 11309, 7213, 15405,
+ 557, 8749, 4653, 12845, 2605, 10797, 6701, 14893, 1581, 9773, 5677, 13869, 3629, 11821, 7725, 15917,
+ 301, 8493, 4397, 12589, 2349, 10541, 6445, 14637, 1325, 9517, 5421, 13613, 3373, 11565, 7469, 15661,
+ 813, 9005, 4909, 13101, 2861, 11053, 6957, 15149, 1837, 10029, 5933, 14125, 3885, 12077, 7981, 16173,
+ 173, 8365, 4269, 12461, 2221, 10413, 6317, 14509, 1197, 9389, 5293, 13485, 3245, 11437, 7341, 15533,
+ 685, 8877, 4781, 12973, 2733, 10925, 6829, 15021, 1709, 9901, 5805, 13997, 3757, 11949, 7853, 16045,
+ 429, 8621, 4525, 12717, 2477, 10669, 6573, 14765, 1453, 9645, 5549, 13741, 3501, 11693, 7597, 15789,
+ 941, 9133, 5037, 13229, 2989, 11181, 7085, 15277, 1965, 10157, 6061, 14253, 4013, 12205, 8109, 16301,
+ 109, 8301, 4205, 12397, 2157, 10349, 6253, 14445, 1133, 9325, 5229, 13421, 3181, 11373, 7277, 15469,
+ 621, 8813, 4717, 12909, 2669, 10861, 6765, 14957, 1645, 9837, 5741, 13933, 3693, 11885, 7789, 15981,
+ 365, 8557, 4461, 12653, 2413, 10605, 6509, 14701, 1389, 9581, 5485, 13677, 3437, 11629, 7533, 15725,
+ 877, 9069, 4973, 13165, 2925, 11117, 7021, 15213, 1901, 10093, 5997, 14189, 3949, 12141, 8045, 16237,
+ 237, 8429, 4333, 12525, 2285, 10477, 6381, 14573, 1261, 9453, 5357, 13549, 3309, 11501, 7405, 15597,
+ 749, 8941, 4845, 13037, 2797, 10989, 6893, 15085, 1773, 9965, 5869, 14061, 3821, 12013, 7917, 16109,
+ 493, 8685, 4589, 12781, 2541, 10733, 6637, 14829, 1517, 9709, 5613, 13805, 3565, 11757, 7661, 15853,
+ 1005, 9197, 5101, 13293, 3053, 11245, 7149, 15341, 2029, 10221, 6125, 14317, 4077, 12269, 8173, 16365,
+ 29, 8221, 4125, 12317, 2077, 10269, 6173, 14365, 1053, 9245, 5149, 13341, 3101, 11293, 7197, 15389,
+ 541, 8733, 4637, 12829, 2589, 10781, 6685, 14877, 1565, 9757, 5661, 13853, 3613, 11805, 7709, 15901,
+ 285, 8477, 4381, 12573, 2333, 10525, 6429, 14621, 1309, 9501, 5405, 13597, 3357, 11549, 7453, 15645,
+ 797, 8989, 4893, 13085, 2845, 11037, 6941, 15133, 1821, 10013, 5917, 14109, 3869, 12061, 7965, 16157,
+ 157, 8349, 4253, 12445, 2205, 10397, 6301, 14493, 1181, 9373, 5277, 13469, 3229, 11421, 7325, 15517,
+ 669, 8861, 4765, 12957, 2717, 10909, 6813, 15005, 1693, 9885, 5789, 13981, 3741, 11933, 7837, 16029,
+ 413, 8605, 4509, 12701, 2461, 10653, 6557, 14749, 1437, 9629, 5533, 13725, 3485, 11677, 7581, 15773,
+ 925, 9117, 5021, 13213, 2973, 11165, 7069, 15261, 1949, 10141, 6045, 14237, 3997, 12189, 8093, 16285,
+ 93, 8285, 4189, 12381, 2141, 10333, 6237, 14429, 1117, 9309, 5213, 13405, 3165, 11357, 7261, 15453,
+ 605, 8797, 4701, 12893, 2653, 10845, 6749, 14941, 1629, 9821, 5725, 13917, 3677, 11869, 7773, 15965,
+ 349, 8541, 4445, 12637, 2397, 10589, 6493, 14685, 1373, 9565, 5469, 13661, 3421, 11613, 7517, 15709,
+ 861, 9053, 4957, 13149, 2909, 11101, 7005, 15197, 1885, 10077, 5981, 14173, 3933, 12125, 8029, 16221,
+ 221, 8413, 4317, 12509, 2269, 10461, 6365, 14557, 1245, 9437, 5341, 13533, 3293, 11485, 7389, 15581,
+ 733, 8925, 4829, 13021, 2781, 10973, 6877, 15069, 1757, 9949, 5853, 14045, 3805, 11997, 7901, 16093,
+ 477, 8669, 4573, 12765, 2525, 10717, 6621, 14813, 1501, 9693, 5597, 13789, 3549, 11741, 7645, 15837,
+ 989, 9181, 5085, 13277, 3037, 11229, 7133, 15325, 2013, 10205, 6109, 14301, 4061, 12253, 8157, 16349,
+ 61, 8253, 4157, 12349, 2109, 10301, 6205, 14397, 1085, 9277, 5181, 13373, 3133, 11325, 7229, 15421,
+ 573, 8765, 4669, 12861, 2621, 10813, 6717, 14909, 1597, 9789, 5693, 13885, 3645, 11837, 7741, 15933,
+ 317, 8509, 4413, 12605, 2365, 10557, 6461, 14653, 1341, 9533, 5437, 13629, 3389, 11581, 7485, 15677,
+ 829, 9021, 4925, 13117, 2877, 11069, 6973, 15165, 1853, 10045, 5949, 14141, 3901, 12093, 7997, 16189,
+ 189, 8381, 4285, 12477, 2237, 10429, 6333, 14525, 1213, 9405, 5309, 13501, 3261, 11453, 7357, 15549,
+ 701, 8893, 4797, 12989, 2749, 10941, 6845, 15037, 1725, 9917, 5821, 14013, 3773, 11965, 7869, 16061,
+ 445, 8637, 4541, 12733, 2493, 10685, 6589, 14781, 1469, 9661, 5565, 13757, 3517, 11709, 7613, 15805,
+ 957, 9149, 5053, 13245, 3005, 11197, 7101, 15293, 1981, 10173, 6077, 14269, 4029, 12221, 8125, 16317,
+ 125, 8317, 4221, 12413, 2173, 10365, 6269, 14461, 1149, 9341, 5245, 13437, 3197, 11389, 7293, 15485,
+ 637, 8829, 4733, 12925, 2685, 10877, 6781, 14973, 1661, 9853, 5757, 13949, 3709, 11901, 7805, 15997,
+ 381, 8573, 4477, 12669, 2429, 10621, 6525, 14717, 1405, 9597, 5501, 13693, 3453, 11645, 7549, 15741,
+ 893, 9085, 4989, 13181, 2941, 11133, 7037, 15229, 1917, 10109, 6013, 14205, 3965, 12157, 8061, 16253,
+ 253, 8445, 4349, 12541, 2301, 10493, 6397, 14589, 1277, 9469, 5373, 13565, 3325, 11517, 7421, 15613,
+ 765, 8957, 4861, 13053, 2813, 11005, 6909, 15101, 1789, 9981, 5885, 14077, 3837, 12029, 7933, 16125,
+ 509, 8701, 4605, 12797, 2557, 10749, 6653, 14845, 1533, 9725, 5629, 13821, 3581, 11773, 7677, 15869,
+ 1021, 9213, 5117, 13309, 3069, 11261, 7165, 15357, 2045, 10237, 6141, 14333, 4093, 12285, 8189, 16381,
+ 3, 8195, 4099, 12291, 2051, 10243, 6147, 14339, 1027, 9219, 5123, 13315, 3075, 11267, 7171, 15363,
+ 515, 8707, 4611, 12803, 2563, 10755, 6659, 14851, 1539, 9731, 5635, 13827, 3587, 11779, 7683, 15875,
+ 259, 8451, 4355, 12547, 2307, 10499, 6403, 14595, 1283, 9475, 5379, 13571, 3331, 11523, 7427, 15619,
+ 771, 8963, 4867, 13059, 2819, 11011, 6915, 15107, 1795, 9987, 5891, 14083, 3843, 12035, 7939, 16131,
+ 131, 8323, 4227, 12419, 2179, 10371, 6275, 14467, 1155, 9347, 5251, 13443, 3203, 11395, 7299, 15491,
+ 643, 8835, 4739, 12931, 2691, 10883, 6787, 14979, 1667, 9859, 5763, 13955, 3715, 11907, 7811, 16003,
+ 387, 8579, 4483, 12675, 2435, 10627, 6531, 14723, 1411, 9603, 5507, 13699, 3459, 11651, 7555, 15747,
+ 899, 9091, 4995, 13187, 2947, 11139, 7043, 15235, 1923, 10115, 6019, 14211, 3971, 12163, 8067, 16259,
+ 67, 8259, 4163, 12355, 2115, 10307, 6211, 14403, 1091, 9283, 5187, 13379, 3139, 11331, 7235, 15427,
+ 579, 8771, 4675, 12867, 2627, 10819, 6723, 14915, 1603, 9795, 5699, 13891, 3651, 11843, 7747, 15939,
+ 323, 8515, 4419, 12611, 2371, 10563, 6467, 14659, 1347, 9539, 5443, 13635, 3395, 11587, 7491, 15683,
+ 835, 9027, 4931, 13123, 2883, 11075, 6979, 15171, 1859, 10051, 5955, 14147, 3907, 12099, 8003, 16195,
+ 195, 8387, 4291, 12483, 2243, 10435, 6339, 14531, 1219, 9411, 5315, 13507, 3267, 11459, 7363, 15555,
+ 707, 8899, 4803, 12995, 2755, 10947, 6851, 15043, 1731, 9923, 5827, 14019, 3779, 11971, 7875, 16067,
+ 451, 8643, 4547, 12739, 2499, 10691, 6595, 14787, 1475, 9667, 5571, 13763, 3523, 11715, 7619, 15811,
+ 963, 9155, 5059, 13251, 3011, 11203, 7107, 15299, 1987, 10179, 6083, 14275, 4035, 12227, 8131, 16323,
+ 35, 8227, 4131, 12323, 2083, 10275, 6179, 14371, 1059, 9251, 5155, 13347, 3107, 11299, 7203, 15395,
+ 547, 8739, 4643, 12835, 2595, 10787, 6691, 14883, 1571, 9763, 5667, 13859, 3619, 11811, 7715, 15907,
+ 291, 8483, 4387, 12579, 2339, 10531, 6435, 14627, 1315, 9507, 5411, 13603, 3363, 11555, 7459, 15651,
+ 803, 8995, 4899, 13091, 2851, 11043, 6947, 15139, 1827, 10019, 5923, 14115, 3875, 12067, 7971, 16163,
+ 163, 8355, 4259, 12451, 2211, 10403, 6307, 14499, 1187, 9379, 5283, 13475, 3235, 11427, 7331, 15523,
+ 675, 8867, 4771, 12963, 2723, 10915, 6819, 15011, 1699, 9891, 5795, 13987, 3747, 11939, 7843, 16035,
+ 419, 8611, 4515, 12707, 2467, 10659, 6563, 14755, 1443, 9635, 5539, 13731, 3491, 11683, 7587, 15779,
+ 931, 9123, 5027, 13219, 2979, 11171, 7075, 15267, 1955, 10147, 6051, 14243, 4003, 12195, 8099, 16291,
+ 99, 8291, 4195, 12387, 2147, 10339, 6243, 14435, 1123, 9315, 5219, 13411, 3171, 11363, 7267, 15459,
+ 611, 8803, 4707, 12899, 2659, 10851, 6755, 14947, 1635, 9827, 5731, 13923, 3683, 11875, 7779, 15971,
+ 355, 8547, 4451, 12643, 2403, 10595, 6499, 14691, 1379, 9571, 5475, 13667, 3427, 11619, 7523, 15715,
+ 867, 9059, 4963, 13155, 2915, 11107, 7011, 15203, 1891, 10083, 5987, 14179, 3939, 12131, 8035, 16227,
+ 227, 8419, 4323, 12515, 2275, 10467, 6371, 14563, 1251, 9443, 5347, 13539, 3299, 11491, 7395, 15587,
+ 739, 8931, 4835, 13027, 2787, 10979, 6883, 15075, 1763, 9955, 5859, 14051, 3811, 12003, 7907, 16099,
+ 483, 8675, 4579, 12771, 2531, 10723, 6627, 14819, 1507, 9699, 5603, 13795, 3555, 11747, 7651, 15843,
+ 995, 9187, 5091, 13283, 3043, 11235, 7139, 15331, 2019, 10211, 6115, 14307, 4067, 12259, 8163, 16355,
+ 19, 8211, 4115, 12307, 2067, 10259, 6163, 14355, 1043, 9235, 5139, 13331, 3091, 11283, 7187, 15379,
+ 531, 8723, 4627, 12819, 2579, 10771, 6675, 14867, 1555, 9747, 5651, 13843, 3603, 11795, 7699, 15891,
+ 275, 8467, 4371, 12563, 2323, 10515, 6419, 14611, 1299, 9491, 5395, 13587, 3347, 11539, 7443, 15635,
+ 787, 8979, 4883, 13075, 2835, 11027, 6931, 15123, 1811, 10003, 5907, 14099, 3859, 12051, 7955, 16147,
+ 147, 8339, 4243, 12435, 2195, 10387, 6291, 14483, 1171, 9363, 5267, 13459, 3219, 11411, 7315, 15507,
+ 659, 8851, 4755, 12947, 2707, 10899, 6803, 14995, 1683, 9875, 5779, 13971, 3731, 11923, 7827, 16019,
+ 403, 8595, 4499, 12691, 2451, 10643, 6547, 14739, 1427, 9619, 5523, 13715, 3475, 11667, 7571, 15763,
+ 915, 9107, 5011, 13203, 2963, 11155, 7059, 15251, 1939, 10131, 6035, 14227, 3987, 12179, 8083, 16275,
+ 83, 8275, 4179, 12371, 2131, 10323, 6227, 14419, 1107, 9299, 5203, 13395, 3155, 11347, 7251, 15443,
+ 595, 8787, 4691, 12883, 2643, 10835, 6739, 14931, 1619, 9811, 5715, 13907, 3667, 11859, 7763, 15955,
+ 339, 8531, 4435, 12627, 2387, 10579, 6483, 14675, 1363, 9555, 5459, 13651, 3411, 11603, 7507, 15699,
+ 851, 9043, 4947, 13139, 2899, 11091, 6995, 15187, 1875, 10067, 5971, 14163, 3923, 12115, 8019, 16211,
+ 211, 8403, 4307, 12499, 2259, 10451, 6355, 14547, 1235, 9427, 5331, 13523, 3283, 11475, 7379, 15571,
+ 723, 8915, 4819, 13011, 2771, 10963, 6867, 15059, 1747, 9939, 5843, 14035, 3795, 11987, 7891, 16083,
+ 467, 8659, 4563, 12755, 2515, 10707, 6611, 14803, 1491, 9683, 5587, 13779, 3539, 11731, 7635, 15827,
+ 979, 9171, 5075, 13267, 3027, 11219, 7123, 15315, 2003, 10195, 6099, 14291, 4051, 12243, 8147, 16339,
+ 51, 8243, 4147, 12339, 2099, 10291, 6195, 14387, 1075, 9267, 5171, 13363, 3123, 11315, 7219, 15411,
+ 563, 8755, 4659, 12851, 2611, 10803, 6707, 14899, 1587, 9779, 5683, 13875, 3635, 11827, 7731, 15923,
+ 307, 8499, 4403, 12595, 2355, 10547, 6451, 14643, 1331, 9523, 5427, 13619, 3379, 11571, 7475, 15667,
+ 819, 9011, 4915, 13107, 2867, 11059, 6963, 15155, 1843, 10035, 5939, 14131, 3891, 12083, 7987, 16179,
+ 179, 8371, 4275, 12467, 2227, 10419, 6323, 14515, 1203, 9395, 5299, 13491, 3251, 11443, 7347, 15539,
+ 691, 8883, 4787, 12979, 2739, 10931, 6835, 15027, 1715, 9907, 5811, 14003, 3763, 11955, 7859, 16051,
+ 435, 8627, 4531, 12723, 2483, 10675, 6579, 14771, 1459, 9651, 5555, 13747, 3507, 11699, 7603, 15795,
+ 947, 9139, 5043, 13235, 2995, 11187, 7091, 15283, 1971, 10163, 6067, 14259, 4019, 12211, 8115, 16307,
+ 115, 8307, 4211, 12403, 2163, 10355, 6259, 14451, 1139, 9331, 5235, 13427, 3187, 11379, 7283, 15475,
+ 627, 8819, 4723, 12915, 2675, 10867, 6771, 14963, 1651, 9843, 5747, 13939, 3699, 11891, 7795, 15987,
+ 371, 8563, 4467, 12659, 2419, 10611, 6515, 14707, 1395, 9587, 5491, 13683, 3443, 11635, 7539, 15731,
+ 883, 9075, 4979, 13171, 2931, 11123, 7027, 15219, 1907, 10099, 6003, 14195, 3955, 12147, 8051, 16243,
+ 243, 8435, 4339, 12531, 2291, 10483, 6387, 14579, 1267, 9459, 5363, 13555, 3315, 11507, 7411, 15603,
+ 755, 8947, 4851, 13043, 2803, 10995, 6899, 15091, 1779, 9971, 5875, 14067, 3827, 12019, 7923, 16115,
+ 499, 8691, 4595, 12787, 2547, 10739, 6643, 14835, 1523, 9715, 5619, 13811, 3571, 11763, 7667, 15859,
+ 1011, 9203, 5107, 13299, 3059, 11251, 7155, 15347, 2035, 10227, 6131, 14323, 4083, 12275, 8179, 16371,
+ 11, 8203, 4107, 12299, 2059, 10251, 6155, 14347, 1035, 9227, 5131, 13323, 3083, 11275, 7179, 15371,
+ 523, 8715, 4619, 12811, 2571, 10763, 6667, 14859, 1547, 9739, 5643, 13835, 3595, 11787, 7691, 15883,
+ 267, 8459, 4363, 12555, 2315, 10507, 6411, 14603, 1291, 9483, 5387, 13579, 3339, 11531, 7435, 15627,
+ 779, 8971, 4875, 13067, 2827, 11019, 6923, 15115, 1803, 9995, 5899, 14091, 3851, 12043, 7947, 16139,
+ 139, 8331, 4235, 12427, 2187, 10379, 6283, 14475, 1163, 9355, 5259, 13451, 3211, 11403, 7307, 15499,
+ 651, 8843, 4747, 12939, 2699, 10891, 6795, 14987, 1675, 9867, 5771, 13963, 3723, 11915, 7819, 16011,
+ 395, 8587, 4491, 12683, 2443, 10635, 6539, 14731, 1419, 9611, 5515, 13707, 3467, 11659, 7563, 15755,
+ 907, 9099, 5003, 13195, 2955, 11147, 7051, 15243, 1931, 10123, 6027, 14219, 3979, 12171, 8075, 16267,
+ 75, 8267, 4171, 12363, 2123, 10315, 6219, 14411, 1099, 9291, 5195, 13387, 3147, 11339, 7243, 15435,
+ 587, 8779, 4683, 12875, 2635, 10827, 6731, 14923, 1611, 9803, 5707, 13899, 3659, 11851, 7755, 15947,
+ 331, 8523, 4427, 12619, 2379, 10571, 6475, 14667, 1355, 9547, 5451, 13643, 3403, 11595, 7499, 15691,
+ 843, 9035, 4939, 13131, 2891, 11083, 6987, 15179, 1867, 10059, 5963, 14155, 3915, 12107, 8011, 16203,
+ 203, 8395, 4299, 12491, 2251, 10443, 6347, 14539, 1227, 9419, 5323, 13515, 3275, 11467, 7371, 15563,
+ 715, 8907, 4811, 13003, 2763, 10955, 6859, 15051, 1739, 9931, 5835, 14027, 3787, 11979, 7883, 16075,
+ 459, 8651, 4555, 12747, 2507, 10699, 6603, 14795, 1483, 9675, 5579, 13771, 3531, 11723, 7627, 15819,
+ 971, 9163, 5067, 13259, 3019, 11211, 7115, 15307, 1995, 10187, 6091, 14283, 4043, 12235, 8139, 16331,
+ 43, 8235, 4139, 12331, 2091, 10283, 6187, 14379, 1067, 9259, 5163, 13355, 3115, 11307, 7211, 15403,
+ 555, 8747, 4651, 12843, 2603, 10795, 6699, 14891, 1579, 9771, 5675, 13867, 3627, 11819, 7723, 15915,
+ 299, 8491, 4395, 12587, 2347, 10539, 6443, 14635, 1323, 9515, 5419, 13611, 3371, 11563, 7467, 15659,
+ 811, 9003, 4907, 13099, 2859, 11051, 6955, 15147, 1835, 10027, 5931, 14123, 3883, 12075, 7979, 16171,
+ 171, 8363, 4267, 12459, 2219, 10411, 6315, 14507, 1195, 9387, 5291, 13483, 3243, 11435, 7339, 15531,
+ 683, 8875, 4779, 12971, 2731, 10923, 6827, 15019, 1707, 9899, 5803, 13995, 3755, 11947, 7851, 16043,
+ 427, 8619, 4523, 12715, 2475, 10667, 6571, 14763, 1451, 9643, 5547, 13739, 3499, 11691, 7595, 15787,
+ 939, 9131, 5035, 13227, 2987, 11179, 7083, 15275, 1963, 10155, 6059, 14251, 4011, 12203, 8107, 16299,
+ 107, 8299, 4203, 12395, 2155, 10347, 6251, 14443, 1131, 9323, 5227, 13419, 3179, 11371, 7275, 15467,
+ 619, 8811, 4715, 12907, 2667, 10859, 6763, 14955, 1643, 9835, 5739, 13931, 3691, 11883, 7787, 15979,
+ 363, 8555, 4459, 12651, 2411, 10603, 6507, 14699, 1387, 9579, 5483, 13675, 3435, 11627, 7531, 15723,
+ 875, 9067, 4971, 13163, 2923, 11115, 7019, 15211, 1899, 10091, 5995, 14187, 3947, 12139, 8043, 16235,
+ 235, 8427, 4331, 12523, 2283, 10475, 6379, 14571, 1259, 9451, 5355, 13547, 3307, 11499, 7403, 15595,
+ 747, 8939, 4843, 13035, 2795, 10987, 6891, 15083, 1771, 9963, 5867, 14059, 3819, 12011, 7915, 16107,
+ 491, 8683, 4587, 12779, 2539, 10731, 6635, 14827, 1515, 9707, 5611, 13803, 3563, 11755, 7659, 15851,
+ 1003, 9195, 5099, 13291, 3051, 11243, 7147, 15339, 2027, 10219, 6123, 14315, 4075, 12267, 8171, 16363,
+ 27, 8219, 4123, 12315, 2075, 10267, 6171, 14363, 1051, 9243, 5147, 13339, 3099, 11291, 7195, 15387,
+ 539, 8731, 4635, 12827, 2587, 10779, 6683, 14875, 1563, 9755, 5659, 13851, 3611, 11803, 7707, 15899,
+ 283, 8475, 4379, 12571, 2331, 10523, 6427, 14619, 1307, 9499, 5403, 13595, 3355, 11547, 7451, 15643,
+ 795, 8987, 4891, 13083, 2843, 11035, 6939, 15131, 1819, 10011, 5915, 14107, 3867, 12059, 7963, 16155,
+ 155, 8347, 4251, 12443, 2203, 10395, 6299, 14491, 1179, 9371, 5275, 13467, 3227, 11419, 7323, 15515,
+ 667, 8859, 4763, 12955, 2715, 10907, 6811, 15003, 1691, 9883, 5787, 13979, 3739, 11931, 7835, 16027,
+ 411, 8603, 4507, 12699, 2459, 10651, 6555, 14747, 1435, 9627, 5531, 13723, 3483, 11675, 7579, 15771,
+ 923, 9115, 5019, 13211, 2971, 11163, 7067, 15259, 1947, 10139, 6043, 14235, 3995, 12187, 8091, 16283,
+ 91, 8283, 4187, 12379, 2139, 10331, 6235, 14427, 1115, 9307, 5211, 13403, 3163, 11355, 7259, 15451,
+ 603, 8795, 4699, 12891, 2651, 10843, 6747, 14939, 1627, 9819, 5723, 13915, 3675, 11867, 7771, 15963,
+ 347, 8539, 4443, 12635, 2395, 10587, 6491, 14683, 1371, 9563, 5467, 13659, 3419, 11611, 7515, 15707,
+ 859, 9051, 4955, 13147, 2907, 11099, 7003, 15195, 1883, 10075, 5979, 14171, 3931, 12123, 8027, 16219,
+ 219, 8411, 4315, 12507, 2267, 10459, 6363, 14555, 1243, 9435, 5339, 13531, 3291, 11483, 7387, 15579,
+ 731, 8923, 4827, 13019, 2779, 10971, 6875, 15067, 1755, 9947, 5851, 14043, 3803, 11995, 7899, 16091,
+ 475, 8667, 4571, 12763, 2523, 10715, 6619, 14811, 1499, 9691, 5595, 13787, 3547, 11739, 7643, 15835,
+ 987, 9179, 5083, 13275, 3035, 11227, 7131, 15323, 2011, 10203, 6107, 14299, 4059, 12251, 8155, 16347,
+ 59, 8251, 4155, 12347, 2107, 10299, 6203, 14395, 1083, 9275, 5179, 13371, 3131, 11323, 7227, 15419,
+ 571, 8763, 4667, 12859, 2619, 10811, 6715, 14907, 1595, 9787, 5691, 13883, 3643, 11835, 7739, 15931,
+ 315, 8507, 4411, 12603, 2363, 10555, 6459, 14651, 1339, 9531, 5435, 13627, 3387, 11579, 7483, 15675,
+ 827, 9019, 4923, 13115, 2875, 11067, 6971, 15163, 1851, 10043, 5947, 14139, 3899, 12091, 7995, 16187,
+ 187, 8379, 4283, 12475, 2235, 10427, 6331, 14523, 1211, 9403, 5307, 13499, 3259, 11451, 7355, 15547,
+ 699, 8891, 4795, 12987, 2747, 10939, 6843, 15035, 1723, 9915, 5819, 14011, 3771, 11963, 7867, 16059,
+ 443, 8635, 4539, 12731, 2491, 10683, 6587, 14779, 1467, 9659, 5563, 13755, 3515, 11707, 7611, 15803,
+ 955, 9147, 5051, 13243, 3003, 11195, 7099, 15291, 1979, 10171, 6075, 14267, 4027, 12219, 8123, 16315,
+ 123, 8315, 4219, 12411, 2171, 10363, 6267, 14459, 1147, 9339, 5243, 13435, 3195, 11387, 7291, 15483,
+ 635, 8827, 4731, 12923, 2683, 10875, 6779, 14971, 1659, 9851, 5755, 13947, 3707, 11899, 7803, 15995,
+ 379, 8571, 4475, 12667, 2427, 10619, 6523, 14715, 1403, 9595, 5499, 13691, 3451, 11643, 7547, 15739,
+ 891, 9083, 4987, 13179, 2939, 11131, 7035, 15227, 1915, 10107, 6011, 14203, 3963, 12155, 8059, 16251,
+ 251, 8443, 4347, 12539, 2299, 10491, 6395, 14587, 1275, 9467, 5371, 13563, 3323, 11515, 7419, 15611,
+ 763, 8955, 4859, 13051, 2811, 11003, 6907, 15099, 1787, 9979, 5883, 14075, 3835, 12027, 7931, 16123,
+ 507, 8699, 4603, 12795, 2555, 10747, 6651, 14843, 1531, 9723, 5627, 13819, 3579, 11771, 7675, 15867,
+ 1019, 9211, 5115, 13307, 3067, 11259, 7163, 15355, 2043, 10235, 6139, 14331, 4091, 12283, 8187, 16379,
+ 7, 8199, 4103, 12295, 2055, 10247, 6151, 14343, 1031, 9223, 5127, 13319, 3079, 11271, 7175, 15367,
+ 519, 8711, 4615, 12807, 2567, 10759, 6663, 14855, 1543, 9735, 5639, 13831, 3591, 11783, 7687, 15879,
+ 263, 8455, 4359, 12551, 2311, 10503, 6407, 14599, 1287, 9479, 5383, 13575, 3335, 11527, 7431, 15623,
+ 775, 8967, 4871, 13063, 2823, 11015, 6919, 15111, 1799, 9991, 5895, 14087, 3847, 12039, 7943, 16135,
+ 135, 8327, 4231, 12423, 2183, 10375, 6279, 14471, 1159, 9351, 5255, 13447, 3207, 11399, 7303, 15495,
+ 647, 8839, 4743, 12935, 2695, 10887, 6791, 14983, 1671, 9863, 5767, 13959, 3719, 11911, 7815, 16007,
+ 391, 8583, 4487, 12679, 2439, 10631, 6535, 14727, 1415, 9607, 5511, 13703, 3463, 11655, 7559, 15751,
+ 903, 9095, 4999, 13191, 2951, 11143, 7047, 15239, 1927, 10119, 6023, 14215, 3975, 12167, 8071, 16263,
+ 71, 8263, 4167, 12359, 2119, 10311, 6215, 14407, 1095, 9287, 5191, 13383, 3143, 11335, 7239, 15431,
+ 583, 8775, 4679, 12871, 2631, 10823, 6727, 14919, 1607, 9799, 5703, 13895, 3655, 11847, 7751, 15943,
+ 327, 8519, 4423, 12615, 2375, 10567, 6471, 14663, 1351, 9543, 5447, 13639, 3399, 11591, 7495, 15687,
+ 839, 9031, 4935, 13127, 2887, 11079, 6983, 15175, 1863, 10055, 5959, 14151, 3911, 12103, 8007, 16199,
+ 199, 8391, 4295, 12487, 2247, 10439, 6343, 14535, 1223, 9415, 5319, 13511, 3271, 11463, 7367, 15559,
+ 711, 8903, 4807, 12999, 2759, 10951, 6855, 15047, 1735, 9927, 5831, 14023, 3783, 11975, 7879, 16071,
+ 455, 8647, 4551, 12743, 2503, 10695, 6599, 14791, 1479, 9671, 5575, 13767, 3527, 11719, 7623, 15815,
+ 967, 9159, 5063, 13255, 3015, 11207, 7111, 15303, 1991, 10183, 6087, 14279, 4039, 12231, 8135, 16327,
+ 39, 8231, 4135, 12327, 2087, 10279, 6183, 14375, 1063, 9255, 5159, 13351, 3111, 11303, 7207, 15399,
+ 551, 8743, 4647, 12839, 2599, 10791, 6695, 14887, 1575, 9767, 5671, 13863, 3623, 11815, 7719, 15911,
+ 295, 8487, 4391, 12583, 2343, 10535, 6439, 14631, 1319, 9511, 5415, 13607, 3367, 11559, 7463, 15655,
+ 807, 8999, 4903, 13095, 2855, 11047, 6951, 15143, 1831, 10023, 5927, 14119, 3879, 12071, 7975, 16167,
+ 167, 8359, 4263, 12455, 2215, 10407, 6311, 14503, 1191, 9383, 5287, 13479, 3239, 11431, 7335, 15527,
+ 679, 8871, 4775, 12967, 2727, 10919, 6823, 15015, 1703, 9895, 5799, 13991, 3751, 11943, 7847, 16039,
+ 423, 8615, 4519, 12711, 2471, 10663, 6567, 14759, 1447, 9639, 5543, 13735, 3495, 11687, 7591, 15783,
+ 935, 9127, 5031, 13223, 2983, 11175, 7079, 15271, 1959, 10151, 6055, 14247, 4007, 12199, 8103, 16295,
+ 103, 8295, 4199, 12391, 2151, 10343, 6247, 14439, 1127, 9319, 5223, 13415, 3175, 11367, 7271, 15463,
+ 615, 8807, 4711, 12903, 2663, 10855, 6759, 14951, 1639, 9831, 5735, 13927, 3687, 11879, 7783, 15975,
+ 359, 8551, 4455, 12647, 2407, 10599, 6503, 14695, 1383, 9575, 5479, 13671, 3431, 11623, 7527, 15719,
+ 871, 9063, 4967, 13159, 2919, 11111, 7015, 15207, 1895, 10087, 5991, 14183, 3943, 12135, 8039, 16231,
+ 231, 8423, 4327, 12519, 2279, 10471, 6375, 14567, 1255, 9447, 5351, 13543, 3303, 11495, 7399, 15591,
+ 743, 8935, 4839, 13031, 2791, 10983, 6887, 15079, 1767, 9959, 5863, 14055, 3815, 12007, 7911, 16103,
+ 487, 8679, 4583, 12775, 2535, 10727, 6631, 14823, 1511, 9703, 5607, 13799, 3559, 11751, 7655, 15847,
+ 999, 9191, 5095, 13287, 3047, 11239, 7143, 15335, 2023, 10215, 6119, 14311, 4071, 12263, 8167, 16359,
+ 23, 8215, 4119, 12311, 2071, 10263, 6167, 14359, 1047, 9239, 5143, 13335, 3095, 11287, 7191, 15383,
+ 535, 8727, 4631, 12823, 2583, 10775, 6679, 14871, 1559, 9751, 5655, 13847, 3607, 11799, 7703, 15895,
+ 279, 8471, 4375, 12567, 2327, 10519, 6423, 14615, 1303, 9495, 5399, 13591, 3351, 11543, 7447, 15639,
+ 791, 8983, 4887, 13079, 2839, 11031, 6935, 15127, 1815, 10007, 5911, 14103, 3863, 12055, 7959, 16151,
+ 151, 8343, 4247, 12439, 2199, 10391, 6295, 14487, 1175, 9367, 5271, 13463, 3223, 11415, 7319, 15511,
+ 663, 8855, 4759, 12951, 2711, 10903, 6807, 14999, 1687, 9879, 5783, 13975, 3735, 11927, 7831, 16023,
+ 407, 8599, 4503, 12695, 2455, 10647, 6551, 14743, 1431, 9623, 5527, 13719, 3479, 11671, 7575, 15767,
+ 919, 9111, 5015, 13207, 2967, 11159, 7063, 15255, 1943, 10135, 6039, 14231, 3991, 12183, 8087, 16279,
+ 87, 8279, 4183, 12375, 2135, 10327, 6231, 14423, 1111, 9303, 5207, 13399, 3159, 11351, 7255, 15447,
+ 599, 8791, 4695, 12887, 2647, 10839, 6743, 14935, 1623, 9815, 5719, 13911, 3671, 11863, 7767, 15959,
+ 343, 8535, 4439, 12631, 2391, 10583, 6487, 14679, 1367, 9559, 5463, 13655, 3415, 11607, 7511, 15703,
+ 855, 9047, 4951, 13143, 2903, 11095, 6999, 15191, 1879, 10071, 5975, 14167, 3927, 12119, 8023, 16215,
+ 215, 8407, 4311, 12503, 2263, 10455, 6359, 14551, 1239, 9431, 5335, 13527, 3287, 11479, 7383, 15575,
+ 727, 8919, 4823, 13015, 2775, 10967, 6871, 15063, 1751, 9943, 5847, 14039, 3799, 11991, 7895, 16087,
+ 471, 8663, 4567, 12759, 2519, 10711, 6615, 14807, 1495, 9687, 5591, 13783, 3543, 11735, 7639, 15831,
+ 983, 9175, 5079, 13271, 3031, 11223, 7127, 15319, 2007, 10199, 6103, 14295, 4055, 12247, 8151, 16343,
+ 55, 8247, 4151, 12343, 2103, 10295, 6199, 14391, 1079, 9271, 5175, 13367, 3127, 11319, 7223, 15415,
+ 567, 8759, 4663, 12855, 2615, 10807, 6711, 14903, 1591, 9783, 5687, 13879, 3639, 11831, 7735, 15927,
+ 311, 8503, 4407, 12599, 2359, 10551, 6455, 14647, 1335, 9527, 5431, 13623, 3383, 11575, 7479, 15671,
+ 823, 9015, 4919, 13111, 2871, 11063, 6967, 15159, 1847, 10039, 5943, 14135, 3895, 12087, 7991, 16183,
+ 183, 8375, 4279, 12471, 2231, 10423, 6327, 14519, 1207, 9399, 5303, 13495, 3255, 11447, 7351, 15543,
+ 695, 8887, 4791, 12983, 2743, 10935, 6839, 15031, 1719, 9911, 5815, 14007, 3767, 11959, 7863, 16055,
+ 439, 8631, 4535, 12727, 2487, 10679, 6583, 14775, 1463, 9655, 5559, 13751, 3511, 11703, 7607, 15799,
+ 951, 9143, 5047, 13239, 2999, 11191, 7095, 15287, 1975, 10167, 6071, 14263, 4023, 12215, 8119, 16311,
+ 119, 8311, 4215, 12407, 2167, 10359, 6263, 14455, 1143, 9335, 5239, 13431, 3191, 11383, 7287, 15479,
+ 631, 8823, 4727, 12919, 2679, 10871, 6775, 14967, 1655, 9847, 5751, 13943, 3703, 11895, 7799, 15991,
+ 375, 8567, 4471, 12663, 2423, 10615, 6519, 14711, 1399, 9591, 5495, 13687, 3447, 11639, 7543, 15735,
+ 887, 9079, 4983, 13175, 2935, 11127, 7031, 15223, 1911, 10103, 6007, 14199, 3959, 12151, 8055, 16247,
+ 247, 8439, 4343, 12535, 2295, 10487, 6391, 14583, 1271, 9463, 5367, 13559, 3319, 11511, 7415, 15607,
+ 759, 8951, 4855, 13047, 2807, 10999, 6903, 15095, 1783, 9975, 5879, 14071, 3831, 12023, 7927, 16119,
+ 503, 8695, 4599, 12791, 2551, 10743, 6647, 14839, 1527, 9719, 5623, 13815, 3575, 11767, 7671, 15863,
+ 1015, 9207, 5111, 13303, 3063, 11255, 7159, 15351, 2039, 10231, 6135, 14327, 4087, 12279, 8183, 16375,
+ 15, 8207, 4111, 12303, 2063, 10255, 6159, 14351, 1039, 9231, 5135, 13327, 3087, 11279, 7183, 15375,
+ 527, 8719, 4623, 12815, 2575, 10767, 6671, 14863, 1551, 9743, 5647, 13839, 3599, 11791, 7695, 15887,
+ 271, 8463, 4367, 12559, 2319, 10511, 6415, 14607, 1295, 9487, 5391, 13583, 3343, 11535, 7439, 15631,
+ 783, 8975, 4879, 13071, 2831, 11023, 6927, 15119, 1807, 9999, 5903, 14095, 3855, 12047, 7951, 16143,
+ 143, 8335, 4239, 12431, 2191, 10383, 6287, 14479, 1167, 9359, 5263, 13455, 3215, 11407, 7311, 15503,
+ 655, 8847, 4751, 12943, 2703, 10895, 6799, 14991, 1679, 9871, 5775, 13967, 3727, 11919, 7823, 16015,
+ 399, 8591, 4495, 12687, 2447, 10639, 6543, 14735, 1423, 9615, 5519, 13711, 3471, 11663, 7567, 15759,
+ 911, 9103, 5007, 13199, 2959, 11151, 7055, 15247, 1935, 10127, 6031, 14223, 3983, 12175, 8079, 16271,
+ 79, 8271, 4175, 12367, 2127, 10319, 6223, 14415, 1103, 9295, 5199, 13391, 3151, 11343, 7247, 15439,
+ 591, 8783, 4687, 12879, 2639, 10831, 6735, 14927, 1615, 9807, 5711, 13903, 3663, 11855, 7759, 15951,
+ 335, 8527, 4431, 12623, 2383, 10575, 6479, 14671, 1359, 9551, 5455, 13647, 3407, 11599, 7503, 15695,
+ 847, 9039, 4943, 13135, 2895, 11087, 6991, 15183, 1871, 10063, 5967, 14159, 3919, 12111, 8015, 16207,
+ 207, 8399, 4303, 12495, 2255, 10447, 6351, 14543, 1231, 9423, 5327, 13519, 3279, 11471, 7375, 15567,
+ 719, 8911, 4815, 13007, 2767, 10959, 6863, 15055, 1743, 9935, 5839, 14031, 3791, 11983, 7887, 16079,
+ 463, 8655, 4559, 12751, 2511, 10703, 6607, 14799, 1487, 9679, 5583, 13775, 3535, 11727, 7631, 15823,
+ 975, 9167, 5071, 13263, 3023, 11215, 7119, 15311, 1999, 10191, 6095, 14287, 4047, 12239, 8143, 16335,
+ 47, 8239, 4143, 12335, 2095, 10287, 6191, 14383, 1071, 9263, 5167, 13359, 3119, 11311, 7215, 15407,
+ 559, 8751, 4655, 12847, 2607, 10799, 6703, 14895, 1583, 9775, 5679, 13871, 3631, 11823, 7727, 15919,
+ 303, 8495, 4399, 12591, 2351, 10543, 6447, 14639, 1327, 9519, 5423, 13615, 3375, 11567, 7471, 15663,
+ 815, 9007, 4911, 13103, 2863, 11055, 6959, 15151, 1839, 10031, 5935, 14127, 3887, 12079, 7983, 16175,
+ 175, 8367, 4271, 12463, 2223, 10415, 6319, 14511, 1199, 9391, 5295, 13487, 3247, 11439, 7343, 15535,
+ 687, 8879, 4783, 12975, 2735, 10927, 6831, 15023, 1711, 9903, 5807, 13999, 3759, 11951, 7855, 16047,
+ 431, 8623, 4527, 12719, 2479, 10671, 6575, 14767, 1455, 9647, 5551, 13743, 3503, 11695, 7599, 15791,
+ 943, 9135, 5039, 13231, 2991, 11183, 7087, 15279, 1967, 10159, 6063, 14255, 4015, 12207, 8111, 16303,
+ 111, 8303, 4207, 12399, 2159, 10351, 6255, 14447, 1135, 9327, 5231, 13423, 3183, 11375, 7279, 15471,
+ 623, 8815, 4719, 12911, 2671, 10863, 6767, 14959, 1647, 9839, 5743, 13935, 3695, 11887, 7791, 15983,
+ 367, 8559, 4463, 12655, 2415, 10607, 6511, 14703, 1391, 9583, 5487, 13679, 3439, 11631, 7535, 15727,
+ 879, 9071, 4975, 13167, 2927, 11119, 7023, 15215, 1903, 10095, 5999, 14191, 3951, 12143, 8047, 16239,
+ 239, 8431, 4335, 12527, 2287, 10479, 6383, 14575, 1263, 9455, 5359, 13551, 3311, 11503, 7407, 15599,
+ 751, 8943, 4847, 13039, 2799, 10991, 6895, 15087, 1775, 9967, 5871, 14063, 3823, 12015, 7919, 16111,
+ 495, 8687, 4591, 12783, 2543, 10735, 6639, 14831, 1519, 9711, 5615, 13807, 3567, 11759, 7663, 15855,
+ 1007, 9199, 5103, 13295, 3055, 11247, 7151, 15343, 2031, 10223, 6127, 14319, 4079, 12271, 8175, 16367,
+ 31, 8223, 4127, 12319, 2079, 10271, 6175, 14367, 1055, 9247, 5151, 13343, 3103, 11295, 7199, 15391,
+ 543, 8735, 4639, 12831, 2591, 10783, 6687, 14879, 1567, 9759, 5663, 13855, 3615, 11807, 7711, 15903,
+ 287, 8479, 4383, 12575, 2335, 10527, 6431, 14623, 1311, 9503, 5407, 13599, 3359, 11551, 7455, 15647,
+ 799, 8991, 4895, 13087, 2847, 11039, 6943, 15135, 1823, 10015, 5919, 14111, 3871, 12063, 7967, 16159,
+ 159, 8351, 4255, 12447, 2207, 10399, 6303, 14495, 1183, 9375, 5279, 13471, 3231, 11423, 7327, 15519,
+ 671, 8863, 4767, 12959, 2719, 10911, 6815, 15007, 1695, 9887, 5791, 13983, 3743, 11935, 7839, 16031,
+ 415, 8607, 4511, 12703, 2463, 10655, 6559, 14751, 1439, 9631, 5535, 13727, 3487, 11679, 7583, 15775,
+ 927, 9119, 5023, 13215, 2975, 11167, 7071, 15263, 1951, 10143, 6047, 14239, 3999, 12191, 8095, 16287,
+ 95, 8287, 4191, 12383, 2143, 10335, 6239, 14431, 1119, 9311, 5215, 13407, 3167, 11359, 7263, 15455,
+ 607, 8799, 4703, 12895, 2655, 10847, 6751, 14943, 1631, 9823, 5727, 13919, 3679, 11871, 7775, 15967,
+ 351, 8543, 4447, 12639, 2399, 10591, 6495, 14687, 1375, 9567, 5471, 13663, 3423, 11615, 7519, 15711,
+ 863, 9055, 4959, 13151, 2911, 11103, 7007, 15199, 1887, 10079, 5983, 14175, 3935, 12127, 8031, 16223,
+ 223, 8415, 4319, 12511, 2271, 10463, 6367, 14559, 1247, 9439, 5343, 13535, 3295, 11487, 7391, 15583,
+ 735, 8927, 4831, 13023, 2783, 10975, 6879, 15071, 1759, 9951, 5855, 14047, 3807, 11999, 7903, 16095,
+ 479, 8671, 4575, 12767, 2527, 10719, 6623, 14815, 1503, 9695, 5599, 13791, 3551, 11743, 7647, 15839,
+ 991, 9183, 5087, 13279, 3039, 11231, 7135, 15327, 2015, 10207, 6111, 14303, 4063, 12255, 8159, 16351,
+ 63, 8255, 4159, 12351, 2111, 10303, 6207, 14399, 1087, 9279, 5183, 13375, 3135, 11327, 7231, 15423,
+ 575, 8767, 4671, 12863, 2623, 10815, 6719, 14911, 1599, 9791, 5695, 13887, 3647, 11839, 7743, 15935,
+ 319, 8511, 4415, 12607, 2367, 10559, 6463, 14655, 1343, 9535, 5439, 13631, 3391, 11583, 7487, 15679,
+ 831, 9023, 4927, 13119, 2879, 11071, 6975, 15167, 1855, 10047, 5951, 14143, 3903, 12095, 7999, 16191,
+ 191, 8383, 4287, 12479, 2239, 10431, 6335, 14527, 1215, 9407, 5311, 13503, 3263, 11455, 7359, 15551,
+ 703, 8895, 4799, 12991, 2751, 10943, 6847, 15039, 1727, 9919, 5823, 14015, 3775, 11967, 7871, 16063,
+ 447, 8639, 4543, 12735, 2495, 10687, 6591, 14783, 1471, 9663, 5567, 13759, 3519, 11711, 7615, 15807,
+ 959, 9151, 5055, 13247, 3007, 11199, 7103, 15295, 1983, 10175, 6079, 14271, 4031, 12223, 8127, 16319,
+ 127, 8319, 4223, 12415, 2175, 10367, 6271, 14463, 1151, 9343, 5247, 13439, 3199, 11391, 7295, 15487,
+ 639, 8831, 4735, 12927, 2687, 10879, 6783, 14975, 1663, 9855, 5759, 13951, 3711, 11903, 7807, 15999,
+ 383, 8575, 4479, 12671, 2431, 10623, 6527, 14719, 1407, 9599, 5503, 13695, 3455, 11647, 7551, 15743,
+ 895, 9087, 4991, 13183, 2943, 11135, 7039, 15231, 1919, 10111, 6015, 14207, 3967, 12159, 8063, 16255,
+ 255, 8447, 4351, 12543, 2303, 10495, 6399, 14591, 1279, 9471, 5375, 13567, 3327, 11519, 7423, 15615,
+ 767, 8959, 4863, 13055, 2815, 11007, 6911, 15103, 1791, 9983, 5887, 14079, 3839, 12031, 7935, 16127,
+ 511, 8703, 4607, 12799, 2559, 10751, 6655, 14847, 1535, 9727, 5631, 13823, 3583, 11775, 7679, 15871,
+ 1023, 9215, 5119, 13311, 3071, 11263, 7167, 15359, 2047, 10239, 6143, 14335, 4095, 12287, 8191, 16383,
+};
+}
+}
diff --git a/include/kfr/data/sincos.hpp b/include/kfr/data/sincos.hpp
@@ -0,0 +1,308 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/kfr.h"
+#include <cstdint>
+
+namespace kfr
+{
+
+namespace data
+{
+
+// data generated by mpfr
+template <typename T>
+constexpr T c_sin_table[256] = {
+ /* sin(2*pi* 0/ 256) */ T(0.0),
+ /* sin(2*pi* 1/ 256) */ T(0.02454122852291228803173452945928292506547),
+ /* sin(2*pi* 2/ 256) */ T(0.04906767432741801425495497694268265831475),
+ /* sin(2*pi* 3/ 256) */ T(0.0735645635996674235294656215752343218133),
+ /* sin(2*pi* 4/ 256) */ T(0.09801714032956060199419556388864184586114),
+ /* sin(2*pi* 5/ 256) */ T(0.1224106751992161984987044741509457875752),
+ /* sin(2*pi* 6/ 256) */ T(0.1467304744553617516588501296467178197062),
+ /* sin(2*pi* 7/ 256) */ T(0.1709618887603012263636423572082635319663),
+ /* sin(2*pi* 8/ 256) */ T(0.1950903220161282678482848684770222409277),
+ /* sin(2*pi* 9/ 256) */ T(0.2191012401568697972277375474973577988484),
+ /* sin(2*pi* 10/ 256) */ T(0.242980179903263889948274162077471118321),
+ /* sin(2*pi* 11/ 256) */ T(0.2667127574748983863252865151164363940421),
+ /* sin(2*pi* 12/ 256) */ T(0.2902846772544623676361923758173952746915),
+ /* sin(2*pi* 13/ 256) */ T(0.3136817403988914766564788459941003099934),
+ /* sin(2*pi* 14/ 256) */ T(0.3368898533922200506892532126191475704778),
+ /* sin(2*pi* 15/ 256) */ T(0.3598950365349881487751045723267564202023),
+ /* sin(2*pi* 16/ 256) */ T(0.3826834323650897717284599840303988667613),
+ /* sin(2*pi* 17/ 256) */ T(0.4052413140049898709084813055050524665119),
+ /* sin(2*pi* 18/ 256) */ T(0.4275550934302820943209668568887985343046),
+ /* sin(2*pi* 19/ 256) */ T(0.4496113296546066000462945794242270758832),
+ /* sin(2*pi* 20/ 256) */ T(0.4713967368259976485563876259052543776575),
+ /* sin(2*pi* 21/ 256) */ T(0.4928981922297840368730266887588092682397),
+ /* sin(2*pi* 22/ 256) */ T(0.514102744193221726593693838968815772608),
+ /* sin(2*pi* 23/ 256) */ T(0.5349976198870972106630769046370179155603),
+ /* sin(2*pi* 24/ 256) */ T(0.5555702330196022247428308139485328743749),
+ /* sin(2*pi* 25/ 256) */ T(0.575808191417845300745972453815730841776),
+ /* sin(2*pi* 26/ 256) */ T(0.5956993044924333434670365288299698895119),
+ /* sin(2*pi* 27/ 256) */ T(0.6152315905806268454849135634139842776594),
+ /* sin(2*pi* 28/ 256) */ T(0.6343932841636454982151716132254933706757),
+ /* sin(2*pi* 29/ 256) */ T(0.6531728429537767640842030136563054150769),
+ /* sin(2*pi* 30/ 256) */ T(0.6715589548470184006253768504274218032288),
+ /* sin(2*pi* 31/ 256) */ T(0.6895405447370669246167306299574847028455),
+ /* sin(2*pi* 32/ 256) */ T(0.7071067811865475244008443621048490392848),
+ /* sin(2*pi* 33/ 256) */ T(0.7242470829514669209410692432905531674831),
+ /* sin(2*pi* 34/ 256) */ T(0.740951125354959091175616897495162729729),
+ /* sin(2*pi* 35/ 256) */ T(0.7572088465064845475754640536057844730404),
+ /* sin(2*pi* 36/ 256) */ T(0.773010453362736960810906609758469800971),
+ /* sin(2*pi* 37/ 256) */ T(0.7883464276266062620091647053596892826565),
+ /* sin(2*pi* 38/ 256) */ T(0.8032075314806449098066765129631419238796),
+ /* sin(2*pi* 39/ 256) */ T(0.817584813151583696504920884130633809471),
+ /* sin(2*pi* 40/ 256) */ T(0.8314696123025452370787883776179057567386),
+ /* sin(2*pi* 41/ 256) */ T(0.8448535652497070732595712051049570977198),
+ /* sin(2*pi* 42/ 256) */ T(0.8577286100002720699022699842847701370425),
+ /* sin(2*pi* 43/ 256) */ T(0.8700869911087114186522924044838488439108),
+ /* sin(2*pi* 44/ 256) */ T(0.8819212643483550297127568636603883495084),
+ /* sin(2*pi* 45/ 256) */ T(0.8932243011955153203424164474933979780006),
+ /* sin(2*pi* 46/ 256) */ T(0.9039892931234433315862002972305370487101),
+ /* sin(2*pi* 47/ 256) */ T(0.9142097557035306546350148293935774010447),
+ /* sin(2*pi* 48/ 256) */ T(0.9238795325112867561281831893967882868224),
+ /* sin(2*pi* 49/ 256) */ T(0.932992798834738887711660255543302498295),
+ /* sin(2*pi* 50/ 256) */ T(0.9415440651830207784125094025995023571856),
+ /* sin(2*pi* 51/ 256) */ T(0.9495281805930366671959360741893450282522),
+ /* sin(2*pi* 52/ 256) */ T(0.9569403357322088649357978869802699694828),
+ /* sin(2*pi* 53/ 256) */ T(0.9637760657954398666864643555078351536631),
+ /* sin(2*pi* 54/ 256) */ T(0.9700312531945439926039842072861002514569),
+ /* sin(2*pi* 55/ 256) */ T(0.975702130038528544460395766419527971644),
+ /* sin(2*pi* 56/ 256) */ T(0.9807852804032304491261822361342390369739),
+ /* sin(2*pi* 57/ 256) */ T(0.9852776423889412447740184331785477871601),
+ /* sin(2*pi* 58/ 256) */ T(0.9891765099647809734516737380162430639837),
+ /* sin(2*pi* 59/ 256) */ T(0.9924795345987099981567672516611178200108),
+ /* sin(2*pi* 60/ 256) */ T(0.9951847266721968862448369531094799215755),
+ /* sin(2*pi* 61/ 256) */ T(0.9972904566786902161355971401825678211717),
+ /* sin(2*pi* 62/ 256) */ T(0.9987954562051723927147716047591006944432),
+ /* sin(2*pi* 63/ 256) */ T(0.9996988186962042201157656496661721968501),
+ /* sin(2*pi* 64/ 256) */ T(1.0),
+ /* sin(2*pi* 65/ 256) */ T(0.9996988186962042201157656496661721968501),
+ /* sin(2*pi* 66/ 256) */ T(0.9987954562051723927147716047591006944432),
+ /* sin(2*pi* 67/ 256) */ T(0.9972904566786902161355971401825678211717),
+ /* sin(2*pi* 68/ 256) */ T(0.9951847266721968862448369531094799215755),
+ /* sin(2*pi* 69/ 256) */ T(0.9924795345987099981567672516611178200108),
+ /* sin(2*pi* 70/ 256) */ T(0.9891765099647809734516737380162430639837),
+ /* sin(2*pi* 71/ 256) */ T(0.9852776423889412447740184331785477871601),
+ /* sin(2*pi* 72/ 256) */ T(0.9807852804032304491261822361342390369739),
+ /* sin(2*pi* 73/ 256) */ T(0.975702130038528544460395766419527971644),
+ /* sin(2*pi* 74/ 256) */ T(0.9700312531945439926039842072861002514569),
+ /* sin(2*pi* 75/ 256) */ T(0.9637760657954398666864643555078351536631),
+ /* sin(2*pi* 76/ 256) */ T(0.9569403357322088649357978869802699694828),
+ /* sin(2*pi* 77/ 256) */ T(0.9495281805930366671959360741893450282522),
+ /* sin(2*pi* 78/ 256) */ T(0.9415440651830207784125094025995023571856),
+ /* sin(2*pi* 79/ 256) */ T(0.932992798834738887711660255543302498295),
+ /* sin(2*pi* 80/ 256) */ T(0.9238795325112867561281831893967882868224),
+ /* sin(2*pi* 81/ 256) */ T(0.9142097557035306546350148293935774010447),
+ /* sin(2*pi* 82/ 256) */ T(0.9039892931234433315862002972305370487101),
+ /* sin(2*pi* 83/ 256) */ T(0.8932243011955153203424164474933979780006),
+ /* sin(2*pi* 84/ 256) */ T(0.8819212643483550297127568636603883495084),
+ /* sin(2*pi* 85/ 256) */ T(0.8700869911087114186522924044838488439108),
+ /* sin(2*pi* 86/ 256) */ T(0.8577286100002720699022699842847701370425),
+ /* sin(2*pi* 87/ 256) */ T(0.8448535652497070732595712051049570977198),
+ /* sin(2*pi* 88/ 256) */ T(0.8314696123025452370787883776179057567386),
+ /* sin(2*pi* 89/ 256) */ T(0.817584813151583696504920884130633809471),
+ /* sin(2*pi* 90/ 256) */ T(0.8032075314806449098066765129631419238796),
+ /* sin(2*pi* 91/ 256) */ T(0.7883464276266062620091647053596892826565),
+ /* sin(2*pi* 92/ 256) */ T(0.773010453362736960810906609758469800971),
+ /* sin(2*pi* 93/ 256) */ T(0.7572088465064845475754640536057844730404),
+ /* sin(2*pi* 94/ 256) */ T(0.740951125354959091175616897495162729729),
+ /* sin(2*pi* 95/ 256) */ T(0.7242470829514669209410692432905531674831),
+ /* sin(2*pi* 96/ 256) */ T(0.7071067811865475244008443621048490392848),
+ /* sin(2*pi* 97/ 256) */ T(0.6895405447370669246167306299574847028455),
+ /* sin(2*pi* 98/ 256) */ T(0.6715589548470184006253768504274218032288),
+ /* sin(2*pi* 99/ 256) */ T(0.6531728429537767640842030136563054150769),
+ /* sin(2*pi* 100/ 256) */ T(0.6343932841636454982151716132254933706757),
+ /* sin(2*pi* 101/ 256) */ T(0.6152315905806268454849135634139842776594),
+ /* sin(2*pi* 102/ 256) */ T(0.5956993044924333434670365288299698895119),
+ /* sin(2*pi* 103/ 256) */ T(0.575808191417845300745972453815730841776),
+ /* sin(2*pi* 104/ 256) */ T(0.5555702330196022247428308139485328743749),
+ /* sin(2*pi* 105/ 256) */ T(0.5349976198870972106630769046370179155603),
+ /* sin(2*pi* 106/ 256) */ T(0.514102744193221726593693838968815772608),
+ /* sin(2*pi* 107/ 256) */ T(0.4928981922297840368730266887588092682397),
+ /* sin(2*pi* 108/ 256) */ T(0.4713967368259976485563876259052543776575),
+ /* sin(2*pi* 109/ 256) */ T(0.4496113296546066000462945794242270758832),
+ /* sin(2*pi* 110/ 256) */ T(0.4275550934302820943209668568887985343046),
+ /* sin(2*pi* 111/ 256) */ T(0.4052413140049898709084813055050524665119),
+ /* sin(2*pi* 112/ 256) */ T(0.3826834323650897717284599840303988667613),
+ /* sin(2*pi* 113/ 256) */ T(0.3598950365349881487751045723267564202023),
+ /* sin(2*pi* 114/ 256) */ T(0.3368898533922200506892532126191475704778),
+ /* sin(2*pi* 115/ 256) */ T(0.3136817403988914766564788459941003099934),
+ /* sin(2*pi* 116/ 256) */ T(0.2902846772544623676361923758173952746915),
+ /* sin(2*pi* 117/ 256) */ T(0.2667127574748983863252865151164363940421),
+ /* sin(2*pi* 118/ 256) */ T(0.242980179903263889948274162077471118321),
+ /* sin(2*pi* 119/ 256) */ T(0.2191012401568697972277375474973577988484),
+ /* sin(2*pi* 120/ 256) */ T(0.1950903220161282678482848684770222409277),
+ /* sin(2*pi* 121/ 256) */ T(0.1709618887603012263636423572082635319663),
+ /* sin(2*pi* 122/ 256) */ T(0.1467304744553617516588501296467178197062),
+ /* sin(2*pi* 123/ 256) */ T(0.1224106751992161984987044741509457875752),
+ /* sin(2*pi* 124/ 256) */ T(0.09801714032956060199419556388864184586114),
+ /* sin(2*pi* 125/ 256) */ T(0.0735645635996674235294656215752343218133),
+ /* sin(2*pi* 126/ 256) */ T(0.04906767432741801425495497694268265831475),
+ /* sin(2*pi* 127/ 256) */ T(0.02454122852291228803173452945928292506547),
+ /* sin(2*pi* 128/ 256) */ T(0.0),
+ /* sin(2*pi* 129/ 256) */ T(-0.02454122852291228803173452945928292506547),
+ /* sin(2*pi* 130/ 256) */ T(-0.04906767432741801425495497694268265831475),
+ /* sin(2*pi* 131/ 256) */ T(-0.0735645635996674235294656215752343218133),
+ /* sin(2*pi* 132/ 256) */ T(-0.09801714032956060199419556388864184586114),
+ /* sin(2*pi* 133/ 256) */ T(-0.1224106751992161984987044741509457875752),
+ /* sin(2*pi* 134/ 256) */ T(-0.1467304744553617516588501296467178197062),
+ /* sin(2*pi* 135/ 256) */ T(-0.1709618887603012263636423572082635319663),
+ /* sin(2*pi* 136/ 256) */ T(-0.1950903220161282678482848684770222409277),
+ /* sin(2*pi* 137/ 256) */ T(-0.2191012401568697972277375474973577988484),
+ /* sin(2*pi* 138/ 256) */ T(-0.242980179903263889948274162077471118321),
+ /* sin(2*pi* 139/ 256) */ T(-0.2667127574748983863252865151164363940421),
+ /* sin(2*pi* 140/ 256) */ T(-0.2902846772544623676361923758173952746915),
+ /* sin(2*pi* 141/ 256) */ T(-0.3136817403988914766564788459941003099934),
+ /* sin(2*pi* 142/ 256) */ T(-0.3368898533922200506892532126191475704778),
+ /* sin(2*pi* 143/ 256) */ T(-0.3598950365349881487751045723267564202023),
+ /* sin(2*pi* 144/ 256) */ T(-0.3826834323650897717284599840303988667613),
+ /* sin(2*pi* 145/ 256) */ T(-0.4052413140049898709084813055050524665119),
+ /* sin(2*pi* 146/ 256) */ T(-0.4275550934302820943209668568887985343046),
+ /* sin(2*pi* 147/ 256) */ T(-0.4496113296546066000462945794242270758832),
+ /* sin(2*pi* 148/ 256) */ T(-0.4713967368259976485563876259052543776575),
+ /* sin(2*pi* 149/ 256) */ T(-0.4928981922297840368730266887588092682397),
+ /* sin(2*pi* 150/ 256) */ T(-0.514102744193221726593693838968815772608),
+ /* sin(2*pi* 151/ 256) */ T(-0.5349976198870972106630769046370179155603),
+ /* sin(2*pi* 152/ 256) */ T(-0.5555702330196022247428308139485328743749),
+ /* sin(2*pi* 153/ 256) */ T(-0.575808191417845300745972453815730841776),
+ /* sin(2*pi* 154/ 256) */ T(-0.5956993044924333434670365288299698895119),
+ /* sin(2*pi* 155/ 256) */ T(-0.6152315905806268454849135634139842776594),
+ /* sin(2*pi* 156/ 256) */ T(-0.6343932841636454982151716132254933706757),
+ /* sin(2*pi* 157/ 256) */ T(-0.6531728429537767640842030136563054150769),
+ /* sin(2*pi* 158/ 256) */ T(-0.6715589548470184006253768504274218032288),
+ /* sin(2*pi* 159/ 256) */ T(-0.6895405447370669246167306299574847028455),
+ /* sin(2*pi* 160/ 256) */ T(-0.7071067811865475244008443621048490392848),
+ /* sin(2*pi* 161/ 256) */ T(-0.7242470829514669209410692432905531674831),
+ /* sin(2*pi* 162/ 256) */ T(-0.740951125354959091175616897495162729729),
+ /* sin(2*pi* 163/ 256) */ T(-0.7572088465064845475754640536057844730404),
+ /* sin(2*pi* 164/ 256) */ T(-0.773010453362736960810906609758469800971),
+ /* sin(2*pi* 165/ 256) */ T(-0.7883464276266062620091647053596892826565),
+ /* sin(2*pi* 166/ 256) */ T(-0.8032075314806449098066765129631419238796),
+ /* sin(2*pi* 167/ 256) */ T(-0.817584813151583696504920884130633809471),
+ /* sin(2*pi* 168/ 256) */ T(-0.8314696123025452370787883776179057567386),
+ /* sin(2*pi* 169/ 256) */ T(-0.8448535652497070732595712051049570977198),
+ /* sin(2*pi* 170/ 256) */ T(-0.8577286100002720699022699842847701370425),
+ /* sin(2*pi* 171/ 256) */ T(-0.8700869911087114186522924044838488439108),
+ /* sin(2*pi* 172/ 256) */ T(-0.8819212643483550297127568636603883495084),
+ /* sin(2*pi* 173/ 256) */ T(-0.8932243011955153203424164474933979780006),
+ /* sin(2*pi* 174/ 256) */ T(-0.9039892931234433315862002972305370487101),
+ /* sin(2*pi* 175/ 256) */ T(-0.9142097557035306546350148293935774010447),
+ /* sin(2*pi* 176/ 256) */ T(-0.9238795325112867561281831893967882868224),
+ /* sin(2*pi* 177/ 256) */ T(-0.932992798834738887711660255543302498295),
+ /* sin(2*pi* 178/ 256) */ T(-0.9415440651830207784125094025995023571856),
+ /* sin(2*pi* 179/ 256) */ T(-0.9495281805930366671959360741893450282522),
+ /* sin(2*pi* 180/ 256) */ T(-0.9569403357322088649357978869802699694828),
+ /* sin(2*pi* 181/ 256) */ T(-0.9637760657954398666864643555078351536631),
+ /* sin(2*pi* 182/ 256) */ T(-0.9700312531945439926039842072861002514569),
+ /* sin(2*pi* 183/ 256) */ T(-0.975702130038528544460395766419527971644),
+ /* sin(2*pi* 184/ 256) */ T(-0.9807852804032304491261822361342390369739),
+ /* sin(2*pi* 185/ 256) */ T(-0.9852776423889412447740184331785477871601),
+ /* sin(2*pi* 186/ 256) */ T(-0.9891765099647809734516737380162430639837),
+ /* sin(2*pi* 187/ 256) */ T(-0.9924795345987099981567672516611178200108),
+ /* sin(2*pi* 188/ 256) */ T(-0.9951847266721968862448369531094799215755),
+ /* sin(2*pi* 189/ 256) */ T(-0.9972904566786902161355971401825678211717),
+ /* sin(2*pi* 190/ 256) */ T(-0.9987954562051723927147716047591006944432),
+ /* sin(2*pi* 191/ 256) */ T(-0.9996988186962042201157656496661721968501),
+ /* sin(2*pi* 192/ 256) */ T(-1.0),
+ /* sin(2*pi* 193/ 256) */ T(-0.9996988186962042201157656496661721968501),
+ /* sin(2*pi* 194/ 256) */ T(-0.9987954562051723927147716047591006944432),
+ /* sin(2*pi* 195/ 256) */ T(-0.9972904566786902161355971401825678211717),
+ /* sin(2*pi* 196/ 256) */ T(-0.9951847266721968862448369531094799215755),
+ /* sin(2*pi* 197/ 256) */ T(-0.9924795345987099981567672516611178200108),
+ /* sin(2*pi* 198/ 256) */ T(-0.9891765099647809734516737380162430639837),
+ /* sin(2*pi* 199/ 256) */ T(-0.9852776423889412447740184331785477871601),
+ /* sin(2*pi* 200/ 256) */ T(-0.9807852804032304491261822361342390369739),
+ /* sin(2*pi* 201/ 256) */ T(-0.975702130038528544460395766419527971644),
+ /* sin(2*pi* 202/ 256) */ T(-0.9700312531945439926039842072861002514569),
+ /* sin(2*pi* 203/ 256) */ T(-0.9637760657954398666864643555078351536631),
+ /* sin(2*pi* 204/ 256) */ T(-0.9569403357322088649357978869802699694828),
+ /* sin(2*pi* 205/ 256) */ T(-0.9495281805930366671959360741893450282522),
+ /* sin(2*pi* 206/ 256) */ T(-0.9415440651830207784125094025995023571856),
+ /* sin(2*pi* 207/ 256) */ T(-0.932992798834738887711660255543302498295),
+ /* sin(2*pi* 208/ 256) */ T(-0.9238795325112867561281831893967882868224),
+ /* sin(2*pi* 209/ 256) */ T(-0.9142097557035306546350148293935774010447),
+ /* sin(2*pi* 210/ 256) */ T(-0.9039892931234433315862002972305370487101),
+ /* sin(2*pi* 211/ 256) */ T(-0.8932243011955153203424164474933979780006),
+ /* sin(2*pi* 212/ 256) */ T(-0.8819212643483550297127568636603883495084),
+ /* sin(2*pi* 213/ 256) */ T(-0.8700869911087114186522924044838488439108),
+ /* sin(2*pi* 214/ 256) */ T(-0.8577286100002720699022699842847701370425),
+ /* sin(2*pi* 215/ 256) */ T(-0.8448535652497070732595712051049570977198),
+ /* sin(2*pi* 216/ 256) */ T(-0.8314696123025452370787883776179057567386),
+ /* sin(2*pi* 217/ 256) */ T(-0.817584813151583696504920884130633809471),
+ /* sin(2*pi* 218/ 256) */ T(-0.8032075314806449098066765129631419238796),
+ /* sin(2*pi* 219/ 256) */ T(-0.7883464276266062620091647053596892826565),
+ /* sin(2*pi* 220/ 256) */ T(-0.773010453362736960810906609758469800971),
+ /* sin(2*pi* 221/ 256) */ T(-0.7572088465064845475754640536057844730404),
+ /* sin(2*pi* 222/ 256) */ T(-0.740951125354959091175616897495162729729),
+ /* sin(2*pi* 223/ 256) */ T(-0.7242470829514669209410692432905531674831),
+ /* sin(2*pi* 224/ 256) */ T(-0.7071067811865475244008443621048490392848),
+ /* sin(2*pi* 225/ 256) */ T(-0.6895405447370669246167306299574847028455),
+ /* sin(2*pi* 226/ 256) */ T(-0.6715589548470184006253768504274218032288),
+ /* sin(2*pi* 227/ 256) */ T(-0.6531728429537767640842030136563054150769),
+ /* sin(2*pi* 228/ 256) */ T(-0.6343932841636454982151716132254933706757),
+ /* sin(2*pi* 229/ 256) */ T(-0.6152315905806268454849135634139842776594),
+ /* sin(2*pi* 230/ 256) */ T(-0.5956993044924333434670365288299698895119),
+ /* sin(2*pi* 231/ 256) */ T(-0.575808191417845300745972453815730841776),
+ /* sin(2*pi* 232/ 256) */ T(-0.5555702330196022247428308139485328743749),
+ /* sin(2*pi* 233/ 256) */ T(-0.5349976198870972106630769046370179155603),
+ /* sin(2*pi* 234/ 256) */ T(-0.514102744193221726593693838968815772608),
+ /* sin(2*pi* 235/ 256) */ T(-0.4928981922297840368730266887588092682397),
+ /* sin(2*pi* 236/ 256) */ T(-0.4713967368259976485563876259052543776575),
+ /* sin(2*pi* 237/ 256) */ T(-0.4496113296546066000462945794242270758832),
+ /* sin(2*pi* 238/ 256) */ T(-0.4275550934302820943209668568887985343046),
+ /* sin(2*pi* 239/ 256) */ T(-0.4052413140049898709084813055050524665119),
+ /* sin(2*pi* 240/ 256) */ T(-0.3826834323650897717284599840303988667613),
+ /* sin(2*pi* 241/ 256) */ T(-0.3598950365349881487751045723267564202023),
+ /* sin(2*pi* 242/ 256) */ T(-0.3368898533922200506892532126191475704778),
+ /* sin(2*pi* 243/ 256) */ T(-0.3136817403988914766564788459941003099934),
+ /* sin(2*pi* 244/ 256) */ T(-0.2902846772544623676361923758173952746915),
+ /* sin(2*pi* 245/ 256) */ T(-0.2667127574748983863252865151164363940421),
+ /* sin(2*pi* 246/ 256) */ T(-0.242980179903263889948274162077471118321),
+ /* sin(2*pi* 247/ 256) */ T(-0.2191012401568697972277375474973577988484),
+ /* sin(2*pi* 248/ 256) */ T(-0.1950903220161282678482848684770222409277),
+ /* sin(2*pi* 249/ 256) */ T(-0.1709618887603012263636423572082635319663),
+ /* sin(2*pi* 250/ 256) */ T(-0.1467304744553617516588501296467178197062),
+ /* sin(2*pi* 251/ 256) */ T(-0.1224106751992161984987044741509457875752),
+ /* sin(2*pi* 252/ 256) */ T(-0.09801714032956060199419556388864184586114),
+ /* sin(2*pi* 253/ 256) */ T(-0.0735645635996674235294656215752343218133),
+ /* sin(2*pi* 254/ 256) */ T(-0.04906767432741801425495497694268265831475),
+ /* sin(2*pi* 255/ 256) */ T(-0.02454122852291228803173452945928292506547),
+
+};
+}
+
+template <typename T>
+constexpr inline T sin_using_table(size_t size, size_t k)
+{
+ constexpr size_t table_size = arraysize(data::c_sin_table<T>);
+ return data::c_sin_table<T>[(k * table_size / size) % table_size];
+}
+template <typename T>
+constexpr inline T cos_using_table(size_t size, size_t k)
+{
+ return sin_using_table<T>(size, k + size / 4);
+}
+}
diff --git a/include/kfr/dft/bitrev.hpp b/include/kfr/dft/bitrev.hpp
@@ -0,0 +1,387 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/complex.hpp"
+#include "../base/constants.hpp"
+#include "../base/digitreverse.hpp"
+#include "../base/vec.hpp"
+
+#include "../data/bitrev.hpp"
+
+#include "ft.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+constexpr bool fft_reorder_aligned = false;
+
+template <size_t Bits>
+constexpr inline u32 bitrev_using_table(u32 x)
+{
+ constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table));
+ if (Bits > bitrev_table_log2N)
+ return bitreverse<Bits>(x);
+
+ return data::bitrev_table[x] >> (bitrev_table_log2N - Bits);
+}
+
+constexpr inline u32 bitrev_using_table(u32 x, size_t bits)
+{
+ constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table));
+ if (bits > bitrev_table_log2N)
+ return bitreverse<32>(x) >> (32 - bits);
+
+ return data::bitrev_table[x] >> (bitrev_table_log2N - bits);
+}
+
+constexpr inline u32 dig4rev_using_table(u32 x, size_t bits)
+{
+ constexpr size_t bitrev_table_log2N = ilog2(arraysize(data::bitrev_table));
+ if (bits > bitrev_table_log2N)
+ return digitreverse4<32>(x) >> (32 - bits);
+
+ x = data::bitrev_table[x];
+ x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
+ x = x >> (bitrev_table_log2N - bits);
+ return x;
+}
+
+template <size_t log2n, size_t bitrev, typename T>
+KFR_INTRIN void fft_reorder_swap(T* inout, size_t i)
+{
+ using cxx = cvec<T, 16>;
+ constexpr size_t N = 1 << log2n;
+ constexpr size_t N4 = 2 * N / 4;
+
+ cxx vi = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i));
+ vi = digitreverse<bitrev, 2>(vi);
+ cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), vi);
+}
+
+template <size_t log2n, size_t bitrev, typename T>
+KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j)
+{
+ __builtin_assume(i != j);
+ using cxx = cvec<T, 16>;
+ constexpr size_t N = 1 << log2n;
+ constexpr size_t N4 = 2 * N / 4;
+
+ cxx vi = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i));
+ cxx vj = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j));
+
+ vi = digitreverse<bitrev, 2>(vi);
+ cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), vi);
+ vj = digitreverse<bitrev, 2>(vj);
+ cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), vj);
+}
+
+template <size_t log2n, size_t bitrev, typename T>
+KFR_INTRIN void fft_reorder_swap(T* inout, size_t i, size_t j)
+{
+ __builtin_assume(i != j);
+ using cxx = cvec<T, 16>;
+ constexpr size_t N = 1 << log2n;
+ constexpr size_t N4 = 2 * N / 4;
+
+ cxx vi = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i));
+ cxx vj = cread_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j));
+
+ vi = digitreverse<bitrev, 2>(vi);
+ cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), vi);
+ vj = digitreverse<bitrev, 2>(vj);
+ cwrite_group<4, 4, N4 / 2, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), vj);
+}
+
+template <size_t log2n, size_t bitrev, typename T>
+KFR_INTRIN void fft_reorder_swap(complex<T>* inout, size_t i)
+{
+ fft_reorder_swap<log2n, bitrev>(ptr_cast<T>(inout), i * 2);
+}
+
+template <size_t log2n, size_t bitrev, typename T>
+KFR_INTRIN void fft_reorder_swap_two(complex<T>* inout, size_t i0, size_t i1)
+{
+ fft_reorder_swap_two<log2n, bitrev>(ptr_cast<T>(inout), i0 * 2, i1 * 2);
+}
+
+template <size_t log2n, size_t bitrev, typename T>
+KFR_INTRIN void fft_reorder_swap(complex<T>* inout, size_t i, size_t j)
+{
+ fft_reorder_swap<log2n, bitrev>(ptr_cast<T>(inout), i * 2, j * 2);
+}
+
+template <typename T>
+KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<11>)
+{
+ fft_reorder_swap_two<11>(inout, 0 * 4, 8 * 4);
+ fft_reorder_swap<11>(inout, 1 * 4, 64 * 4);
+ fft_reorder_swap<11>(inout, 2 * 4, 32 * 4);
+ fft_reorder_swap<11>(inout, 3 * 4, 96 * 4);
+ fft_reorder_swap<11>(inout, 4 * 4, 16 * 4);
+ fft_reorder_swap<11>(inout, 5 * 4, 80 * 4);
+ fft_reorder_swap<11>(inout, 6 * 4, 48 * 4);
+ fft_reorder_swap<11>(inout, 7 * 4, 112 * 4);
+ fft_reorder_swap<11>(inout, 9 * 4, 72 * 4);
+ fft_reorder_swap<11>(inout, 10 * 4, 40 * 4);
+ fft_reorder_swap<11>(inout, 11 * 4, 104 * 4);
+ fft_reorder_swap<11>(inout, 12 * 4, 24 * 4);
+ fft_reorder_swap<11>(inout, 13 * 4, 88 * 4);
+ fft_reorder_swap<11>(inout, 14 * 4, 56 * 4);
+ fft_reorder_swap<11>(inout, 15 * 4, 120 * 4);
+ fft_reorder_swap<11>(inout, 17 * 4, 68 * 4);
+ fft_reorder_swap<11>(inout, 18 * 4, 36 * 4);
+ fft_reorder_swap<11>(inout, 19 * 4, 100 * 4);
+ fft_reorder_swap_two<11>(inout, 20 * 4, 28 * 4);
+ fft_reorder_swap<11>(inout, 21 * 4, 84 * 4);
+ fft_reorder_swap<11>(inout, 22 * 4, 52 * 4);
+ fft_reorder_swap<11>(inout, 23 * 4, 116 * 4);
+ fft_reorder_swap<11>(inout, 25 * 4, 76 * 4);
+ fft_reorder_swap<11>(inout, 26 * 4, 44 * 4);
+ fft_reorder_swap<11>(inout, 27 * 4, 108 * 4);
+ fft_reorder_swap<11>(inout, 29 * 4, 92 * 4);
+ fft_reorder_swap<11>(inout, 30 * 4, 60 * 4);
+ fft_reorder_swap<11>(inout, 31 * 4, 124 * 4);
+ fft_reorder_swap<11>(inout, 33 * 4, 66 * 4);
+ fft_reorder_swap_two<11>(inout, 34 * 4, 42 * 4);
+ fft_reorder_swap<11>(inout, 35 * 4, 98 * 4);
+ fft_reorder_swap<11>(inout, 37 * 4, 82 * 4);
+ fft_reorder_swap<11>(inout, 38 * 4, 50 * 4);
+ fft_reorder_swap<11>(inout, 39 * 4, 114 * 4);
+ fft_reorder_swap<11>(inout, 41 * 4, 74 * 4);
+ fft_reorder_swap<11>(inout, 43 * 4, 106 * 4);
+ fft_reorder_swap<11>(inout, 45 * 4, 90 * 4);
+ fft_reorder_swap<11>(inout, 46 * 4, 58 * 4);
+ fft_reorder_swap<11>(inout, 47 * 4, 122 * 4);
+ fft_reorder_swap<11>(inout, 49 * 4, 70 * 4);
+ fft_reorder_swap<11>(inout, 51 * 4, 102 * 4);
+ fft_reorder_swap<11>(inout, 53 * 4, 86 * 4);
+ fft_reorder_swap_two<11>(inout, 54 * 4, 62 * 4);
+ fft_reorder_swap<11>(inout, 55 * 4, 118 * 4);
+ fft_reorder_swap<11>(inout, 57 * 4, 78 * 4);
+ fft_reorder_swap<11>(inout, 59 * 4, 110 * 4);
+ fft_reorder_swap<11>(inout, 61 * 4, 94 * 4);
+ fft_reorder_swap<11>(inout, 63 * 4, 126 * 4);
+ fft_reorder_swap_two<11>(inout, 65 * 4, 73 * 4);
+ fft_reorder_swap<11>(inout, 67 * 4, 97 * 4);
+ fft_reorder_swap<11>(inout, 69 * 4, 81 * 4);
+ fft_reorder_swap<11>(inout, 71 * 4, 113 * 4);
+ fft_reorder_swap<11>(inout, 75 * 4, 105 * 4);
+ fft_reorder_swap<11>(inout, 77 * 4, 89 * 4);
+ fft_reorder_swap<11>(inout, 79 * 4, 121 * 4);
+ fft_reorder_swap<11>(inout, 83 * 4, 101 * 4);
+ fft_reorder_swap_two<11>(inout, 85 * 4, 93 * 4);
+ fft_reorder_swap<11>(inout, 87 * 4, 117 * 4);
+ fft_reorder_swap<11>(inout, 91 * 4, 109 * 4);
+ fft_reorder_swap<11>(inout, 95 * 4, 125 * 4);
+ fft_reorder_swap_two<11>(inout, 99 * 4, 107 * 4);
+ fft_reorder_swap<11>(inout, 103 * 4, 115 * 4);
+ fft_reorder_swap<11>(inout, 111 * 4, 123 * 4);
+ fft_reorder_swap_two<11>(inout, 119 * 4, 127 * 4);
+}
+
+template <typename T>
+KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<7>)
+{
+ constexpr size_t bitrev = 2;
+ fft_reorder_swap_two<7, bitrev>(inout, 0 * 4, 2 * 4);
+ fft_reorder_swap<7, bitrev>(inout, 1 * 4, 4 * 4);
+ fft_reorder_swap<7, bitrev>(inout, 3 * 4, 6 * 4);
+ fft_reorder_swap_two<7, bitrev>(inout, 5 * 4, 7 * 4);
+}
+
+template <typename T>
+KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<8>)
+{
+ constexpr size_t bitrev = 4;
+ fft_reorder_swap_two<8, bitrev>(inout, 0 * 4, 5 * 4);
+ fft_reorder_swap<8, bitrev>(inout, 1 * 4, 4 * 4);
+ fft_reorder_swap<8, bitrev>(inout, 2 * 4, 8 * 4);
+ fft_reorder_swap<8, bitrev>(inout, 3 * 4, 12 * 4);
+ fft_reorder_swap<8, bitrev>(inout, 6 * 4, 9 * 4);
+ fft_reorder_swap<8, bitrev>(inout, 7 * 4, 13 * 4);
+ fft_reorder_swap_two<8, bitrev>(inout, 10 * 4, 15 * 4);
+ fft_reorder_swap<8, bitrev>(inout, 11 * 4, 14 * 4);
+}
+
+template <typename T>
+KFR_INTRIN void fft_reorder(complex<T>* inout, csize_t<9>)
+{
+ constexpr size_t bitrev = 2;
+ fft_reorder_swap_two<9, bitrev>(inout, 0 * 4, 4 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 1 * 4, 16 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 2 * 4, 8 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 3 * 4, 24 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 5 * 4, 20 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 6 * 4, 12 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 7 * 4, 28 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 9 * 4, 18 * 4);
+ fft_reorder_swap_two<9, bitrev>(inout, 10 * 4, 14 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 11 * 4, 26 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 13 * 4, 22 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 15 * 4, 30 * 4);
+ fft_reorder_swap_two<9, bitrev>(inout, 17 * 4, 21 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 19 * 4, 25 * 4);
+ fft_reorder_swap<9, bitrev>(inout, 23 * 4, 29 * 4);
+ fft_reorder_swap_two<9, bitrev>(inout, 27 * 4, 31 * 4);
+}
+
+template <typename T, bool use_br2>
+void cwrite_reordered(T* out, cvec<T, 16> value, size_t N4, cbool_t<use_br2>)
+{
+ value = digitreverse < use_br2 ? 2 : 4, 2 > (value);
+ cwrite_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(out), N4, value);
+}
+
+template <typename T, bool use_br2>
+KFR_INTRIN void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4, cbool_t<use_br2>)
+{
+ __builtin_assume(i != j);
+ const cvec<T, 16> vi = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), N4);
+ const cvec<T, 16> vj = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), N4);
+ cwrite_reordered(inout + j, vi, N4, cbool<use_br2>);
+ cwrite_reordered(inout + i, vj, N4, cbool<use_br2>);
+}
+
+template <typename T>
+KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, ctrue_t use_br2)
+{
+ const size_t N = 1 << log2n;
+ const size_t N4 = N / 4;
+ const size_t iend = N / 16 * 4 * 2;
+ constexpr size_t istep = 2 * 4;
+ const size_t jstep1 = (1 << (log2n - 5)) * 4 * 2;
+ const size_t jstep2 = size_t(1 << (log2n - 5)) * 4 * 2 - size_t(1 << (log2n - 6)) * 4 * 2;
+ T* io = ptr_cast<T>(inout);
+
+ for (size_t i = 0; i < iend;)
+ {
+ size_t j = bitrev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep;
+ j = j + jstep1;
+
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep;
+ j = j - jstep2;
+
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep;
+ j = j + jstep1;
+
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep;
+ }
+}
+
+template <typename T>
+KFR_INTRIN void fft_reorder(complex<T>* inout, size_t log2n, cfalse_t use_br2)
+{
+ const size_t N = size_t(1) << log2n;
+ const size_t N4 = N / 4;
+ const size_t N16 = N * 2 / 16;
+ size_t iend = N16;
+ constexpr size_t istep = 2 * 4;
+ const size_t jstep = N / 64 * 4 * 2;
+ T* io = ptr_cast<T>(inout);
+
+ size_t i = 0;
+#pragma clang loop unroll_count(2)
+ for (; i < iend;)
+ {
+ size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
+
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep * 4;
+ }
+ iend += N16;
+#pragma clang loop unroll_count(2)
+ for (; i < iend;)
+ {
+ size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
+
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+
+ i += istep;
+ j = j + jstep;
+
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep * 3;
+ }
+ iend += N16;
+#pragma clang loop unroll_count(2)
+ for (; i < iend;)
+ {
+ size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
+
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+
+ i += istep;
+ j = j + jstep;
+
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+
+ i += istep;
+ j = j + jstep;
+
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep * 2;
+ }
+ iend += N16;
+#pragma clang loop unroll_count(2)
+ for (; i < iend;)
+ {
+ size_t j = dig4rev_using_table(static_cast<u32>(i >> 3), log2n - 4) << 3;
+
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+
+ i += istep;
+ j = j + jstep;
+
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+
+ i += istep;
+ j = j + jstep;
+
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+
+ i += istep;
+ j = j + jstep;
+
+ if (i >= j)
+ fft_reorder_swap_n4(io, i, j, N4, use_br2);
+ i += istep;
+ }
+}
+}
+}
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -0,0 +1,998 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/complex.hpp"
+#include "../base/constants.hpp"
+#include "../base/memory.hpp"
+#include "../base/read_write.hpp"
+#include "../base/vec.hpp"
+#include "../misc/small_buffer.hpp"
+
+#include "../cometa/string.hpp"
+
+#include "bitrev.hpp"
+#include "ft.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Wshadow")
+#pragma clang diagnostic ignored "-Wshadow"
+#endif
+
+namespace kfr
+{
+
+template <typename T>
+struct dft_stage
+{
+ size_t stage_size = 0;
+ size_t data_size = 0;
+ size_t temp_size = 0;
+ u8* data = nullptr;
+ size_t repeats = 1;
+ size_t out_offset = 0;
+ const char* name;
+ bool recursion = false;
+
+ void initialize(size_t size) { do_initialize(size); }
+
+ KFR_INTRIN void execute(complex<T>* out, const complex<T>* in, u8* temp) { do_execute(out, in, temp); }
+ virtual ~dft_stage() {}
+
+protected:
+ virtual void do_initialize(size_t) {}
+ virtual void do_execute(complex<T>*, const complex<T>*, u8* temp) = 0;
+};
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Wassume")
+#pragma clang diagnostic ignored "-Wassume"
+#endif
+
+namespace internal
+{
+
+template <size_t width, bool inverse, typename T>
+KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, cfalse_t /*split_format*/, cbool_t<inverse>,
+ cvec<T, width> w, cvec<T, width> tw)
+{
+ cvec<T, width> b1 = w * dupeven(tw);
+ w = swap<2>(w);
+
+ if (inverse)
+ tw = -(tw);
+ w = subadd(b1, w * dupodd(tw));
+ return w;
+}
+
+template <size_t width, bool use_br2, bool inverse, bool aligned, typename T>
+KFR_INTRIN void radix4_body(size_t N, csize_t<width>, cfalse_t, cfalse_t, cfalse_t, cbool_t<use_br2>,
+ cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in,
+ const complex<T>* twiddle)
+{
+ const size_t N4 = N / 4;
+ cvec<T, width> w1, w2, w3;
+
+ cvec<T, width> sum02, sum13, diff02, diff13;
+
+ cvec<T, width> a0, a1, a2, a3;
+ a0 = cread<width, aligned>(in + 0);
+ a2 = cread<width, aligned>(in + N4 * 2);
+ sum02 = a0 + a2;
+
+ a1 = cread<width, aligned>(in + N4);
+ a3 = cread<width, aligned>(in + N4 * 3);
+ sum13 = a1 + a3;
+
+ cwrite<width, aligned>(out, sum02 + sum13);
+ w2 = sum02 - sum13;
+ cwrite<width, aligned>(
+ out + N4 * (use_br2 ? 1 : 2),
+ radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w2, cread<width, true>(twiddle + width)));
+ diff02 = a0 - a2;
+ diff13 = a1 - a3;
+ if (inverse)
+ {
+ diff13 = (diff13 ^ broadcast<width, T>(T(), -T()));
+ diff13 = swap<2>(diff13);
+ }
+ else
+ {
+ diff13 = swap<2>(diff13);
+ diff13 = (diff13 ^ broadcast<width, T>(T(), -T()));
+ }
+
+ w1 = diff02 + diff13;
+
+ cwrite<width, aligned>(
+ out + N4 * (use_br2 ? 2 : 1),
+ radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w1, cread<width, true>(twiddle + 0)));
+ w3 = diff02 - diff13;
+ cwrite<width, aligned>(out + N4 * 3, radix4_apply_twiddle(csize<width>, cfalse, cbool<inverse>, w3,
+ cread<width, true>(twiddle + width * 2)));
+}
+
+template <size_t width, bool inverse, typename T>
+KFR_INTRIN cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*split_format*/, cbool_t<inverse>,
+ cvec<T, width> w, cvec<T, width> tw)
+{
+ vec<T, width> re1, im1, twre, twim;
+ split(w, re1, im1);
+ split(tw, twre, twim);
+
+ const vec<T, width> b1re = re1 * twre;
+ const vec<T, width> b1im = im1 * twre;
+ if (inverse)
+ w = concat(b1re + im1 * twim, b1im - re1 * twim);
+ else
+ w = concat(b1re - im1 * twim, b1im + re1 * twim);
+ return w;
+}
+
+template <size_t width, bool splitout, bool splitin, bool use_br2, bool inverse, bool aligned, typename T>
+KFR_INTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout>, cbool_t<splitin>,
+ cbool_t<use_br2>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out,
+ const complex<T>* in, const complex<T>* twiddle)
+{
+ const size_t N4 = N / 4;
+ cvec<T, width> w1, w2, w3;
+ constexpr bool read_split = !splitin && splitout;
+ constexpr bool write_split = splitin && !splitout;
+
+ vec<T, width> re0, im0, re1, im1, re2, im2, re3, im3;
+
+ split(cread_split<width, aligned, read_split>(in + N4 * 0), re0, im0);
+ split(cread_split<width, aligned, read_split>(in + N4 * 1), re1, im1);
+ split(cread_split<width, aligned, read_split>(in + N4 * 2), re2, im2);
+ split(cread_split<width, aligned, read_split>(in + N4 * 3), re3, im3);
+
+ const vec<T, width> sum02re = re0 + re2;
+ const vec<T, width> sum02im = im0 + im2;
+ const vec<T, width> sum13re = re1 + re3;
+ const vec<T, width> sum13im = im1 + im3;
+
+ cwrite_split<width, aligned, write_split>(out, concat(sum02re + sum13re, sum02im + sum13im));
+ w2 = concat(sum02re - sum13re, sum02im - sum13im);
+ cwrite_split<width, aligned, write_split>(
+ out + N4 * (use_br2 ? 1 : 2),
+ radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w2, cread<width, true>(twiddle + width)));
+
+ const vec<T, width> diff02re = re0 - re2;
+ const vec<T, width> diff02im = im0 - im2;
+ const vec<T, width> diff13re = re1 - re3;
+ const vec<T, width> diff13im = im1 - im3;
+
+ (inverse ? w1 : w3) = concat(diff02re - diff13im, diff02im + diff13re);
+ (inverse ? w3 : w1) = concat(diff02re + diff13im, diff02im - diff13re);
+
+ cwrite_split<width, aligned, write_split>(
+ out + N4 * (use_br2 ? 2 : 1),
+ radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w1, cread<width, true>(twiddle + 0)));
+ cwrite_split<width, aligned, write_split>(out + N4 * 3,
+ radix4_apply_twiddle(csize<width>, ctrue, cbool<inverse>, w3,
+ cread<width, true>(twiddle + width * 2)));
+}
+
+template <typename T>
+KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size)
+{
+ if (n == 0)
+ {
+ return make_vector(static_cast<T>(1), static_cast<T>(0));
+ }
+ else if (n == size / 4)
+ {
+ return make_vector(static_cast<T>(0), static_cast<T>(-1));
+ }
+ else if (n == size / 2)
+ {
+ return make_vector(static_cast<T>(-1), static_cast<T>(0));
+ }
+ else if (n == size * 3 / 4)
+ {
+ return make_vector(static_cast<T>(0), static_cast<T>(1));
+ }
+ else
+ {
+ double kth = c_pi<double, 2> * (n / static_cast<double>(size));
+ double tcos = +kfr::native::cos(kth);
+ double tsin = -kfr::native::sin(kth);
+ return make_vector(static_cast<T>(tcos), static_cast<T>(tsin));
+ }
+}
+
+template <typename T, size_t width>
+KFR_INTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_t nnstep, size_t size,
+ bool split_format)
+{
+ vec<T, 2 * width> result = T();
+ KFR_LOOP_UNROLL
+ for (size_t i = 0; i < width; i++)
+ {
+ const cvec<T, 1> r = calculate_twiddle<T>(nn + nnstep * i, size);
+ result(i * 2) = r[0];
+ result(i * 2 + 1) = r[1];
+ }
+ if (split_format)
+ ref_cast<cvec<T, width>>(twiddle[0]) = splitpairs(result);
+ else
+ ref_cast<cvec<T, width>>(twiddle[0]) = result;
+ twiddle += width;
+}
+
+template <typename T, size_t width>
+KFR_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, size_t size, bool split_format)
+{
+ size_t nnstep = size / stage_size;
+ KFR_LOOP_NOUNROLL
+ for (size_t n = 0; n < stage_size / 4; n += width)
+ {
+ initialize_twiddles_impl<T, width>(twiddle, n * nnstep * 1, nnstep * 1, size, split_format);
+ initialize_twiddles_impl<T, width>(twiddle, n * nnstep * 2, nnstep * 2, size, split_format);
+ initialize_twiddles_impl<T, width>(twiddle, n * nnstep * 3, nnstep * 3, size, split_format);
+ }
+}
+
+template <typename T>
+KFR_INTRIN void prefetch_one(const complex<T>* in)
+{
+ __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0);
+}
+
+template <typename T>
+KFR_INTRIN void prefetch_four(size_t stride, const complex<T>* in)
+{
+ __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0);
+ __builtin_prefetch(ptr_cast<void>(in + stride), 0, _MM_HINT_T0);
+ __builtin_prefetch(ptr_cast<void>(in + stride * 2), 0, _MM_HINT_T0);
+ __builtin_prefetch(ptr_cast<void>(in + stride * 3), 0, _MM_HINT_T0);
+}
+
+template <typename Ntype, size_t width, bool splitout, bool splitin, bool prefetch, bool use_br2,
+ bool inverse, bool aligned, typename T>
+KFR_INTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t<splitout>, cbool_t<splitin>,
+ cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
+ complex<T>* out, const complex<T>* in, const complex<T>*& twiddle)
+{
+ constexpr static size_t prefetch_offset = width * 8;
+ const auto N4 = N / csize<4>;
+ const auto N43 = N4 * csize<3>;
+ __builtin_assume(blocks > 0);
+ __builtin_assume(N > 0);
+ __builtin_assume(N4 > 0);
+ KFR_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b++)
+ {
+#pragma clang loop unroll_count(default_unroll_count)
+ for (size_t n2 = 0; n2 < N4; n2 += width)
+ {
+ if (prefetch)
+ prefetch_four(N4, in + prefetch_offset);
+ radix4_body(N, csize<width>, cbool < splitout || splitin >, cbool<splitout>, cbool<splitin>,
+ cbool<use_br2>, cbool<inverse>, cbool<aligned>, out, in, twiddle + n2 * 3);
+ in += width;
+ out += width;
+ }
+ in += N43;
+ out += N43;
+ }
+ twiddle += N43;
+ return {};
+}
+
+template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T>
+KFR_SINTRIN ctrue_t radix4_pass(csize_t<32>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t,
+ cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
+ complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
+{
+ __builtin_assume(blocks > 0);
+ constexpr static size_t prefetch_offset = 32 * 4;
+ for (size_t b = 0; b < blocks; b++)
+ {
+ if (prefetch)
+ prefetch_four(csize<64>, out + prefetch_offset);
+ cvec<T, 4> w0, w1, w2, w3, w4, w5, w6, w7;
+ split(cread<8, aligned>(out + 0), w0, w1);
+ split(cread<8, aligned>(out + 8), w2, w3);
+ split(cread<8, aligned>(out + 16), w4, w5);
+ split(cread<8, aligned>(out + 24), w6, w7);
+
+ butterfly8<4, inverse>(w0, w1, w2, w3, w4, w5, w6, w7);
+
+ w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>);
+ w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>);
+ w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>);
+ w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>);
+ w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>);
+ w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>);
+ w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>);
+
+ cvec<T, 8> z0, z1, z2, z3;
+ transpose4x8(w0, w1, w2, w3, w4, w5, w6, w7, z0, z1, z2, z3);
+
+ butterfly4<8, inverse>(cfalse, z0, z1, z2, z3, z0, z1, z2, z3);
+ cwrite<32, aligned>(out, bitreverse<2>(concat(z0, z1, z2, z3)));
+ out += 32;
+ }
+ return {};
+}
+
+template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T>
+KFR_SINTRIN ctrue_t radix4_pass(csize_t<8>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t,
+ cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
+ complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
+{
+ __builtin_assume(blocks > 0);
+ constexpr static size_t prefetch_offset = width * 16;
+ for (size_t b = 0; b < blocks; b += 2)
+ {
+ if (prefetch)
+ prefetch_one(out + prefetch_offset);
+
+ cvec<T, 8> vlo = cread<8, aligned>(out + 0);
+ cvec<T, 8> vhi = cread<8, aligned>(out + 8);
+ butterfly8<inverse>(vlo);
+ butterfly8<inverse>(vhi);
+ vlo = permutegroups<(2), 0, 4, 2, 6, 1, 5, 3, 7>(vlo);
+ vhi = permutegroups<(2), 0, 4, 2, 6, 1, 5, 3, 7>(vhi);
+ cwrite<8, aligned>(out, vlo);
+ cwrite<8, aligned>(out + 8, vhi);
+ out += 16;
+ }
+ return {};
+}
+
+template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T>
+KFR_SINTRIN ctrue_t radix4_pass(csize_t<16>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t,
+ cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
+ complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
+{
+ __builtin_assume(blocks > 0);
+ constexpr static size_t prefetch_offset = width * 4;
+#pragma clang loop unroll_count(2)
+ for (size_t b = 0; b < blocks; b += 2)
+ {
+ if (prefetch)
+ prefetch_one(out + prefetch_offset);
+
+ cvec<T, 16> vlo = cread<16, aligned>(out);
+ cvec<T, 16> vhi = cread<16, aligned>(out + 16);
+ butterfly4<4, inverse>(vlo);
+ butterfly4<4, inverse>(vhi);
+ apply_twiddles4<0, 4, 4, inverse>(vlo);
+ apply_twiddles4<0, 4, 4, inverse>(vhi);
+ vlo = digitreverse4<2>(vlo);
+ vhi = digitreverse4<2>(vhi);
+ butterfly4<4, inverse>(vlo);
+ butterfly4<4, inverse>(vhi);
+
+ use_br2 ? cbitreverse_write(out, vlo) : cdigitreverse4_write(out, vlo);
+ use_br2 ? cbitreverse_write(out + 16, vhi) : cdigitreverse4_write(out + 16, vhi);
+ out += 32;
+ }
+ return {};
+}
+
+template <size_t width, bool prefetch, bool use_br2, bool inverse, bool aligned, typename T>
+KFR_SINTRIN ctrue_t radix4_pass(csize_t<4>, size_t blocks, csize_t<width>, cfalse_t, cfalse_t,
+ cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>,
+ complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/)
+{
+ constexpr static size_t prefetch_offset = width * 4;
+ __builtin_assume(blocks > 0);
+ KFR_LOOP_NOUNROLL
+ for (size_t b = 0; b < blocks; b += 4)
+ {
+ if (prefetch)
+ prefetch_one(out + prefetch_offset);
+
+ cvec<T, 16> v16 = cdigitreverse4_read<16, aligned>(out);
+ butterfly4<4, inverse>(v16);
+ cdigitreverse4_write<aligned>(out, v16);
+
+ out += 4 * 4;
+ }
+ return {};
+}
+
+template <typename T, bool splitin, bool is_even, bool inverse>
+struct fft_stage_impl : dft_stage<T>
+{
+ fft_stage_impl(size_t stage_size)
+ {
+ this->stage_size = stage_size;
+ this->repeats = 4;
+ this->recursion = true;
+ this->data_size = align_up(sizeof(complex<T>) * stage_size / 4 * 3, native_cache_alignment);
+ }
+
+protected:
+ constexpr static bool prefetch = true;
+ constexpr static bool aligned = false;
+ constexpr static size_t width = vector_width<T, cpu_t::native>;
+
+ virtual void do_initialize(size_t size) override final
+ {
+ complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ initialize_twiddles<T, width>(twiddle, this->stage_size, size, true);
+ }
+
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final
+ {
+ const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ if (splitin)
+ in = out;
+ const size_t stage_size = this->stage_size;
+ __builtin_assume(stage_size >= 2048);
+ __builtin_assume(stage_size % 2048 == 0);
+ radix4_pass(stage_size, 1, csize<width>, ctrue, cbool<splitin>, cbool<!is_even>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ }
+};
+
+template <typename T, bool splitin, size_t size, bool inverse>
+struct fft_final_stage_impl : dft_stage<T>
+{
+ fft_final_stage_impl(size_t)
+ {
+ this->stage_size = size;
+ this->out_offset = size;
+ this->repeats = 4;
+ this->recursion = true;
+ this->data_size = align_up(sizeof(complex<T>) * size * 3 / 2, native_cache_alignment);
+ }
+
+protected:
+ constexpr static size_t width = vector_width<T, cpu_t::native>;
+ constexpr static bool is_even = cometa::is_even(ilog2(size));
+ constexpr static bool use_br2 = !is_even;
+ constexpr static bool aligned = false;
+ constexpr static bool prefetch = splitin;
+
+ virtual void do_initialize(size_t total_size) override final
+ {
+ complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ size_t stage_size = this->stage_size;
+ while (stage_size > 4)
+ {
+ initialize_twiddles<T, width>(twiddle, stage_size, total_size, true);
+ stage_size /= 4;
+ }
+ }
+
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final
+ {
+ constexpr bool is_double = sizeof(T) == 8;
+ constexpr size_t final_size = is_even ? (is_double ? 4 : 16) : (is_double ? 8 : 32);
+ const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ final_pass(csize<final_size>, out, in, twiddle);
+ }
+
+ KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
+ {
+ radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ radix4_pass(csize<128>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<32>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<8>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ }
+
+ KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
+ {
+ radix4_pass(csize<512>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ radix4_pass(csize<128>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<32>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ }
+
+ KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
+ {
+ radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<64>, 16, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<16>, 64, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<4>, 256, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ }
+
+ KFR_INTRIN void final_pass(csize_t<16>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
+ {
+ radix4_pass(csize<1024>, 1, csize<width>, ctrue, cbool<splitin>, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ radix4_pass(csize<256>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<64>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<16>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ }
+};
+
+template <typename T, bool is_even>
+struct fft_reorder_stage_impl : dft_stage<T>
+{
+ fft_reorder_stage_impl(size_t stage_size)
+ {
+ this->stage_size = stage_size;
+ log2n = ilog2(stage_size);
+ this->data_size = 0;
+ }
+
+protected:
+ size_t log2n;
+
+ virtual void do_initialize(size_t) override final {}
+
+ virtual void do_execute(complex<T>* out, const complex<T>*, u8* /*temp*/) override final
+ {
+ fft_reorder(out, log2n, cbool<!is_even>);
+ }
+};
+
+template <typename T, size_t log2n, bool inverse>
+struct fft_specialization;
+
+template <typename T, bool inverse>
+struct fft_specialization<T, 1, inverse> : dft_stage<T>
+{
+ fft_specialization(size_t) {}
+protected:
+ constexpr static bool aligned = false;
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final
+ {
+ cvec<T, 1> a0, a1;
+ split(cread<2, aligned>(in), a0, a1);
+ cwrite<2, aligned>(out, concat(a0 + a1, a0 - a1));
+ }
+};
+
+template <typename T, bool inverse>
+struct fft_specialization<T, 2, inverse> : dft_stage<T>
+{
+ fft_specialization(size_t) {}
+protected:
+ constexpr static bool aligned = false;
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final
+ {
+ cvec<T, 1> a0, a1, a2, a3;
+ split(cread<4>(in), a0, a1, a2, a3);
+ butterfly(cbool<inverse>, a0, a1, a2, a3, a0, a1, a2, a3);
+ cwrite<4>(out, concat(a0, a1, a2, a3));
+ }
+};
+
+template <typename T, bool inverse>
+struct fft_specialization<T, 3, inverse> : dft_stage<T>
+{
+ fft_specialization(size_t) {}
+protected:
+ constexpr static bool aligned = false;
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final
+ {
+ cvec<T, 8> v8 = cread<8, aligned>(in);
+ butterfly8<inverse>(v8);
+ cwrite<8, aligned>(out, v8);
+ }
+};
+
+template <typename T, bool inverse>
+struct fft_specialization<T, 4, inverse> : dft_stage<T>
+{
+ fft_specialization(size_t) {}
+protected:
+ constexpr static bool aligned = false;
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final
+ {
+ cvec<T, 16> v16 = cread<16, aligned>(in);
+ butterfly16<inverse>(v16);
+ cwrite<16, aligned>(out, v16);
+ }
+};
+
+template <typename T, bool inverse>
+struct fft_specialization<T, 5, inverse> : dft_stage<T>
+{
+ fft_specialization(size_t) {}
+protected:
+ constexpr static bool aligned = false;
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final
+ {
+ cvec<T, 32> v32 = cread<32, aligned>(in);
+ butterfly32<inverse>(v32);
+ cwrite<32, aligned>(out, v32);
+ }
+};
+
+template <typename T, bool inverse>
+struct fft_specialization<T, 6, inverse> : dft_stage<T>
+{
+ fft_specialization(size_t) {}
+protected:
+ constexpr static bool aligned = false;
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8*) override final
+ {
+ butterfly64(cbool<inverse>, cbool<aligned>, out, in);
+ }
+};
+
+template <typename T, bool inverse>
+struct fft_specialization<T, 7, inverse> : dft_stage<T>
+{
+ fft_specialization(size_t)
+ {
+ this->stage_size = 128;
+ this->data_size = align_up(sizeof(complex<T>) * 128 * 3 / 2, native_cache_alignment);
+ }
+
+protected:
+ constexpr static bool aligned = false;
+ constexpr static size_t width = vector_width<T, cpu_t::native>;
+ constexpr static bool use_br2 = true;
+ constexpr static bool prefetch = false;
+ constexpr static bool is_double = sizeof(T) == 8;
+ constexpr static size_t final_size = is_double ? 8 : 32;
+ constexpr static size_t split_format = final_size == 8;
+
+ virtual void do_initialize(size_t total_size) override final
+ {
+ complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ initialize_twiddles<T, width>(twiddle, 128, total_size, split_format);
+ initialize_twiddles<T, width>(twiddle, 32, total_size, split_format);
+ initialize_twiddles<T, width>(twiddle, 8, total_size, split_format);
+ }
+
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final
+ {
+ const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ final_pass(csize<final_size>, out, in, twiddle);
+ fft_reorder(out, csize<7>);
+ }
+
+ KFR_INTRIN void final_pass(csize_t<8>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
+ {
+ radix4_pass(csize<128>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ radix4_pass(csize<32>, 4, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<8>, 16, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ }
+
+ KFR_INTRIN void final_pass(csize_t<32>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
+ {
+ radix4_pass(csize<128>, 1, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ radix4_pass(csize<32>, 4, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ }
+};
+
+template <bool inverse>
+struct fft_specialization<float, 8, inverse> : dft_stage<float>
+{
+ fft_specialization(size_t) { this->temp_size = sizeof(complex<float>) * 256; }
+protected:
+ virtual void do_execute(complex<float>* out, const complex<float>* in, u8* temp) override final
+ {
+ complex<float>* scratch = ptr_cast<complex<float>>(temp);
+ if (out == in)
+ {
+ butterfly16_multi_flip<0, inverse>(scratch, out);
+ butterfly16_multi_flip<1, inverse>(scratch, out);
+ butterfly16_multi_flip<2, inverse>(scratch, out);
+ butterfly16_multi_flip<3, inverse>(scratch, out);
+
+ butterfly16_multi_natural<0, inverse>(out, scratch);
+ butterfly16_multi_natural<1, inverse>(out, scratch);
+ butterfly16_multi_natural<2, inverse>(out, scratch);
+ butterfly16_multi_natural<3, inverse>(out, scratch);
+ }
+ else
+ {
+ butterfly16_multi_flip<0, inverse>(out, in);
+ butterfly16_multi_flip<1, inverse>(out, in);
+ butterfly16_multi_flip<2, inverse>(out, in);
+ butterfly16_multi_flip<3, inverse>(out, in);
+
+ butterfly16_multi_natural<0, inverse>(out, out);
+ butterfly16_multi_natural<1, inverse>(out, out);
+ butterfly16_multi_natural<2, inverse>(out, out);
+ butterfly16_multi_natural<3, inverse>(out, out);
+ }
+ }
+};
+
+template <bool inverse>
+struct fft_specialization<double, 8, inverse> : dft_stage<double>
+{
+ using T = double;
+ fft_specialization(size_t)
+ {
+ this->stage_size = 256;
+ this->data_size = align_up(sizeof(complex<T>) * 256 * 3 / 2, native_cache_alignment);
+ }
+
+protected:
+ constexpr static bool aligned = false;
+ constexpr static size_t width = vector_width<T, cpu_t::native>;
+ constexpr static bool use_br2 = false;
+ constexpr static bool prefetch = false;
+ constexpr static size_t split_format = true;
+
+ virtual void do_initialize(size_t total_size) override final
+ {
+ complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ initialize_twiddles<T, width>(twiddle, 256, total_size, split_format);
+ initialize_twiddles<T, width>(twiddle, 64, total_size, split_format);
+ initialize_twiddles<T, width>(twiddle, 16, total_size, split_format);
+ }
+
+ virtual void do_execute(complex<T>* out, const complex<T>* in, u8* /*temp*/) override final
+ {
+ const complex<T>* twiddle = ptr_cast<complex<T>>(this->data);
+ final_pass(csize<4>, out, in, twiddle);
+ fft_reorder(out, csize<8>);
+ }
+
+ KFR_INTRIN void final_pass(csize_t<4>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle)
+ {
+ radix4_pass(csize<256>, 1, csize<width>, ctrue, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, in, twiddle);
+ radix4_pass(csize<64>, 4, csize<width>, ctrue, ctrue, cbool<use_br2>, cbool<prefetch>, cbool<inverse>,
+ cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<16>, 16, csize<width>, cfalse, ctrue, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ radix4_pass(csize<4>, 64, csize<width>, cfalse, cfalse, cbool<use_br2>, cbool<prefetch>,
+ cbool<inverse>, cbool<aligned>, out, out, twiddle);
+ }
+};
+
+template <typename T, bool splitin, bool is_even>
+struct fft_stage_impl_t
+{
+ template <bool inverse>
+ using type = internal::fft_stage_impl<T, splitin, is_even, inverse>;
+};
+template <typename T, bool splitin, size_t size>
+struct fft_final_stage_impl_t
+{
+ template <bool inverse>
+ using type = internal::fft_final_stage_impl<T, splitin, size, inverse>;
+};
+template <typename T, bool is_even>
+struct fft_reorder_stage_impl_t
+{
+ template <bool>
+ using type = internal::fft_reorder_stage_impl<T, is_even>;
+};
+template <typename T, size_t log2n, bool aligned>
+struct fft_specialization_t
+{
+ template <bool inverse>
+ using type = internal::fft_specialization<T, log2n, inverse>;
+};
+}
+
+namespace dft_type
+{
+constexpr cbools_t<true, true> both{};
+constexpr cbools_t<true, false> direct{};
+constexpr cbools_t<false, true> inverse{};
+}
+
+template <typename T>
+struct dft_plan
+{
+ using dft_stage_ptr = std::unique_ptr<dft_stage<T>>;
+
+ size_t size;
+ size_t temp_size;
+
+ template <bool direct = true, bool inverse = true>
+ dft_plan(size_t size, cbools_t<direct, inverse> type = dft_type::both)
+ : size(size), temp_size(0), data_size(0)
+ {
+ if (is_poweroftwo(size))
+ {
+ const size_t log2n = ilog2(size);
+ cswitch(csizes<1, 2, 3, 4, 5, 6, 7, 8>, log2n,
+ [&](auto log2n) {
+ add_stage<internal::fft_specialization_t<T, val_of(log2n), false>::template type>(
+ size, type);
+ },
+ [&]() {
+ cswitch(cfalse_true, is_even(log2n), [&](auto is_even) {
+ make_fft(size, type, is_even, ctrue);
+ add_stage<internal::fft_reorder_stage_impl_t<T, val_of(is_even)>::template type>(
+ size, type);
+ });
+ });
+ initialize(type);
+ }
+ }
+ KFR_INTRIN void execute(complex<T>* out, const complex<T>* in, u8* temp, bool inverse = false) const
+ {
+ if (inverse)
+ execute_dft(ctrue, out, in, temp);
+ else
+ execute_dft(cfalse, out, in, temp);
+ }
+ template <bool inverse>
+ KFR_INTRIN void execute(complex<T>* out, const complex<T>* in, u8* temp, cbool_t<inverse> inv) const
+ {
+ execute_dft(inv, out, in, temp);
+ }
+
+ template <size_t Tag1, size_t Tag2, size_t Tag3>
+ KFR_INTRIN void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in,
+ univector<u8, Tag3>& temp, bool inverse = false) const
+ {
+ if (inverse)
+ execute_dft(ctrue, out.data(), in.data(), temp.data());
+ else
+ execute_dft(cfalse, out.data(), in.data(), temp.data());
+ }
+ template <bool inverse, size_t Tag1, size_t Tag2, size_t Tag3>
+ KFR_INTRIN void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in,
+ univector<u8, Tag3>& temp, cbool_t<inverse> inv) const
+ {
+ execute_dft(inv, out.data(), in.data(), temp.data());
+ }
+
+private:
+ autofree<u8> data;
+ size_t data_size;
+ std::vector<dft_stage_ptr> stages[2];
+ template <template <bool inverse> class Stage>
+ void add_stage(size_t stage_size, cbools_t<true, true>)
+ {
+ dft_stage<T>* direct_stage = new Stage<false>(stage_size);
+ direct_stage->name = type_name<decltype(*direct_stage)>();
+ dft_stage<T>* inverse_stage = new Stage<true>(stage_size);
+ inverse_stage->name = type_name<decltype(*inverse_stage)>();
+ this->data_size += direct_stage->data_size;
+ this->temp_size += direct_stage->temp_size;
+ stages[0].push_back(dft_stage_ptr(direct_stage));
+ stages[1].push_back(dft_stage_ptr(inverse_stage));
+ }
+ template <template <bool inverse> class Stage>
+ void add_stage(size_t stage_size, cbools_t<true, false>)
+ {
+ dft_stage<T>* direct_stage = new Stage<false>(stage_size);
+ direct_stage->name = type_name<decltype(*direct_stage)>();
+ this->data_size += direct_stage->data_size;
+ this->temp_size += direct_stage->temp_size;
+ stages[0].push_back(dft_stage_ptr(direct_stage));
+ }
+ template <template <bool inverse> class Stage>
+ void add_stage(size_t stage_size, cbools_t<false, true>)
+ {
+ dft_stage<T>* inverse_stage = new Stage<true>(stage_size);
+ inverse_stage->name = type_name<decltype(*inverse_stage)>();
+ this->data_size += inverse_stage->data_size;
+ this->temp_size += inverse_stage->temp_size;
+ stages[1].push_back(dft_stage_ptr(inverse_stage));
+ }
+
+ template <bool direct, bool inverse, bool is_even, bool first>
+ void make_fft(size_t stage_size, cbools_t<direct, inverse> type, cbool_t<is_even>, cbool_t<first>)
+ {
+ constexpr size_t final_size = is_even ? 1024 : 512;
+
+ using fft_stage_impl_t = internal::fft_stage_impl_t<T, !first, is_even>;
+ using fft_final_stage_impl_t = internal::fft_final_stage_impl_t<T, !first, final_size>;
+
+ if (stage_size >= 2048)
+ {
+ add_stage<fft_stage_impl_t::template type>(stage_size, type);
+
+ make_fft(stage_size / 4, cbools<direct, inverse>, cbool<is_even>, cfalse);
+ }
+ else
+ {
+ add_stage<fft_final_stage_impl_t::template type>(final_size, type);
+ }
+ }
+
+ template <bool direct, bool inverse>
+ void initialize(cbools_t<direct, inverse>)
+ {
+ data = autofree<u8>(data_size);
+ if (direct)
+ {
+ size_t offset = 0;
+ for (dft_stage_ptr& stage : stages[0])
+ {
+ stage->data = data.data() + offset;
+ stage->initialize(this->size);
+ offset += stage->data_size;
+ }
+ }
+ if (inverse)
+ {
+ size_t offset = 0;
+ for (dft_stage_ptr& stage : stages[1])
+ {
+ stage->data = data.data() + offset;
+ if (!direct)
+ stage->initialize(this->size);
+ offset += stage->data_size;
+ }
+ }
+ }
+ template <bool inverse>
+ KFR_INTRIN void execute_dft(cbool_t<inverse>, complex<T>* out, const complex<T>* in, u8* temp) const
+ {
+ size_t stack[32] = { 0 };
+
+ const size_t count = stages[inverse].size();
+
+ for (size_t depth = 0; depth < count;)
+ {
+ if (stages[inverse][depth]->recursion)
+ {
+ complex<T>* rout = out;
+ const complex<T>* rin = in;
+ size_t rdepth = depth;
+ size_t maxdepth = depth;
+ do
+ {
+ if (stack[rdepth] == stages[inverse][rdepth]->repeats)
+ {
+ stack[rdepth] = 0;
+ rdepth--;
+ }
+ else
+ {
+ stages[inverse][rdepth]->execute(rout, rin, temp);
+ rout += stages[inverse][rdepth]->out_offset;
+ rin = rout;
+ stack[rdepth]++;
+ if (rdepth < count - 1 && stages[inverse][rdepth + 1]->recursion)
+ rdepth++;
+ else
+ maxdepth = rdepth;
+ }
+ } while (rdepth != depth);
+ depth = maxdepth + 1;
+ }
+ else
+ {
+ stages[inverse][depth]->execute(out, in, temp);
+ depth++;
+ }
+ in = out;
+ }
+ }
+};
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp
@@ -0,0 +1,1505 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/complex.hpp"
+#include "../base/constants.hpp"
+#include "../base/digitreverse.hpp"
+#include "../base/read_write.hpp"
+#include "../base/sin_cos.hpp"
+#include "../base/univector.hpp"
+#include "../base/vec.hpp"
+#include "../misc/small_buffer.hpp"
+
+#include "../base/memory.hpp"
+#include "../data/sincos.hpp"
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Winaccessible-base"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
+KFR_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, N> y)
+{
+ return subadd(x * dupeven(y), swap<2>(x) * dupodd(y));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
+KFR_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, 2> y)
+{
+ vec<T, N> yy = resize<N>(y);
+ return cmul_impl(x, yy);
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
+KFR_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y)
+{
+ vec<T, N> xx = resize<N>(x);
+ return cmul_impl(xx, y);
+}
+
+/// Complex Multiplication
+template <typename T, size_t N1, size_t N2>
+KFR_INLINE vec<T, std::max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y)
+{
+ return internal::cmul_impl(x, y);
+}
+KFR_FN(cmul)
+
+template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
+KFR_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, N> y)
+{
+ return swap<2>(subadd(swap<2>(x) * cdupreal(y), x * cdupimag(y)));
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
+KFR_INLINE vec<T, N> cmul_2conj(vec<T, N> in0, vec<T, N> in1, vec<T, N> tw)
+{
+ return (in0 + in1) * cdupreal(tw) + swap<2>(cnegimag(in0 - in1)) * cdupimag(tw);
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)>
+KFR_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, vec<T, 2> in0, vec<T, 2> in1, vec<T, N> tw)
+{
+ const vec<T, N> twr = cdupreal(tw);
+ const vec<T, N> twi = cdupimag(tw);
+ const vec<T, 2> sum = (in0 + in1);
+ const vec<T, 2> dif = swap<2>(cnegimag(in0 - in1));
+ const vec<T, N> sumtw = resize<N>(sum) * twr;
+ const vec<T, N> diftw = resize<N>(dif) * twi;
+ out0 += sumtw + diftw;
+ out1 += sumtw - diftw;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
+KFR_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, 2> y)
+{
+ vec<T, N> yy = resize<N>(y);
+ return cmul_conj(x, yy);
+}
+template <typename T, size_t N, KFR_ENABLE_IF(N > 2)>
+KFR_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y)
+{
+ vec<T, N> xx = resize<N>(x);
+ return cmul_conj(xx, y);
+}
+KFR_FN(cmul_conj)
+KFR_FN(cmul_2conj)
+
+template <size_t N, bool A = false, typename T>
+KFR_INLINE cvec<T, N> cread(const complex<T>* src)
+{
+ return internal_read_write::read<N * 2, A>(ptr_cast<T>(src));
+}
+
+template <size_t N, bool A = false, typename T>
+KFR_INLINE void cwrite(complex<T>* dest, cvec<T, N> value)
+{
+ return internal_read_write::write<A>(ptr_cast<T>(dest), value);
+}
+
+template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices>
+KFR_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, csizes_t<indices...>)
+{
+ return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...);
+}
+template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices>
+KFR_INLINE void cwrite_group_impl(complex<T>* dest, cvec<T, count * N> value, csizes_t<indices...>)
+{
+ swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... };
+}
+
+template <size_t count, size_t N, bool A, typename T, size_t... indices>
+KFR_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, size_t stride, csizes_t<indices...>)
+{
+ return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...);
+}
+template <size_t count, size_t N, bool A, typename T, size_t... indices>
+KFR_INLINE void cwrite_group_impl(complex<T>* dest, size_t stride, cvec<T, count * N> value,
+ csizes_t<indices...>)
+{
+ swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... };
+}
+
+template <size_t count, size_t N, size_t stride, bool A = false, typename T>
+KFR_INLINE cvec<T, count * N> cread_group(const complex<T>* src)
+{
+ return cread_group_impl<count, N, stride, A>(src, csizeseq<count>);
+}
+
+template <size_t count, size_t N, size_t stride, bool A = false, typename T>
+KFR_INLINE void cwrite_group(complex<T>* dest, cvec<T, count * N> value)
+{
+ return cwrite_group_impl<count, N, stride, A>(dest, value, csizeseq<count>);
+}
+
+template <size_t count, size_t N, bool A = false, typename T>
+KFR_INLINE cvec<T, count * N> cread_group(const complex<T>* src, size_t stride)
+{
+ return cread_group_impl<count, N, A>(src, stride, csizeseq<count>);
+}
+
+template <size_t count, size_t N, bool A = false, typename T>
+KFR_INLINE void cwrite_group(complex<T>* dest, size_t stride, cvec<T, count * N> value)
+{
+ return cwrite_group_impl<count, N, A>(dest, stride, value, csizeseq<count>);
+}
+
+template <size_t N, bool A = false, bool split = false, typename T>
+KFR_INLINE cvec<T, N> cread_split(const complex<T>* src)
+{
+ cvec<T, N> temp = internal_read_write::read<N * 2, A>(ptr_cast<T>(src));
+ if (split)
+ temp = splitpairs(temp);
+ return temp;
+}
+
+template <size_t N, bool A = false, bool split = false, typename T>
+KFR_INLINE void cwrite_split(complex<T>* dest, cvec<T, N> value)
+{
+ if (split)
+ value = interleavehalfs(value);
+ internal_read_write::write<A>(ptr_cast<T>(dest), value);
+}
+
+template <>
+inline cvec<f32, 8> cread_split<8, false, true, f32>(const complex<f32>* src)
+{
+ const cvec<f32, 4> l = concat(cread<2>(src), cread<2>(src + 4));
+ const cvec<f32, 4> h = concat(cread<2>(src + 2), cread<2>(src + 6));
+
+ return concat(shuffle<0, 2, 8 + 0, 8 + 2>(l, h), shuffle<1, 3, 8 + 1, 8 + 3>(l, h));
+}
+template <>
+inline cvec<f32, 8> cread_split<8, true, true, f32>(const complex<f32>* src)
+{
+ const cvec<f32, 4> l = concat(cread<2, true>(src), cread<2, true>(src + 4));
+ const cvec<f32, 4> h = concat(cread<2, true>(src + 2), cread<2, true>(src + 6));
+
+ return concat(shuffle<0, 2, 8 + 0, 8 + 2>(l, h), shuffle<1, 3, 8 + 1, 8 + 3>(l, h));
+}
+
+template <>
+inline cvec<f64, 4> cread_split<4, false, true, f64>(const complex<f64>* src)
+{
+ const cvec<f64, 2> l = concat(cread<1>(src), cread<1>(src + 2));
+ const cvec<f64, 2> h = concat(cread<1>(src + 1), cread<1>(src + 3));
+
+ return concat(shuffle<0, 4, 2, 6>(l, h), shuffle<1, 5, 3, 7>(l, h));
+}
+
+template <>
+inline void cwrite_split<8, false, true, f32>(complex<f32>* dest, cvec<f32, 8> x)
+{
+ x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
+
+ cvec<f32, 2> a, b, c, d;
+ split(x, a, b, c, d);
+ cwrite<2>(dest, a);
+ cwrite<2>(dest + 4, b);
+ cwrite<2>(dest + 2, c);
+ cwrite<2>(dest + 6, d);
+}
+template <>
+inline void cwrite_split<8, true, true, f32>(complex<f32>* dest, cvec<f32, 8> x)
+{
+ x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
+
+ cvec<f32, 2> a, b, c, d;
+ split(x, a, b, c, d);
+ cwrite<2, true>(dest, a);
+ cwrite<2, true>(dest + 4, b);
+ cwrite<2, true>(dest + 2, c);
+ cwrite<2, true>(dest + 6, d);
+}
+
+template <>
+inline void cwrite_split<4, false, true, f64>(complex<f64>* dest, cvec<f64, 4> x)
+{
+ x = concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x)));
+ cwrite<1>(dest, part<4, 0>(x));
+ cwrite<1>(dest + 2, part<4, 1>(x));
+ cwrite<1>(dest + 1, part<4, 2>(x));
+ cwrite<1>(dest + 3, part<4, 3>(x));
+}
+template <>
+inline void cwrite_split<4, true, true, f64>(complex<f64>* dest, cvec<f64, 4> x)
+{
+ x = concat(shuffle<0, 4, 2, 6>(low(x), high(x)), shuffle<1, 5, 3, 7>(low(x), high(x)));
+ cwrite<1, true>(dest, part<4, 0>(x));
+ cwrite<1, true>(dest + 2, part<4, 1>(x));
+ cwrite<1, true>(dest + 1, part<4, 2>(x));
+ cwrite<1, true>(dest + 3, part<4, 3>(x));
+}
+
+template <size_t N, size_t stride, typename T, size_t... Indices>
+KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, csizes_t<Indices...>)
+{
+ return concat(ref_cast<cvec<T, 1>>(base[Indices * stride])...);
+}
+
+template <size_t N, size_t stride, typename T>
+KFR_INLINE cvec<T, N> cgather(const complex<T>* base)
+{
+ if (stride == 1)
+ {
+ return ref_cast<cvec<T, N>>(*base);
+ }
+ else
+ return cgather_helper<N, stride, T>(base, csizeseq<N>);
+}
+
+KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t size, size_t)
+{
+ size_t temp = index;
+ index += stride;
+ if (index >= size)
+ index -= size;
+ return temp;
+}
+KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t)
+{
+ size_t temp = index;
+ index += stride;
+ return temp;
+}
+
+template <size_t N, typename T, size_t... Indices>
+KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride,
+ csizes_t<Indices...>)
+{
+ return concat(ref_cast<cvec<T, 1>>(base[cgather_next(index, stride, Indices)])...);
+}
+
+template <size_t N, typename T>
+KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride)
+{
+ return cgather_helper<N, T>(base, index, stride, csizeseq<N>);
+}
+template <size_t N, typename T>
+KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t stride)
+{
+ size_t index = 0;
+ return cgather_helper<N, T>(base, index, stride, csizeseq<N>);
+}
+
+template <size_t N, typename T, size_t... Indices>
+KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride, size_t size,
+ csizes_t<Indices...>)
+{
+ return concat(ref_cast<cvec<T, 1>>(base[cgather_next(index, stride, size, Indices)])...);
+}
+
+template <size_t N, typename T>
+KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride, size_t size)
+{
+ return cgather_helper<N, T>(base, index, stride, size, csizeseq<N>);
+}
+
+template <size_t N, size_t stride, typename T, size_t... Indices>
+KFR_INLINE void cscatter_helper(complex<T>* base, cvec<T, N> value, csizes_t<Indices...>)
+{
+ swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... };
+}
+
+template <size_t N, size_t stride, typename T>
+KFR_INLINE void cscatter(complex<T>* base, cvec<T, N> value)
+{
+ if (stride == 1)
+ {
+ cwrite<N>(base, value);
+ }
+ else
+ {
+ return cscatter_helper<N, stride, T>(base, value, csizeseq<N>);
+ }
+}
+
+template <size_t N, typename T, size_t... Indices>
+KFR_INLINE void cscatter_helper(complex<T>* base, size_t stride, cvec<T, N> value, csizes_t<Indices...>)
+{
+ swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... };
+}
+
+template <size_t N, typename T>
+KFR_INLINE void cscatter(complex<T>* base, size_t stride, cvec<T, N> value)
+{
+ return cscatter_helper<N, T>(base, stride, value, csizeseq<N>);
+}
+
+template <size_t groupsize = 1, typename T, size_t N, typename IT>
+KFR_INLINE vec<T, N * 2 * groupsize> cgather(const complex<T>* base, vec<IT, N> offset)
+{
+ return gather_helper<2 * groupsize>(ptr_cast<T>(base), offset, csizeseq<N>);
+}
+
+template <size_t groupsize = 1, typename T, size_t N, typename IT>
+KFR_INLINE void cscatter(complex<T>* base, vec<IT, N> offset, vec<T, N * 2 * groupsize> value)
+{
+ return scatter_helper<2 * groupsize>(ptr_cast<T>(base), offset, value, csizeseq<N>);
+}
+
+constexpr size_t default_unroll_count = 2;
+
+template <typename T>
+KFR_INTRIN void transpose4x8(cvec<T, 8> z0, cvec<T, 8> z1, cvec<T, 8> z2, cvec<T, 8> z3, cvec<T, 4>& w0,
+ cvec<T, 4>& w1, cvec<T, 4>& w2, cvec<T, 4>& w3, cvec<T, 4>& w4, cvec<T, 4>& w5,
+ cvec<T, 4>& w6, cvec<T, 4>& w7)
+{
+ cvec<T, 16> a = concat(low(z0), low(z1), low(z2), low(z3));
+ cvec<T, 16> b = concat(high(z0), high(z1), high(z2), high(z3));
+ a = digitreverse4<2>(a);
+ b = digitreverse4<2>(b);
+ w0 = part<4, 0>(a);
+ w1 = part<4, 1>(a);
+ w2 = part<4, 2>(a);
+ w3 = part<4, 3>(a);
+ w4 = part<4, 0>(b);
+ w5 = part<4, 1>(b);
+ w6 = part<4, 2>(b);
+ w7 = part<4, 3>(b);
+}
+
+template <typename T>
+KFR_INTRIN void transpose4x8(cvec<T, 4> w0, cvec<T, 4> w1, cvec<T, 4> w2, cvec<T, 4> w3, cvec<T, 4> w4,
+ cvec<T, 4> w5, cvec<T, 4> w6, cvec<T, 4> w7, cvec<T, 8>& z0, cvec<T, 8>& z1,
+ cvec<T, 8>& z2, cvec<T, 8>& z3)
+{
+ cvec<T, 16> a = concat(w0, w1, w2, w3);
+ cvec<T, 16> b = concat(w4, w5, w6, w7);
+ a = digitreverse4<2>(a);
+ b = digitreverse4<2>(b);
+ z0 = concat(part<4, 0>(a), part<4, 0>(b));
+ z1 = concat(part<4, 1>(a), part<4, 1>(b));
+ z2 = concat(part<4, 2>(a), part<4, 2>(b));
+ z3 = concat(part<4, 3>(a), part<4, 3>(b));
+}
+
+template <typename T>
+void transpose4(cvec<T, 16>& a, cvec<T, 16>& b, cvec<T, 16>& c, cvec<T, 16>& d)
+{
+ cvec<T, 4> a0, a1, a2, a3;
+ cvec<T, 4> b0, b1, b2, b3;
+ cvec<T, 4> c0, c1, c2, c3;
+ cvec<T, 4> d0, d1, d2, d3;
+
+ split(a, a0, a1, a2, a3);
+ split(b, b0, b1, b2, b3);
+ split(c, c0, c1, c2, c3);
+ split(d, d0, d1, d2, d3);
+
+ a = concat(a0, b0, c0, d0);
+ b = concat(a1, b1, c1, d1);
+ c = concat(a2, b2, c2, d2);
+ d = concat(a3, b3, c3, d3);
+}
+template <typename T>
+void transpose4(cvec<T, 16>& a, cvec<T, 16>& b, cvec<T, 16>& c, cvec<T, 16>& d, cvec<T, 16>& aa,
+ cvec<T, 16>& bb, cvec<T, 16>& cc, cvec<T, 16>& dd)
+{
+ cvec<T, 4> a0, a1, a2, a3;
+ cvec<T, 4> b0, b1, b2, b3;
+ cvec<T, 4> c0, c1, c2, c3;
+ cvec<T, 4> d0, d1, d2, d3;
+
+ split(a, a0, a1, a2, a3);
+ split(b, b0, b1, b2, b3);
+ split(c, c0, c1, c2, c3);
+ split(d, d0, d1, d2, d3);
+
+ aa = concat(a0, b0, c0, d0);
+ bb = concat(a1, b1, c1, d1);
+ cc = concat(a2, b2, c2, d2);
+ dd = concat(a3, b3, c3, d3);
+}
+
+template <bool b, typename T>
+constexpr KFR_INTRIN T chsign(T x)
+{
+ return b ? -x : x;
+}
+
+template <typename T, size_t N, size_t size, size_t start, size_t step, bool inverse = false,
+ size_t... indices>
+constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>)
+{
+ return make_vector((indices & 1 ? chsign<inverse>(-sin_using_table<T>(size, (indices / 2 * step + start)))
+ : cos_using_table<T>(size, (indices / 2 * step + start)))...);
+}
+
+template <typename T, size_t width, size_t... indices>
+constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>,
+ size_t size, size_t start, size_t step)
+{
+ return make_vector((indices & 1 ? -sin_using_table<T>(size, indices / 2 * step + start)
+ : cos_using_table<T>(size, indices / 2 * step + start))...);
+}
+
+template <typename T, size_t width, size_t size, size_t start, size_t step, bool inverse = false>
+constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle()
+{
+ return get_fixed_twiddle_helper<T, width, size, start, step, inverse>(
+ std::make_index_sequence<width * 2>());
+}
+
+template <typename T, size_t width>
+constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle(size_t size, size_t start, size_t step = 0)
+{
+ return get_fixed_twiddle_helper<T, width>(std::make_index_sequence<width * 2>(), start, step, size);
+}
+
+template <typename T, size_t N, size_t size, size_t start, size_t step = 0, bool inverse = false>
+constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>();
+
+template <typename T, size_t N, bool inverse>
+constexpr cvec<T, N> twiddleimagmask()
+{
+ return inverse ? broadcast<N, T>(-1, +1) : broadcast<N, T>(+1, -1);
+}
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wconversion"
+
+#pragma clang diagnostic pop
+
+template <typename T, size_t N>
+KFR_NOINLINE static vec<T, N> cossin_conj(vec<T, N> x)
+{
+ return cconj(in_sin_cos<cpu_t::native>::cossin(x));
+}
+
+template <size_t k, size_t size, bool inverse = false, typename T, size_t width>
+KFR_INTRIN vec<T, width> cmul_by_twiddle(vec<T, width> x)
+{
+ constexpr size_t kk = (inverse ? size - k : k) % size;
+ constexpr T isqrt2 = static_cast<T>(0.70710678118654752440084436210485);
+ if (kk == 0)
+ {
+ return x;
+ }
+ else if (kk == size * 1 / 8)
+ {
+ return swap<2>(subadd(swap<2>(x), x)) * isqrt2;
+ }
+ else if (kk == size * 2 / 8)
+ {
+ return negodd(swap<2>(x));
+ }
+ else if (kk == size * 3 / 8)
+ {
+ return subadd(x, swap<2>(x)) * -isqrt2;
+ }
+ else if (kk == size * 4 / 8)
+ {
+ return -x;
+ }
+ else if (kk == size * 5 / 8)
+ {
+ return swap<2>(subadd(swap<2>(x), x)) * -isqrt2;
+ }
+ else if (kk == size * 6 / 8)
+ {
+ return swap<2>(negodd(x));
+ }
+ else if (kk == size * 7 / 8)
+ {
+ return subadd(x, swap<2>(x)) * isqrt2;
+ }
+ else
+ {
+ return cmul(x, resize<width>(fixed_twiddle<T, 1, size, kk>));
+ }
+}
+
+template <size_t N, typename T>
+KFR_INTRIN void butterfly2(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N>& w0, cvec<T, N>& w1)
+{
+ w0 = a0 + a1;
+ w1 = a0 - a1;
+}
+
+template <size_t N, typename T>
+KFR_INTRIN void butterfly2(cvec<T, N>& a0, cvec<T, N>& a1)
+{
+ butterfly2<N>(a0, a1, a0, a1);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly4(cfalse_t /*split_format*/, cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2,
+ cvec<T, N> a3, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3)
+{
+ cvec<T, N> sum02, sum13, diff02, diff13;
+ cvec<T, N * 2> a01, a23, sum0213, diff0213;
+
+ a01 = concat(a0, a1);
+ a23 = concat(a2, a3);
+ sum0213 = a01 + a23;
+ diff0213 = a01 - a23;
+
+ sum02 = low(sum0213);
+ sum13 = high(sum0213);
+ diff02 = low(diff0213);
+ diff13 = high(diff0213);
+ w0 = sum02 + sum13;
+ w2 = sum02 - sum13;
+ if (inverse)
+ {
+ diff13 = (diff13 ^ broadcast<N, T>(T(), -T()));
+ diff13 = swap<2>(diff13);
+ }
+ else
+ {
+ diff13 = swap<2>(diff13);
+ diff13 = (diff13 ^ broadcast<N, T>(T(), -T()));
+ }
+
+ w1 = diff02 + diff13;
+ w3 = diff02 - diff13;
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly4(ctrue_t /*split_format*/, cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2,
+ cvec<T, N> a3, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3)
+{
+ vec<T, N> re0, im0, re1, im1, re2, im2, re3, im3;
+ vec<T, N> wre0, wim0, wre1, wim1, wre2, wim2, wre3, wim3;
+
+ cvec<T, N> sum02, sum13, diff02, diff13;
+ vec<T, N> sum02re, sum13re, diff02re, diff13re;
+ vec<T, N> sum02im, sum13im, diff02im, diff13im;
+
+ sum02 = a0 + a2;
+ sum13 = a1 + a3;
+
+ w0 = sum02 + sum13;
+ w2 = sum02 - sum13;
+
+ diff02 = a0 - a2;
+ diff13 = a1 - a3;
+ split(diff02, diff02re, diff02im);
+ split(diff13, diff13re, diff13im);
+
+ (inverse ? w3 : w1) = concat(diff02re + diff13im, diff02im - diff13re);
+ (inverse ? w1 : w3) = concat(diff02re - diff13im, diff02im + diff13re);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly8(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4,
+ cvec<T, N> a5, cvec<T, N> a6, cvec<T, N> a7, cvec<T, N>& w0, cvec<T, N>& w1,
+ cvec<T, N>& w2, cvec<T, N>& w3, cvec<T, N>& w4, cvec<T, N>& w5, cvec<T, N>& w6,
+ cvec<T, N>& w7)
+{
+ cvec<T, N> b0 = a0, b2 = a2, b4 = a4, b6 = a6;
+ butterfly4<N, inverse>(cbool<false>, b0, b2, b4, b6, b0, b2, b4, b6);
+ cvec<T, N> b1 = a1, b3 = a3, b5 = a5, b7 = a7;
+ butterfly4<N, inverse>(cbool<false>, b1, b3, b5, b7, b1, b3, b5, b7);
+ w0 = b0 + b1;
+ w4 = b0 - b1;
+
+ b3 = cmul_by_twiddle<1, 8, inverse>(b3);
+ b5 = cmul_by_twiddle<2, 8, inverse>(b5);
+ b7 = cmul_by_twiddle<3, 8, inverse>(b7);
+
+ w1 = b2 + b3;
+ w5 = b2 - b3;
+ w2 = b4 + b5;
+ w6 = b4 - b5;
+ w3 = b6 + b7;
+ w7 = b6 - b7;
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly8(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<T, N>& a3, cvec<T, N>& a4,
+ cvec<T, N>& a5, cvec<T, N>& a6, cvec<T, N>& a7)
+{
+ butterfly8<N, inverse>(a0, a1, a2, a3, a4, a5, a6, a7, a0, a1, a2, a3, a4, a5, a6, a7);
+}
+
+template <bool inverse = false, typename T>
+KFR_INTRIN void butterfly8(cvec<T, 2>& a01, cvec<T, 2>& a23, cvec<T, 2>& a45, cvec<T, 2>& a67)
+{
+ cvec<T, 2> b01 = a01, b23 = a23, b45 = a45, b67 = a67;
+
+ butterfly4<2, inverse>(cbool<false>, b01, b23, b45, b67, b01, b23, b45, b67);
+
+ cvec<T, 2> b02, b13, b46, b57;
+
+ cvec<T, 8> b01234567 = concat(b01, b23, b45, b67);
+ cvec<T, 8> b02461357 = concat(even<2>(b01234567), odd<2>(b01234567));
+ split(b02461357, b02, b46, b13, b57);
+
+ b13 = cmul(b13, fixed_twiddle<T, 2, 8, 0, 1, inverse>);
+ b57 = cmul(b57, fixed_twiddle<T, 2, 8, 2, 1, inverse>);
+ a01 = b02 + b13;
+ a23 = b46 + b57;
+ a45 = b02 - b13;
+ a67 = b46 - b57;
+}
+
+template <bool inverse = false, typename T>
+KFR_INTRIN void butterfly8(cvec<T, 8>& v8)
+{
+ cvec<T, 2> w0, w1, w2, w3;
+ split(v8, w0, w1, w2, w3);
+ butterfly8<inverse>(w0, w1, w2, w3);
+ v8 = concat(w0, w1, w2, w3);
+}
+
+template <bool inverse = false, typename T>
+KFR_INTRIN void butterfly32(cvec<T, 32>& v32)
+{
+ cvec<T, 4> w0, w1, w2, w3, w4, w5, w6, w7;
+ split(v32, w0, w1, w2, w3, w4, w5, w6, w7);
+ butterfly8<4, inverse>(w0, w1, w2, w3, w4, w5, w6, w7);
+
+ w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>);
+ w2 = cmul(w2, fixed_twiddle<T, 4, 32, 0, 2, inverse>);
+ w3 = cmul(w3, fixed_twiddle<T, 4, 32, 0, 3, inverse>);
+ w4 = cmul(w4, fixed_twiddle<T, 4, 32, 0, 4, inverse>);
+ w5 = cmul(w5, fixed_twiddle<T, 4, 32, 0, 5, inverse>);
+ w6 = cmul(w6, fixed_twiddle<T, 4, 32, 0, 6, inverse>);
+ w7 = cmul(w7, fixed_twiddle<T, 4, 32, 0, 7, inverse>);
+
+ cvec<T, 8> z0, z1, z2, z3;
+ transpose4x8(w0, w1, w2, w3, w4, w5, w6, w7, z0, z1, z2, z3);
+
+ butterfly4<8, inverse>(cfalse, z0, z1, z2, z3, z0, z1, z2, z3);
+ v32 = concat(z0, z1, z2, z3);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly4(cvec<T, N * 4>& a0123)
+{
+ cvec<T, N> a0;
+ cvec<T, N> a1;
+ cvec<T, N> a2;
+ cvec<T, N> a3;
+ split(a0123, a0, a1, a2, a3);
+ butterfly4<N, inverse>(cfalse, a0, a1, a2, a3, a0, a1, a2, a3);
+ a0123 = concat(a0, a1, a2, a3);
+}
+
+template <size_t N, typename T>
+KFR_INTRIN void butterfly2(cvec<T, N * 2>& a01)
+{
+ cvec<T, N> a0;
+ cvec<T, N> a1;
+ split(a01, a0, a1);
+ butterfly2<N>(a0, a1);
+ a01 = concat(a0, a1);
+}
+
+template <size_t N, bool inverse = false, bool split_format = false, typename T>
+KFR_INTRIN void apply_twiddle(cvec<T, N> a1, cvec<T, N> tw1, cvec<T, N>& w1)
+{
+ if (split_format)
+ {
+ vec<T, N> re1, im1, tw1re, tw1im;
+ split(a1, re1, im1);
+ split(tw1, tw1re, tw1im);
+ vec<T, N> b1re = re1 * tw1re;
+ vec<T, N> b1im = im1 * tw1re;
+ if (inverse)
+ w1 = concat(b1re + im1 * tw1im, b1im - re1 * tw1im);
+ else
+ w1 = concat(b1re - im1 * tw1im, b1im + re1 * tw1im);
+ }
+ else
+ {
+ cvec<T, N> b1 = a1 * dupeven(tw1);
+ a1 = swap<2>(a1);
+
+ if (inverse)
+ tw1 = -(tw1);
+ w1 = subadd(b1, a1 * dupodd(tw1));
+ }
+}
+
+template <size_t N, bool inverse = false, bool split_format = false, typename T>
+KFR_INTRIN void apply_twiddles4(cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> tw1, cvec<T, N> tw2,
+ cvec<T, N> tw3, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3)
+{
+ apply_twiddle<N, inverse, split_format>(a1, tw1, w1);
+ apply_twiddle<N, inverse, split_format>(a2, tw2, w2);
+ apply_twiddle<N, inverse, split_format>(a3, tw3, w3);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2,
+ cvec<T, N>& __restrict a3, cvec<T, N> tw1, cvec<T, N> tw2, cvec<T, N> tw3)
+{
+ apply_twiddles4<N, inverse>(a1, a2, a3, tw1, tw2, tw3, a1, a2, a3);
+}
+
+template <size_t N, bool inverse = false, typename T, typename = u8[N - 1]>
+KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2,
+ cvec<T, N>& __restrict a3, cvec<T, 1> tw1, cvec<T, 1> tw2, cvec<T, 1> tw3)
+{
+ apply_twiddles4<N, inverse>(a1, a2, a3, resize<N * 2>(tw1), resize<N * 2>(tw2), resize<N * 2>(tw3));
+}
+
+template <size_t N, bool inverse = false, typename T, typename = u8[N - 2]>
+KFR_INTRIN void apply_twiddles4(cvec<T, N>& __restrict a1, cvec<T, N>& __restrict a2,
+ cvec<T, N>& __restrict a3, cvec<T, N / 2> tw1, cvec<T, N / 2> tw2,
+ cvec<T, N / 2> tw3)
+{
+ apply_twiddles4<N, inverse>(a1, a2, a3, resize<N * 2>(tw1), resize<N * 2>(tw2), resize<N * 2>(tw3));
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void apply_vertical_twiddles4(cvec<T, N * 4>& b, cvec<T, N * 4>& c, cvec<T, N * 4>& d)
+{
+ cvec<T, 4> b0, b1, b2, b3;
+ cvec<T, 4> c0, c1, c2, c3;
+ cvec<T, 4> d0, d1, d2, d3;
+
+ split(b, b0, b1, b2, b3);
+ split(c, c0, c1, c2, c3);
+ split(d, d0, d1, d2, d3);
+
+ b1 = cmul_by_twiddle<4, 64, inverse>(b1);
+ b2 = cmul_by_twiddle<8, 64, inverse>(b2);
+ b3 = cmul_by_twiddle<12, 64, inverse>(b3);
+
+ c1 = cmul_by_twiddle<8, 64, inverse>(c1);
+ c2 = cmul_by_twiddle<16, 64, inverse>(c2);
+ c3 = cmul_by_twiddle<24, 64, inverse>(c3);
+
+ d1 = cmul_by_twiddle<12, 64, inverse>(d1);
+ d2 = cmul_by_twiddle<24, 64, inverse>(d2);
+ d3 = cmul_by_twiddle<36, 64, inverse>(d3);
+
+ b = concat(b0, b1, b2, b3);
+ c = concat(c0, c1, c2, c3);
+ d = concat(d0, d1, d2, d3);
+}
+
+template <size_t n2, size_t nnstep, size_t N, bool inverse = false, typename T>
+KFR_INTRIN void apply_twiddles4(cvec<T, N * 4>& __restrict a0123)
+{
+ cvec<T, N> a0;
+ cvec<T, N> a1;
+ cvec<T, N> a2;
+ cvec<T, N> a3;
+ split(a0123, a0, a1, a2, a3);
+
+ cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1, inverse>,
+ tw2 = fixed_twiddle<T, N, 64, n2 * nnstep * 2, nnstep * 2, inverse>,
+ tw3 = fixed_twiddle<T, N, 64, n2 * nnstep * 3, nnstep * 3, inverse>;
+
+ apply_twiddles4<N>(a1, a2, a3, tw1, tw2, tw3);
+
+ a0123 = concat(a0, a1, a2, a3);
+}
+
+template <bool inverse, bool aligned, typename T>
+KFR_INTRIN void butterfly64(cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>* in)
+{
+ cvec<T, 16> w0, w1, w2, w3;
+
+ w0 = cread_group<4, 4, 16, aligned>(
+ in); // concat(cread<4>(in + 0), cread<4>(in + 16), cread<4>(in + 32), cread<4>(in + 48));
+ butterfly4<4, inverse>(w0);
+ apply_twiddles4<0, 1, 4, inverse>(w0);
+
+ w1 = cread_group<4, 4, 16, aligned>(
+ in + 4); // concat(cread<4>(in + 4), cread<4>(in + 20), cread<4>(in + 36), cread<4>(in + 52));
+ butterfly4<4, inverse>(w1);
+ apply_twiddles4<4, 1, 4, inverse>(w1);
+
+ w2 = cread_group<4, 4, 16, aligned>(
+ in + 8); // concat(cread<4>(in + 8), cread<4>(in + 24), cread<4>(in + 40), cread<4>(in + 56));
+ butterfly4<4, inverse>(w2);
+ apply_twiddles4<8, 1, 4, inverse>(w2);
+
+ w3 = cread_group<4, 4, 16, aligned>(
+ in + 12); // concat(cread<4>(in + 12), cread<4>(in + 28), cread<4>(in + 44), cread<4>(in + 60));
+ butterfly4<4, inverse>(w3);
+ apply_twiddles4<12, 1, 4, inverse>(w3);
+
+ transpose4(w0, w1, w2, w3);
+ // pass 2:
+
+ butterfly4<4, inverse>(w0);
+ butterfly4<4, inverse>(w1);
+ butterfly4<4, inverse>(w2);
+ butterfly4<4, inverse>(w3);
+
+ transpose4(w0, w1, w2, w3);
+
+ w0 = digitreverse4<2>(w0);
+ w1 = digitreverse4<2>(w1);
+ w2 = digitreverse4<2>(w2);
+ w3 = digitreverse4<2>(w3);
+
+ apply_vertical_twiddles4<4, inverse>(w1, w2, w3);
+
+ // pass 3:
+ butterfly4<4, inverse>(w3);
+ cwrite_group<4, 4, 16, aligned>(out + 12, w3); // split(w3, out[3], out[7], out[11], out[15]);
+
+ butterfly4<4, inverse>(w2);
+ cwrite_group<4, 4, 16, aligned>(out + 8, w2); // split(w2, out[2], out[6], out[10], out[14]);
+
+ butterfly4<4, inverse>(w1);
+ cwrite_group<4, 4, 16, aligned>(out + 4, w1); // split(w1, out[1], out[5], out[9], out[13]);
+
+ butterfly4<4, inverse>(w0);
+ cwrite_group<4, 4, 16, aligned>(out, w0); // split(w0, out[0], out[4], out[8], out[12]);
+}
+
+template <bool inverse = false, typename T>
+KFR_INTRIN void butterfly16(cvec<T, 16>& v16)
+{
+ butterfly4<4, inverse>(v16);
+ apply_twiddles4<0, 4, 4, inverse>(v16);
+ v16 = digitreverse4<2>(v16);
+ butterfly4<4, inverse>(v16);
+}
+
+template <size_t index, bool inverse = false, typename T>
+KFR_INTRIN void butterfly16_multi_natural(complex<T>* out, const complex<T>* in)
+{
+ constexpr size_t N = 4;
+
+ cvec<T, 4> a1 = cread<4>(in + index * 4 + 16 * 1);
+ cvec<T, 4> a5 = cread<4>(in + index * 4 + 16 * 5);
+ cvec<T, 4> a9 = cread<4>(in + index * 4 + 16 * 9);
+ cvec<T, 4> a13 = cread<4>(in + index * 4 + 16 * 13);
+ butterfly4<N, inverse>(cfalse, a1, a5, a9, a13, a1, a5, a9, a13);
+ a5 = cmul_by_twiddle<1, 16, inverse>(a5);
+ a9 = cmul_by_twiddle<2, 16, inverse>(a9);
+ a13 = cmul_by_twiddle<3, 16, inverse>(a13);
+
+ cvec<T, 4> a2 = cread<4>(in + index * 4 + 16 * 2);
+ cvec<T, 4> a6 = cread<4>(in + index * 4 + 16 * 6);
+ cvec<T, 4> a10 = cread<4>(in + index * 4 + 16 * 10);
+ cvec<T, 4> a14 = cread<4>(in + index * 4 + 16 * 14);
+ butterfly4<N, inverse>(cfalse, a2, a6, a10, a14, a2, a6, a10, a14);
+ a6 = cmul_by_twiddle<2, 16, inverse>(a6);
+ a10 = cmul_by_twiddle<4, 16, inverse>(a10);
+ a14 = cmul_by_twiddle<6, 16, inverse>(a14);
+
+ cvec<T, 4> a3 = cread<4>(in + index * 4 + 16 * 3);
+ cvec<T, 4> a7 = cread<4>(in + index * 4 + 16 * 7);
+ cvec<T, 4> a11 = cread<4>(in + index * 4 + 16 * 11);
+ cvec<T, 4> a15 = cread<4>(in + index * 4 + 16 * 15);
+ butterfly4<N, inverse>(cfalse, a3, a7, a11, a15, a3, a7, a11, a15);
+ a7 = cmul_by_twiddle<3, 16, inverse>(a7);
+ a11 = cmul_by_twiddle<6, 16, inverse>(a11);
+ a15 = cmul_by_twiddle<9, 16, inverse>(a15);
+
+ cvec<T, 4> a0 = cread<4>(in + index * 4 + 16 * 0);
+ cvec<T, 4> a4 = cread<4>(in + index * 4 + 16 * 4);
+ cvec<T, 4> a8 = cread<4>(in + index * 4 + 16 * 8);
+ cvec<T, 4> a12 = cread<4>(in + index * 4 + 16 * 12);
+ butterfly4<N, inverse>(cfalse, a0, a4, a8, a12, a0, a4, a8, a12);
+ butterfly4<N, inverse>(cfalse, a0, a1, a2, a3, a0, a1, a2, a3);
+ cwrite<4>(out + index * 4 + 16 * 0, a0);
+ cwrite<4>(out + index * 4 + 16 * 4, a1);
+ cwrite<4>(out + index * 4 + 16 * 8, a2);
+ cwrite<4>(out + index * 4 + 16 * 12, a3);
+ butterfly4<N, inverse>(cfalse, a4, a5, a6, a7, a4, a5, a6, a7);
+ cwrite<4>(out + index * 4 + 16 * 1, a4);
+ cwrite<4>(out + index * 4 + 16 * 5, a5);
+ cwrite<4>(out + index * 4 + 16 * 9, a6);
+ cwrite<4>(out + index * 4 + 16 * 13, a7);
+ butterfly4<N, inverse>(cfalse, a8, a9, a10, a11, a8, a9, a10, a11);
+ cwrite<4>(out + index * 4 + 16 * 2, a8);
+ cwrite<4>(out + index * 4 + 16 * 6, a9);
+ cwrite<4>(out + index * 4 + 16 * 10, a10);
+ cwrite<4>(out + index * 4 + 16 * 14, a11);
+ butterfly4<N, inverse>(cfalse, a12, a13, a14, a15, a12, a13, a14, a15);
+ cwrite<4>(out + index * 4 + 16 * 3, a12);
+ cwrite<4>(out + index * 4 + 16 * 7, a13);
+ cwrite<4>(out + index * 4 + 16 * 11, a14);
+ cwrite<4>(out + index * 4 + 16 * 15, a15);
+}
+
+template <size_t index, bool inverse = false, typename T>
+KFR_INTRIN void butterfly16_multi_flip(complex<T>* out, const complex<T>* in)
+{
+ constexpr size_t N = 4;
+
+ cvec<T, 4> a1 = cread<4>(in + index * 4 + 16 * 1);
+ cvec<T, 4> a5 = cread<4>(in + index * 4 + 16 * 5);
+ cvec<T, 4> a9 = cread<4>(in + index * 4 + 16 * 9);
+ cvec<T, 4> a13 = cread<4>(in + index * 4 + 16 * 13);
+ butterfly4<N, inverse>(cfalse, a1, a5, a9, a13, a1, a5, a9, a13);
+ a5 = cmul_by_twiddle<1, 16, inverse>(a5);
+ a9 = cmul_by_twiddle<2, 16, inverse>(a9);
+ a13 = cmul_by_twiddle<3, 16, inverse>(a13);
+
+ cvec<T, 4> a2 = cread<4>(in + index * 4 + 16 * 2);
+ cvec<T, 4> a6 = cread<4>(in + index * 4 + 16 * 6);
+ cvec<T, 4> a10 = cread<4>(in + index * 4 + 16 * 10);
+ cvec<T, 4> a14 = cread<4>(in + index * 4 + 16 * 14);
+ butterfly4<N, inverse>(cfalse, a2, a6, a10, a14, a2, a6, a10, a14);
+ a6 = cmul_by_twiddle<2, 16, inverse>(a6);
+ a10 = cmul_by_twiddle<4, 16, inverse>(a10);
+ a14 = cmul_by_twiddle<6, 16, inverse>(a14);
+
+ cvec<T, 4> a3 = cread<4>(in + index * 4 + 16 * 3);
+ cvec<T, 4> a7 = cread<4>(in + index * 4 + 16 * 7);
+ cvec<T, 4> a11 = cread<4>(in + index * 4 + 16 * 11);
+ cvec<T, 4> a15 = cread<4>(in + index * 4 + 16 * 15);
+ butterfly4<N, inverse>(cfalse, a3, a7, a11, a15, a3, a7, a11, a15);
+ a7 = cmul_by_twiddle<3, 16, inverse>(a7);
+ a11 = cmul_by_twiddle<6, 16, inverse>(a11);
+ a15 = cmul_by_twiddle<9, 16, inverse>(a15);
+
+ cvec<T, 16> w1 = concat(a1, a5, a9, a13);
+ cvec<T, 16> w2 = concat(a2, a6, a10, a14);
+ cvec<T, 16> w3 = concat(a3, a7, a11, a15);
+
+ cvec<T, 4> a0 = cread<4>(in + index * 4 + 16 * 0);
+ cvec<T, 4> a4 = cread<4>(in + index * 4 + 16 * 4);
+ cvec<T, 4> a8 = cread<4>(in + index * 4 + 16 * 8);
+ cvec<T, 4> a12 = cread<4>(in + index * 4 + 16 * 12);
+ butterfly4<N, inverse>(cfalse, a0, a4, a8, a12, a0, a4, a8, a12);
+ cvec<T, 16> w0 = concat(a0, a4, a8, a12);
+
+ butterfly4<N * 4, inverse>(cfalse, w0, w1, w2, w3, w0, w1, w2, w3);
+
+ w0 = digitreverse4<2>(w0);
+ w1 = digitreverse4<2>(w1);
+ w2 = digitreverse4<2>(w2);
+ w3 = digitreverse4<2>(w3);
+
+ transpose4(w0, w1, w2, w3);
+ cwrite<16>(out + index * 64 + 16 * 0, cmul(w0, fixed_twiddle<T, 16, 256, 0, index * 4 + 0, inverse>));
+ cwrite<16>(out + index * 64 + 16 * 1, cmul(w1, fixed_twiddle<T, 16, 256, 0, index * 4 + 1, inverse>));
+ cwrite<16>(out + index * 64 + 16 * 2, cmul(w2, fixed_twiddle<T, 16, 256, 0, index * 4 + 2, inverse>));
+ cwrite<16>(out + index * 64 + 16 * 3, cmul(w3, fixed_twiddle<T, 16, 256, 0, index * 4 + 3, inverse>));
+}
+
+template <size_t n2, size_t nnstep, size_t N, typename T>
+KFR_INTRIN void apply_twiddles2(cvec<T, N>& a1)
+{
+ cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1>;
+
+ a1 = cmul(a1, tw1);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00, cvec<T, N>& w01,
+ cvec<T, N>& w02)
+{
+ constexpr cvec<T, N> tw3r1 = static_cast<T>(-0.5);
+ constexpr cvec<T, N> tw3i1 =
+ static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>();
+
+ const cvec<T, N> sum1 = a01 + a02;
+ const cvec<T, N> dif1 = swap<2>(a01 - a02);
+ w00 = a00 + sum1;
+
+ const cvec<T, N> s1 = w00 + sum1 * tw3r1;
+
+ const cvec<T, N> d1 = dif1 * tw3i1;
+
+ w01 = s1 + d1;
+ w02 = s1 - d1;
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly3(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2)
+{
+ butterfly3<N, inverse>(a0, a1, a2, a0, a1, a2);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly6(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4,
+ cvec<T, N> a5, cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3,
+ cvec<T, N>& w4, cvec<T, N>& w5)
+{
+ cvec<T, N* 2> a03 = concat(a0, a3);
+ cvec<T, N* 2> a25 = concat(a2, a5);
+ cvec<T, N* 2> a41 = concat(a4, a1);
+ butterfly3<N * 2, inverse>(a03, a25, a41, a03, a25, a41);
+ cvec<T, N> t0, t1, t2, t3, t4, t5;
+ split(a03, t0, t1);
+ split(a25, t2, t3);
+ split(a41, t4, t5);
+ t3 = -t3;
+ cvec<T, N* 2> a04 = concat(t0, t4);
+ cvec<T, N* 2> a15 = concat(t1, t5);
+ cvec<T, N * 2> w02, w35;
+ butterfly2<N * 2>(a04, a15, w02, w35);
+ split(w02, w0, w2);
+ split(w35, w3, w5);
+
+ butterfly2<N>(t2, t3, w1, w4);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly6(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<T, N>& a3, cvec<T, N>& a4,
+ cvec<T, N>& a5)
+{
+ butterfly6<N, inverse>(a0, a1, a2, a3, a4, a5, a0, a1, a2, a3, a4, a5);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04,
+ cvec<T, N> a05, cvec<T, N> a06, cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02,
+ cvec<T, N>& w03, cvec<T, N>& w04, cvec<T, N>& w05, cvec<T, N>& w06)
+{
+ constexpr cvec<T, N> tw7r1 = static_cast<T>(0.623489801858733530525004884);
+ constexpr cvec<T, N> tw7i1 =
+ static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>();
+ constexpr cvec<T, N> tw7r2 = static_cast<T>(-0.2225209339563144042889025645);
+ constexpr cvec<T, N> tw7i2 =
+ static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>();
+ constexpr cvec<T, N> tw7r3 = static_cast<T>(-0.90096886790241912623610231951);
+ constexpr cvec<T, N> tw7i3 =
+ static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>();
+
+ const cvec<T, N> sum1 = a01 + a06;
+ const cvec<T, N> dif1 = swap<2>(a01 - a06);
+ const cvec<T, N> sum2 = a02 + a05;
+ const cvec<T, N> dif2 = swap<2>(a02 - a05);
+ const cvec<T, N> sum3 = a03 + a04;
+ const cvec<T, N> dif3 = swap<2>(a03 - a04);
+ w00 = a00 + sum1 + sum2 + sum3;
+
+ const cvec<T, N> s1 = w00 + sum1 * tw7r1 + sum2 * tw7r2 + sum3 * tw7r3;
+ const cvec<T, N> s2 = w00 + sum1 * tw7r2 + sum2 * tw7r3 + sum3 * tw7r1;
+ const cvec<T, N> s3 = w00 + sum1 * tw7r3 + sum2 * tw7r1 + sum3 * tw7r2;
+
+ const cvec<T, N> d1 = dif1 * tw7i1 + dif2 * tw7i2 + dif3 * tw7i3;
+ const cvec<T, N> d2 = dif1 * tw7i2 - dif2 * tw7i3 - dif3 * tw7i1;
+ const cvec<T, N> d3 = dif1 * tw7i3 - dif2 * tw7i1 + dif3 * tw7i2;
+
+ w01 = s1 + d1;
+ w06 = s1 - d1;
+ w02 = s2 + d2;
+ w05 = s2 - d2;
+ w03 = s3 + d3;
+ w04 = s3 - d3;
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly7(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cvec<T, N>& a3, cvec<T, N>& a4,
+ cvec<T, N>& a5, cvec<T, N>& a6)
+{
+ butterfly7<N, inverse>(a0, a1, a2, a3, a4, a5, a6, a0, a1, a2, a3, a4, a5, a6);
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly5(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04,
+ cvec<T, N>& w00, cvec<T, N>& w01, cvec<T, N>& w02, cvec<T, N>& w03,
+ cvec<T, N>& w04)
+{
+ constexpr cvec<T, N> tw5r1 = static_cast<T>(0.30901699437494742410229341718);
+ constexpr cvec<T, N> tw5i1 =
+ static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>();
+ constexpr cvec<T, N> tw5r2 = static_cast<T>(-0.80901699437494742410229341718);
+ constexpr cvec<T, N> tw5i2 =
+ static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>();
+
+ const cvec<T, N> sum1 = a01 + a04;
+ const cvec<T, N> dif1 = swap<2>(a01 - a04);
+ const cvec<T, N> sum2 = a02 + a03;
+ const cvec<T, N> dif2 = swap<2>(a02 - a03);
+ w00 = a00 + sum1 + sum2;
+
+ const cvec<T, N> s1 = w00 + sum1 * tw5r1 + sum2 * tw5r2;
+ const cvec<T, N> s2 = w00 + sum1 * tw5r2 + sum2 * tw5r1;
+
+ const cvec<T, N> d1 = dif1 * tw5i1 + dif2 * tw5i2;
+ const cvec<T, N> d2 = dif1 * tw5i2 - dif2 * tw5i1;
+
+ w01 = s1 + d1;
+ w04 = s1 - d1;
+ w02 = s2 + d2;
+ w03 = s2 - d2;
+}
+
+template <size_t N, bool inverse = false, typename T>
+KFR_INTRIN void butterfly10(cvec<T, N> a0, cvec<T, N> a1, cvec<T, N> a2, cvec<T, N> a3, cvec<T, N> a4,
+ cvec<T, N> a5, cvec<T, N> a6, cvec<T, N> a7, cvec<T, N> a8, cvec<T, N> a9,
+ cvec<T, N>& w0, cvec<T, N>& w1, cvec<T, N>& w2, cvec<T, N>& w3, cvec<T, N>& w4,
+ cvec<T, N>& w5, cvec<T, N>& w6, cvec<T, N>& w7, cvec<T, N>& w8, cvec<T, N>& w9)
+{
+ cvec<T, N* 2> a05 = concat(a0, a5);
+ cvec<T, N* 2> a27 = concat(a2, a7);
+ cvec<T, N* 2> a49 = concat(a4, a9);
+ cvec<T, N* 2> a61 = concat(a6, a1);
+ cvec<T, N* 2> a83 = concat(a8, a3);
+ butterfly5<N * 2, inverse>(a05, a27, a49, a61, a83, a05, a27, a49, a61, a83);
+ cvec<T, N> t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+ split(a05, t0, t1);
+ split(a27, t2, t3);
+ split(a49, t4, t5);
+ split(a61, t6, t7);
+ split(a83, t8, t9);
+ t5 = -t5;
+
+ cvec<T, N * 2> t02, t13;
+ cvec<T, N * 2> w06, w51;
+ t02 = concat(t0, t2);
+ t13 = concat(t1, t3);
+ butterfly2<N * 2>(t02, t13, w06, w51);
+ split(w06, w0, w6);
+ split(w51, w5, w1);
+
+ cvec<T, N * 2> t68, t79;
+ cvec<T, N * 2> w84, w39;
+ t68 = concat(t6, t8);
+ t79 = concat(t7, t9);
+ butterfly2<N * 2>(t68, t79, w84, w39);
+ split(w84, w8, w4);
+ split(w39, w3, w9);
+ butterfly2<N>(t4, t5, w7, w2);
+}
+
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N>& out0, vec<T, N>& out1)
+{
+ butterfly2<N / 2>(in0, in1, out0, out1);
+}
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N>& out0,
+ vec<T, N>& out1, vec<T, N>& out2)
+{
+ butterfly3<N / 2, inverse>(in0, in1, in2, out0, out1, out2);
+}
+
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
+ vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3)
+{
+ butterfly4<N / 2, inverse>(cfalse, in0, in1, in2, in3, out0, out1, out2, out3);
+}
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
+ vec<T, N> in4, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3,
+ vec<T, N>& out4)
+{
+ butterfly5<N / 2, inverse>(in0, in1, in2, in3, in4, out0, out1, out2, out3, out4);
+}
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
+ vec<T, N> in4, vec<T, N> in5, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2,
+ vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5)
+{
+ butterfly6<N / 2, inverse>(in0, in1, in2, in3, in4, in5, out0, out1, out2, out3, out4, out5);
+}
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
+ vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N>& out0, vec<T, N>& out1,
+ vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6)
+{
+ butterfly7<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, out0, out1, out2, out3, out4, out5, out6);
+}
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
+ vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N> in7, vec<T, N>& out0,
+ vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3, vec<T, N>& out4, vec<T, N>& out5,
+ vec<T, N>& out6, vec<T, N>& out7)
+{
+ butterfly8<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5,
+ out6, out7);
+}
+template <bool inverse, typename T, size_t N>
+KFR_INTRIN void butterfly(cbool_t<inverse>, vec<T, N> in0, vec<T, N> in1, vec<T, N> in2, vec<T, N> in3,
+ vec<T, N> in4, vec<T, N> in5, vec<T, N> in6, vec<T, N> in7, vec<T, N> in8,
+ vec<T, N> in9, vec<T, N>& out0, vec<T, N>& out1, vec<T, N>& out2, vec<T, N>& out3,
+ vec<T, N>& out4, vec<T, N>& out5, vec<T, N>& out6, vec<T, N>& out7, vec<T, N>& out8,
+ vec<T, N>& out9)
+{
+ butterfly10<N / 2, inverse>(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, out0, out1, out2, out3,
+ out4, out5, out6, out7, out8, out9);
+}
+template <bool transposed, typename T, size_t... N, size_t Nout = csum(csizes<N...>)>
+KFR_INTRIN void cread_transposed(cbool_t<transposed>, const complex<T>* ptr, vec<T, N>&... w)
+{
+ vec<T, Nout> temp = read<Nout>(ptr_cast<T>(ptr));
+ if (transposed)
+ temp = ctranspose<sizeof...(N)>(temp);
+ split(temp, w...);
+}
+
+// Warning: Reads past the end. Use with care
+KFR_INTRIN void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec<f32, 4>& w0, cvec<f32, 4>& w1,
+ cvec<f32, 4>& w2)
+{
+ cvec<f32, 4> w3;
+ cvec<f32, 16> v16 = concat(cread<4>(ptr), cread<4>(ptr + 3), cread<4>(ptr + 6), cread<4>(ptr + 9));
+ v16 = digitreverse4<2>(v16);
+ split(v16, w0, w1, w2, w3);
+}
+
+KFR_INTRIN void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec<f32, 4>& w0, cvec<f32, 4>& w1,
+ cvec<f32, 4>& w2, cvec<f32, 4>& w3, cvec<f32, 4>& w4)
+{
+ cvec<f32, 16> v16 = concat(cread<4>(ptr), cread<4>(ptr + 5), cread<4>(ptr + 10), cread<4>(ptr + 15));
+ v16 = digitreverse4<2>(v16);
+ split(v16, w0, w1, w2, w3);
+ w4 = cgather<4, 5>(ptr + 4);
+}
+
+template <bool transposed, typename T, size_t... N, size_t Nout = csum(csizes<N...>)>
+KFR_INTRIN void cwrite_transposed(cbool_t<transposed>, complex<T>* ptr, vec<T, N>... args)
+{
+ auto temp = concat(args...);
+ if (transposed)
+ temp = ctransposeinverse<sizeof...(N)>(temp);
+ write(ptr_cast<T>(ptr), temp);
+}
+
+template <size_t I, size_t radix, typename T, size_t N, size_t width = N / 2>
+KFR_INTRIN vec<T, N> mul_tw(cbool_t<false>, vec<T, N> x, const complex<T>* twiddle)
+{
+ return I == 0 ? x : cmul(x, cread<width>(twiddle + width * (I - 1)));
+}
+template <size_t I, size_t radix, typename T, size_t N, size_t width = N / 2>
+KFR_INTRIN vec<T, N> mul_tw(cbool_t<true>, vec<T, N> x, const complex<T>* twiddle)
+{
+ return I == 0 ? x : cmul_conj(x, cread<width>(twiddle + width * (I - 1)));
+}
+
+// Non-final
+template <typename T, size_t width, size_t radix, bool inverse, size_t... I>
+KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>,
+ cbool_t<inverse>, complex<T>* out, const complex<T>* in,
+ const complex<T>* tw, size_t stride)
+{
+ carray<cvec<T, width>, radix> inout;
+
+ swallow{ (inout.get(csize<I>) = cread<width>(in + i + stride * I))... };
+
+ butterfly(cbool<inverse>, inout.get(csize<I>)..., inout.get(csize<I>)...);
+
+ swallow{ (cwrite<width>(out + i + stride * I,
+ mul_tw<I, radix>(cbool<inverse>, inout.get(csize<I>), tw + i * (radix - 1))),
+ 0)... };
+}
+
+// Final
+template <typename T, size_t width, size_t radix, bool inverse, size_t... I>
+KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>,
+ cbool_t<inverse>, complex<T>* out, const complex<T>* in, size_t stride)
+{
+ carray<cvec<T, width>, radix> inout;
+
+ // swallow{ ( inout.get( csize<I> ) = infn( i, I, cvec<T, width>( ) ) )... };
+ cread_transposed(cbool<true>, in + i * radix, inout.get(csize<I>)...);
+
+ butterfly(cbool<inverse>, inout.get(csize<I>)..., inout.get(csize<I>)...);
+
+ swallow{ (cwrite<width>(out + i + stride * I, inout.get(csize<I>)), 0)... };
+}
+
+template <size_t width, size_t radix, typename... Args>
+KFR_INTRIN void butterfly(size_t i, csize_t<width>, csize_t<radix>, Args&&... args)
+{
+ butterfly_helper(std::make_index_sequence<radix>(), i, csize<width>, csize<radix>,
+ std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+KFR_INTRIN void butterfly_cycle(size_t&, size_t, csize_t<0>, Args&&...)
+{
+}
+template <size_t width, typename... Args>
+KFR_INTRIN void butterfly_cycle(size_t& i, size_t count, csize_t<width>, Args&&... args)
+{
+ KFR_LOOP_NOUNROLL
+ for (; i < count / width * width; i += width)
+ butterfly(i, csize<width>, std::forward<Args>(args)...);
+ butterfly_cycle(i, count, csize<width / 2>, std::forward<Args>(args)...);
+}
+
+template <size_t width, typename... Args>
+KFR_INTRIN void butterflies(size_t count, csize_t<width>, Args&&... args)
+{
+ __builtin_assume(count > 0);
+ size_t i = 0;
+ butterfly_cycle(i, count, csize<width>, std::forward<Args>(args)...);
+}
+
+template <typename T, bool inverse, typename Tstride>
+KFR_INTRIN void generic_butterfly_cycle(csize_t<0>, size_t, cbool_t<inverse>, complex<T>*, const complex<T>*,
+ Tstride, size_t, size_t, const complex<T>*, size_t)
+{
+}
+template <size_t width, bool inverse, typename T, typename Tstride>
+KFR_INTRIN void generic_butterfly_cycle(csize_t<width>, size_t radix, cbool_t<inverse>, complex<T>* out,
+ const complex<T>* in, Tstride ostride, size_t halfradix,
+ size_t halfradix_sqr, const complex<T>* twiddle, size_t i)
+{
+ KFR_LOOP_NOUNROLL
+ for (; i < halfradix / width * width; i += width)
+ {
+ const cvec<T, 1> in0 = cread<1>(in);
+ cvec<T, width> sum0 = resize<2 * width>(in0);
+ cvec<T, width> sum1 = sum0;
+
+ KFR_LOOP_NOUNROLL
+ for (size_t j = 0; j < halfradix; j++)
+ {
+ const cvec<T, 1> ina = cread<1>(in + (1 + j));
+ const cvec<T, 1> inb = cread<1>(in + radix - (j + 1));
+ cvec<T, width> tw = cread<width>(twiddle);
+ if (inverse)
+ tw = cconj(tw);
+
+ cmul_2conj(sum0, sum1, ina, inb, tw);
+ twiddle += halfradix;
+ }
+ twiddle = twiddle - halfradix_sqr + width;
+
+ if (is_constant_val(ostride))
+ {
+ cwrite<width>(out + (1 + i), sum0);
+ cwrite<width>(out + (radix - (i + 1)) - (width - 1), reverse<2>(sum1));
+ }
+ else
+ {
+ cscatter<width>(out + (i + 1) * ostride, ostride, sum0);
+ cscatter<width>(out + (radix - (i + 1)) * ostride - (width - 1) * ostride, ostride,
+ reverse<2>(sum1));
+ }
+ }
+ generic_butterfly_cycle(csize<width / 2>, radix, cbool<inverse>, out, in, ostride, halfradix,
+ halfradix_sqr, twiddle, i);
+}
+
+template <size_t width, typename T, bool inverse, typename Tstride = csize_t<1>>
+KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
+ const complex<T>* twiddle, Tstride ostride = Tstride{})
+{
+ __builtin_assume(radix > 0);
+ {
+ cvec<T, width> sum = T();
+ size_t j = 0;
+ KFR_LOOP_NOUNROLL
+ for (; j < radix / width * width; j += width)
+ {
+ sum += cread<width>(in + j);
+ }
+ cvec<T, 1> sums = T();
+ KFR_LOOP_NOUNROLL
+ for (; j < radix; j++)
+ {
+ sums += cread<1>(in + j);
+ }
+ cwrite<1>(out, hcadd(sum) + sums);
+ }
+ const size_t halfradix = radix / 2;
+ const size_t halfradix_sqr = halfradix * halfradix;
+ __builtin_assume(halfradix > 0);
+ size_t i = 0;
+
+ generic_butterfly_cycle(csize<width>, radix, cbool<inverse>, out, in, ostride, halfradix, halfradix_sqr,
+ twiddle, i);
+}
+
+template <typename T, bool inverse, typename Tstride = csize_t<1>>
+KFR_INTRIN void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
+ complex<T>* temp, const complex<T>* twiddle, Tstride ostride = Tstride{})
+{
+ if (out == in)
+ {
+ builtin_memcpy(temp, in, sizeof(complex<T>) * radix);
+ in = temp;
+ }
+ constexpr size_t width = vector_width<T, cpu_t::native>;
+
+ cswitch(csizes<11>, radix,
+ [&](auto radix_) KFR_INLINE_LAMBDA {
+ generic_butterfly_w<width>(val_of(radix_), cbool<inverse>, out, in, twiddle, ostride);
+ },
+ [&]() KFR_INLINE_LAMBDA {
+ generic_butterfly_w<width>(radix, cbool<inverse>, out, in, twiddle, ostride);
+ });
+}
+
+template <typename T, size_t N>
+constexpr cvec<T, N> cmask08 = broadcast<N, T>(T(), -T());
+
+template <typename T, size_t N>
+constexpr cvec<T, N> cmask0088 = broadcast<N, T>(T(), T(), -T(), -T());
+
+template <bool A = false, typename T, size_t N>
+KFR_INTRIN void cbitreverse_write(complex<T>* dest, vec<T, N> x)
+{
+ cwrite<N / 2, A>(dest, bitreverse<2>(x));
+}
+
+template <bool A = false, typename T, size_t N>
+KFR_INTRIN void cdigitreverse4_write(complex<T>* dest, vec<T, N> x)
+{
+ cwrite<N / 2, A>(dest, digitreverse4<2>(x));
+}
+
+template <size_t N, bool A = false, typename T>
+KFR_INTRIN cvec<T, N> cbitreverse_read(const complex<T>* src)
+{
+ return bitreverse<2>(cread<N, A>(src));
+}
+
+template <size_t N, bool A = false, typename T>
+KFR_INTRIN cvec<T, N> cdigitreverse4_read(const complex<T>* src)
+{
+ return digitreverse4<2>(cread<N, A>(src));
+}
+
+#if 1
+
+template <>
+KFR_INTRIN cvec<f64, 16> cdigitreverse4_read<16, false, f64>(const complex<f64>* src)
+{
+ return concat(cread<1>(src + 0), cread<1>(src + 4), cread<1>(src + 8), cread<1>(src + 12),
+ cread<1>(src + 1), cread<1>(src + 5), cread<1>(src + 9), cread<1>(src + 13),
+ cread<1>(src + 2), cread<1>(src + 6), cread<1>(src + 10), cread<1>(src + 14),
+ cread<1>(src + 3), cread<1>(src + 7), cread<1>(src + 11), cread<1>(src + 15));
+}
+template <>
+KFR_INTRIN void cdigitreverse4_write<false, f64, 32>(complex<f64>* dest, vec<f64, 32> x)
+{
+ cwrite<1>(dest, part<16, 0>(x));
+ cwrite<1>(dest + 4, part<16, 1>(x));
+ cwrite<1>(dest + 8, part<16, 2>(x));
+ cwrite<1>(dest + 12, part<16, 3>(x));
+
+ cwrite<1>(dest + 1, part<16, 4>(x));
+ cwrite<1>(dest + 5, part<16, 5>(x));
+ cwrite<1>(dest + 9, part<16, 6>(x));
+ cwrite<1>(dest + 13, part<16, 7>(x));
+
+ cwrite<1>(dest + 2, part<16, 8>(x));
+ cwrite<1>(dest + 6, part<16, 9>(x));
+ cwrite<1>(dest + 10, part<16, 10>(x));
+ cwrite<1>(dest + 14, part<16, 11>(x));
+
+ cwrite<1>(dest + 3, part<16, 12>(x));
+ cwrite<1>(dest + 7, part<16, 13>(x));
+ cwrite<1>(dest + 11, part<16, 14>(x));
+ cwrite<1>(dest + 15, part<16, 15>(x));
+}
+#endif
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/dft/reference_dft.hpp b/include/kfr/dft/reference_dft.hpp
@@ -0,0 +1,141 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/complex.hpp"
+#include "../base/constants.hpp"
+#include "../base/memory.hpp"
+#include "../base/read_write.hpp"
+#include "../base/vec.hpp"
+#include "../misc/small_buffer.hpp"
+#include <cmath>
+
+namespace kfr
+{
+
+template <typename Tnumber = long double>
+void reference_fft_pass(Tnumber pi2, size_t N, size_t offset, size_t delta, int flag, Tnumber (*x)[2],
+ Tnumber (*X)[2], Tnumber (*XX)[2])
+{
+ const size_t N2 = N / 2;
+ using std::sin;
+ using std::cos;
+
+ if (N != 2)
+ {
+ reference_fft_pass(pi2, N2, offset, 2 * delta, flag, x, XX, X);
+ reference_fft_pass(pi2, N2, offset + delta, 2 * delta, flag, x, XX, X);
+
+ for (size_t k = 0; k < N2; k++)
+ {
+ const size_t k00 = offset + k * delta;
+ const size_t k01 = k00 + N2 * delta;
+ const size_t k10 = offset + 2 * k * delta;
+ const size_t k11 = k10 + delta;
+ const Tnumber m = static_cast<Tnumber>(k) / N;
+ const Tnumber cs = cos(pi2 * m);
+ const Tnumber sn = flag * sin(pi2 * m);
+ const Tnumber tmp0 = cs * XX[k11][0] + sn * XX[k11][1];
+ const Tnumber tmp1 = cs * XX[k11][1] - sn * XX[k11][0];
+ X[k01][0] = XX[k10][0] - tmp0;
+ X[k01][1] = XX[k10][1] - tmp1;
+ X[k00][0] = XX[k10][0] + tmp0;
+ X[k00][1] = XX[k10][1] + tmp1;
+ }
+ }
+ else
+ {
+ const size_t k00 = offset;
+ const size_t k01 = k00 + delta;
+ X[k01][0] = x[k00][0] - x[k01][0];
+ X[k01][1] = x[k00][1] - x[k01][1];
+ X[k00][0] = x[k00][0] + x[k01][0];
+ X[k00][1] = x[k00][1] + x[k01][1];
+ }
+}
+
+template <typename Tnumber = long double, typename T>
+void reference_fft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
+{
+ using Tcmplx = Tnumber(*)[2];
+ if (size < 2)
+ return;
+ std::vector<complex<Tnumber>> datain(size);
+ std::vector<complex<Tnumber>> dataout(size);
+ std::vector<complex<Tnumber>> temp(size);
+ std::copy(in, in + size, datain.begin());
+ const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
+ reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
+ Tcmplx(dataout.data()), Tcmplx(temp.data()));
+ std::copy(dataout.begin(), dataout.end(), out);
+}
+
+template <typename Tnumber = long double, typename T>
+void reference_dft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
+{
+ using std::sin;
+ using std::cos;
+ if (is_poweroftwo(size))
+ {
+ return reference_fft<Tnumber>(out, in, size, inversion);
+ }
+ constexpr Tnumber pi2 = c_pi<Tnumber, 2>;
+ if (size < 2)
+ return;
+ std::vector<complex<T>> datain;
+ if (out == in)
+ {
+ datain.resize(size);
+ std::copy_n(in, size, datain.begin());
+ in = datain.data();
+ }
+ {
+ Tnumber sumr = 0;
+ Tnumber sumi = 0;
+ for (size_t j = 0; j < size; j++)
+ {
+ sumr += static_cast<Tnumber>(in[j].real());
+ sumi += static_cast<Tnumber>(in[j].imag());
+ }
+ out[0] = { static_cast<T>(sumr), static_cast<T>(sumi) };
+ }
+ for (size_t i = 1; i < size; i++)
+ {
+ Tnumber sumr = static_cast<Tnumber>(in[0].real());
+ Tnumber sumi = static_cast<Tnumber>(in[0].imag());
+
+ for (size_t j = 1; j < size; j++)
+ {
+ const Tnumber x = pi2 * ((i * j) % size) / size;
+ Tnumber twr = cos(x);
+ Tnumber twi = sin(x);
+ if (inversion)
+ twi = -twi;
+
+ sumr += twr * static_cast<Tnumber>(in[j].real()) + twi * static_cast<Tnumber>(in[j].imag());
+ sumi += twr * static_cast<Tnumber>(in[j].imag()) - twi * static_cast<Tnumber>(in[j].real());
+ out[i] = { static_cast<T>(sumr), static_cast<T>(sumi) };
+ }
+ }
+}
+}
diff --git a/include/kfr/dispatch/cpuid.hpp b/include/kfr/dispatch/cpuid.hpp
@@ -0,0 +1,305 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/types.hpp"
+#include <cstring>
+
+namespace kfr
+{
+
+struct cpu_features
+{
+ u32 max;
+ u32 exmax;
+ u32 isIntel : 1;
+ u32 isAMD : 1;
+ u32 has3DNOW : 1;
+ u32 has3DNOWEXT : 1;
+ u32 hasABM : 1;
+ u32 hasADX : 1;
+ u32 hasAES : 1;
+ u32 hasAVX : 1;
+ u32 hasAVX2 : 1;
+ u32 hasAVXOSSUPPORT : 1;
+ u32 hasAVX512OSSUPPORT : 1;
+ u32 hasAVX512CD : 1;
+ u32 hasAVX512ER : 1;
+ u32 hasAVX512F : 1;
+ u32 hasAVX512DQ : 1;
+ u32 hasAVX512PF : 1;
+ u32 hasAVX512BW : 1;
+ u32 hasBMI1 : 1;
+ u32 hasBMI2 : 1;
+ u32 hasCLFSH : 1;
+ u32 hasCMOV : 1;
+ u32 hasCMPXCHG16B : 1;
+ u32 hasCX8 : 1;
+ u32 hasERMS : 1;
+ u32 hasF16C : 1;
+ u32 hasFMA : 1;
+ u32 hasFSGSBASE : 1;
+ u32 hasFXSR : 1;
+ u32 hasHLE : 1;
+ u32 hasINVPCID : 1;
+ u32 hasLAHF : 1;
+ u32 hasLZCNT : 1;
+ u32 hasMMX : 1;
+ u32 hasMMXEXT : 1;
+ u32 hasMONITOR : 1;
+ u32 hasMOVBE : 1;
+ u32 hasMSR : 1;
+ u32 hasOSXSAVE : 1;
+ u32 hasPCLMULQDQ : 1;
+ u32 hasPOPCNT : 1;
+ u32 hasPREFETCHWT1 : 1;
+ u32 hasRDRAND : 1;
+ u32 hasRDSEED : 1;
+ u32 hasRDTSCP : 1;
+ u32 hasRTM : 1;
+ u32 hasSEP : 1;
+ u32 hasSHA : 1;
+ u32 hasSSE : 1;
+ u32 hasSSE2 : 1;
+ u32 hasSSE3 : 1;
+ u32 hasSSE41 : 1;
+ u32 hasSSE42 : 1;
+ u32 hasSSE4a : 1;
+ u32 hasSSSE3 : 1;
+ u32 hasSYSCALL : 1;
+ u32 hasTBM : 1;
+ u32 hasXOP : 1;
+ u32 hasXSAVE : 1;
+ u32 padding1 : 6;
+ char vendor[17];
+ char model[49];
+ char padding2[2];
+};
+
+namespace internal
+{
+
+struct cpu_data
+{
+ u32 data[4];
+};
+
+#if defined KFR_COMPILER_GNU || defined KFR_COMPILER_CLANG
+KFR_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx)
+{
+ __asm__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(func), "2"(subfunc));
+ return 1;
+}
+KFR_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0)
+{
+ get_cpuid(func, subfunc, &ptr[0], &ptr[1], &ptr[2], &ptr[3]);
+}
+KFR_INLINE u32 get_xcr0()
+{
+ u32 xcr0;
+ __asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx");
+ return xcr0;
+}
+#endif
+
+template <size_t>
+cpu_t detect_cpu()
+{
+ cpu_features c;
+ memset(&c, 0, sizeof(c));
+ cpu_data data0;
+ cpu_data exdata0;
+
+ u32 f_1_ECX(0);
+ u32 f_1_EDX(0);
+ u32 f_7_EBX(0);
+ u32 f_7_ECX(0);
+ u32 f_81_ECX(0);
+ u32 f_81_EDX(0);
+
+ cpuid(data0.data, 0);
+ c.max = static_cast<u32>(data0.data[0]);
+ cpuid(exdata0.data, 0x80000000);
+ c.exmax = static_cast<u32>(exdata0.data[0]);
+
+ *ptr_cast<u32>(c.vendor) = static_cast<u32>(data0.data[1]);
+ *ptr_cast<u32>(c.vendor + 4) = static_cast<u32>(data0.data[3]);
+ *ptr_cast<u32>(c.vendor + 8) = static_cast<u32>(data0.data[2]);
+
+ c.isIntel = strncmp(c.vendor, "GenuineIntel", sizeof(c.vendor)) == 0 ? 1 : 0;
+ c.isAMD = strncmp(c.vendor, "AuthenticAMD", sizeof(c.vendor)) == 0 ? 1 : 0;
+
+ if (c.max >= 1)
+ {
+ cpu_data data1;
+ cpuid(data1.data, 1);
+ f_1_ECX = static_cast<u32>(data1.data[2]);
+ f_1_EDX = static_cast<u32>(data1.data[3]);
+ }
+
+ if (c.max >= 7)
+ {
+ cpu_data data7;
+ cpuid(data7.data, 7);
+ f_7_EBX = static_cast<u32>(data7.data[1]);
+ f_7_ECX = static_cast<u32>(data7.data[2]);
+ }
+
+ if (c.exmax >= 0x80000001)
+ {
+ cpu_data data81;
+ cpuid(data81.data, 0x80000001);
+ f_81_ECX = static_cast<u32>(data81.data[2]);
+ f_81_EDX = static_cast<u32>(data81.data[3]);
+ }
+
+ if (c.exmax >= 0x80000004)
+ {
+ cpu_data data82;
+ cpu_data data83;
+ cpu_data data84;
+ cpuid(data82.data, 0x80000002);
+ cpuid(data83.data, 0x80000003);
+ cpuid(data84.data, 0x80000004);
+ memcpy(c.model, data82.data, sizeof(cpu_data));
+ memcpy(c.model + 16, data83.data, sizeof(cpu_data));
+ memcpy(c.model + 32, data84.data, sizeof(cpu_data));
+ }
+
+ c.hasSSE3 = f_1_ECX >> 0 & 1;
+ c.hasPCLMULQDQ = f_1_ECX >> 1 & 1;
+ c.hasMONITOR = f_1_ECX >> 3 & 1;
+ c.hasSSSE3 = f_1_ECX >> 9 & 1;
+ c.hasFMA = f_1_ECX >> 12 & 1;
+ c.hasCMPXCHG16B = f_1_ECX >> 13 & 1;
+ c.hasSSE41 = f_1_ECX >> 19 & 1;
+ c.hasSSE42 = f_1_ECX >> 20 & 1;
+ c.hasMOVBE = f_1_ECX >> 22 & 1;
+ c.hasPOPCNT = f_1_ECX >> 23 & 1;
+ c.hasAES = f_1_ECX >> 25 & 1;
+ c.hasXSAVE = f_1_ECX >> 26 & 1;
+ c.hasOSXSAVE = f_1_ECX >> 27 & 1;
+ c.hasAVX = f_1_ECX >> 28 & 1;
+ c.hasF16C = f_1_ECX >> 29 & 1;
+ c.hasRDRAND = f_1_ECX >> 30 & 1;
+ c.hasMSR = f_1_EDX >> 5 & 1;
+ c.hasCX8 = f_1_EDX >> 8 & 1;
+ c.hasSEP = f_1_EDX >> 11 & 1;
+ c.hasCMOV = f_1_EDX >> 15 & 1;
+ c.hasCLFSH = f_1_EDX >> 19 & 1;
+ c.hasMMX = f_1_EDX >> 23 & 1;
+ c.hasFXSR = f_1_EDX >> 24 & 1;
+ c.hasSSE = f_1_EDX >> 25 & 1;
+ c.hasSSE2 = f_1_EDX >> 26 & 1;
+ c.hasFSGSBASE = f_7_EBX >> 0 & 1;
+ c.hasBMI1 = f_7_EBX >> 3 & 1;
+ c.hasHLE = c.isIntel && f_7_EBX >> 4 & 1;
+ c.hasAVX2 = f_7_EBX >> 5 & 1;
+ c.hasBMI2 = f_7_EBX >> 8 & 1;
+ c.hasERMS = f_7_EBX >> 9 & 1;
+ c.hasINVPCID = f_7_EBX >> 10 & 1;
+ c.hasRTM = c.isIntel && f_7_EBX >> 11 & 1;
+ c.hasAVX512F = f_7_EBX >> 16 & 1;
+ c.hasAVX512DQ = f_7_EBX >> 17 & 1;
+ c.hasRDSEED = f_7_EBX >> 18 & 1;
+ c.hasADX = f_7_EBX >> 19 & 1;
+ c.hasAVX512PF = f_7_EBX >> 26 & 1;
+ c.hasAVX512ER = f_7_EBX >> 27 & 1;
+ c.hasAVX512CD = f_7_EBX >> 28 & 1;
+ c.hasSHA = f_7_EBX >> 29 & 1;
+ c.hasAVX512BW = f_7_EBX >> 30 & 1;
+ c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1;
+ c.hasLAHF = f_81_ECX >> 0 & 1;
+ c.hasLZCNT = c.isIntel && f_81_ECX >> 5 & 1;
+ c.hasABM = c.isAMD && f_81_ECX >> 5 & 1;
+ c.hasSSE4a = c.isAMD && f_81_ECX >> 6 & 1;
+ c.hasXOP = c.isAMD && f_81_ECX >> 11 & 1;
+ c.hasTBM = c.isAMD && f_81_ECX >> 21 & 1;
+ c.hasSYSCALL = c.isIntel && f_81_EDX >> 11 & 1;
+ c.hasMMXEXT = c.isAMD && f_81_EDX >> 22 & 1;
+ c.hasRDTSCP = c.isIntel && f_81_EDX >> 27 & 1;
+ c.has3DNOWEXT = c.isAMD && f_81_EDX >> 30 & 1;
+ c.has3DNOW = c.isAMD && f_81_EDX >> 31 & 1;
+
+ const u32 xcr0 = get_xcr0();
+
+ c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (xcr0 & 0x06) == 0x06;
+ c.hasAVX512OSSUPPORT = c.hasAVX512F && c.hasOSXSAVE && (xcr0 & 0xE0) == 0xE0;
+
+#ifdef KFR_AVAIL_AVX512
+ if (c.hasAVX512F && c.hasAVX512BW && c.hasAVX512DQ && c.hasAVX512OSSUPPORT)
+ return cpu_t::avx3;
+#endif
+#ifdef KFR_AVAIL_AVX2
+ if (c.hasAVX2 && c.hasAVXOSSUPPORT)
+ return cpu_t::avx2;
+#endif
+#ifdef KFR_AVAIL_AVX
+ if (c.hasAVX && c.hasAVXOSSUPPORT)
+ return cpu_t::avx1;
+#endif
+#ifdef KFR_AVAIL_SSE41
+ if (c.hasSSE41)
+ return cpu_t::sse41;
+#endif
+#ifdef KFR_AVAIL_SSSE3
+ if (c.hasSSSE3)
+ return cpu_t::ssse3;
+#endif
+#ifdef KFR_AVAIL_SSE3
+ if (c.hasSSE3)
+ return cpu_t::sse3;
+#endif
+#ifdef KFR_AVAIL_SSE2
+ if (c.hasSSE2)
+ return cpu_t::sse2;
+#endif
+ return cpu_t::lowest;
+}
+}
+
+namespace internal
+{
+
+KFR_INLINE cpu_t& cpu_v()
+{
+ static cpu_t v1 = cpu_t::native;
+ return v1;
+}
+
+KFR_INLINE char init_cpu_v()
+{
+ cpu_v() = detect_cpu<0>();
+ return 0;
+}
+
+KFR_INLINE char init_dummyvar()
+{
+ static char dummy = init_cpu_v();
+ return dummy;
+}
+
+static char dummyvar = init_dummyvar();
+}
+KFR_INLINE cpu_t get_cpu() { return internal::cpu_v(); }
+}
diff --git a/include/kfr/dispatch/runtimedispatch.hpp b/include/kfr/dispatch/runtimedispatch.hpp
@@ -0,0 +1,173 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/dispatch.hpp"
+#include "../base/types.hpp"
+#include "cpuid.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <typename Fn, typename... Args>
+KFR_CPU_INTRIN(sse2)
+auto with_cpu_impl(ccpu_t<cpu_t::sse2>, Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+
+template <typename Fn, typename... Args>
+KFR_CPU_INTRIN(sse3)
+auto with_cpu_impl(ccpu_t<cpu_t::sse3>, Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+
+template <typename Fn, typename... Args>
+KFR_CPU_INTRIN(ssse3)
+auto with_cpu_impl(ccpu_t<cpu_t::ssse3>, Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+
+template <typename Fn, typename... Args>
+KFR_CPU_INTRIN(sse41)
+auto with_cpu_impl(ccpu_t<cpu_t::sse41>, Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+
+template <typename Fn, typename... Args>
+KFR_CPU_INTRIN(sse42)
+auto with_cpu_impl(ccpu_t<cpu_t::sse42>, Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+
+template <typename Fn, typename... Args>
+KFR_CPU_INTRIN(avx)
+auto with_cpu_impl(ccpu_t<cpu_t::avx>, Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+
+template <typename Fn, typename... Args>
+KFR_CPU_INTRIN(avx2)
+auto with_cpu_impl(ccpu_t<cpu_t::avx2>, Fn&& fn, Args&&... args)
+{
+ return fn(std::forward<Args>(args)...);
+}
+}
+
+template <cpu_t cpu, typename Fn, typename... Args>
+KFR_INTRIN auto with_cpu(ccpu_t<cpu>, Fn&& fn, Args&&... args)
+{
+ return internal::with_cpu_impl(ccpu<cpu>, std::forward<Fn>(fn), std::forward<Args>(args)...);
+}
+
+template <cpu_t cpu, typename Fn>
+struct fn_with_cpu
+{
+ template <typename... Args>
+ KFR_INTRIN auto operator()(Args&&... args) -> decltype(std::declval<Fn>()(std::forward<Args>(args)...))
+ {
+ return internal::with_cpu_impl(ccpu<cpu>, std::forward<Fn>(fn), std::forward<Args>(args)...);
+ }
+ Fn fn;
+};
+
+template <cpu_t cpu, typename Fn>
+KFR_INTRIN fn_with_cpu<cpu, Fn> make_with_cpu(ccpu_t<cpu>, Fn&& fn)
+{
+ return { std::forward<Fn>(fn) };
+}
+
+namespace internal
+{
+
+template <typename Fn, cpu_t, cpu_t...>
+struct runtime_dispatcher;
+
+template <typename Fn, cpu_t oldest>
+struct runtime_dispatcher<Fn, oldest>
+{
+ using targetFn = retarget<Fn, oldest>;
+
+ template <typename... Args>
+ KFR_INLINE static result_of<targetFn(Args&&...)> call(Fn&& fn, cpu_t, Args&&... args)
+ {
+ return cpu_caller<oldest>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...);
+ }
+};
+
+template <typename Fn, cpu_t newest, cpu_t next, cpu_t... cpus>
+struct runtime_dispatcher<Fn, newest, next, cpus...>
+{
+ using nextdispatcher = runtime_dispatcher<Fn, next, cpus...>;
+
+ using targetFn = retarget<Fn, newest>;
+
+ template <typename... Args,
+ KFR_ENABLE_IF(is_callable<targetFn, Args&&...>::value&& is_enabled<targetFn>::value)>
+ KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args)
+ -> decltype(nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...))
+ {
+ return set >= newest
+ ? cpu_caller<newest>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...)
+ : nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...);
+ }
+ template <typename... Args,
+ KFR_ENABLE_IF(!(is_callable<targetFn, Args&&...>::value && is_enabled<targetFn>::value))>
+ KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args)
+ -> decltype(nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...))
+ {
+ return nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...);
+ }
+};
+
+template <typename Fn, cpu_t newest, cpu_t... cpus, typename... Args>
+KFR_INLINE auto runtimedispatch(cvals_t<cpu_t, newest, cpus...>, Fn&& fn, Args&&... args)
+ -> decltype(internal::runtime_dispatcher<Fn, newest, cpus...>::call(std::forward<Fn>(fn), get_cpu(),
+ std::forward<Args>(args)...))
+{
+ return internal::runtime_dispatcher<Fn, newest, cpus...>::call(std::forward<Fn>(fn), get_cpu(),
+ std::forward<Args>(args)...);
+}
+
+template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c == cpu_t::runtime)>
+KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
+{
+ return runtimedispatch(std::forward<Fn>(fn), std::forward<Args>(args)...);
+}
+}
+
+template <typename Fn, typename cpulist = decltype(cpu_all), typename... Args>
+KFR_INLINE auto runtimedispatch(Fn&& fn, Args&&... args)
+ -> decltype(internal::runtimedispatch<Fn>(cpulist(), std::forward<Fn>(fn), std::forward<Args>(args)...))
+{
+ return internal::runtimedispatch(cpulist(), std::forward<Fn>(fn), std::forward<Args>(args)...);
+}
+}
diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp
@@ -0,0 +1,401 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/operators.hpp"
+#include "../base/vec.hpp"
+#include <cmath>
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+enum class biquad_type
+{
+ lowpass,
+ highpass,
+ bandpass,
+ bandstop,
+ peak,
+ notch,
+ lowshelf,
+ highshelf
+};
+
+template <typename T>
+struct biquad_params
+{
+ constexpr static bool is_pod = true;
+
+ static_assert(std::is_floating_point<T>::value, "T must be a floating point type");
+ constexpr biquad_params() noexcept : a0(1), a1(0), a2(0), b0(1), b1(0), b2(0) {}
+ constexpr biquad_params(T a0, T a1, T a2, T b0, T b1, T b2) noexcept : a0(a0),
+ a1(a1),
+ a2(a2),
+ b0(b0),
+ b1(b1),
+ b2(b2)
+ {
+ }
+ T a0;
+ T a1;
+ T a2;
+ T b0;
+ T b1;
+ T b2;
+ biquad_params<T> normalized_a0() const
+ {
+ vec<T, 5> v{ a1, a2, b0, b1, b2 };
+ v = v / a0;
+ return { T(1.0), v[0], v[1], v[2], v[3], v[4] };
+ }
+ biquad_params<T> normalized_b0() const { return { a0, a1, a2, T(1.0), b1 / b0, b2 / b0 }; }
+ biquad_params<T> normalized_all() const { return normalized_a0().normalized_b0(); }
+};
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_allpass(T frequency, T Q)
+{
+ const T alpha = std::sin(frequency) / 2.0 * Q;
+ const T cs = std::cos(frequency);
+
+ const T b0 = 1.0 / (1.0 + alpha);
+ const T b1 = -2.0 * cs * b0;
+ const T b2 = (1.0 - alpha) * b0;
+ const T a0 = (1.0 - alpha) * b0;
+ const T a1 = -2.0 * cs * b0;
+ const T a2 = (1.0 + alpha) * b0;
+ return { b0, b1, b2, a0, a1, a2 };
+}
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_lowpass(T frequency, T Q)
+{
+ const T K = std::tan(c_pi<T, 1> * frequency);
+ const T K2 = K * K;
+ const T norm = 1 / (1 + K / Q + K2);
+ const T a0 = K2 * norm;
+ const T a1 = 2 * a0;
+ const T a2 = a0;
+ const T b1 = 2 * (K2 - 1) * norm;
+ const T b2 = (1 - K / Q + K2) * norm;
+ return { 1.0, b1, b2, a0, a1, a2 };
+}
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_highpass(T frequency, T Q)
+{
+ const T K = std::tan(c_pi<T, 1> * frequency);
+ const T K2 = K * K;
+ const T norm = 1 / (1 + K / Q + K2);
+ const T a0 = 1 * norm;
+ const T a1 = -2 * a0;
+ const T a2 = a0;
+ const T b1 = 2 * (K2 - 1) * norm;
+ const T b2 = (1 - K / Q + K2) * norm;
+ return { 1.0, b1, b2, a0, a1, a2 };
+}
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_bandpass(T frequency, T Q)
+{
+ const T K = std::tan(c_pi<T, 1> * frequency);
+ const T K2 = K * K;
+ const T norm = 1 / (1 + K / Q + K2);
+ const T a0 = K / Q * norm;
+ const T a1 = 0;
+ const T a2 = -a0;
+ const T b1 = 2 * (K2 - 1) * norm;
+ const T b2 = (1 - K / Q + K2) * norm;
+ return { 1.0, b1, b2, a0, a1, a2 };
+}
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_notch(T frequency, T Q)
+{
+ const T K = std::tan(c_pi<T, 1> * frequency);
+ const T K2 = K * K;
+ const T norm = 1 / (1 + K / Q + K2);
+ const T a0 = (1 + K2) * norm;
+ const T a1 = 2 * (K2 - 1) * norm;
+ const T a2 = a0;
+ const T b1 = a1;
+ const T b2 = (1 - K / Q + K2) * norm;
+ return { 1.0, b1, b2, a0, a1, a2 };
+}
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_peak(T frequency, T Q, T gain)
+{
+ biquad_params<T> result;
+ const T K = std::tan(c_pi<T, 1> * frequency);
+ const T K2 = K * K;
+ const T V = std::exp(std::abs(gain) * (1.0 / 20.0) * c_log_10<T>);
+
+ if (gain >= 0)
+ { // boost
+ const T norm = 1 / (1 + 1 / Q * K + K2);
+ const T a0 = (1 + V / Q * K + K2) * norm;
+ const T a1 = 2 * (K2 - 1) * norm;
+ const T a2 = (1 - V / Q * K + K2) * norm;
+ const T b1 = a1;
+ const T b2 = (1 - 1 / Q * K + K2) * norm;
+ result = { 1.0, b1, b2, a0, a1, a2 };
+ }
+ else
+ { // cut
+ const T norm = 1 / (1 + V / Q * K + K2);
+ const T a0 = (1 + 1 / Q * K + K2) * norm;
+ const T a1 = 2 * (K2 - 1) * norm;
+ const T a2 = (1 - 1 / Q * K + K2) * norm;
+ const T b1 = a1;
+ const T b2 = (1 - V / Q * K + K2) * norm;
+ result = { 1.0, b1, b2, a0, a1, a2 };
+ }
+ return result;
+}
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_lowshelf(T frequency, T gain)
+{
+ biquad_params<T> result;
+ const T K = std::tan(c_pi<T, 1> * frequency);
+ const T K2 = K * K;
+ const T V = std::exp(std::fabs(gain) * (1.0 / 20.0) * c_log_10<T>);
+
+ if (gain >= 0)
+ { // boost
+ const T norm = 1 / (1 + c_sqrt_2<T> * K + K2);
+ const T a0 = (1 + std::sqrt(2 * V) * K + V * K2) * norm;
+ const T a1 = 2 * (V * K2 - 1) * norm;
+ const T a2 = (1 - std::sqrt(2 * V) * K + V * K2) * norm;
+ const T b1 = 2 * (K2 - 1) * norm;
+ const T b2 = (1 - c_sqrt_2<T> * K + K2) * norm;
+ result = { 1.0, b1, b2, a0, a1, a2 };
+ }
+ else
+ { // cut
+ const T norm = 1 / (1 + std::sqrt(2 * V) * K + V * K2);
+ const T a0 = (1 + c_sqrt_2<T> * K + K2) * norm;
+ const T a1 = 2 * (K2 - 1) * norm;
+ const T a2 = (1 - c_sqrt_2<T> * K + K2) * norm;
+ const T b1 = 2 * (V * K2 - 1) * norm;
+ const T b2 = (1 - std::sqrt(2 * V) * K + V * K2) * norm;
+ result = { 1.0, b1, b2, a0, a1, a2 };
+ }
+ return result;
+}
+
+template <typename T>
+KFR_INLINE biquad_params<T> biquad_highshelf(T frequency, T gain)
+{
+ biquad_params<T> result;
+ const T K = std::tan(c_pi<T, 1> * frequency);
+ const T K2 = K * K;
+ const T V = std::exp(std::fabs(gain) * (1.0 / 20.0) * c_log_10<T>);
+
+ if (gain >= 0)
+ { // boost
+ const T norm = 1 / (1 + c_sqrt_2<T> * K + K2);
+ const T a0 = (V + std::sqrt(2 * V) * K + K2) * norm;
+ const T a1 = 2 * (K2 - V) * norm;
+ const T a2 = (V - std::sqrt(2 * V) * K + K2) * norm;
+ const T b1 = 2 * (K2 - 1) * norm;
+ const T b2 = (1 - c_sqrt_2<T> * K + K2) * norm;
+ result = { 1.0, b1, b2, a0, a1, a2 };
+ }
+ else
+ { // cut
+ const T norm = 1 / (V + std::sqrt(2 * V) * K + K2);
+ const T a0 = (1 + c_sqrt_2<T> * K + K2) * norm;
+ const T a1 = 2 * (K2 - 1) * norm;
+ const T a2 = (1 - c_sqrt_2<T> * K + K2) * norm;
+ const T b1 = 2 * (K2 - V) * norm;
+ const T b2 = (V - std::sqrt(2 * V) * K + K2) * norm;
+ result = { 1.0, b1, b2, a0, a1, a2 };
+ }
+ return result;
+}
+
+namespace internal
+{
+template <cpu_t cpu = cpu_t::native>
+struct in_biquad
+{
+private:
+public:
+ template <typename T, size_t filters>
+ struct biquad_block
+ {
+ vec<T, filters> s1;
+ vec<T, filters> s2;
+ vec<T, filters> a1;
+ vec<T, filters> a2;
+ vec<T, filters> b0;
+ vec<T, filters> b1;
+ vec<T, filters> b2;
+
+ vec<T, filters> out;
+ biquad_block() : s1(), s2(), a1(), a2(), b0(), b1(), b2(), out() {}
+ biquad_block(const biquad_params<T>* bq, size_t count) : s1(), s2(), out()
+ {
+ count = count > filters ? filters : count;
+ for (size_t i = 0; i < count; i++)
+ {
+ a1(i) = bq[i].a1;
+ a2(i) = bq[i].a2;
+ b0(i) = bq[i].b0;
+ b1(i) = bq[i].b1;
+ b2(i) = bq[i].b2;
+ }
+ for (size_t i = count; i < filters; i++)
+ {
+ a1(i) = T(0);
+ a2(i) = T(0);
+ b0(i) = T(1);
+ b1(i) = T(0);
+ b2(i) = T(0);
+ }
+ }
+
+ template <size_t count>
+ biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count)
+ {
+ static_assert(count <= filters, "count > filters");
+ }
+ };
+
+ template <typename T, typename E1>
+ struct expression_biquad : public expression<E1>
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_biquad<newcpu>::template expression_biquad<T, retarget<E1, newcpu>>;
+
+ expression_biquad(const biquad_params<T>& bq, E1&& e1) noexcept
+ : expression<E1>(std::forward<E1>(e1)),
+ bq(bq)
+ {
+ }
+ template <typename U, size_t width>
+ inline vec<U, width> operator()(cinput_t, size_t index, vec_t<U, width> t)
+ {
+ const vec<T, width> in = cast<T>(this->argument_first(index, t));
+ const vec<T, width> in1 = insertleft(x[0], in);
+ const vec<T, width> in2 = insertleft(x[1], in1);
+ vec<T, width> out = bq.b0 * in + bq.b1 * in1 + bq.b2 * in2;
+
+ out(0) = out[0] - bq.a1 * y[0] - bq.a2 * y[1];
+ out(1) = out[1] - bq.a1 * out[0] - bq.a2 * y[0];
+
+ KFR_LOOP_UNROLL
+ for (size_t i = 2; i < width; i++)
+ {
+ out(i) = out[i] - bq.a1 * out[i - 1] - bq.a2 * out[i - 2];
+ }
+
+ x(1) = in[width - 2];
+ x(0) = in[width - 1];
+
+ y(1) = out[width - 2];
+ y(0) = out[width - 1];
+ return cast<U>(out);
+ }
+ template <typename U>
+ inline vec<U, 1> operator()(cinput_t, size_t index, vec_t<U, 1> t)
+ {
+ T in = cast<T>(this->argument_first(index, t))[0];
+
+ T out = bq.b0 * in + bq.b1 * x[0] + bq.b2 * x[1] - bq.a1 * y[0] - bq.a2 * y[1];
+ x(1) = x[0];
+ x(0) = in;
+ y(1) = y[0];
+ y(0) = out;
+ return cast<U>(out);
+ }
+ biquad_params<T> bq;
+ mutable vec<T, 2> x = T(0);
+ mutable vec<T, 2> y = T(0);
+ };
+
+ template <size_t filters, typename T, typename E1>
+ struct expression_biquads : public expression<E1>
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this =
+ typename in_biquad<newcpu>::template expression_biquads<filters, T, retarget<E1, newcpu>>;
+
+ expression_biquads(const biquad_block<T, filters>& bq, E1&& e1)
+ : expression<E1>(std::forward<E1>(e1)), bq(bq)
+ {
+ }
+ template <size_t width>
+ inline vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const
+ {
+ const vec<T, width> in = this->argument_first(index, t);
+ vec<T, width> out;
+
+ KFR_LOOP_UNROLL
+ for (size_t i = 0; i < width; i++)
+ {
+ bq.out = process(insertleft(in[i], bq.out));
+ out(i) = bq.out[filters - 1];
+ }
+
+ return out;
+ }
+ KFR_INLINE vec<T, filters> process(vec<T, filters> in) const
+ {
+ const vec<T, filters> out = bq.b0 * in + bq.s1;
+ bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out;
+ bq.s2 = bq.b2 * in - bq.a2 * out;
+ return out;
+ }
+ mutable biquad_block<T, filters> bq;
+ };
+};
+}
+
+template <typename T, typename E1>
+KFR_INLINE internal::in_biquad<>::expression_biquad<T, internal::arg<E1>> biquad(const biquad_params<T>& bq,
+ E1&& e1)
+{
+ return internal::in_biquad<>::expression_biquad<T, internal::arg<E1>>(bq, std::forward<E1>(e1));
+}
+template <size_t filters, typename T, typename E1>
+KFR_INLINE internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>> biquad(
+ const biquad_params<T> (&bq)[filters], E1&& e1)
+{
+ return internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1));
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -0,0 +1,280 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/memory.hpp"
+#include "../base/sin_cos.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/basic.hpp"
+#include "../expressions/operators.hpp"
+#include "../expressions/reduce.hpp"
+#include "window.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+template <typename T, size_t Size>
+using fir_taps = univector<T, Size>;
+
+namespace internal
+{
+template <cpu_t cpu = cpu_t::native>
+struct in_fir : in_sqrt<cpu>, in_abs<cpu>, in_log_exp<cpu>, in_sin_cos<cpu>, in_window<cpu>, in_reduce<cpu>
+{
+private:
+ using in_sqrt<cpu>::sqrt;
+ using in_abs<cpu>::abs;
+ using in_log_exp<cpu>::log;
+ using in_log_exp<cpu>::exp;
+ using in_log_exp<cpu>::log_fmadd;
+ using in_log_exp<cpu>::exp_fmadd;
+ using in_log_exp<cpu>::exp10;
+ using typename in_sin_cos<cpu>::fn_sinc;
+ using in_reduce<cpu>::reduce;
+ using in_reduce<cpu>::dotproduct;
+ using in_reduce<cpu>::sum;
+
+public:
+ template <typename T>
+ KFR_SINTRIN void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window,
+ bool normalize = true)
+ {
+ const T scale = 2.0 * cutoff;
+ taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>,
+ taps.size(), true)) *
+ scale * window;
+
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = scale;
+
+ if (normalize)
+ {
+ const T invsum = reciprocal(sum(taps));
+ taps = taps * invsum;
+ }
+ }
+ template <typename T>
+ KFR_SINTRIN void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window,
+ bool normalize = true)
+ {
+ const T scale = 2.0 * -cutoff;
+ taps = bind_expression(fn_sinc(), symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>,
+ taps.size(), true)) *
+ scale * window;
+
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = 1 - 2.0 * cutoff;
+
+ if (normalize)
+ {
+ const T invsum = reciprocal(sum(taps) + 1);
+ taps = taps * invsum;
+ }
+ }
+
+ template <typename T>
+ KFR_SINTRIN void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2,
+ const expression_pointer<T>& window, bool normalize = true)
+ {
+ const T scale1 = 2.0 * frequency1;
+ const T scale2 = 2.0 * frequency2;
+ const T sc = c_pi<T> * T(taps.size() - 1);
+ const T start1 = sc * frequency1;
+ const T start2 = sc * frequency2;
+
+ taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 -
+ bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) *
+ window;
+
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = 2 * (frequency2 - frequency1);
+
+ if (normalize)
+ {
+ const T invsum = reciprocal(sum(taps) + 1);
+ taps = taps * invsum;
+ }
+ }
+
+ template <typename T>
+ KFR_SINTRIN void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2,
+ const expression_pointer<T>& window, bool normalize = true)
+ {
+ const T scale1 = 2.0 * frequency1;
+ const T scale2 = 2.0 * frequency2;
+ const T sc = c_pi<T> * T(taps.size() - 1);
+ const T start1 = sc * frequency1;
+ const T start2 = sc * frequency2;
+
+ taps = (bind_expression(fn_sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 -
+ bind_expression(fn_sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) *
+ window;
+
+ if (is_odd(taps.size()))
+ taps[taps.size() / 2] = 1 - 2 * (frequency2 - frequency1);
+
+ if (normalize)
+ {
+ const T invsum = reciprocal(sum(taps));
+ taps = taps * invsum;
+ }
+ }
+
+ template <size_t index, size_t order, typename T, size_t N>
+ KFR_SINTRIN void convole_round(vec<T, N>& output, vec<T, order> input, vec<T, order> taps,
+ vec<T, order> delay)
+ {
+ output(index) = dot(taps, rotatetwo<index + 1>(delay, input));
+ }
+
+ template <size_t index, size_t order, typename T, size_t N, KFR_ENABLE_IF(index >= N)>
+ KFR_SINTRIN void convole_rounds(vec<T, N>& /*output*/, vec<T, order> /*input*/, vec<T, order> /*taps*/,
+ vec<T, order> /*delay*/)
+ {
+ }
+
+ template <size_t index, size_t order, typename T, size_t N, KFR_ENABLE_IF(index < N)>
+ KFR_SINTRIN void convole_rounds(vec<T, N>& output, vec<T, order> input, vec<T, order> taps,
+ vec<T, order> delay)
+ {
+ convole_round<index, order, T, N>(output, input, taps, delay);
+ convole_rounds<index + 1, order, T, N>(output, input, taps, delay);
+ }
+
+ template <size_t tapcount, typename T, typename E1>
+ struct expression_short_fir : expression<E1>
+ {
+ static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two");
+ template <cpu_t newcpu>
+ using retarget_this =
+ typename in_fir<newcpu>::template expression_short_fir<tapcount, T, retarget<E1, newcpu>>;
+
+ expression_short_fir(E1&& e1, const array_ref<T>& taps)
+ : expression<E1>(std::forward<E1>(e1)), taps(taps)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x)
+ {
+ const vec<T, N> in = cast<T>(this->argument_first(index, x));
+
+ vec<T, N> out;
+ vec<T, tapcount> winput = widen<tapcount>(in);
+ winput = reverse(winput);
+ convole_rounds<0, tapcount, T, N>(out, winput, taps, delayline);
+ delayline = rotatetwo<N>(delayline, winput);
+
+ return cast<U>(out);
+ }
+ const vec<T, tapcount> taps;
+ vec<T, tapcount> delayline;
+ };
+
+ template <typename T, typename E1>
+ struct expression_fir : expression<E1>
+ {
+ template <cpu_t newcpu>
+ using retarget_this = typename in_fir<newcpu>::template expression_fir<T, retarget<E1, newcpu>>;
+
+ expression_fir(E1&& e1, const array_ref<const T>& taps)
+ : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(taps.size(), T()),
+ delayline_cursor(0)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x)
+ {
+ const size_t tapcount = taps.size();
+ const vec<T, N> input = cast<T>(this->argument_first(index, x));
+
+ vec<T, N> output;
+ size_t cursor = delayline_cursor;
+ KFR_LOOP_NOUNROLL
+ for (size_t i = 0; i < N; i++)
+ {
+ delayline.ringbuf_write(cursor, input[i]);
+ output(i) = dotproduct(taps, delayline.slice(cursor) /*, tapcount - cursor*/) +
+ dotproduct(taps.slice(tapcount - cursor), delayline /*, cursor*/);
+ }
+ delayline_cursor = cursor;
+ return cast<U>(output);
+ }
+ const univector_dyn<T> taps;
+ univector_dyn<T> delayline;
+ size_t delayline_cursor;
+ };
+ KFR_SPEC_FN(in_fir, fir_lowpass)
+ KFR_SPEC_FN(in_fir, fir_highpass)
+ KFR_SPEC_FN(in_fir, fir_bandpass)
+ KFR_SPEC_FN(in_fir, fir_bandstop)
+};
+}
+
+namespace native
+{
+template <typename T, size_t Tag>
+KFR_INLINE void fir_lowpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window,
+ bool normalize = true)
+{
+ return internal::in_fir<>::fir_lowpass(taps.slice(), cutoff, window, normalize);
+}
+template <typename T, size_t Tag>
+KFR_INLINE void fir_highpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window,
+ bool normalize = true)
+{
+ return internal::in_fir<>::fir_highpass(taps.slice(), cutoff, window, normalize);
+}
+template <typename T, size_t Tag>
+KFR_INLINE void fir_bandpass(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2,
+ const expression_pointer<T>& window, bool normalize = true)
+{
+ return internal::in_fir<>::fir_bandpass(taps.slice(), frequency1, frequency2, window, normalize);
+}
+template <typename T, size_t Tag>
+KFR_INLINE void fir_bandstop(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2,
+ const expression_pointer<T>& window, bool normalize = true)
+{
+ return internal::in_fir<>::fir_bandstop(taps.slice(), frequency1, frequency2, window, normalize);
+}
+
+template <typename T, typename E1, size_t Tag>
+KFR_INLINE internal::in_fir<>::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps)
+{
+ return internal::in_fir<>::expression_fir<T, E1>(std::forward<E1>(e1), taps.ref());
+}
+template <typename T, size_t TapCount, typename E1>
+KFR_INLINE internal::in_fir<>::expression_short_fir<TapCount, T, E1> short_fir(
+ E1&& e1, const univector<T, TapCount>& taps)
+{
+ static_assert(TapCount >= 1 && TapCount < 16, "Use short_fir only for small FIR filters");
+ return internal::in_fir<>::expression_short_fir<TapCount, T, E1>(std::forward<E1>(e1), taps.ref());
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/goertzel.hpp b/include/kfr/dsp/goertzel.hpp
@@ -0,0 +1,126 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/complex.hpp"
+#include "../base/sin_cos.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/basic.hpp"
+
+namespace kfr
+{
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_goertzel : in_sin_cos<cc>
+{
+private:
+ using in_sin_cos<cc>::sin;
+ using in_sin_cos<cc>::cos;
+
+public:
+ template <typename T>
+ struct expression_goertzel : output_expression
+ {
+ expression_goertzel(complex<T>& result, identity<T> omega)
+ : result(result), omega(omega), coeff(2 * cos(omega)), q0(), q1(), q2()
+ {
+ }
+ ~expression_goertzel()
+ {
+ result.real(q1 - q2 * cos(omega));
+ result.imag(q2 * sin(omega));
+ }
+ template <typename U, size_t N>
+ KFR_INLINE void operator()(coutput_t, size_t index, vec<U, N> x)
+ {
+ vec<T, N> in = cast<T>(x);
+ KFR_LOOP_UNROLL
+ for (size_t i = 0; i < N; i++)
+ {
+ q0 = coeff * q1 - q2 + in[i];
+ q2 = q1;
+ q1 = q0;
+ }
+ }
+ complex<T>& result;
+ const T omega;
+ const T coeff;
+ T q0;
+ T q1;
+ T q2;
+ };
+
+ template <typename T, size_t width>
+ struct expression_parallel_goertzel : output_expression
+ {
+ expression_parallel_goertzel(complex<T> result[], vec<T, width> omega)
+ : result(result), omega(omega), coeff(cos(omega)), q0(), q1(), q2()
+ {
+ }
+ ~expression_parallel_goertzel()
+ {
+ const vec<T, width> re = q1 - q2 * cos(omega);
+ const vec<T, width> im = q2 * sin(omega);
+ for (size_t i = 0; i < width; i++)
+ {
+ result[i].real(re[i]);
+ result[i].imag(im[i]);
+ }
+ }
+ template <typename U, size_t N>
+ KFR_INLINE void operator()(coutput_t, size_t index, vec<U, N> x)
+ {
+ const vec<T, N> in = cast<T>(x);
+ KFR_LOOP_UNROLL
+ for (size_t i = 0; i < N; i++)
+ {
+ q0 = coeff * q1 - q2 + in[i];
+ q2 = q1;
+ q1 = q0;
+ }
+ }
+ complex<T> result[];
+ const vec<T, width> omega;
+ const vec<T, width> coeff;
+ vec<T, width> q0;
+ vec<T, width> q1;
+ vec<T, width> q2;
+ };
+
+ template <typename T>
+ KFR_SINTRIN expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega)
+ {
+ return expression_goertzel<T>(result, omega);
+ }
+
+ template <typename T, size_t width>
+ KFR_SINTRIN expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width],
+ const T (&omega)[width])
+ {
+ return expression_parallel_goertzel<T, width>(result, read<width>(omega));
+ }
+};
+}
+}
diff --git a/include/kfr/dsp/interpolation.hpp b/include/kfr/dsp/interpolation.hpp
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/select.hpp"
+#include "../base/sin_cos.hpp"
+#include "../base/vec.hpp"
+
+namespace kfr
+{
+namespace internal
+{
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_interpolation : in_sin_cos<cc>, in_select<cc>
+{
+private:
+ using in_sin_cos<cc>::fastcos;
+ using in_select<cc>::select;
+
+public:
+ template <typename T, typename M>
+ KFR_SINTRIN T nearest(M mu, T x1, T x2)
+ {
+ return select(mu < M(0.5), x1, x2);
+ }
+
+ template <typename T, typename M>
+ KFR_SINTRIN T linear(M mu, T x1, T x2)
+ {
+ return mix(mu, x1, x2);
+ }
+
+ template <typename T, typename M>
+ KFR_SINTRIN T cosine(M mu, T x1, T x2)
+ {
+ return mix((M(1) - fastcos(mu * c_pi<T>)) * M(0.5), x1, x2);
+ }
+
+ template <typename T, typename M>
+ KFR_SINTRIN T cubic(M mu, T x0, T x1, T x2, T x3)
+ {
+ const T a0 = x3 - x2 - x0 + x1;
+ const T a1 = x0 - x1 - a0;
+ const T a2 = x2 - x0;
+ const T a3 = x1;
+ return horner(mu, a0, a1, a2, a3);
+ }
+
+ template <typename T, typename M>
+ KFR_SINTRIN T catmullrom(M mu, T x0, T x1, T x2, T x3)
+ {
+ const T a0 = T(0.5) * (x3 - x0) - T(1.5) * (x2 - x1);
+ const T a1 = x0 - T(2.5) * x1 + T(2) * x2 - T(0.5) * x3;
+ const T a2 = T(0.5) * (x2 - x0);
+ const T a3 = x1;
+ return horner(mu, a0, a1, a2, a3);
+ }
+
+ KFR_SPEC_FN(in_interpolation, nearest)
+ KFR_SPEC_FN(in_interpolation, linear)
+ KFR_SPEC_FN(in_interpolation, cosine)
+ KFR_SPEC_FN(in_interpolation, cubic)
+ KFR_SPEC_FN(in_interpolation, catmullrom)
+};
+}
+}
diff --git a/include/kfr/dsp/oscillators.hpp b/include/kfr/dsp/oscillators.hpp
@@ -0,0 +1,338 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/sin_cos.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/basic.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+inline auto simpleimpulse()
+{
+ return lambda([](cinput_t, size_t index, auto x) {
+ if (index == 0)
+ return onoff(x);
+ else
+ return zerovector(x);
+ });
+}
+
+template <typename T>
+auto jaehne(T magn, size_t size)
+{
+ using namespace native;
+ return typed<T>(magn * sin(c_pi<T, 1, 2> * sqr(linspace(T(0), T(size), size, false)) / size), size);
+}
+
+template <typename T>
+auto swept(T magn, size_t size)
+{
+ using namespace native;
+ return typed<T>(
+ magn * sin(c_pi<T, 1, 4> * sqr(sqr(linspace(T(0), T(size), size, false)) / sqr(T(size))) * T(size)),
+ size);
+}
+
+namespace internal
+{
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_oscillators : in_sin_cos<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
+{
+private:
+ using in_sin_cos<cc>::fastsin;
+ using in_sin_cos<cc>::sin;
+ using in_select<cc>::select;
+ using in_round<cc>::fract;
+ using in_abs<cc>::abs;
+
+public:
+ template <typename T>
+ KFR_SINTRIN T rawsine(T x)
+ {
+ return fastsin(x * c_pi<T, 2>);
+ }
+ template <typename T>
+ KFR_SINTRIN T sinenorm(T x)
+ {
+ return rawsine(fract(x));
+ }
+ template <typename T>
+ KFR_SINTRIN T sine(T x)
+ {
+ return sinenorm(c_recip_pi<T, 1, 2> * x);
+ }
+
+ template <typename T>
+ KFR_SINTRIN T rawsquare(T x)
+ {
+ return select(x < T(0.5), T(1), -T(1));
+ }
+ template <typename T>
+ KFR_SINTRIN T squarenorm(T x)
+ {
+ return rawsquare(fract(x));
+ }
+ template <typename T>
+ KFR_SINTRIN T square(T x)
+ {
+ return squarenorm(c_recip_pi<T, 1, 2> * x);
+ }
+
+ template <typename T>
+ KFR_SINTRIN T rawsawtooth(T x)
+ {
+ return T(1) - 2 * x;
+ }
+ template <typename T>
+ KFR_SINTRIN T sawtoothnorm(T x)
+ {
+ return rawsawtooth(fract(x));
+ }
+ template <typename T>
+ KFR_SINTRIN T sawtooth(T x)
+ {
+ return sawtoothnorm(c_recip_pi<T, 1, 2> * x);
+ }
+
+ template <typename T>
+ KFR_SINTRIN T isawtoothnorm(T x)
+ {
+ return T(-1) + 2 * fract(x + 0.5);
+ }
+ template <typename T>
+ KFR_SINTRIN T isawtooth(T x)
+ {
+ return isawtoothnorm(c_recip_pi<T, 1, 2> * x);
+ }
+
+ template <typename T>
+ KFR_SINTRIN T rawtriangle(T x)
+ {
+ return 1 - abs(4 * x - 2);
+ }
+ template <typename T>
+ KFR_SINTRIN T trianglenorm(T x)
+ {
+ return rawtriangle(fract(x + 0.25));
+ }
+ template <typename T>
+ KFR_SINTRIN T triangle(T x)
+ {
+ return trianglenorm(c_recip_pi<T, 1, 2> * x);
+ }
+
+ KFR_SPEC_FN(in_oscillators, rawsine)
+ KFR_SPEC_FN(in_oscillators, sine)
+ KFR_SPEC_FN(in_oscillators, sinenorm)
+ KFR_SPEC_FN(in_oscillators, rawsquare)
+ KFR_SPEC_FN(in_oscillators, square)
+ KFR_SPEC_FN(in_oscillators, squarenorm)
+ KFR_SPEC_FN(in_oscillators, rawtriangle)
+ KFR_SPEC_FN(in_oscillators, triangle)
+ KFR_SPEC_FN(in_oscillators, trianglenorm)
+ KFR_SPEC_FN(in_oscillators, rawsawtooth)
+ KFR_SPEC_FN(in_oscillators, sawtooth)
+ KFR_SPEC_FN(in_oscillators, sawtoothnorm)
+ KFR_SPEC_FN(in_oscillators, isawtooth)
+ KFR_SPEC_FN(in_oscillators, isawtoothnorm)
+};
+}
+
+using fn_rawsine = internal::in_oscillators<>::fn_rawsine;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> rawsine(const T1& x)
+{
+ return internal::in_oscillators<>::rawsine(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_rawsine, E1> rawsine(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_sine = internal::in_oscillators<>::fn_sine;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> sine(const T1& x)
+{
+ return internal::in_oscillators<>::sine(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_sine, E1> sine(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_sinenorm = internal::in_oscillators<>::fn_sinenorm;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> sinenorm(const T1& x)
+{
+ return internal::in_oscillators<>::sinenorm(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_sinenorm, E1> sinenorm(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_rawsquare = internal::in_oscillators<>::fn_rawsquare;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> rawsquare(const T1& x)
+{
+ return internal::in_oscillators<>::rawsquare(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_rawsquare, E1> rawsquare(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_square = internal::in_oscillators<>::fn_square;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> square(const T1& x)
+{
+ return internal::in_oscillators<>::square(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_square, E1> square(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_squarenorm = internal::in_oscillators<>::fn_squarenorm;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> squarenorm(const T1& x)
+{
+ return internal::in_oscillators<>::squarenorm(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_squarenorm, E1> squarenorm(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_rawtriangle = internal::in_oscillators<>::fn_rawtriangle;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> rawtriangle(const T1& x)
+{
+ return internal::in_oscillators<>::rawtriangle(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_rawtriangle, E1> rawtriangle(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_triangle = internal::in_oscillators<>::fn_triangle;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> triangle(const T1& x)
+{
+ return internal::in_oscillators<>::triangle(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_triangle, E1> triangle(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_trianglenorm = internal::in_oscillators<>::fn_trianglenorm;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> trianglenorm(const T1& x)
+{
+ return internal::in_oscillators<>::trianglenorm(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_trianglenorm, E1> trianglenorm(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_rawsawtooth = internal::in_oscillators<>::fn_rawsawtooth;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> rawsawtooth(const T1& x)
+{
+ return internal::in_oscillators<>::rawsawtooth(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_rawsawtooth, E1> rawsawtooth(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_sawtooth = internal::in_oscillators<>::fn_sawtooth;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> sawtooth(const T1& x)
+{
+ return internal::in_oscillators<>::sawtooth(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_sawtooth, E1> sawtooth(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_sawtoothnorm = internal::in_oscillators<>::fn_sawtoothnorm;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> sawtoothnorm(const T1& x)
+{
+ return internal::in_oscillators<>::sawtoothnorm(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_sawtoothnorm, E1> sawtoothnorm(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_isawtooth = internal::in_oscillators<>::fn_isawtooth;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> isawtooth(const T1& x)
+{
+ return internal::in_oscillators<>::isawtooth(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_isawtooth, E1> isawtooth(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_isawtoothnorm = internal::in_oscillators<>::fn_isawtoothnorm;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> isawtoothnorm(const T1& x)
+{
+ return internal::in_oscillators<>::isawtoothnorm(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_isawtoothnorm, E1> isawtoothnorm(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/resample.hpp b/include/kfr/dsp/resample.hpp
@@ -0,0 +1,244 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/memory.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/reduce.hpp"
+#include "window.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+namespace resample_quality
+{
+constexpr csize_t<4> draft{};
+constexpr csize_t<6> low{};
+constexpr csize_t<8> normal{};
+constexpr csize_t<10> high{};
+}
+
+namespace internal
+{
+template <cpu_t cc = cpu_t::native>
+struct in_resampling : in_sqrt<cc>, in_abs<cc>, in_log_exp<cc>, in_sin_cos<cc>, in_window<cc>, in_reduce<cc>
+{
+private:
+ using in_sqrt<cc>::sqrt;
+ using in_abs<cc>::abs;
+ using in_log_exp<cc>::log;
+ using in_log_exp<cc>::exp;
+ using in_log_exp<cc>::log_fmadd;
+ using in_log_exp<cc>::exp_fmadd;
+ using in_log_exp<cc>::exp10;
+ using in_sin_cos<cc>::cos;
+ using in_sin_cos<cc>::sinc;
+ using in_reduce<cc>::dotproduct;
+ using in_reduce<cc>::sum;
+
+public:
+ template <typename T1, typename T2>
+ static inline T1 blackman(T1 n, T2 a)
+ {
+ const T1 a0 = (1 - a) * 0.5;
+ const T1 a1 = 0.5;
+ const T1 a2 = a * 0.5;
+ n = n * c_pi<T1, 2>;
+ return a0 - a1 * cos(n) + a2 * cos(2 * n);
+ }
+
+ template <typename T, size_t quality>
+ struct resampler
+ {
+ template <cpu_t newcpu>
+ using retarget_this = typename in_resampling<newcpu>::template resampler<T, quality>;
+
+ using itype = i64;
+
+ constexpr static itype depth = static_cast<itype>(1 << (quality + 1));
+
+ resampler(itype interpolation_factor, itype decimation_factor, T scale = T(1), T cutoff = 0.49)
+ : input_position(0), output_position(0)
+ {
+ const i64 gcf = gcd(interpolation_factor, decimation_factor);
+ interpolation_factor /= gcf;
+ decimation_factor /= gcf;
+
+ taps = depth * interpolation_factor;
+ order = size_t(depth * interpolation_factor - 1);
+
+ this->interpolation_factor = interpolation_factor;
+ this->decimation_factor = decimation_factor;
+
+ const itype halftaps = taps / 2;
+ filter = univector<T>(size_t(taps), T());
+ delay = univector<T>(size_t(depth), T());
+
+ cutoff = cutoff / std::max(decimation_factor, interpolation_factor);
+
+ for (itype j = 0, jj = 0; j < taps; j++)
+ {
+ filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff *
+ sinc((jj - halftaps) * cutoff * c_pi<T, 2>) *
+ blackman(T(jj) / T(taps - 1), T(0.16));
+ jj += size_t(interpolation_factor);
+ if (jj >= taps)
+ jj = jj - taps + 1;
+ }
+
+ const T s = reciprocal(sum(filter)) * interpolation_factor;
+ filter = filter * s;
+ }
+ KFR_INLINE size_t operator()(T* dest, size_t zerosize)
+ {
+ size_t outputsize = 0;
+ const itype srcsize = itype(zerosize);
+
+ for (size_t i = 0;; i++)
+ {
+ const itype ii = itype(i) + output_position;
+ const itype workindex = ii * (decimation_factor);
+ const itype workindex_rem = workindex % (interpolation_factor);
+ const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0;
+ itype srcindex = workindex / (interpolation_factor);
+ srcindex = workindex_rem ? srcindex + 1 : srcindex;
+ const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth));
+ srcindex = srcindex - (depth - 1);
+
+ if (srcindex + depth >= input_position + srcsize)
+ break;
+ outputsize++;
+
+ if (dest)
+ {
+ if (srcindex >= input_position)
+ {
+ dest[i] = T(0);
+ }
+ else
+ {
+ const itype prev_count = input_position - srcindex;
+ dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr);
+ }
+ }
+ }
+ if (srcsize >= depth)
+ {
+ delay = zeros();
+ }
+ else
+ {
+ delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize));
+ delay.slice(size_t(depth - srcsize)) = zeros();
+ }
+
+ input_position += srcsize;
+ output_position += outputsize;
+ return outputsize;
+ }
+ KFR_INLINE size_t operator()(T* dest, univector_ref<const T> src)
+ {
+ size_t outputsize = 0;
+ const itype srcsize = itype(src.size());
+
+ for (size_t i = 0;; i++)
+ {
+ const itype ii = itype(i) + output_position;
+ const itype workindex = ii * (decimation_factor);
+ const itype workindex_rem = workindex % (interpolation_factor);
+ const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0;
+ itype srcindex = workindex / (interpolation_factor);
+ srcindex = workindex_rem ? srcindex + 1 : srcindex;
+ const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth));
+ srcindex = srcindex - (depth - 1);
+
+ if (srcindex + depth >= input_position + srcsize)
+ break;
+ outputsize++;
+
+ if (dest)
+ {
+ if (srcindex >= input_position)
+ {
+ dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)),
+ tap_ptr /*, depth*/);
+ }
+ else
+ {
+ const itype prev_count = input_position - srcindex;
+ dest[i] =
+ dotproduct(delay.slice(size_t(depth - prev_count)),
+ tap_ptr /*, size_t(prev_count)*/) +
+ dotproduct(src, tap_ptr.slice(
+ size_t(prev_count),
+ size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/);
+ }
+ }
+ }
+ if (srcsize >= depth)
+ {
+ delay = src.slice(size_t(srcsize - depth));
+ }
+ else
+ {
+ delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize));
+ delay.slice(size_t(depth - srcsize)) = src;
+ }
+
+ input_position += srcsize;
+ output_position += outputsize;
+ return outputsize;
+ }
+ itype taps;
+ size_t order;
+ itype interpolation_factor;
+ itype decimation_factor;
+ univector<T> filter;
+ univector<T> delay;
+ itype input_position;
+ itype output_position;
+ };
+};
+}
+
+namespace native
+{
+template <typename T, size_t quality>
+inline internal::in_resampling<>::resampler<T, quality> resampler(csize_t<quality>,
+ size_t interpolation_factor,
+ size_t decimation_factor, T scale = T(1),
+ T cutoff = 0.49)
+{
+ using itype = typename internal::in_resampling<>::resampler<T, quality>::itype;
+ return internal::in_resampling<>::resampler<T, quality>(itype(interpolation_factor),
+ itype(decimation_factor), scale, cutoff);
+}
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/speaker.hpp b/include/kfr/dsp/speaker.hpp
@@ -0,0 +1,91 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+namespace kfr
+{
+
+enum class Speaker : int
+{
+ Mono = 0,
+ M = static_cast<int>(Mono),
+ Left = 1,
+ L = static_cast<int>(Left),
+ Right = 2,
+ R = static_cast<int>(Right),
+ Center = 3,
+ C = static_cast<int>(Center),
+ Lfe = 4,
+ Ls = 5,
+ LeftSurround = static_cast<int>(Ls),
+ Rs = 6,
+ RightSurround = static_cast<int>(Rs),
+ Lc = 7,
+ Rc = 8,
+ S = 9,
+ Cs = static_cast<int>(S),
+ Sl = 10,
+ Sr = 11,
+ Tm = 12,
+ Tfl = 13,
+ Tfc = 14,
+ Tfr = 15,
+ Trl = 16,
+ Trc = 17,
+ Trr = 18,
+ Lfe2 = 19
+};
+
+enum class SpeakerArrangement : int
+{
+ Mono = 0,
+ Stereo = 1,
+ StereoSurround = 2,
+ StereoCenter = 3,
+ StereoSide = 4,
+ StereoCLfe = 5,
+ Cine30 = 6,
+ Music30 = 7,
+ Cine31 = 8,
+ Music31 = 9,
+ Cine40 = 10,
+ Music40 = 11,
+ Cine41 = 12,
+ Music41 = 13,
+ Arr50 = 14,
+ Arr51 = 15,
+ Cine60 = 16,
+ Music60 = 17,
+ Cine61 = 18,
+ Music61 = 19,
+ Cine70 = 20,
+ Music70 = 21,
+ Cine71 = 22,
+ Music71 = 23,
+ Cine80 = 24,
+ Music80 = 25,
+ Cine81 = 26,
+ Music81 = 27,
+ Arr102 = 28
+};
+}
diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp
@@ -0,0 +1,219 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/log_exp.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/basic.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+using sample_rate_t = double;
+
+namespace internal
+{
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_dsp_units : in_log_exp<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
+{
+private:
+ using in_log_exp<cc>::log;
+ using in_log_exp<cc>::exp;
+ using in_log_exp<cc>::log10;
+ using in_log_exp<cc>::exp10;
+ using in_log_exp<cc>::exp_fmadd;
+ using in_log_exp<cc>::log_fmadd;
+ using in_select<cc>::select;
+ using in_round<cc>::fract;
+ using in_abs<cc>::abs;
+
+public:
+ template <typename T, typename TF = ftype<T>>
+ KFR_SINTRIN TF amp_to_dB(T amp)
+ {
+ return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322);
+ // return T( 20.0 ) * log10( level );
+ }
+
+ template <typename T, typename TF = ftype<T>>
+ KFR_SINTRIN TF dB_to_amp(T dB)
+ {
+ return exp(dB * subtype<TF>(0.11512925464970228420089957273422));
+ // return exp10( dB / 20 );
+ }
+
+ template <typename T, typename TF = ftype<T>>
+ KFR_SINTRIN TF amp_to_dB(T amp, T offset)
+ {
+ return log_fmadd(amp, subtype<TF>(8.6858896380650365530225783783322), offset);
+ // return T( 20.0 ) * log10( level );
+ }
+
+ template <typename T, typename TF = ftype<T>>
+ KFR_SINTRIN TF dB_to_amp(T dB, T offset)
+ {
+ auto offs = -subtype<TF>(0.11512925464970228420089957273422) * offset;
+ return exp_fmadd(dB, subtype<TF>(0.11512925464970228420089957273422), offs);
+ // return exp10( dB / 20 );
+ }
+
+ template <typename T>
+ KFR_SINTRIN T power_to_dB(T x)
+ {
+ return log(x) * (10 * c_recip_log_10<T>);
+ }
+
+ template <typename T>
+ KFR_SINTRIN T dB_to_power(T x)
+ {
+ if (x == -c_infinity<T>)
+ return 0.0;
+ else
+ return exp(x * (c_log_10<T> / 10.0));
+ }
+
+ template <typename T, typename TF = ftype<T>>
+ KFR_SINTRIN TF note_to_hertz(T note)
+ {
+ const subtype<TF> offset = 2.1011784386926213177653145771814;
+
+ return exp_fmadd(note, subtype<TF>(0.05776226504666210911810267678818), offset);
+ }
+
+ template <typename T, typename TF = ftype<T>>
+ KFR_SINTRIN TF hertz_to_note(T hertz)
+ {
+ const subtype<TF> offset = -36.376316562295915248836189714583;
+
+ return log_fmadd(hertz, subtype<TF>(17.312340490667560888319096172023), offset);
+ }
+
+ template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>>
+ KFR_SINTRIN Tc note_to_hertz(T1 note, T2 tunenote, T3 tunehertz)
+ {
+ const Tc offset = log(tunehertz) - tunenote * subtype<Tc>(0.05776226504666210911810267678818);
+
+ return exp_fmadd(note, subtype<Tc>(0.05776226504666210911810267678818), offset);
+ }
+
+ template <typename T1, typename T2, typename T3, typename Tc = common_type<T1, T2, T3, f32>>
+ KFR_SINTRIN Tc hertz_to_note(T1 hertz, T2 tunenote, T3 tunehertz)
+ {
+ const Tc offset = tunenote - log(tunehertz) * subtype<Tc>(17.312340490667560888319096172023);
+
+ return log_fmadd(hertz, subtype<Tc>(17.312340490667560888319096172023), offset);
+ }
+
+ KFR_SPEC_FN(in_dsp_units, note_to_hertz)
+ KFR_SPEC_FN(in_dsp_units, hertz_to_note)
+ KFR_SPEC_FN(in_dsp_units, amp_to_dB)
+ KFR_SPEC_FN(in_dsp_units, dB_to_amp)
+ KFR_SPEC_FN(in_dsp_units, power_to_dB)
+ KFR_SPEC_FN(in_dsp_units, dB_to_power)
+};
+}
+
+using fn_note_to_hertz = internal::in_dsp_units<>::fn_note_to_hertz;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> note_to_hertz(const T1& x)
+{
+ return internal::in_dsp_units<>::note_to_hertz(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_note_to_hertz, E1> note_to_hertz(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_hertz_to_note = internal::in_dsp_units<>::fn_hertz_to_note;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> hertz_to_note(const T1& x)
+{
+ return internal::in_dsp_units<>::hertz_to_note(x);
+}
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_hertz_to_note, E1> hertz_to_note(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_amp_to_dB = internal::in_dsp_units<>::fn_amp_to_dB;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> amp_to_dB(const T1& x)
+{
+ return internal::in_dsp_units<>::amp_to_dB(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_amp_to_dB, E1> amp_to_dB(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_dB_to_amp = internal::in_dsp_units<>::fn_dB_to_amp;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> dB_to_amp(const T1& x)
+{
+ return internal::in_dsp_units<>::dB_to_amp(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_dB_to_amp, E1> dB_to_amp(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_power_to_dB = internal::in_dsp_units<>::fn_power_to_dB;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> power_to_dB(const T1& x)
+{
+ return internal::in_dsp_units<>::power_to_dB(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_power_to_dB, E1> power_to_dB(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+using fn_dB_to_power = internal::in_dsp_units<>::fn_dB_to_power;
+template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
+KFR_INTRIN ftype<T1> dB_to_power(const T1& x)
+{
+ return internal::in_dsp_units<>::dB_to_power(x);
+}
+
+template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_INTRIN expr_func<fn_dB_to_power, E1> dB_to_power(E1&& x)
+{
+ return { {}, std::forward<E1>(x) };
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/dsp/weighting.hpp b/include/kfr/dsp/weighting.hpp
@@ -0,0 +1,122 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/sqrt.hpp"
+#include "../base/vec.hpp"
+#include "units.hpp"
+
+namespace kfr
+{
+namespace internal
+{
+
+template <cpu_t c = cpu_t::native, cpu_t cc = c>
+struct in_weight : in_sqrt<cc>, in_dsp_units<cc>
+{
+private:
+ using in_dsp_units<cc>::amp_to_dB;
+
+public:
+ template <typename T>
+ KFR_SINTRIN T weight_a_unnorm(T f)
+ {
+ const T f2 = pow2(f);
+ const T nom = pow2(12200) * pow4(f);
+ const T den =
+ (f2 + pow2(20.6)) * (sqrt((f2 + pow2(107.7)) * (f2 + pow2(737.9)))) * (f2 + pow2(12200));
+ return nom / den;
+ }
+
+ template <typename T>
+ constexpr static T weight_a_gain = reciprocal(weight_a_unnorm(T(1000.0)));
+
+ template <typename T>
+ KFR_SINTRIN T aweighting(T f)
+ {
+ return weight_a_unnorm(f) * weight_a_gain<subtype<T>>;
+ }
+
+ template <typename T>
+ KFR_SINTRIN T weight_b_unnorm(T f)
+ {
+ const T f2 = pow2(f);
+ const T nom = pow2(12200) * pow3(f);
+ const T den = (f2 + pow2(20.6)) * (sqrt((f2 + pow2(158.5)))) * (f2 + pow2(12200));
+
+ return nom / den;
+ }
+
+ template <typename T>
+ constexpr static T weight_b_gain = reciprocal(weight_b_unnorm(T(1000.0)));
+
+ template <typename T>
+ KFR_SINTRIN T bweighting(T f)
+ {
+ return weight_b_unnorm(f) * weight_b_gain<subtype<T>>;
+ }
+
+ template <typename T>
+ KFR_SINTRIN T weight_c_unnorm(T f)
+ {
+ const T f2 = pow2(f);
+ const T nom = pow2(12200) * f2;
+ const T den = (f2 + pow2(20.6)) * (f2 + pow2(12200));
+
+ return nom / den;
+ }
+
+ template <typename T>
+ constexpr static T weight_c_gain = reciprocal(weight_c_unnorm(T(1000.0)));
+
+ template <typename T>
+ KFR_SINTRIN T cweighting(T f)
+ {
+ return weight_c_unnorm(f) * weight_c_gain<subtype<T>>;
+ }
+
+ template <typename T>
+ KFR_SINTRIN T aweightingdB(T f)
+ {
+ return amp_to_dB(aweighting(f));
+ }
+ template <typename T>
+ KFR_SINTRIN T bweightingdB(T f)
+ {
+ return amp_to_dB(bweighting(f));
+ }
+ template <typename T>
+ KFR_SINTRIN T cweightingdB(T f)
+ {
+ return amp_to_dB(cweighting(f));
+ }
+
+ KFR_SPEC_FN(in_weight, aweighting)
+ KFR_SPEC_FN(in_weight, bweighting)
+ KFR_SPEC_FN(in_weight, cweighting)
+ KFR_SPEC_FN(in_weight, aweightingdB)
+ KFR_SPEC_FN(in_weight, bweightingdB)
+ KFR_SPEC_FN(in_weight, cweightingdB)
+};
+}
+}
diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp
@@ -0,0 +1,685 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/log_exp.hpp"
+#include "../base/sin_cos.hpp"
+#include "../base/sqrt.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/pointer.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+enum class window_type
+{
+ rectangular = 1,
+ triangular = 2,
+ bartlett = 3,
+ cosine = 4,
+ hann = 5,
+ bartlett_hann = 6,
+ hamming = 7,
+ bohman = 8,
+ blackman = 9,
+ blackman_harris = 10,
+ kaiser = 11,
+ flattop = 12,
+ gaussian = 13,
+ lanczos = 14,
+};
+
+template <window_type type>
+using cwindow_type_t = cval_t<window_type, type>;
+
+template <window_type type>
+constexpr cwindow_type_t<type> cwindow_type{};
+
+enum class window_symmetry
+{
+ periodic,
+ symmetric
+};
+
+namespace internal
+{
+
+template <typename T>
+constexpr T bessel_coef[] = { T(0.25),
+ T(0.027777777777777776236),
+ T(0.0017361111111111110147),
+ T(6.9444444444444444384e-005),
+ T(1.9290123456790123911e-006),
+ T(3.9367598891408417495e-008),
+ T(6.1511873267825652335e-010),
+ T(7.5940584281266239246e-012),
+ T(7.5940584281266233693e-014),
+ T(6.2760813455591932909e-016),
+ T(4.3583898233049949985e-018),
+ T(2.5789288895295827557e-020),
+ T(1.3157800456783586208e-022),
+ T(5.8479113141260384983e-025),
+ T(2.2843403570804837884e-027),
+ T(7.904291893012054025e-030),
+ T(2.4395962632753252792e-032),
+ T(6.75788438580422547e-035),
+ T(1.689471096451056426e-037),
+ T(3.8310002187098784929e-040),
+ T(7.9152897080782616517e-043),
+ T(1.4962740468957016443e-045),
+ T(2.5976979980828152196e-048),
+ T(4.1563167969325041577e-051),
+ T(6.1483976285983795968e-054),
+ T(8.434015951438105991e-057),
+ T(1.0757673407446563809e-059),
+ T(1.2791526049282476926e-062),
+ T(1.4212806721424974034e-065),
+ T(1.4789601166935457918e-068),
+ T(1.4442969889585408123e-071),
+ T(1.3262598613026086927e-074),
+ T(1.1472836170437790782e-077),
+ T(9.3655805472961564331e-081),
+ T(7.2265282000741942594e-084),
+ T(5.2786911614858977913e-087),
+ T(3.6556032974279072401e-090),
+ T(2.4034209713529963119e-093),
+ T(1.5021381070956226783e-096) };
+
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> modzerobessel(vec<T, N> x)
+{
+ const vec<T, N> x_2 = x * 0.5;
+ const vec<T, N> x_2_sqr = x_2 * x_2;
+ vec<T, N> num = x_2_sqr;
+ vec<T, N> result;
+ result = 1 + x_2_sqr;
+
+ KFR_LOOP_UNROLL
+ for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++)
+ {
+ result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result);
+ }
+ return result;
+}
+
+template <cpu_t cpu = cpu_t::native>
+struct in_window : in_sin_cos<cpu>, in_log_exp<cpu>, in_select<cpu>, in_sqrt<cpu>, in_abs<cpu>
+{
+private:
+ using in_sin_cos<cpu>::sin;
+ using in_sin_cos<cpu>::cos;
+ using in_sin_cos<cpu>::sinc;
+ using in_log_exp<cpu>::exp;
+ using in_select<cpu>::select;
+ using in_sqrt<cpu>::sqrt;
+ using in_abs<cpu>::abs;
+
+public:
+ template <typename T>
+ struct window_linspace_0_1 : expression_linspace<T>
+ {
+ window_linspace_0_1(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric)
+ {
+ }
+ };
+
+ template <typename T>
+ struct window_linspace_m1_1 : expression_linspace<T>
+ {
+ window_linspace_m1_1(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric)
+ {
+ }
+ };
+
+ template <typename T>
+ struct window_linspace_mpi_pi : expression_linspace<T>
+ {
+ window_linspace_mpi_pi(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric)
+ {
+ }
+ };
+
+ template <typename T>
+ struct window_linspace_m1_1_trunc : expression_linspace<T>
+ {
+ window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size,
+ symmetry == window_symmetry::symmetric)
+ {
+ }
+ };
+
+ template <typename T>
+ struct window_linspace_m1_1_trunc2 : expression_linspace<T>
+ {
+ window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry)
+ : expression_linspace<T>(symmetric_linspace,
+ (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size,
+ symmetry == window_symmetry::symmetric)
+ {
+ }
+ };
+
+ template <typename T>
+ struct expression_rectangular : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_rectangular<T>;
+ expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric)
+ : m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ using UI = utype<U>;
+ const vec<UI, N> i = enumerate(vec<UI, N>()) + cast<UI>(index);
+ return select(i < cast<UI>(m_size), U(1), U(0));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_triangular : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_triangular<T>;
+ expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(1 - abs(linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_m1_1_trunc2<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_bartlett : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_bartlett<T>;
+ expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(1 - abs(linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_m1_1<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_cosine : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_cosine<T>;
+ expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(sin(c_pi<T> * linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_hann : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_hann<T>;
+ expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y))));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_bartlett_hann : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_bartlett_hann<T>;
+
+ expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> xx = linspace(cinput, index, y);
+ return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5))));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_hamming : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_hamming<T>;
+ expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), alpha(alpha), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y))));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_0_1<T> linspace;
+ T alpha;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_bohman : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_bohman<T>;
+ expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<U, N> n = abs(linspace(cinput, index, y));
+ return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_m1_1<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_blackman : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_blackman<T>;
+ expression_blackman(size_t size, T alpha = 0.16,
+ window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> n = linspace(cinput, index, y);
+ return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_0_1<T> linspace;
+ T a0, a1, a2;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_blackman_harris : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_blackman_harris<T>;
+ expression_blackman_harris(size_t size, T = T(),
+ window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>;
+
+ return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) -
+ T(0.01168) * cos(3 * n));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_kaiser : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_kaiser<T>;
+ expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])),
+ m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m);
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_m1_1<T> linspace;
+ T beta;
+ T m;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_flattop : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_flattop<T>;
+ expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>;
+ constexpr T a0 = 1;
+ constexpr T a1 = 1.93;
+ constexpr T a2 = 1.29;
+ constexpr T a3 = 0.388;
+ constexpr T a4 = 0.028;
+ return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_0_1<T> linspace;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_gaussian : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_gaussian<T>;
+
+ expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), alpha(alpha), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y))));
+ }
+
+ size_t size() const { return m_size; }
+ private:
+ window_linspace_m1_1_trunc<T> linspace;
+ T alpha;
+ size_t m_size;
+ };
+
+ template <typename T>
+ struct expression_lanczos : input_expression
+ {
+ using value_type = T;
+
+ template <cpu_t newcpu>
+ using retarget_this = typename in_window<newcpu>::template expression_lanczos<T>;
+ expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
+ : linspace(size, symmetry), alpha(alpha), m_size(size)
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ constexpr vec_t<T, N> y{};
+ return cast<U>(sinc(linspace(cinput, index, y)));
+ }
+ size_t size() const { return m_size; }
+
+ private:
+ window_linspace_mpi_pi<T> linspace;
+ T alpha;
+ size_t m_size;
+ };
+};
+
+template <window_type>
+struct window_by_type;
+
+#define KFR_WINDOW_BY_TYPE(win) \
+ template <> \
+ struct window_by_type<window_type::win> \
+ { \
+ template <typename T> \
+ using type = in_window<>::expression_##win<T>; \
+ };
+KFR_WINDOW_BY_TYPE(rectangular)
+KFR_WINDOW_BY_TYPE(triangular)
+KFR_WINDOW_BY_TYPE(bartlett)
+KFR_WINDOW_BY_TYPE(cosine)
+KFR_WINDOW_BY_TYPE(hann)
+KFR_WINDOW_BY_TYPE(bartlett_hann)
+KFR_WINDOW_BY_TYPE(hamming)
+KFR_WINDOW_BY_TYPE(bohman)
+KFR_WINDOW_BY_TYPE(blackman)
+KFR_WINDOW_BY_TYPE(blackman_harris)
+KFR_WINDOW_BY_TYPE(kaiser)
+KFR_WINDOW_BY_TYPE(flattop)
+KFR_WINDOW_BY_TYPE(gaussian)
+KFR_WINDOW_BY_TYPE(lanczos)
+}
+
+KFR_INLINE internal::in_window<>::expression_rectangular<fbase> window_rectangular(size_t size)
+{
+ return internal::in_window<>::expression_rectangular<fbase>(size, fbase());
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_triangular<T> window_triangular(size_t size,
+ ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_triangular<T>(size);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_bartlett<T> window_bartlett(size_t size,
+ ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_bartlett<T>(size);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_cosine<T>(size);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_hann<T>(size);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_bartlett_hann<T> window_bartlett_hann(size_t size,
+ ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_bartlett_hann<T>(size);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_hamming<T> window_hamming(size_t size, T alpha = 0.54,
+ ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_hamming<T>(size, alpha);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_bohman<T>(size);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_blackman<T> window_blackman(
+ size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric,
+ ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_blackman<T>(size, alpha, symmetry);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_blackman_harris<T> window_blackman_harris(
+ size_t size, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_blackman_harris<T>(size, T(), symmetry);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_kaiser<T> window_kaiser(size_t size, T beta = T(0.5),
+ ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_kaiser<T>(size, beta);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_flattop<T>(size);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_gaussian<T> window_gaussian(size_t size, T alpha = 2.5,
+ ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_gaussian<T>(size, alpha);
+}
+template <typename T = fbase>
+KFR_INLINE internal::in_window<>::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>())
+{
+ return internal::in_window<>::expression_lanczos<T>(size);
+}
+
+template <typename T = fbase, window_type type,
+ typename window_expr = typename internal::window_by_type<type>::template type<T>>
+KFR_NOINLINE window_expr window(size_t size, cval_t<window_type, type>, T win_param = T(),
+ window_symmetry symmetry = window_symmetry::symmetric,
+ ctype_t<T> = ctype_t<T>())
+{
+ return window_expr(size, win_param, symmetry);
+}
+
+template <typename T = fbase>
+KFR_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_param,
+ window_symmetry symmetry = window_symmetry::symmetric,
+ ctype_t<T> = ctype_t<T>())
+{
+ return cswitch(
+ cvals<window_type, window_type::rectangular, window_type::triangular, window_type::bartlett,
+ window_type::cosine, window_type::hann, window_type::bartlett_hann, window_type::hamming,
+ window_type::bohman, window_type::blackman, window_type::blackman_harris, window_type::kaiser,
+ window_type::flattop, window_type::gaussian, window_type::lanczos>,
+ type,
+ [=](auto win) {
+ constexpr window_type window = val_of(win);
+ return to_pointer<T>(
+ typename internal::window_by_type<window>::template type<T>(size, win_param, symmetry));
+ },
+ fn_returns<expression_pointer<T>>());
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/expressions/basic.hpp b/include/kfr/expressions/basic.hpp
@@ -0,0 +1,360 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/univector.hpp"
+#include "../base/vec.hpp"
+
+namespace kfr
+{
+
+namespace internal
+{
+template <typename T, typename E1>
+struct expression_iterator
+{
+ constexpr expression_iterator(E1&& e1) : e1(std::forward<E1>(e1)) {}
+ struct iterator
+ {
+ T operator*() { return get(); }
+ T get() { return expr.e1(cinput, position, vec_t<T, 1>())[0]; }
+ iterator& operator++()
+ {
+ ++position;
+ return *this;
+ }
+ iterator operator++(int)
+ {
+ iterator copy = *this;
+ ++(*this);
+ return copy;
+ }
+ bool operator!=(const iterator& other) const { return position != other.position; }
+ expression_iterator& expr;
+ size_t position;
+ };
+ iterator begin() { return { *this, 0 }; }
+ iterator end() { return { *this, e1.size() }; }
+ E1 e1;
+};
+}
+
+template <typename E1, typename T = value_type_of<E1>>
+KFR_INLINE internal::expression_iterator<T, E1> to_iterator(E1&& e1)
+{
+ return internal::expression_iterator<T, E1>(std::forward<E1>(e1));
+}
+
+template <typename T, typename... Ts>
+KFR_INLINE auto sequence(T x, Ts... rest)
+{
+ const T seq[] = { x, static_cast<T>(rest)... };
+ constexpr size_t N = arraysize(seq);
+ return lambda([=](size_t index) { return seq[index % N]; });
+}
+KFR_INLINE auto zeros()
+{
+ return lambda([](cinput_t, size_t, auto x) { return zerovector(x); });
+}
+KFR_INLINE auto ones()
+{
+ return lambda([](cinput_t, size_t, auto x) {
+ using U = subtype<decltype(x)>;
+ return U(1);
+ });
+}
+KFR_INLINE auto counter()
+{
+ return lambda([](cinput_t, size_t index, auto x) {
+ using T = subtype<decltype(x)>;
+ using Tsub = subtype<T>;
+ using TI = subtype<itype<T>>;
+ return cast<T>(enumerate<Tsub, x.size()>() + cast<Tsub>(cast<TI>(index)));
+ });
+}
+template <typename T1>
+KFR_INLINE auto counter(T1 start)
+{
+ return lambda([start](cinput_t, size_t index, auto x) {
+ using T = subtype<decltype(x)>;
+ using Tsub = subtype<T>;
+ using TI = subtype<itype<T>>;
+ return cast<T>(enumerate<Tsub, x.size()>() + cast<Tsub>(start) + cast<Tsub>(cast<TI>(index)));
+ });
+}
+template <typename T1, typename T2>
+KFR_INLINE auto counter(T1 start, T2 step)
+{
+ return lambda([start, step](cinput_t, size_t index, auto x) {
+ using T = subtype<decltype(x)>;
+ using Tsub = subtype<T>;
+ using TI = subtype<itype<T>>;
+ return cast<T>(enumerate<Tsub, x.size()>() * step + cast<Tsub>(start) + cast<Tsub>(cast<TI>(index)));
+ });
+}
+
+template <typename Gen>
+struct segment
+{
+ template <typename Gen_>
+ constexpr segment(size_t start, Gen_&& gen) : start(start), gen(std::forward<Gen_>(gen))
+ {
+ }
+ size_t start;
+ Gen gen;
+};
+
+enum symmetric_linspace_t
+{
+ symmetric_linspace
+};
+
+namespace internal
+{
+template <typename T, typename E1>
+struct expression_reader
+{
+ constexpr expression_reader(E1&& e1) noexcept : e1(std::forward<E1>(e1)) {}
+ T read()
+ {
+ const T result = e1(cinput, m_position, vec_t<T, 1>());
+ m_position++;
+ return result;
+ }
+ size_t m_position = 0;
+ E1 e1;
+};
+template <typename T, typename E1>
+struct expression_writer
+{
+ constexpr expression_writer(E1&& e1) noexcept : e1(std::forward<E1>(e1)) {}
+ template <typename U>
+ void write(U value)
+ {
+ e1(coutput, m_position, vec<U, 1>(value));
+ m_position++;
+ }
+ size_t m_position = 0;
+ E1 e1;
+};
+}
+
+template <typename T, typename E1>
+internal::expression_reader<T, E1> reader(E1&& e1)
+{
+ static_assert(is_input_expression<E1>::value, "E1 must be an expression");
+ return internal::expression_reader<T, E1>(std::forward<E1>(e1));
+}
+
+template <typename T, typename E1>
+internal::expression_writer<T, E1> writer(E1&& e1)
+{
+ static_assert(is_output_expression<E1>::value, "E1 must be an output expression");
+ return internal::expression_writer<T, E1>(std::forward<E1>(e1));
+}
+
+namespace internal
+{
+
+template <typename E1, typename = void>
+struct inherit_value_type
+{
+};
+
+template <typename E1>
+struct inherit_value_type<E1, void_t<typename decay<E1>::value_type>>
+{
+ using value_type = typename decay<E1>::value_type;
+};
+
+template <typename E1>
+struct expression_skip : expression<E1>, inherit_value_type<E1>
+{
+ expression_skip(E1&& e1, size_t count) : expression<E1>(std::forward<E1>(e1)), count(count) {}
+ template <typename T, size_t N>
+ KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y)
+ {
+ return this->argument_first(index + count, y);
+ }
+ size_t count;
+};
+
+template <typename T, bool precise = false>
+struct expression_linspace;
+
+template <typename T>
+struct expression_linspace<T, false> : input_expression
+{
+ using value_type = T;
+
+ expression_linspace(T start, T stop, size_t size, bool endpoint = false)
+ : start(start), offset((stop - start) / T(endpoint ? size - 1 : size))
+ {
+ }
+
+ expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false)
+ : expression_linspace(-symsize, +symsize, size, endpoint)
+ {
+ }
+
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
+ {
+ using UI = itype<U>;
+ return U(start) + (enumerate(x) + cast<U>(cast<UI>(index))) * U(offset);
+ }
+
+ T start;
+ T offset;
+};
+
+template <typename T>
+struct expression_linspace<T, true> : input_expression
+{
+ expression_linspace(T start, T stop, size_t size, bool endpoint = false)
+ : start(start), stop(stop), invsize(1.0 / T(endpoint ? size - 1 : size))
+ {
+ }
+
+ expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false)
+ : expression_linspace(-symsize, +symsize, size, endpoint)
+ {
+ }
+
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
+ {
+ using UI = itype<U>;
+ return mix((enumerate(x) + cast<U>(cast<UI>(index))) * invsize, cast<U>(start), cast<U>(stop));
+ }
+ template <typename U, size_t N>
+ KFR_INLINE static vec<U, N> mix(vec<U, N> t, U x, U y)
+ {
+ return (U(1.0) - t) * x + t * y;
+ }
+
+ T start;
+ T stop;
+ T invsize;
+};
+
+template <typename... E>
+struct expression_sequence : expression<E...>
+{
+public:
+ using base = expression<E...>;
+ template <cpu_t newcpu>
+ using retarget_this = expression_sequence<retarget<E, newcpu>...>;
+
+ template <typename... Expr_>
+ KFR_INLINE expression_sequence(const size_t (&segments)[base::size], Expr_&&... expr) noexcept
+ : base(std::forward<Expr_>(expr)...)
+ {
+ std::copy(std::begin(segments), std::end(segments), this->segments.begin() + 1);
+ this->segments[0] = 0;
+ this->segments[base::size + 1] = size_t(-1);
+ }
+
+ template <typename T, size_t N>
+ KFR_NOINLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y)
+ {
+ std::size_t sindex = size_t(std::upper_bound(std::begin(segments), std::end(segments), index) - 1 -
+ std::begin(segments));
+ if (segments[sindex + 1] - index >= N)
+ return get(index, sindex - 1, y);
+ else
+ {
+ vec<T, N> result;
+#pragma clang loop unroll_count(4)
+ for (size_t i = 0; i < N; i++)
+ {
+ sindex = segments[sindex + 1] == index ? sindex + 1 : sindex;
+ result.data()[i] = get(index, sindex - 1, vec_t<T, 1>())[0];
+ index++;
+ }
+ return result;
+ }
+ }
+
+protected:
+ template <typename T, size_t N>
+ KFR_NOINLINE vec<T, N> get(size_t index, size_t expr_index, vec_t<T, N> y)
+ {
+ return cswitch(indicesfor<E...>, expr_index, [&](auto val) { return this->argument(val, index, y); },
+ [&]() { return zerovector(y); });
+ }
+
+ std::array<size_t, base::size + 2> segments;
+};
+}
+
+template <typename E1>
+KFR_INLINE internal::expression_skip<E1> skip(E1&& e1, size_t count = 1)
+{
+ return internal::expression_skip<E1>(std::forward<E1>(e1), count);
+}
+
+template <typename T1, typename T2, bool precise = false, typename TF = ftype<common_type<T1, T2>>>
+KFR_INLINE internal::expression_linspace<TF, precise> linspace(T1 start, T2 stop, size_t size,
+ bool endpoint = false)
+{
+ return internal::expression_linspace<TF, precise>(start, stop, size, endpoint);
+}
+KFR_FN(linspace)
+
+template <typename T, bool precise = false, typename TF = ftype<T>>
+KFR_INLINE internal::expression_linspace<TF, precise> symmlinspace(T symsize, size_t size,
+ bool endpoint = false)
+{
+ return internal::expression_linspace<TF, precise>(symmetric_linspace, symsize, size, endpoint);
+}
+KFR_FN(symmlinspace)
+
+template <size_t size, typename... E>
+KFR_INLINE internal::expression_sequence<decay<E>...> gen_sequence(const size_t (&list)[size], E&&... gens)
+{
+ static_assert(size == sizeof...(E), "Lists must be of equal length");
+ return internal::expression_sequence<decay<E>...>(list, std::forward<E>(gens)...);
+}
+KFR_FN(gen_sequence)
+
+namespace internal
+{
+template <typename... E>
+struct multioutput : output_expression
+{
+ template <typename... E_>
+ multioutput(E_&&... e) : outputs(std::forward<E_>(e)...)
+ {
+ }
+ template <typename T, size_t N>
+ void operator()(coutput_t, size_t index, vec<T, N> x)
+ {
+ cfor(csize<0>, csize<sizeof...(E)>, [&](auto n) { std::get<val_of(n)>(outputs)(coutput, index, x); });
+ }
+ std::tuple<E...> outputs;
+
+private:
+};
+}
+}
diff --git a/include/kfr/expressions/conversion.hpp b/include/kfr/expressions/conversion.hpp
@@ -0,0 +1,57 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/operators.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/basic.hpp"
+
+namespace kfr
+{
+namespace internal
+{
+template <typename From, typename E>
+struct expression_convert : expression<E>
+{
+ template <cpu_t newcpu>
+ using retarget_this = expression_convert<From, retarget<E, newcpu>>;
+
+ KFR_INLINE expression_convert(E&& expr) noexcept : expression<E>(std::forward<E>(expr)) {}
+
+ template <typename T, size_t N>
+ KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>)
+ {
+ return this->argument_first(index, vec_t<From, N>());
+ }
+};
+}
+
+template <typename From, typename E>
+KFR_INLINE internal::expression_convert<From, decay<E>> convert(E&& expr)
+{
+ return internal::expression_convert<From, decay<E>>(std::forward<E>(expr));
+}
+KFR_FN(convert)
+}
diff --git a/include/kfr/expressions/generators.hpp b/include/kfr/expressions/generators.hpp
@@ -0,0 +1,279 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/log_exp.hpp"
+#include "../base/select.hpp"
+#include "../base/sin_cos.hpp"
+#include "../base/vec.hpp"
+
+#pragma clang diagnostic push
+#if CID_HAS_WARNING("-Winaccessible-base")
+#pragma clang diagnostic ignored "-Winaccessible-base"
+#endif
+
+namespace kfr
+{
+
+namespace internal
+{
+
+template <cpu_t cpu = cpu_t::native>
+struct in_generators : in_log_exp<cpu>, in_select<cpu>, in_sin_cos<cpu>
+{
+private:
+ using in_log_exp<cpu>::exp;
+ using in_log_exp<cpu>::exp2;
+ using in_select<cpu>::select;
+ using in_sin_cos<cpu>::cossin;
+
+public:
+ template <typename T, size_t width_, typename Class>
+ struct generator
+ {
+ constexpr static size_t width = width_;
+ using type = T;
+
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N> t) const
+ {
+ return cast<U>(generate(t));
+ }
+
+ void resync(T start) const { ptr_cast<Class>(this)->sync(start); }
+
+ protected:
+ void call_next() const { ptr_cast<Class>(this)->next(); }
+ template <size_t N>
+ void call_shift(csize_t<N>) const
+ {
+ ptr_cast<Class>(this)->shift(csize<N>);
+ }
+
+ template <size_t N>
+ void shift(csize_t<N>) const
+ {
+ const vec<T, width> oldvalue = value;
+ call_next();
+ value = slice<N, width>(oldvalue, value);
+ }
+
+ template <size_t N, KFR_ENABLE_IF(N == width)>
+ KFR_INLINE vec<T, N> generate(vec_t<T, N>) const
+ {
+ const vec<T, N> result = value;
+ call_next();
+ return result;
+ }
+
+ template <size_t N, KFR_ENABLE_IF(N < width)>
+ KFR_INLINE vec<T, N> generate(vec_t<T, N>) const
+ {
+ const vec<T, N> result = narrow<N>(value);
+ shift(csize<N>);
+ return result;
+ }
+
+ template <size_t N, KFR_ENABLE_IF(N > width)>
+ KFR_INLINE vec<T, N> generate(vec_t<T, N> x) const
+ {
+ const auto lo = generate(low(x));
+ const auto hi = generate(high(x));
+ return concat(lo, hi);
+ }
+
+ mutable vec<T, width> value;
+ };
+
+ template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
+ struct generator_linear : generator<T, width, generator_linear<T, width>>
+ {
+ template <cpu_t newcpu>
+ using retarget_this = typename in_generators<newcpu>::template generator_linear<T>;
+
+ constexpr generator_linear(T start, T step) noexcept : step(step), vstep(step* width)
+ {
+ this->resync(start);
+ }
+
+ KFR_INLINE void sync(T start) const noexcept { this->value = start + enumerate<T, width>() * step; }
+
+ KFR_INLINE void next() const noexcept { this->value += vstep; }
+
+ protected:
+ T step;
+ T vstep;
+ };
+
+ template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
+ struct generator_exp : generator<T, width, generator_exp<T, width>>
+ {
+ template <cpu_t newcpu>
+ using retarget_this = typename in_generators<newcpu>::template generator_exp<T>;
+
+ generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1)
+ {
+ this->resync(start);
+ }
+
+ KFR_INLINE void sync(T start) const noexcept
+ {
+ this->value = exp(start + enumerate<T, width>() * step);
+ }
+
+ KFR_INLINE void next() const noexcept { this->value += this->value * vstep; }
+
+ protected:
+ T step;
+ T vstep;
+ };
+
+ template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
+ struct generator_exp2 : generator<T, width, generator_exp2<T, width>>
+ {
+ template <cpu_t newcpu>
+ using retarget_this = typename in_generators<newcpu>::template generator_exp2<T>;
+
+ generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1)
+ {
+ this->resync(start);
+ }
+
+ KFR_INLINE void sync(T start) const noexcept
+ {
+ this->value = exp2(start + enumerate<T, width>() * step);
+ }
+
+ KFR_INLINE void next() const noexcept { this->value += this->value * vstep; }
+
+ protected:
+ T step;
+ T vstep;
+ };
+
+ template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
+ struct generator_cossin : generator<T, width, generator_cossin<T, width>>
+ {
+ template <cpu_t newcpu>
+ using retarget_this = typename in_generators<newcpu>::template generator_cossin<T>;
+
+ generator_cossin(T start, T step)
+ : step(step), alpha(2 * sqr(sin(width / 2 * step / 2))), beta(-sin(width / 2 * step))
+ {
+ this->resync(start);
+ }
+ KFR_INLINE void sync(T start) const noexcept { this->value = init_cossin(step, start); }
+
+ KFR_INLINE void next() const noexcept
+ {
+ this->value = this->value - subadd(alpha * this->value, beta * swap<2>(this->value));
+ }
+
+ protected:
+ T step;
+ T alpha;
+ T beta;
+ KFR_NOINLINE static vec<T, width> init_cossin(T w, T phase)
+ {
+ return cossin(dup(phase + enumerate<T, width / 2>() * w));
+ }
+ };
+
+ template <typename T, size_t width = get_vector_width<T, cpu>(2, 4)>
+ struct generator_sin : generator<T, width, generator_sin<T, width>>
+ {
+ template <cpu_t newcpu>
+ using retarget_this = typename in_generators<newcpu>::template generator_sin<T>;
+
+ generator_sin(T start, T step)
+ : step(step), alpha(2 * sqr(sin(width * step / 2))), beta(sin(width * step))
+ {
+ this->resync(start);
+ }
+ KFR_INLINE void sync(T start) const noexcept
+ {
+ const vec<T, width* 2> cs = splitpairs(cossin(dup(start + enumerate<T, width>() * step)));
+ this->cos_value = low(cs);
+ this->value = high(cs);
+ }
+
+ KFR_INLINE void next() const noexcept
+ {
+ const vec<T, width> c = this->cos_value;
+ const vec<T, width> s = this->value;
+
+ const vec<T, width> cc = alpha * c + beta * s;
+ const vec<T, width> ss = alpha * s - beta * c;
+
+ this->cos_value = c - cc;
+ this->value = s - ss;
+ }
+
+ template <size_t N>
+ void shift(csize_t<N>) const noexcept
+ {
+ const vec<T, width> oldvalue = this->value;
+ const vec<T, width> oldcosvalue = this->cos_value;
+ next();
+ this->value = slice<N, width>(oldvalue, this->value);
+ this->cos_value = slice<N, width>(oldcosvalue, this->cos_value);
+ }
+
+ protected:
+ T step;
+ T alpha;
+ T beta;
+ mutable vec<T, width> cos_value;
+ };
+};
+}
+
+template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
+KFR_SINTRIN internal::in_generators<>::generator_linear<TF> gen_linear(T1 start, T2 step)
+{
+ return internal::in_generators<>::generator_linear<TF>(start, step);
+}
+template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
+KFR_SINTRIN internal::in_generators<>::generator_exp<TF> gen_exp(T1 start, T2 step)
+{
+ return internal::in_generators<>::generator_exp<TF>(start, step);
+}
+template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
+KFR_SINTRIN internal::in_generators<>::generator_exp2<TF> gen_exp2(T1 start, T2 step)
+{
+ return internal::in_generators<>::generator_exp2<TF>(start, step);
+}
+template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
+KFR_SINTRIN internal::in_generators<>::generator_sin<TF> gen_cossin(T1 start, T2 step)
+{
+ return internal::in_generators<>::generator_cossin<TF>(start, step);
+}
+template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
+KFR_SINTRIN internal::in_generators<>::generator_sin<TF> gen_sin(T1 start, T2 step)
+{
+ return internal::in_generators<>::generator_sin<TF>(start, step);
+}
+}
+
+#pragma clang diagnostic pop
diff --git a/include/kfr/expressions/operators.hpp b/include/kfr/expressions/operators.hpp
@@ -0,0 +1,66 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/operators.hpp"
+#include "../base/vec.hpp"
+
+namespace kfr
+{
+
+#define KFR_EXPR_UNARY(fn, op) \
+ template <typename A1, KFR_ENABLE_IF(is_input_expression<A1>::value)> \
+ KFR_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward<A1>(a1))) \
+ { \
+ return bind_expression(fn(), std::forward<A1>(a1)); \
+ }
+
+#define KFR_EXPR_BINARY(fn, op) \
+ template <typename A1, typename A2, KFR_ENABLE_IF(is_input_expressions<A1, A2>::value)> \
+ KFR_INLINE auto operator op(A1&& a1, A2&& a2) \
+ ->decltype(bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2))) \
+ { \
+ return bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2)); \
+ }
+
+KFR_EXPR_UNARY(fn_neg, -)
+KFR_EXPR_UNARY(fn_bitwisenot, ~)
+
+KFR_EXPR_BINARY(fn_add, +)
+KFR_EXPR_BINARY(fn_sub, -)
+KFR_EXPR_BINARY(fn_mul, *)
+KFR_EXPR_BINARY(fn_div, /)
+KFR_EXPR_BINARY(fn_bitwiseand, &)
+KFR_EXPR_BINARY(fn_bitwiseor, |)
+KFR_EXPR_BINARY(fn_bitwisexor, ^)
+KFR_EXPR_BINARY(fn_shl, <<)
+KFR_EXPR_BINARY(fn_shr, >>)
+
+KFR_EXPR_BINARY(fn_equal, ==)
+KFR_EXPR_BINARY(fn_notequal, !=)
+KFR_EXPR_BINARY(fn_less, <)
+KFR_EXPR_BINARY(fn_greater, >)
+KFR_EXPR_BINARY(fn_lessorequal, <=)
+KFR_EXPR_BINARY(fn_greaterorequal, >=)
+}
diff --git a/include/kfr/expressions/pointer.hpp b/include/kfr/expressions/pointer.hpp
@@ -0,0 +1,168 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/vec.hpp"
+#include "basic.hpp"
+#include <memory>
+
+namespace kfr
+{
+
+constexpr size_t maximum_expression_width() { return bitness_const(16, 32); }
+
+template <typename T, size_t maxwidth = maximum_expression_width()>
+using expression_vtable = carray<void*, 2 + ilog2(maxwidth) + 1>;
+
+struct dummy_content
+{
+};
+
+struct expression_resource
+{
+ virtual ~expression_resource() {}
+ virtual void* instance() { return nullptr; }
+};
+template <typename E>
+struct expression_resource_impl : expression_resource
+{
+ expression_resource_impl(E&& e) noexcept : e(std::move(e)) {}
+ virtual ~expression_resource_impl() {}
+ virtual void* instance() override final { return &e; }
+private:
+ E e;
+};
+
+template <typename E>
+std::shared_ptr<expression_resource> make_resource(E&& e)
+{
+ return std::static_pointer_cast<expression_resource>(
+ std::make_shared<expression_resource_impl<decay<E>>>(std::move(e)));
+}
+
+template <typename T, size_t maxwidth = maximum_expression_width()>
+struct expression_pointer : input_expression
+{
+ using value_type = T;
+
+ static_assert(is_poweroftwo(maxwidth), "N must be a power of two");
+ expression_pointer() noexcept : instance(nullptr), vtable(nullptr) {}
+ expression_pointer(void* instance, const expression_vtable<T, maxwidth>* vtable,
+ std::shared_ptr<expression_resource> resource = nullptr)
+ : instance(instance), vtable(vtable), resource(std::move(resource))
+ {
+ }
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ using func_t = simd<T, N> (*)(void*, size_t);
+
+ static_assert(is_poweroftwo(N), "N must be a power of two");
+ constexpr size_t findex = ilog2(N);
+ static_assert(N <= maxwidth, "N is greater than maxwidth");
+ func_t func = reinterpret_cast<func_t>(vtable->get(csize<2 + findex>));
+ vec<U, N> result = cast<U>(func(instance, index));
+ return result;
+ }
+ KFR_INLINE void begin_block(size_t size) const
+ {
+ using func_t = void (*)(void*, size_t);
+ func_t func = reinterpret_cast<func_t>(vtable->get(csize<0>));
+ func(instance, size);
+ }
+ KFR_INLINE void end_block(size_t size) const
+ {
+ using func_t = void (*)(void*, size_t);
+ func_t func = reinterpret_cast<func_t>(vtable->get(csize<1>));
+ func(instance, size);
+ }
+
+private:
+ void* instance;
+ const expression_vtable<T, maxwidth>* vtable;
+ std::shared_ptr<expression_resource> resource;
+};
+
+namespace internal
+{
+template <typename T, size_t N, typename Fn, typename Ret = simd<T, N>,
+ typename NonMemFn = Ret (*)(Fn*, size_t, vec_t<T, N>)>
+KFR_INLINE NonMemFn make_expression_func()
+{
+ return [](Fn* fn, size_t index, vec_t<T, N> x) { return *(fn->operator()(cinput, index, x)); };
+}
+
+template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)>
+KFR_INLINE NonMemFn make_expression_begin_block()
+{
+ return [](Fn* fn, size_t size) { return fn->begin_block(size); };
+}
+template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)>
+KFR_INLINE NonMemFn make_expression_end_block()
+{
+ return [](Fn* fn, size_t size) { return fn->end_block(size); };
+}
+
+template <typename T, size_t maxwidth, typename E>
+expression_vtable<T, maxwidth> make_expression_vtable_impl()
+{
+ expression_vtable<T, maxwidth> result;
+ constexpr size_t size = result.size() - 2;
+
+ result.get(csize<0>) = reinterpret_cast<void*>(&internal::make_expression_begin_block<decay<E>>);
+ result.get(csize<1>) = reinterpret_cast<void*>(&internal::make_expression_end_block<decay<E>>);
+
+ cforeach(csizeseq<size>, [&](auto u) {
+ constexpr size_t N = 1 << val_of(u);
+ result.get(csize<2 + val_of(u)>) =
+ reinterpret_cast<void*>(internal::make_expression_func<T, N, decay<E>>());
+ });
+ return result;
+}
+
+template <typename T, size_t maxwidth, typename E>
+KFR_INLINE expression_vtable<T, maxwidth>* make_expression_vtable()
+{
+ static_assert(is_input_expression<E>::value, "E must be an expression");
+ static expression_vtable<T, maxwidth> vtable = internal::make_expression_vtable_impl<T, maxwidth, E>();
+ return &vtable;
+}
+}
+
+template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()>
+KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E& expr)
+{
+ static_assert(is_input_expression<E>::value, "E must be an expression");
+ return expression_pointer<T, maxwidth>(std::addressof(expr),
+ internal::make_expression_vtable<T, maxwidth, E>());
+}
+
+template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()>
+KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E&& expr)
+{
+ static_assert(is_input_expression<E>::value, "E must be an expression");
+ std::shared_ptr<expression_resource> ptr = make_resource(std::move(expr));
+ return expression_pointer<T, maxwidth>(
+ ptr->instance(), internal::make_expression_vtable<T, maxwidth, E>(), std::move(ptr));
+}
+}
diff --git a/include/kfr/expressions/reduce.hpp b/include/kfr/expressions/reduce.hpp
@@ -0,0 +1,265 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/min_max.hpp"
+#include "../base/operators.hpp"
+#include "../base/vec.hpp"
+#include "basic.hpp"
+
+namespace kfr
+{
+
+template <typename T>
+KFR_INLINE T final_mean(T value, size_t size)
+{
+ return value / size;
+}
+KFR_FN(final_mean)
+
+template <typename T>
+KFR_INLINE T final_rootmean(T value, size_t size)
+{
+ return internal::builtin_sqrt(value / size);
+}
+KFR_FN(final_rootmean)
+
+namespace internal
+{
+template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, size_t, T>::value)>
+KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t size, T value)
+{
+ return finalfn(value, size);
+}
+template <typename FinalFn, typename T, KFR_ENABLE_IF(!is_callable<FinalFn, size_t, T>::value)>
+KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t, T value)
+{
+ return finalfn(value);
+}
+
+template <cpu_t cpu = cpu_t::native>
+struct in_reduce
+{
+
+ template <typename T, typename ReduceFn, typename TransformFn, typename FinalFn>
+ struct expression_reduce : output_expression
+ {
+ using Tsubtype = subtype<T>;
+ constexpr static size_t width = vector_width<Tsubtype, cpu> * bitness_const(1, 2);
+
+ expression_reduce(ReduceFn&& reducefn, TransformFn&& transformfn, FinalFn&& finalfn)
+ : counter(0), reducefn(std::move(reducefn)), transformfn(std::move(transformfn)),
+ finalfn(std::move(finalfn)), value(resize<width>(make_vector(reducefn(initialvalue<T>{}))))
+ {
+ }
+
+ template <typename U, size_t N>
+ KFR_INLINE void operator()(coutput_t, size_t, vec<U, N> x) const
+ {
+ counter += N;
+ process(x);
+ }
+
+ KFR_INLINE T get()
+ {
+ return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn));
+ }
+
+ protected:
+ void reset() { counter = 0; }
+ template <size_t N, KFR_ENABLE_IF(N == width)>
+ KFR_INLINE void process(vec<Tsubtype, N> x) const
+ {
+ value = reducefn(transformfn(x), value);
+ }
+
+ template <size_t N, KFR_ENABLE_IF(N < width)>
+ KFR_INLINE void process(vec<Tsubtype, N> x) const
+ {
+ value = combine(value, reducefn(transformfn(x), narrow<N>(value)));
+ }
+
+ template <size_t N, KFR_ENABLE_IF(N > width)>
+ KFR_INLINE void process(vec<Tsubtype, N> x) const
+ {
+ process(low(x));
+ process(high(x));
+ }
+
+ mutable size_t counter;
+ retarget<ReduceFn, cpu> reducefn;
+ retarget<TransformFn, cpu> transformfn;
+ retarget<FinalFn, cpu> finalfn;
+ mutable vec<Tsubtype, width> value;
+ };
+
+ template <typename ReduceFn, typename TransformFn = fn_pass_through, typename FinalFn = fn_pass_through,
+ typename E1, typename T = value_type_of<E1>>
+ KFR_SINTRIN T reduce(E1&& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_pass_through(),
+ FinalFn&& finalfn = fn_pass_through())
+ {
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ const size_t size = e1.size();
+ using reducer_t = expression_reduce<T, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>;
+ reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn),
+ std::forward<FinalFn>(finalfn));
+ process<T, cpu>(red, std::forward<E1>(e1), size);
+
+ return red.get();
+ }
+
+ template <typename E1, typename T = value_type_of<E1>>
+ KFR_SINTRIN T sum(E1&& x)
+ {
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return reduce(std::forward<E1>(x), fn_add());
+ }
+
+ template <typename E1, typename T = value_type_of<E1>>
+ KFR_SINTRIN T mean(E1&& x)
+ {
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return reduce(std::forward<E1>(x), fn_add(), fn_pass_through(), fn_final_mean());
+ }
+
+ template <typename E1, typename T = value_type_of<E1>>
+ KFR_SINTRIN T min(E1&& x)
+ {
+ using fn_min = typename in_min_max<cpu>::fn_min;
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return reduce(std::forward<E1>(x), fn_min());
+ }
+
+ template <typename E1, typename T = value_type_of<E1>>
+ KFR_SINTRIN T max(E1&& x)
+ {
+ using fn_max = typename in_min_max<cpu>::fn_max;
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return reduce(std::forward<E1>(x), fn_max());
+ }
+
+ template <typename E1, typename E2,
+ typename T = value_type_of<decltype(std::declval<E1>() * std::declval<E2>())>>
+ KFR_SINTRIN T dotproduct(E1&& x, E2&& y)
+ {
+ auto m = std::forward<E1>(x) * std::forward<E2>(y);
+ using E12 = decltype(m);
+ static_assert(!is_generic<E12>::value, "e1 * e2 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E12>::value, "e1 * e2 must be a sized expression (use typed<T>())");
+ return reduce(std::move(m), fn_add());
+ }
+
+ template <typename E1, typename T = value_type_of<E1>>
+ KFR_SINTRIN T rms(E1&& x)
+ {
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return reduce(std::forward<E1>(x), fn_add(), fn_sqr(), fn_final_rootmean());
+ }
+
+ template <typename E1, typename T = value_type_of<E1>>
+ KFR_SINTRIN T sumsqr(E1&& x)
+ {
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return reduce(std::forward<E1>(x), fn_add(), fn_sqr());
+ }
+
+ KFR_SPEC_FN(in_reduce, reduce)
+ KFR_SPEC_FN(in_reduce, sum)
+ KFR_SPEC_FN(in_reduce, dotproduct)
+ KFR_SPEC_FN(in_reduce, rms)
+ KFR_SPEC_FN(in_reduce, sumsqr)
+ KFR_SPEC_FN(in_reduce, mean)
+ KFR_SPEC_FN(in_reduce, min)
+ KFR_SPEC_FN(in_reduce, max)
+};
+}
+
+namespace native
+{
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T sum(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::in_reduce<>::sum(std::forward<E1>(x));
+}
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T mean(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::in_reduce<>::mean(std::forward<E1>(x));
+}
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T max(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::in_reduce<>::max(std::forward<E1>(x));
+}
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T min(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::in_reduce<>::min(std::forward<E1>(x));
+}
+
+template <typename E1, typename E2, typename T = value_type_of<E1>,
+ KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
+KFR_SINTRIN T dotproduct(E1&& x, E2&& y)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::in_reduce<>::dotproduct(std::forward<E1>(x), std::forward<E2>(y));
+}
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T rms(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::in_reduce<>::rms(std::forward<E1>(x));
+}
+
+template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>::value)>
+KFR_SINTRIN T sumsqr(E1&& x)
+{
+ static_assert(!is_generic<E1>::value, "e1 must be a typed expression (use typed<T>())");
+ static_assert(!is_infinite<E1>::value, "e1 must be a sized expression (use typed<T>())");
+ return internal::in_reduce<>::sumsqr(std::forward<E1>(x));
+}
+}
+}
diff --git a/include/kfr/io/audiofile.hpp b/include/kfr/io/audiofile.hpp
@@ -0,0 +1,370 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/univector.hpp"
+#include "../base/vec.hpp"
+#include "../expressions/basic.hpp"
+#include "file.hpp"
+
+namespace kfr
+{
+
+template <typename Tout, typename Tin, size_t Tag1, size_t Tag2, typename E1>
+void write_interleaved(E1&& dest, const univector2d<Tin, Tag1, Tag2>& src)
+{
+ const size_t channels = src.size();
+ const size_t size = src[0].size();
+ if (channels == 1)
+ {
+ process<Tout>(std::forward<E1>(dest), src[0], size);
+ }
+ else if (channels == 2)
+ {
+ process<Tout>(std::forward<E1>(dest), bind_expression(fn_interleave(), src[0], src[1]), size);
+ }
+ else
+ {
+ internal::expression_writer<Tout, E1> wr = writer<Tout>(std::forward<E1>(dest));
+ for (size_t i = 0; i < size; i++)
+ for (size_t ch = 0; ch < channels; ch++)
+ wr.write(src[ch][i]);
+ }
+}
+
+enum class audiodatatype
+{
+ unknown,
+ i16,
+ i24,
+ i24a32,
+ i32,
+ f32,
+ f64
+};
+
+namespace internal
+{
+template <typename T>
+constexpr range<fmax> audio_range()
+{
+ return { -std::numeric_limits<T>::max(), std::numeric_limits<T>::max() };
+}
+
+template <>
+constexpr range<fmax> audio_range<f32>()
+{
+ return { -1.0, +1.0 };
+}
+
+template <>
+constexpr range<fmax> audio_range<f64>()
+{
+ return { -1.0, +1.0 };
+}
+
+inline size_t get_audiobitdepth(audiodatatype type)
+{
+ return (size_t[]){ 0, 16, 24, 24, 32, 32, 64 }[static_cast<int>(type)];
+}
+
+template <typename T>
+inline audiodatatype get_audiodatatype()
+{
+ if (ctypeid<T>() == ctypeid<i16>())
+ return audiodatatype::i16;
+ else if (ctypeid<T>() == ctypeid<i32>())
+ return audiodatatype::i32;
+ else if (ctypeid<T>() == ctypeid<f32>())
+ return audiodatatype::f32;
+ else if (ctypeid<T>() == ctypeid<f64>())
+ return audiodatatype::f64;
+ else
+ return audiodatatype::unknown;
+}
+}
+
+struct audioformat
+{
+ size_t channels;
+ size_t samples;
+ audiodatatype type;
+ fmax samplerate;
+
+ template <typename T, size_t Tag1, size_t Tag2>
+ constexpr audioformat(const univector2d<T, Tag1, Tag2>& data, fmax sample_rate)
+ : channels(data.size()), samples(data[0].size()), type(internal::get_audiodatatype<T>()),
+ samplerate(sample_rate)
+ {
+ }
+};
+
+namespace internal
+{
+static constexpr u32 FourCC(const char (&ch)[5])
+{
+ return u32(u8(ch[0])) | u32(u8(ch[1])) << 8 | u32(u8(ch[2])) << 16 | u32(u8(ch[3])) << 24;
+}
+
+struct WAV_FMT
+{
+ i32 fId; // 'fmt '
+ i32 pcmHeaderLength;
+ i16 wFormatTag;
+ i16 numChannels;
+ i32 nSamplesPerSec;
+ i32 nAvgBytesPerSec;
+ i16 numBlockAlingn;
+ i16 numBitsPerSample;
+} __attribute__((packed));
+
+struct WAV_DATA
+{
+ i32 dId; // 'data' or 'fact'
+ i32 dLen;
+ u8 data[1];
+} __attribute__((packed));
+
+struct WAV_DATA_HDR
+{
+ i32 dId; // 'data' or 'fact'
+ i32 dLen;
+} __attribute__((packed));
+
+struct AIFF_FMT
+{
+ i32 chunkID;
+ i32 chunkLen;
+ i16 channels;
+ u32 frames;
+ i16 bitsPerSample;
+ f80 sampleRate;
+ i32 compression;
+} __attribute__((packed));
+
+struct AIFF_DATA
+{
+ i32 chunkID;
+ i32 chunkLen;
+ u32 offset;
+} __attribute__((packed));
+
+constexpr u32 cWAVE_FORMAT_PCM = 1;
+constexpr u32 cWAVE_FORMAT_IEEE = 3;
+
+constexpr u32 ccRIFF = FourCC("RIFF");
+constexpr u32 ccWAVE = FourCC("WAVE");
+constexpr u32 ccfmt = FourCC("fmt ");
+constexpr u32 ccdata = FourCC("data");
+
+constexpr u32 ccFORM = FourCC("FORM");
+constexpr u32 ccAIFF = FourCC("AIFF");
+constexpr u32 ccAIFC = FourCC("AIFC");
+constexpr u32 ccCOMM = FourCC("COMM");
+constexpr u32 ccSSND = FourCC("SSND");
+constexpr u32 ccNONE = FourCC("NONE");
+constexpr u32 ccsowt = FourCC("sowt");
+
+struct RIFF_HDR
+{
+ i32 riffID; // 'RIFF' or 'COMM'
+ i32 fileLen;
+ i32 formatID; // 'WAVE' or 'AIFF'
+} __attribute__((packed));
+
+struct WAV_HEADER
+{
+ RIFF_HDR riff;
+ WAV_FMT fmt;
+ WAV_DATA_HDR data;
+
+} __attribute__((packed));
+
+struct CHUNK_HDR
+{
+ i32 chunkID;
+ i32 chunkLen;
+} __attribute__((packed));
+
+static bool audio_test_wav(const array_ref<u8>& rawbytes)
+{
+ if (rawbytes.size() < sizeof(RIFF_HDR))
+ {
+ return false;
+ }
+ const RIFF_HDR* hdr = reinterpret_cast<const RIFF_HDR*>(rawbytes.data());
+ if (hdr->riffID != ccRIFF)
+ {
+ return false;
+ }
+ if (hdr->formatID != ccWAVE)
+ {
+ return false;
+ }
+ return true;
+}
+
+static bool audio_test_aiff(const array_ref<u8>& rawbytes)
+{
+ if (rawbytes.size() < sizeof(RIFF_HDR))
+ {
+ return false;
+ }
+ const RIFF_HDR* hdr = reinterpret_cast<const RIFF_HDR*>(rawbytes.data());
+ if (hdr->riffID != ccFORM)
+ {
+ return false;
+ }
+ if (hdr->formatID != ccAIFF && hdr->formatID != ccAIFC)
+ {
+ return false;
+ }
+ return true;
+}
+
+enum class file_status
+{
+ ok,
+ unknown_format,
+ bad_format,
+ unsupported_compression,
+ unsupported_bit_format
+};
+
+static file_status audio_info_wav(audioformat& info, const array_ref<u8>& rawbytes)
+{
+ const CHUNK_HDR* chunk = ptr_cast<CHUNK_HDR>(rawbytes.data() + 12);
+ const void* end = ptr_cast<char>(rawbytes.end());
+ const WAV_FMT* fmt = nullptr;
+ const WAV_DATA* rawdata = nullptr;
+ while (chunk < end)
+ {
+ switch (chunk->chunkID)
+ {
+ case ccfmt:
+ fmt = ptr_cast<WAV_FMT>(chunk);
+ break;
+ case ccdata:
+ rawdata = ptr_cast<WAV_DATA>(chunk);
+ break;
+ }
+ chunk = ptr_cast<CHUNK_HDR>(ptr_cast<u8>(chunk) + chunk->chunkLen + 8);
+ }
+ if (!fmt || !rawdata)
+ {
+ return file_status::bad_format;
+ }
+
+ if (fmt->wFormatTag != cWAVE_FORMAT_PCM && fmt->wFormatTag != cWAVE_FORMAT_IEEE)
+ {
+ return file_status::unsupported_compression;
+ }
+
+ int storedbits = fmt->numBlockAlingn * 8 / fmt->numChannels;
+ if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 16 && storedbits == 16)
+ {
+ info.type = audiodatatype::i16;
+ }
+ else if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 24 && storedbits == 24)
+ {
+ info.type = audiodatatype::i24;
+ }
+ else if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 24 && storedbits == 32)
+ {
+ info.type = audiodatatype::i24a32;
+ }
+ else if (fmt->wFormatTag == cWAVE_FORMAT_PCM && fmt->numBitsPerSample == 32 && storedbits == 32)
+ {
+ info.type = audiodatatype::i32;
+ }
+ else if (fmt->wFormatTag == cWAVE_FORMAT_IEEE && fmt->numBitsPerSample == 32 && storedbits == 32)
+ {
+ info.type = audiodatatype::f32;
+ }
+ else if (fmt->wFormatTag == cWAVE_FORMAT_IEEE && fmt->numBitsPerSample == 64 && storedbits == 64)
+ {
+ info.type = audiodatatype::f64;
+ }
+ else
+ {
+ return file_status::unsupported_bit_format;
+ }
+
+ if (fmt->numChannels < 1 || fmt->numChannels > 16)
+ return file_status::unsupported_bit_format;
+
+ info.channels = size_t(fmt->numChannels);
+ info.samplerate = size_t(fmt->nSamplesPerSec);
+ info.samples = size_t(rawdata->dLen) / info.channels / (get_audiobitdepth(info.type) / 8);
+
+ return file_status::ok;
+}
+
+static file_status audio_info(audioformat& info, const array_ref<u8>& file_bytes)
+{
+ if (audio_test_wav(file_bytes))
+ return audio_info_wav(info, file_bytes);
+ else
+ return file_status::unknown_format;
+}
+}
+
+template <size_t = 0>
+void audio_encode_header(internal::expression_sequential_file_writer& dest, const audioformat& info)
+{
+ using namespace internal;
+ WAV_HEADER hdr;
+ zeroize(hdr);
+ const size_t framesize = info.channels * get_audiobitdepth(info.type) / 8;
+ hdr.riff.riffID = ccRIFF;
+ hdr.riff.formatID = ccWAVE;
+ hdr.riff.fileLen = autocast(info.samples * framesize + sizeof(hdr) - 8);
+ hdr.fmt.fId = ccfmt;
+ hdr.fmt.pcmHeaderLength = autocast(sizeof(hdr.fmt) - sizeof(CHUNK_HDR));
+ hdr.fmt.numBlockAlingn = autocast(framesize);
+ hdr.fmt.nAvgBytesPerSec = autocast(info.samplerate * framesize);
+ hdr.fmt.nSamplesPerSec = autocast(info.samplerate);
+ hdr.fmt.numChannels = autocast(info.channels);
+ hdr.fmt.wFormatTag = info.type >= audiodatatype::f32 ? cWAVE_FORMAT_IEEE : cWAVE_FORMAT_PCM;
+ hdr.fmt.numBitsPerSample = autocast(get_audiobitdepth(info.type));
+ hdr.data.dId = ccdata;
+ hdr.data.dLen = autocast(info.samples * framesize);
+
+ dest.write(hdr);
+}
+
+template <typename T, size_t Tag1, size_t Tag2>
+void audio_encode_audio(internal::expression_sequential_file_writer& dest,
+ const univector2d<T, Tag1, Tag2>& audio)
+{
+ write_interleaved<T>(dest, audio);
+}
+
+template <typename T, size_t Tag1, size_t Tag2>
+void audio_encode(internal::expression_sequential_file_writer& dest, const univector2d<T, Tag1, Tag2>& audio,
+ const audioformat& info)
+{
+ audio_encode_header(dest, info);
+ audio_encode_audio(dest, audio);
+}
+}
diff --git a/include/kfr/io/file.hpp b/include/kfr/io/file.hpp
@@ -0,0 +1,132 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/function.hpp"
+#include "../base/univector.hpp"
+#include "../base/vec.hpp"
+#include <cstdio>
+#include <string>
+
+namespace kfr
+{
+
+namespace internal
+{
+struct expression_file_base
+{
+ expression_file_base() = delete;
+ expression_file_base(const expression_file_base&) = delete;
+ expression_file_base(expression_file_base&&) = default;
+ expression_file_base(FILE* file) : file(file) {}
+ ~expression_file_base() { fclose(file); }
+ bool ok() const { return file != nullptr; }
+ FILE* file;
+};
+
+struct expression_sequential_file_writer : expression_file_base, output_expression
+{
+ using expression_file_base::expression_file_base;
+ template <typename U, size_t N>
+ void operator()(coutput_t, size_t, vec<U, N> value)
+ {
+ write(value);
+ }
+ template <typename U>
+ void write(const U& value)
+ {
+ fwrite(std::addressof(value), 1, sizeof(U), file);
+ }
+};
+
+struct expression_sequential_file_reader : expression_file_base, input_expression
+{
+ using expression_file_base::expression_file_base;
+ template <typename U, size_t N>
+ vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
+ {
+ vec<U, N> input = qnan;
+ read(input);
+ return input;
+ }
+ template <typename U>
+ void read(U& value) const
+ {
+ fread(std::addressof(value), 1, sizeof(U), file);
+ }
+};
+
+template <typename T>
+struct expression_file_writer : expression_file_base, output_expression
+{
+ using expression_file_base::expression_file_base;
+ template <typename U, size_t N>
+ void operator()(coutput_t, size_t index, vec<U, N> value)
+ {
+ if (position != index)
+ fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET);
+ const vec<T, N> output = cast<T>(value);
+ fwrite(output.data(), sizeof(T), output.size(), file);
+ position = index + N;
+ }
+ size_t position = 0;
+};
+
+template <typename T>
+struct expression_file_reader : expression_file_base, input_expression
+{
+ using expression_file_base::expression_file_base;
+ template <typename U, size_t N>
+ vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
+ {
+ if (position != index)
+ fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET);
+ vec<T, N> input = qnan;
+ fread(input.data(), sizeof(T), input.size(), file);
+ position = index + N;
+ return cast<U>(input);
+ }
+ size_t position = 0;
+};
+}
+
+inline internal::expression_sequential_file_reader sequential_file_reader(const std::string& file_name)
+{
+ return internal::expression_sequential_file_reader(fopen(file_name.c_str(), "rb"));
+}
+inline internal::expression_sequential_file_writer sequential_file_writer(const std::string& file_name)
+{
+ return internal::expression_sequential_file_writer(fopen(file_name.c_str(), "wb"));
+}
+
+template <typename T = u8>
+internal::expression_file_reader<T> file_reader(const std::string& file_name)
+{
+ return internal::expression_file_reader<T>(fopen(file_name.c_str(), "rb"));
+}
+template <typename T = u8>
+internal::expression_file_writer<T> file_writer(const std::string& file_name)
+{
+ return internal::expression_file_writer<T>(fopen(file_name.c_str(), "wb"));
+}
+}
diff --git a/include/kfr/io/python_plot.hpp b/include/kfr/io/python_plot.hpp
@@ -0,0 +1,155 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "../base/vec.hpp"
+#include "../cometa/string.hpp"
+#include <cstdlib>
+
+#ifdef KFR_OS_WIN
+#include <direct.h>
+#define cross_getcwd _getcwd
+#else
+#include <unistd.h>
+#define cross_getcwd getcwd
+#endif
+
+namespace kfr
+{
+namespace internal
+{
+
+void python(const std::string& name, const std::string& code)
+{
+ std::string filename;
+ {
+ char curdir[1024];
+ cross_getcwd(curdir, arraysize(curdir));
+ filename = curdir;
+ }
+#ifdef KFR_OS_WIN
+ const char* slash = "\\";
+#else
+ const char* slash = "/";
+#endif
+ filename = filename + slash + name + ".py";
+
+ FILE* f = fopen(filename.c_str(), "w");
+ fwrite(code.c_str(), 1, code.size(), f);
+ fclose(f);
+ std::system(("python \"" + filename + "\"").c_str());
+}
+}
+
+static std::string concat_args() { return {}; }
+
+template <typename... Ts>
+static std::string concat_args(const std::string& left, const Ts&... rest)
+{
+ const std::string right = concat_args(rest...);
+ return left.empty() ? right : right.empty() ? left : left + ", " + right;
+}
+
+static void plot_show(const std::string& name, const std::string& wavfile, const std::string& options = "")
+{
+ print(name, "...");
+ std::string ss;
+ ss += "#!/usr/bin/env python\n"
+ "import dspplot\n\n"
+ "dspplot.plot(" +
+ concat_args("r'" + wavfile + "'", options) + ")\n";
+
+ internal::python(name, ss);
+ print("done\n");
+}
+
+static void plot_show(const std::string& name, const char* x, const std::string& options = "")
+{
+ plot_show(name, std::string(x), options);
+}
+
+template <typename T>
+void plot_show(const std::string& name, T&& x, const std::string& options = "")
+{
+ print(name, "...");
+ auto array = make_array_ref(std::forward<T>(x));
+ std::string ss;
+ ss += "#!/usr/bin/env python\n"
+ "import dspplot\n\n"
+ "data = [\n";
+ for (size_t i = 0; i < array.size(); i++)
+ ss += as_string(fmt<'g', 20, 17>(array[i]), ",\n");
+ ss += "]\n";
+
+ ss += "dspplot.plot(" + concat_args("data", options) + ")\n";
+
+ internal::python(name, ss);
+ print("done\n");
+}
+
+template <typename T>
+void plot_save(const std::string& name, T&& x, const std::string& options = "")
+{
+ plot_show(name, std::forward<T>(x), concat_args(options, "file='../svg/" + name + ".svg'"));
+}
+
+template <typename T1, typename T2>
+void perfplot_show(const std::string& name, T1&& data, T2&& labels, const std::string& options = "")
+{
+ print(name, "...");
+ auto array = make_array_ref(std::forward<T1>(data));
+ auto labels_array = make_array_ref(std::forward<T2>(labels));
+ std::string ss;
+ ss += "#!/usr/bin/env python\n";
+ ss += "import dspplot\n\n";
+ ss += "data = [\n";
+ for (size_t i = 0; i < array.size(); i++)
+ {
+ auto subarray = make_array_ref(array[i]);
+ ss += "[\n";
+ for (size_t i = 0; i < subarray.size(); i++)
+ ss += as_string(" ", fmt<'g', 20, 17>(subarray[i]), ",\n");
+ ss += "],";
+ }
+ ss += "]\n";
+
+ ss += "labels = [\n";
+ for (size_t i = 0; i < labels_array.size(); i++)
+ {
+ const std::string label = labels_array[i];
+ ss += " '" + label + "',";
+ }
+ ss += "]\n";
+
+ ss += "dspplot.perfplot(" + concat_args("data, labels", options) + ")\n";
+
+ internal::python(name, ss);
+ print("done\n");
+}
+
+template <typename T1, typename T2>
+void perfplot_save(const std::string& name, T1&& data, T2&& labels, const std::string& options = "")
+{
+ perfplot_show(name, std::forward<T1>(data), std::forward<T2>(labels),
+ concat_args(options, "file='../perf/" + name + ".svg'"));
+}
+}
diff --git a/include/kfr/io/tostring.hpp b/include/kfr/io/tostring.hpp
@@ -0,0 +1,131 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/complex.hpp"
+#include "../base/univector.hpp"
+#include "../base/vec.hpp"
+#include "../cometa/string.hpp"
+#include <cmath>
+
+namespace kfr
+{
+
+namespace internal
+{
+
+constexpr size_t number_width = 9;
+constexpr size_t number_precision = 6;
+constexpr size_t number_precision_short = 2;
+constexpr size_t number_columns = 8;
+
+template <typename T>
+std::string fmtvalue(std::true_type, const T& x)
+{
+ std::string str = as_string(fmt<'g', number_width, number_precision>(x));
+ if (str.size() > number_width)
+ str = as_string(fmt<'g', number_width, number_precision_short>(x));
+ return str;
+}
+
+template <typename T>
+std::string fmtvalue(std::true_type, const complex<T>& x)
+{
+ std::string restr = as_string(fmt<'g', number_width, number_precision>(x.real()));
+ if (restr.size() > number_width)
+ restr = as_string(fmt<'g', number_width, number_precision_short>(x.real()));
+
+ std::string imstr = as_string(fmt<'g', -1, number_precision>(std::abs(x.imag())));
+ if (imstr.size() > number_width)
+ imstr = as_string(fmt<'g', -1, number_precision_short>(std::abs(x.imag())));
+
+ return restr + (x.imag() < T(0) ? "-" : "+") + padleft(number_width, imstr + "j");
+}
+
+template <typename T>
+std::string fmtvalue(std::false_type, const T& x)
+{
+ return as_string(fmtwidth<number_width>(repr(x)));
+}
+}
+
+template <typename T>
+inline std::string repr(const kfr::complex<T>& v)
+{
+ return as_string(v.real()) + " + " + as_string(v.imag()) + "j";
+}
+
+template <typename T>
+inline std::string repr(const T* source, size_t N)
+{
+ std::string str;
+ for (size_t i = 0; i < N; i++)
+ {
+ if (i > 0)
+ {
+ if (i % internal::number_columns == 0)
+ str += "\n";
+ else
+ str += " ";
+ }
+ str += as_string(internal::fmtvalue(std::is_floating_point<T>(), source[i]));
+ }
+ return str;
+}
+
+template <typename T>
+inline std::string repr(const complex<T>* source, size_t N)
+{
+ std::string str;
+ for (size_t i = 0; i < N; i++)
+ {
+ if (i > 0)
+ {
+ if (i % (internal::number_columns / 2) == 0)
+ str += "\n";
+ else
+ str += " ";
+ }
+ str += as_string(internal::fmtvalue(std::true_type{}, source[i]));
+ }
+ return str;
+}
+
+template <typename T, int N>
+inline std::string repr(kfr::simd<T, N> v)
+{
+ return repr(tovec(v));
+}
+
+template <typename T, size_t N>
+inline std::string repr(vec<T, N> v)
+{
+ return repr(v.data(), v.size());
+}
+
+template <typename T, size_t Tag>
+inline std::string repr(const univector<T, Tag>& v)
+{
+ return repr(v.data(), v.size());
+}
+}
diff --git a/include/kfr/math.hpp b/include/kfr/math.hpp
@@ -0,0 +1,51 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "base/vec.hpp"
+
+#include "base/abs.hpp"
+#include "base/asin_acos.hpp"
+#include "base/atan.hpp"
+#include "base/complex.hpp"
+#include "base/constants.hpp"
+#include "base/digitreverse.hpp"
+#include "base/gamma.hpp"
+#include "base/log_exp.hpp"
+#include "base/logical.hpp"
+#include "base/min_max.hpp"
+#include "base/operators.hpp"
+#include "base/read_write.hpp"
+#include "base/round.hpp"
+#include "base/saturation.hpp"
+#include "base/select.hpp"
+#include "base/shuffle.hpp"
+#include "base/sin_cos.hpp"
+#include "base/sinh_cosh.hpp"
+#include "base/sqrt.hpp"
+#include "base/tan.hpp"
+
+namespace kfr
+{
+using namespace native;
+}
diff --git a/include/kfr/misc/compiletime.hpp b/include/kfr/misc/compiletime.hpp
@@ -0,0 +1,81 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "../base/constants.hpp"
+#include "../base/operators.hpp"
+#include "../base/types.hpp"
+
+namespace kfr
+{
+
+namespace compiletime
+{
+
+template <typename T>
+constexpr inline T select(bool c, T x, T y)
+{
+ return c ? x : y;
+}
+template <typename T>
+constexpr inline T trunc(T x)
+{
+ return static_cast<T>(static_cast<long long>(x));
+}
+template <typename T>
+constexpr inline T abs(T x)
+{
+ return x < T() ? -x : x;
+}
+template <typename T>
+constexpr inline T mulsign(T x, T y)
+{
+ return y < T() ? -x : x;
+}
+template <typename T>
+constexpr inline T sin(T x)
+{
+ x = x - trunc(x / c_pi<T, 2>) * c_pi<T, 2>;
+ constexpr T c2 = -0.16665853559970855712890625;
+ constexpr T c4 = +8.31427983939647674560546875e-3;
+ constexpr T c6 = -1.85423981747590005397796630859375e-4;
+
+ x -= c_pi<T>;
+ T y = abs(x);
+ y = select(y > c_pi<T, 1, 2>, c_pi<T> - y, y);
+ y = mulsign(y, -x);
+
+ const T y2 = y * y;
+ T formula = c6;
+ const T y3 = y2 * y;
+ formula = fmadd(formula, y2, c4);
+ formula = fmadd(formula, y2, c2);
+ formula = formula * y3 + y;
+ return formula;
+}
+template <typename T>
+constexpr inline T cos(T x)
+{
+ return sin(x + c_pi<T, 1, 2>);
+}
+}
+}
diff --git a/include/kfr/misc/random.hpp b/include/kfr/misc/random.hpp
@@ -0,0 +1,180 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+#include "../base/function.hpp"
+#include "../base/operators.hpp"
+#include "../base/shuffle.hpp"
+#include "../base/vec.hpp"
+
+namespace kfr
+{
+
+using random_state = u32x4;
+
+struct seed_from_rdtsc_t
+{
+};
+
+constexpr seed_from_rdtsc_t seed_from_rdtsc{};
+
+struct random_bit_generator
+{
+ random_bit_generator(seed_from_rdtsc_t) noexcept
+ : state(bitcast<u32>(make_vector(__builtin_readcyclecounter(),
+ (__builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull)))
+ {
+ (void)operator()();
+ }
+ constexpr random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) noexcept : state(x0, x1, x2, x3)
+ {
+ (void)operator()();
+ }
+ constexpr random_bit_generator(u64 x0, u64 x1) noexcept : state(bitcast<u32>(make_vector(x0, x1)))
+ {
+ (void)operator()();
+ }
+
+ inline random_state operator()()
+ {
+ constexpr static random_state mul{ 214013u, 17405u, 214013u, 69069u };
+ constexpr static random_state add{ 2531011u, 10395331u, 13737667u, 1u };
+ state = bitcast<u32>(rotateright<3>(bitcast<u8>(fmadd(state, mul, add))));
+ return state;
+ }
+
+protected:
+ random_state state;
+};
+
+template <size_t N, KFR_ENABLE_IF(N <= sizeof(random_state))>
+inline vec<u8, N> random_bits(random_bit_generator& gen)
+{
+ return narrow<N>(bitcast<u8>(gen()));
+}
+template <size_t N, KFR_ENABLE_IF(N > sizeof(random_state))>
+inline vec<u8, N> random_bits(random_bit_generator& gen)
+{
+ constexpr size_t N2 = prev_poweroftwo(N - 1);
+ return concat(random_bits<N2>(gen), random_bits<N - N2>(gen));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(std::is_integral<T>::value)>
+inline vec<T, N> random_uniform(random_bit_generator& gen)
+{
+ return bitcast<T>(random_bits<N * sizeof(T)>(gen));
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(std::is_same<T, f32>::value)>
+inline vec<f32, N> randommantissa(random_bit_generator& gen)
+{
+ return bitcast<f32>((random_uniform<u32, N>(gen) & 0x7FFFFFu) | 0x3f800000u) + 0.0f;
+}
+
+template <typename T, size_t N, KFR_ENABLE_IF(std::is_same<T, f64>::value)>
+inline vec<f64, N> randommantissa(random_bit_generator& gen)
+{
+ return bitcast<f64>((random_uniform<u64, N>(gen) & 0x000FFFFFFFFFFFFFull) | 0x3FF0000000000000ull) + 0.0;
+}
+
+template <typename T, size_t N>
+inline enable_if_f<vec<T, N>> random_uniform(random_bit_generator& gen)
+{
+ return randommantissa<T, N>(gen) - 1.f;
+}
+
+template <size_t N, typename T>
+inline enable_if_f<vec<T, N>> random_range(random_bit_generator& gen, T min, T max)
+{
+ return mix(random_uniform<T, N>(gen), min, max);
+}
+
+template <size_t N, typename T>
+inline enable_if_not_f<vec<T, N>> random_range(random_bit_generator& gen, T min, T max)
+{
+ using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>;
+
+ vec<T, N> u = random_uniform<T, N>(gen);
+ const vec<big_type, N> tmp = cast<big_type>(u);
+ return cast<T>((tmp * (max - min) + min) >> typebits<T>::bits);
+}
+
+namespace internal
+{
+template <typename T>
+struct expression_random_uniform : input_expression
+{
+ using value_type = T;
+ constexpr expression_random_uniform(const random_bit_generator& gen) noexcept : gen(gen) {}
+ template <typename U, size_t N>
+ vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
+ {
+ return cast<U>(random_uniform<T, N>(gen));
+ }
+ mutable random_bit_generator gen;
+};
+
+template <typename T>
+struct expression_random_range : input_expression
+{
+ using value_type = T;
+ constexpr expression_random_range(const random_bit_generator& gen, T min, T max) noexcept : gen(gen),
+ min(min),
+ max(max)
+ {
+ }
+
+ template <typename U, size_t N>
+ vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
+ {
+ return cast<U>(random_range<N, T>(gen, min, max));
+ }
+ mutable random_bit_generator gen;
+ const T min;
+ const T max;
+};
+}
+
+template <typename T>
+inline internal::expression_random_uniform<T> gen_random_uniform(const random_bit_generator& gen)
+{
+ return internal::expression_random_uniform<T>(gen);
+}
+
+template <typename T>
+inline internal::expression_random_range<T> gen_random_range(const random_bit_generator& gen, T min, T max)
+{
+ return internal::expression_random_range<T>(gen, min, max);
+}
+
+template <typename T>
+inline internal::expression_random_uniform<T> gen_random_uniform()
+{
+ return internal::expression_random_uniform<T>(random_bit_generator(seed_from_rdtsc));
+}
+
+template <typename T>
+inline internal::expression_random_range<T> gen_random_range(T min, T max)
+{
+ return internal::expression_random_range<T>(random_bit_generator(seed_from_rdtsc), min, max);
+}
+}
diff --git a/include/kfr/misc/small_buffer.hpp b/include/kfr/misc/small_buffer.hpp
@@ -0,0 +1,113 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/memory.hpp"
+#include <algorithm>
+#include <cstdint>
+
+namespace kfr
+{
+
+template <typename T, std::size_t Capacity = 16>
+struct small_buffer
+{
+public:
+ small_buffer() noexcept : m_size(0), m_data(m_preallocated) {}
+
+ small_buffer(std::size_t size) : small_buffer() { resize(size); }
+
+ friend void swap(small_buffer<T, Capacity>& first, small_buffer<T, Capacity>& second) noexcept
+ {
+ using std::swap;
+
+ swap(first.m_size, second.m_size);
+ swap(first.m_data, second.m_data);
+ swap(first.m_preallocated, second.m_preallocated);
+ first.m_data = first.m_size <= Capacity ? first.m_preallocated : first.m_data;
+ second.m_data = second.m_size <= Capacity ? second.m_preallocated : second.m_data;
+ }
+ small_buffer(small_buffer<T, Capacity>&& other) : small_buffer() { swap(other, *this); }
+
+ small_buffer(const small_buffer<T, Capacity>& other) : small_buffer() { assign(other); }
+ small_buffer<T, Capacity>& operator=(small_buffer<T, Capacity> other)
+ {
+ swap(other, *this);
+ return *this;
+ }
+
+ ~small_buffer() { clear(); }
+
+ void assign(const small_buffer<T, Capacity>& other)
+ {
+ resize(other.m_size);
+ std::copy_n(other.m_data, m_size, m_data);
+ }
+
+ void resize(std::size_t newsize)
+ {
+ T* m_newdata;
+ if (newsize <= Capacity)
+ {
+ m_newdata = m_preallocated;
+ }
+ else
+ {
+ m_newdata = aligned_allocate<T>(newsize);
+ }
+ std::copy_n(std::make_move_iterator(m_data), std::min(newsize, m_size), m_newdata);
+ if (m_data != m_preallocated)
+ aligned_deallocate(m_data);
+ m_data = m_newdata;
+ m_size = newsize;
+ }
+ bool empty() const { return !size(); }
+ std::size_t size() const { return m_size; }
+ const T* begin() const { return m_data; }
+ const T* end() const { return m_data + m_size; }
+ const T* cbegin() const { return m_data; }
+ const T* cend() const { return m_data + m_size; }
+ T* begin() { return m_data; }
+ T* end() { return m_data + m_size; }
+ void clear() { resize(0); }
+ const T& front() const { return m_data[0]; }
+ const T& back() const { return m_data[m_size - 1]; }
+ T& front() { return m_data[0]; }
+ T& back() { return m_data[m_size - 1]; }
+ void pop_back() { resize(m_size - 1); }
+ T* data() { return m_data; }
+ const T* data() const { return m_data; }
+ T& operator[](std::size_t i) { return m_data[i]; }
+ const T& operator[](std::size_t i) const { return m_data[i]; }
+ void push_back(const T& value)
+ {
+ resize(m_size + 1);
+ m_data[m_size - 1] = value;
+ }
+
+protected:
+ T m_preallocated[Capacity];
+ std::size_t m_size;
+ T* m_data;
+};
+}
diff --git a/include/kfr/misc/sort.hpp b/include/kfr/misc/sort.hpp
@@ -0,0 +1,98 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../base/min_max.hpp"
+#include "../base/shuffle.hpp"
+#include "../base/vec.hpp"
+
+namespace kfr
+{
+/**
+ * Sort the elements in the vector in ascending order
+ * @param x input vector
+ * @return sorted vector
+ * @code
+ * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(-10, 1, 2, 1000));
+ * @endcode
+ */
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> sort(vec<T, N> x)
+{
+ using namespace kfr::native;
+ constexpr size_t Nhalf = N / 2;
+ vec<T, Nhalf> e = low(x);
+ vec<T, Nhalf> o = high(x);
+ constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>);
+ for (size_t i = 0; i < Nhalf; i++)
+ {
+ vec<T, Nhalf> t;
+ t = min(e, o);
+ o = max(e, o);
+ o = rotateright<1>(o);
+ e = t;
+ t = max(e, o);
+ o = min(e, o);
+ e = t;
+ t = blend(e, o, blend0);
+ o = blend(o, e, blend0);
+ o = rotateleft<1>(o);
+ e = t;
+ }
+ return interleavehalfs(concat(e, o));
+}
+
+/**
+ * Sort the elements in the vector in descending order
+ * @param x input vector
+ * @return sorted vector
+ * @code
+ * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(1000, 2, 1, -10));
+ * @endcode
+ */
+template <typename T, size_t N>
+KFR_INLINE vec<T, N> sortdesc(vec<T, N> x)
+{
+ using namespace kfr::native;
+ constexpr size_t Nhalf = N / 2;
+ vec<T, Nhalf> e = low(x);
+ vec<T, Nhalf> o = high(x);
+ constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>);
+ for (size_t i = 0; i < Nhalf; i++)
+ {
+ vec<T, Nhalf> t;
+ t = max(e, o);
+ o = min(e, o);
+ o = rotateright<1>(o);
+ e = t;
+ t = min(e, o);
+ o = max(e, o);
+ e = t;
+ t = blend(e, o, blend0);
+ o = blend(o, e, blend0);
+ o = rotateleft<1>(o);
+ e = t;
+ }
+ return interleavehalfs(concat(e, o));
+}
+}
diff --git a/include/kfr/vec.hpp b/include/kfr/vec.hpp
@@ -0,0 +1,25 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "base/vec.hpp"
diff --git a/include/kfr/version.hpp b/include/kfr/version.hpp
@@ -0,0 +1,35 @@
+/**
+ * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+ * This file is part of KFR
+ *
+ * KFR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * KFR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with KFR.
+ *
+ * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+ * Buying a commercial license is mandatory as soon as you develop commercial activities without
+ * disclosing the source code of your own applications.
+ * See http://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "base/types.hpp"
+#include <string>
+
+namespace kfr
+{
+static std::string library_version()
+{
+ return "KFR " + std::string(version_string) + bitness_const(" x86 ", " x86-64 ") +
+ CID_STRINGIFY(KFR_ARCH_NAME);
+}
+}
diff --git a/readme.md b/readme.md
@@ -0,0 +1,101 @@
+# KFR
+
+KFR is an open source C++ math framework with focus on DSP.
+
+KFR is a header-only and has no external dependencies.
+
+## Features
+
+* All code in the library is optimized for SSE2, SSE3, SSE4.x, AVX and AVX2 processors
+* Mathematical and statistical functions
+* Template expressions (See examples)
+* All data types are supported including complex numbers
+* All vector lengths are also supported. `vec<float,1>`, `vec<unsigned,3>`, `vec<complex<float>, 11>` all are valid vector types in KFR
+* Most of the standard library functions are re-implemented to support vector of any length and data type
+* Runtime CPU dispatching
+* Multi-versioning. Code for various architecttures (SSE2, AVX2, etc) can co-exist in one translation unit. No need to compile for all cpus
+
+Included DSP/audio algorithms:
+
+* FFT
+* FIR filtering
+* FIR filter design using the window method
+* Resampling with configurable quality (See resampling.cpp from Examples directory)
+* Goertzel algorithm
+* Biquad filtering
+* Biquad design functions
+* Oscillators: Sine, Square, Sawtooth, Triangle
+* Window functions: Triangular, Bartlett, Cosine, Hann, Bartlett-Hann, Hamming, Bohman, Blackman, Blackman-Harris, Kaiser, Flattop, Gaussian, Lanczos, Rectangular
+* Audio file reading/writing
+* Pseudorandom number generator
+* Sorting
+* Ring (Circular) buffer
+* Fast incremental sine/cosine generation
+
+## Performace
+
+FFT (double precision, sizes from 1024 to 16777216)
+
+
+
+## Prerequisities
+
+* XCode 6.3, 6.4, 7.x, 8.x, or C++14-compliant compiler (currently only Clang 3.7 or newer is supported)
+* CoMeta metaprogramming library (already included)
+
+KFR is a header-only so just `#include <kfr/math.hpp>` to start using it
+
+The following tools are required to build the examples:
+
+* CMake 3.x
+
+To build the tests:
+
+* Testo - C++14 testing micro framework (included)
+* Python 2.7 with the following modules:
+
+ * dspplot (included, see Installation)
+ * matplotlib
+ * numpy
+ * scipy
+
+## Installation
+
+To obtain the full code, including examples and tests, you can clone the git repository:
+
+```
+git clone https://github.com/kfrlib/kfr.git
+```
+
+To be able to run the tests and examples install the following python modules:
+
+```
+pip install matplotlib
+pip install numpy # or download prebuilt package for windows
+pip install scipy # or download prebuilt package for windows
+```
+Install dspplot using `python setup.py install` inside dspplot directory
+
+## Tests
+
+Execute `build.py` to run the tests or run tests manually from the `tests` directory
+
+Tested on the following systems:
+
+* OS X 10.11.4 / AppleClang 7.3.0.7030031
+* Windows 8.1 / clang version 3.8.0 (branches/release_38)
+
+
+## Planned for future versions
+
+* DFT for any lengths (not only powers of two)
+* Parallel execution of algorithms
+* Serialization/Deserialization of any expression
+* More formats for audio file reading/writing
+* Reduce STL dependency
+
+## License
+
+KFR is dual-licensed, available under both commercial and open-source GPL license.
+
+If you want to use KFR in commercial product or a closed-source project, you need to [purchase a Commercial License](http://kfrlib.com/purchase-license)
diff --git a/sources.cmake b/sources.cmake
@@ -0,0 +1,89 @@
+# Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+# This file is part of KFR
+#
+# KFR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# KFR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with KFR.
+
+
+set(
+ KFR_SRC
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/abs.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/asin_acos.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/atan.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/dispatch.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/function.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/gamma.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/log_exp.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/logical.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/round.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/saturation.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/select.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/shuffle.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/sin_cos.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/sinh_cosh.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/sqrt.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/tan.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/types.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/vec.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/data/bitrev.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/data/sincos.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dft/bitrev.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dft/ft.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/cpuid.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/runtimedispatch.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/goertzel.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/interpolation.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/resample.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/expressions/basic.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/expressions/conversion.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/expressions/generators.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/expressions/operators.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/expressions/pointer.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/expressions/reduce.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/misc/compiletime.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/misc/random.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/misc/small_buffer.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/misc/sort.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/vec.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/kfr.h
+ ${PROJECT_SOURCE_DIR}/include/kfr/base/intrinsics.h
+ ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp
+ ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp
+
+ ${PROJECT_SOURCE_DIR}/tests/testo/testo.hpp
+ ${PROJECT_SOURCE_DIR}/tests/testo/print_colored.hpp
+)
diff --git a/syntax-check.py b/syntax-check.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import fnmatch
+import subprocess
+import os
+import sys
+
+path = os.path.dirname(os.path.realpath(__file__))
+
+filenames = []
+for root, dirnames, files in os.walk(os.path.join(path, 'include')):
+ for filename in fnmatch.filter(files, '*.hpp'):
+ filenames.append(os.path.join(root, filename))
+
+
+target = ""
+if sys.platform.startswith('win32'):
+ target = "--target=x86_64-w64-windows-gnu"
+
+fails = 0
+for filename in filenames:
+ print(filename, '...')
+ c = subprocess.call(["clang", "-fsyntax-only", filename, "-std=c++14", "-I"+os.path.join(path, "include"), "-Wno-pragma-once-outside-header", target])
+ if c != 0:
+ fails+=1
+
+exit(fails)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -0,0 +1,47 @@
+# Copyright (C) 2016 D Levin (http://www.kfrlib.com)
+# This file is part of KFR
+#
+# KFR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# KFR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with KFR.
+
+
+cmake_minimum_required(VERSION 3.0)
+
+add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0)
+
+link_libraries(stdc++ pthread)
+
+include_directories(../include)
+
+add_executable(basic_vector_test basic_vector_test.cpp ${KFR_SRC})
+add_executable(dft_test dft_test.cpp ${KFR_SRC})
+add_executable(empty_test empty_test.cpp ${KFR_SRC})
+add_executable(complex_test complex_test.cpp ${KFR_SRC})
+
+find_package(PythonInterp 2.7)
+
+if (PYTHONINTERP_FOUND)
+ enable_testing()
+
+ add_test(NAME test_basic_vector
+ COMMAND ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/tests/test_output.py
+ ${PROJECT_BINARY_DIR}/tests/basic_vector_test
+ ${PROJECT_SOURCE_DIR}/tests/basic_vector_test.cpp)
+
+ add_test(NAME test_dft
+ COMMAND ${PROJECT_BINARY_DIR}/tests/dft_test)
+ add_test(NAME complex_test
+ COMMAND ${PROJECT_BINARY_DIR}/tests/complex_test)
+else ()
+ message(WARNING "Install Python to run tests")
+endif ()
diff --git a/tests/basic_vector_test.cpp b/tests/basic_vector_test.cpp
@@ -0,0 +1,152 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/io/tostring.hpp>
+
+#include <kfr/cometa/string.hpp>
+#include <kfr/math.hpp>
+#include <kfr/vec.hpp>
+#include <kfr/version.hpp>
+
+using namespace kfr;
+using namespace kfr::native;
+
+template <typename T>
+void print_type(const T& value)
+{
+ println(type_name(value), ":");
+ println(value);
+}
+
+int main(int /*argc*/, char** /*argv*/)
+{
+ println(library_version());
+ // >>> KFR ...
+
+ // How to make a vector:
+
+ // * Use constructor
+ const vec<double, 4> first{ 1, 2.5, -infinity, 3.1415926 };
+ print_type(first);
+ // >>> kfr::vec<double, 4>:
+ // >>> 1 2.5 -inf 3.14159
+
+ // * Use make_vector function
+ const auto second = make_vector(-1, +1);
+ print_type(second);
+ // >>> kfr::vec<int, 2>:
+ // >>> -1 1
+
+ // * Convert from vector of other type:
+ const vec<int, 4> int_vector{ 10, 20, 30, 40 };
+ const vec<double, 4> double_vector = cast<double>(int_vector);
+ print_type(double_vector);
+ // >>> kfr::vec<double, 4>:
+ // >>> 10 20 30 40
+
+ // * Concat two vectors:
+ const vec<int, 1> left_part{ 1 };
+ const vec<int, 1> right_part{ 2 };
+ const vec<int, 2> pair{ left_part, right_part };
+ print_type(pair);
+ // >>> kfr::vec<int, 2>:
+ // >>> 1 2
+
+ // * Same, but using make_vector and concat:
+ const vec<int, 2> pair2 = concat(make_vector(10), make_vector(20));
+ print_type(pair2);
+ // >>> kfr::vec<int, 2>:
+ // >>> 10 20
+
+ // * Repeat vector multiple times:
+ const vec<short, 8> repeated = repeat<4>(make_vector<short>(0, -1));
+ print_type(repeated);
+ // >>> kfr::vec<short, 8>:
+ // >>> 0 -1 0 -1 0 -1 0 -1
+
+ // * Use enumerate to generate sequence of numbers:
+ const vec<int, 8> eight = enumerate<int, 8>();
+ print_type(eight);
+ // >>> kfr::vec<int, 8>:
+ // >>> 0 1 2 3 4 5 6 7
+
+ // * Vectors can be of any length...
+ const vec<int, 1> one{ 42 };
+ const vec<int, 2> two = concat(one, make_vector(42));
+ print_type(two);
+ // >>> kfr::vec<int, 2>:
+ // >>> 42 42
+
+ const vec<u8, 256> very_long_vector = repeat<64>(make_vector<u8>(1, 2, 4, 8));
+ print_type(slice<0, 17>(very_long_vector));
+ // >>> kfr::vec<unsigned char, 17>:
+ // >>> 1 2 4 8 1 2 4 8
+ // >>> 1 2 4 8 1 2 4 8
+ // >>> 1
+
+ // * ...really any:
+ using big_vector = vec<i16, 107>;
+ big_vector v107 = enumerate<i16, 107>();
+ print_type(hadd(v107));
+ // >>> short:
+ // >>> 5671
+
+ using color = vec<u8, 3>;
+ const color green = cast<u8>(make_vector(0.0, 1.0, 0.0) * 255);
+ print_type(green);
+ // >>> kfr::vec<unsigned char, 3>:
+ // >>> 0 255 0
+
+ // Vectors support all standard operators:
+ const auto op1 = make_vector(0, 1, 10, 100);
+ const auto op2 = make_vector(20, 2, -2, 200);
+ const auto result = op1 * op2 - 4;
+ print_type(result);
+ // >>> kfr::vec<int, 4>:
+ // >>> -4 -2 -24 19996
+
+ // * Transform vector:
+ const vec<int, 8> numbers1 = enumerate<int, 8>();
+ const vec<int, 8> numbers2 = enumerate<int, 8>() + 100;
+ print_type(odd(numbers1));
+ print_type(even(numbers2));
+ // >>> kfr::vec<int, 4>:
+ // >>> 1 3 5 7
+ // >>> kfr::vec<int, 4>:
+ // >>> 100 102 104 106
+
+ // * The following command pairs are equivalent:
+ print_type(permute<0, 2, 1, 3, 4, 6, 5, 7>(numbers1));
+ print_type(permute<0, 2, 1, 3>(numbers1));
+ // >>> kfr::vec<int, 8>:
+ // >>> 0 2 1 3 4 6 5 7
+ // >>> kfr::vec<int, 8>:
+ // >>> 0 2 1 3 4 6 5 7
+
+ print_type(shuffle<0, 8, 2, 10, 4, 12, 6, 14>(numbers1, numbers2));
+ print_type(shuffle<0, 8>(numbers1, numbers2));
+ // >>> kfr::vec<int, 8>:
+ // >>> 0 100 2 102 4 104 6 106
+ // >>> kfr::vec<int, 8>:
+ // >>> 0 100 2 102 4 104 6 106
+
+ print_type(blend<0, 1, 1, 0, 1, 1, 0, 1>(numbers1, numbers2));
+ print_type(blend<0, 1, 1>(numbers1, numbers2));
+ // >>> kfr::vec<int, 8>:
+ // >>> 0 101 102 3 104 105 6 107
+ // >>> kfr::vec<int, 8>:
+ // >>> 0 101 102 3 104 105 6 107
+
+ // * Transpose matrix:
+ const auto sixteen = enumerate<float, 16>();
+ print_type(transpose<4>(sixteen));
+ // >>> kfr::vec<float, 16>:
+ // >>> 0 4 8 12 1 5 9 13
+ // >>> 2 6 10 14 3 7 11 15
+ // >>>
+
+ return 0;
+}
diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp
@@ -0,0 +1,200 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/io/tostring.hpp>
+
+#include "testo/testo.hpp"
+#include <kfr/base/complex.hpp>
+#include <kfr/cometa/string.hpp>
+#include <kfr/expressions/basic.hpp>
+#include <kfr/expressions/operators.hpp>
+#include <kfr/expressions/reduce.hpp>
+#include <kfr/math.hpp>
+#include <kfr/version.hpp>
+
+using namespace kfr;
+
+template <typename T1, typename T2>
+void assert_is_same()
+{
+ static_assert(std::is_same<T1, T2>::value, "");
+}
+
+TEST(complex_vector)
+{
+ const vec<c32, 1> c32x1{ c32{ 0, 1 } };
+ CHECK(c32x1(0) == 0.0f);
+ CHECK(c32x1(1) == 1.0f);
+
+ const vec<c32, 2> c32x2{ c32{ 0, 1 }, c32{ 2, 3 } };
+ CHECK(c32x2(0) == 0.0f);
+ CHECK(c32x2(1) == 1.0f);
+ CHECK(c32x2(2) == 2.0f);
+ CHECK(c32x2(3) == 3.0f);
+
+ const vec<c32, 3> c32x3{ c32{ 0, 1 }, c32{ 2, 3 }, c32{ 4, 5 } };
+ CHECK(c32x3(0) == 0.0f);
+ CHECK(c32x3(1) == 1.0f);
+ CHECK(c32x3(2) == 2.0f);
+ CHECK(c32x3(3) == 3.0f);
+ CHECK(c32x3(4) == 4.0f);
+ CHECK(c32x3(5) == 5.0f);
+
+ const vec<c32, 1> c32s = 2;
+ CHECK(c32s(0) == 2.f);
+ CHECK(c32s(1) == 0.f);
+}
+
+TEST(complex_cast)
+{
+ const vec<f32, 4> v1 = subcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 }));
+ CHECK(v1(0) == 0.f);
+ CHECK(v1(1) == 1.f);
+ CHECK(v1(2) == 2.f);
+ CHECK(v1(3) == 3.f);
+
+ const vec<c32, 1> v2 = subcast<c32>(make_vector(1.f, 2.f));
+ CHECK(v2(0) == 1.f);
+ CHECK(v2(1) == 2.f);
+
+ const vec<c32, 2> v3 = cast<c32>(make_vector(1.f, 2.f));
+ CHECK(v3(0) == 1.f);
+ CHECK(v3(1) == 0.f);
+ CHECK(v3(2) == 2.f);
+ CHECK(v3(3) == 0.f);
+
+ CHECK(zerovector<c32, 4>() == make_vector(c32{ 0, 0 }, c32{ 0, 0 }, c32{ 0, 0 }, c32{ 0, 0 }));
+ CHECK(enumerate<c32, 4>() == make_vector(c32{ 0, 0 }, c32{ 1, 0 }, c32{ 2, 0 }, c32{ 3, 0 }));
+}
+
+TEST(complex_math)
+{
+ const vec<c32, 1> a{ c32{ 1, 2 } };
+ const vec<c32, 1> b{ c32{ 3, 4 } };
+ const vec<c32, 1> c = a + b;
+ CHECK(a + b == make_vector(c32{ 4, 6 }));
+ CHECK(a - b == make_vector(c32{ -2, -2 }));
+ CHECK(a * b == make_vector(c32{ -5, 10 }));
+ CHECK(a * 2 == make_vector(c32{ 2, 4 }));
+ CHECK(a / b == make_vector(c32{ 0.44, 0.08 }));
+ CHECK(-a == make_vector(c32{ -1, -2 }));
+
+ CHECK(real(a) == make_vector(1.f));
+ CHECK(imag(a) == make_vector(2.f));
+
+ CHECK(make_complex(5.f, 7) == c32{ 5.f, 7.f });
+ CHECK(make_complex(make_vector(5.f, 8.f), make_vector(7.f, 9.f)) ==
+ make_vector(c32{ 5.f, 7.f }, c32{ 8.f, 9.f }));
+
+ CHECK(cabs(c32{ 3.f, 4.f }) == 5.f);
+ CHECK(cabs(make_vector(c32{ 3.f, 4.f })) == make_vector(5.f));
+
+ testo::epsilon<f32>() *= 5;
+ testo::epsilon<f64>() *= 5;
+
+ CHECK(csin(c32{ 1.f, 1.f }) == c32{ 1.2984575814159773, 0.634963914784736 });
+ CHECK(ccos(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489, -0.9888977057628651 });
+ CHECK(csinh(c32{ 1.f, 1.f }) == c32{ 0.634963914784736, 1.2984575814159773 });
+ CHECK(ccosh(c32{ 1.f, 1.f }) == c32{ 0.8337300251311489, 0.9888977057628651 });
+
+ CHECK(clog(c32{ 1.f, 1.f }) == c32{ 0.34657359027997264, 0.7853981633974483 });
+ CHECK(clog2(c32{ 1.f, 1.f }) == c32{ 0.5, 1.1330900354567983 });
+ CHECK(clog10(c32{ 1.f, 1.f }) == c32{ 0.15051499783199057, 0.3410940884604603 });
+
+ CHECK(cexp(c32{ 1.f, 1.f }) == c32{ 1.4686939399158849, 2.2873552871788423 });
+ CHECK(cexp2(c32{ 1.f, 1.f }) == c32{ 1.5384778027279442, 1.2779225526272695 });
+ CHECK(cexp10(c32{ 1.f, 1.f }) == c32{ -6.682015101903131, 7.439803369574931 });
+}
+
+TEST(complex_read_write)
+{
+ c32 buffer[8] = { c32{ 1, 2 }, c32{ 3, 4 }, c32{ 5, 6 }, c32{ 7, 8 },
+ c32{ 9, 10 }, c32{ 11, 12 }, c32{ 13, 14 }, c32{ 15, 16 } };
+
+ CHECK(read<4>(buffer) == make_vector(c32{ 1, 2 }, c32{ 3, 4 }, c32{ 5, 6 }, c32{ 7, 8 }));
+ CHECK(read<3>(buffer + 1) == make_vector(c32{ 3, 4 }, c32{ 5, 6 }, c32{ 7, 8 }));
+ write(buffer + 2, make_vector(c32{ 10, 11 }, c32{ 12, 13 }));
+ CHECK(read<4>(buffer) == make_vector(c32{ 1, 2 }, c32{ 3, 4 }, c32{ 10, 11 }, c32{ 12, 13 }));
+}
+
+TEST(complex_shuffle)
+{
+ const vec<c32, 2> a{ c32{ 0, 1 }, c32{ 2, 3 } };
+ CHECK(reverse(a) == make_vector(c32{ 2, 3 }, c32{ 0, 1 }));
+}
+
+TEST(complex_basic_expressions)
+{
+ const univector<c32, 3> uv1 = zeros();
+ CHECK(uv1[0] == c32{ 0, 0 });
+ CHECK(uv1[1] == c32{ 0, 0 });
+ CHECK(uv1[2] == c32{ 0, 0 });
+ const univector<c32, 3> uv2 = ones();
+ CHECK(uv2[0] == c32{ 1, 0 });
+ CHECK(uv2[1] == c32{ 1, 0 });
+ CHECK(uv2[2] == c32{ 1, 0 });
+ const univector<c32, 3> uv3 = counter();
+ CHECK(uv3[0] == c32{ 0, 0 });
+ CHECK(uv3[1] == c32{ 1, 0 });
+ CHECK(uv3[2] == c32{ 2, 0 });
+}
+
+TEST(complex_function_expressions)
+{
+ static_assert(is_generic<decltype(counter())>::value, "");
+ static_assert(is_generic<decltype(sqr(counter()))>::value, "");
+
+ const univector<c32, 4> uv1 = sqr(counter());
+ CHECK(uv1[0] == c32{ 0, 0 });
+ CHECK(uv1[1] == c32{ 1, 0 });
+ CHECK(uv1[2] == c32{ 4, 0 });
+ CHECK(uv1[3] == c32{ 9, 0 });
+
+ const univector<c32, 4> uv2 = uv1 * 2.f;
+ CHECK(uv2[0] == c32{ 0, 0 });
+ CHECK(uv2[1] == c32{ 2, 0 });
+ CHECK(uv2[2] == c32{ 8, 0 });
+ CHECK(uv2[3] == c32{ 18, 0 });
+
+ const univector<f32, 4> uv3 = real(uv2);
+ CHECK(uv3[0] == 0.f);
+ CHECK(uv3[1] == 2.f);
+ CHECK(uv3[2] == 8.f);
+ CHECK(uv3[3] == 18.f);
+
+ assert_is_same<c32, value_type_of<decltype(uv2)>>();
+ assert_is_same<f32, value_type_of<decltype(uv3)>>();
+ assert_is_same<f32, value_type_of<decltype(real(uv2))>>();
+}
+
+int main(int argc, char** argv)
+{
+ println(library_version());
+
+ static_assert(vector_width<f32, cpu_t::sse2> == 4, "");
+ static_assert(vector_width<c32, cpu_t::sse2> == 2, "");
+ static_assert(vector_width<i32, cpu_t::sse2> == 4, "");
+ static_assert(vector_width<complex<i32>, cpu_t::sse2> == 2, "");
+
+ static_assert(sizeof(vec<c32, 4>) == sizeof(vec<f32, 8>), "");
+ static_assert(vec<f32, 4>::size() == 4, "");
+ static_assert(vec<c32, 4>::size() == 4, "");
+ static_assert(vec<f32, 4>::scalar_size() == 4, "");
+ static_assert(vec<c32, 4>::scalar_size() == 8, "");
+ assert_is_same<subtype<complex<i32>>, i32>();
+ assert_is_same<vec<c32, 4>::value_type, c32>();
+ assert_is_same<vec<c32, 4>::scalar_type, f32>();
+ assert_is_same<vec<f32, 4>::value_type, f32>();
+ assert_is_same<vec<f32, 4>::scalar_type, f32>();
+ assert_is_same<vec<c32, 1>, decltype(make_vector(c32{ 0, 0 }))>();
+ assert_is_same<vec<c32, 2>, decltype(make_vector(c32{ 0, 0 }, 4))>();
+ assert_is_same<ftype<complex<i32>>, complex<f32>>();
+ assert_is_same<ftype<complex<i64>>, complex<f64>>();
+ assert_is_same<ftype<vec<complex<i32>, 4>>, vec<complex<f32>, 4>>();
+ assert_is_same<ftype<vec<complex<i64>, 8>>, vec<complex<f64>, 8>>();
+
+ return testo::run_all("", true);
+}
diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp
@@ -0,0 +1,56 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016 D Levin
+ * See LICENSE.txt for details
+ */
+#include <tuple>
+
+#include "testo/testo.hpp"
+#include <kfr/cometa/string.hpp>
+#include <kfr/dft/fft.hpp>
+#include <kfr/dft/reference_dft.hpp>
+#include <kfr/expressions/basic.hpp>
+#include <kfr/expressions/operators.hpp>
+#include <kfr/expressions/reduce.hpp>
+#include <kfr/io/tostring.hpp>
+#include <kfr/math.hpp>
+#include <kfr/misc/random.hpp>
+#include <kfr/version.hpp>
+
+using namespace kfr;
+
+TEST(fft_accuracy)
+{
+ testo::active_test()->show_progress = true;
+ random_bit_generator gen(2247448713, 915890490, 864203735, 2982561);
+
+ testo::matrix(named("type") = ctypes<float, double>, //
+ named("inverse") = std::make_tuple(false, true), //
+ named("log2(size)") = make_range(1, 21), //
+ [&gen](auto type, bool inverse, size_t log2size) {
+ using float_type = type_of<decltype(type)>;
+ const size_t size = 1 << log2size;
+
+ univector<complex<float_type>> in =
+ typed<float_type>(gen_random_range(gen, -1.0, +1.0), size * 2);
+ univector<complex<float_type>> out = in;
+ univector<complex<float_type>> refout = out;
+ const dft_plan<float_type> dft(size);
+ univector<u8> temp(dft.temp_size);
+
+ reference_dft(refout.data(), in.data(), size, inverse);
+ dft.execute(out, out, temp, inverse);
+
+ const float_type rms_diff = rms(cabs(refout - out));
+ const double ops = log2size * 50;
+ const double epsilon = std::numeric_limits<float_type>::epsilon();
+ CHECK(rms_diff < epsilon * ops);
+ });
+}
+
+int main(int argc, char** argv)
+{
+ println(library_version());
+
+ return testo::run_all("", true);
+}
diff --git a/tests/empty_test.cpp b/tests/empty_test.cpp
@@ -0,0 +1,7 @@
+#include <kfr/math.hpp>
+#include <kfr/vec.hpp>
+
+using namespace kfr;
+using namespace kfr::native;
+
+int main(int argc, char** argv) { return 0; }
diff --git a/tests/test_output.py b/tests/test_output.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import os
+import subprocess
+import sys
+import re
+
+binary_filename = sys.argv[1]
+source_filename = sys.argv[2]
+
+with open(source_filename) as src:
+ test_source = enumerate(src.readlines())
+
+parsed_output = [(re.sub(r'^\s*// >>>', '', line).strip(), linenum) for linenum, line in test_source if '// >>>' in line]
+
+output = subprocess.check_output([binary_filename], stderr=subprocess.STDOUT).decode("utf-8").splitlines()
+
+output = [o.strip() for o in output]
+
+fails = 0
+for expected, actual in zip(parsed_output, output):
+ reg = re.escape(expected[0]).replace(r'\.\.\.', '.*')
+ match = re.match(reg, actual)
+ if not match:
+ fails+=1
+ print('Expected output string ({file}.cpp, #{line}): \n"{expected}"\n got: \n"{actual}"'.format(expected=expected[0], file=filename, actual=actual, line=expected[1]))
+
+if fails == 0:
+ print('All tests passed successfully ({} lines)'.format(len(parsed_output)))
+else:
+ print('Number of failed tests: {fails})'.format(fails=fails))
+
+exit(fails)
diff --git a/tests/testo/print_colored.hpp b/tests/testo/print_colored.hpp
@@ -0,0 +1,150 @@
+#pragma once
+#include <cstdint>
+
+#if defined(_WIN32)
+#include <windows.h>
+#endif
+
+namespace print_colored
+{
+
+enum text_color : uint32_t
+{
+ Black = 0x00,
+ DarkBlue = 0x01,
+ DarkGreen = 0x02,
+ DarkCyan = 0x03,
+ DarkRed = 0x04,
+ DarkMagenta = 0x05,
+ DarkYellow = 0x06,
+ LightGrey = 0x07,
+ Gray = 0x08,
+ Blue = 0x09,
+ Green = 0x0A,
+ Cyan = 0x0B,
+ Red = 0x0C,
+ Magenta = 0x0D,
+ Yellow = 0x0E,
+ White = 0x0F,
+ BgBlack = 0x00,
+ BgDarkBlue = 0x10,
+ BgDarkGreen = 0x20,
+ BgDarkCyan = 0x30,
+ BgDarkRed = 0x40,
+ BgDarkMagenta = 0x50,
+ BgDarkYellow = 0x60,
+ BgLightGrey = 0x70,
+ BgGray = 0x80,
+ BgBlue = 0x90,
+ BgGreen = 0xA0,
+ BgCyan = 0xB0,
+ BgRed = 0xC0,
+ BgMagenta = 0xD0,
+ BgYellow = 0xE0,
+ BgWhite = 0xF0,
+
+ Normal = BgBlack | LightGrey
+};
+
+enum console_buffer
+{
+ ConsoleStdOutput,
+ ConsoleStdError
+};
+
+#if defined(_WIN32)
+typedef HANDLE console_handle_t;
+
+inline console_handle_t console_handle(console_buffer console = ConsoleStdOutput)
+{
+ static HANDLE con_out = ::GetStdHandle(STD_OUTPUT_HANDLE);
+ static HANDLE con_err = ::GetStdHandle(STD_ERROR_HANDLE);
+ return console == ConsoleStdOutput ? con_out : con_err;
+}
+
+#endif
+
+struct console_color
+{
+public:
+ console_color(text_color c, console_buffer console = ConsoleStdOutput)
+ : m_old(get(console)), m_console(console)
+ {
+ set(c, m_console);
+ }
+
+ ~console_color() { set(m_old, m_console); }
+
+private:
+ text_color get(console_buffer console = ConsoleStdOutput)
+ {
+#ifdef _WIN32
+ CONSOLE_SCREEN_BUFFER_INFO info;
+ ::GetConsoleScreenBufferInfo(console_handle(console), &info);
+ return static_cast<text_color>(info.wAttributes & 0xFF);
+#else
+ return static_color();
+#endif
+ }
+
+ void set(text_color new_color, console_buffer console = ConsoleStdOutput)
+ {
+#ifdef _WIN32
+ ::SetConsoleTextAttribute(console_handle(console), static_cast<WORD>(new_color));
+#else
+ if (new_color != Normal)
+ {
+ uint8_t t = new_color & 0xF;
+ uint8_t b = (new_color & 0xF0) >> 4;
+ uint8_t tnum = 30 + ((t & 1) << 2 | (t & 2) | (t & 4) >> 2);
+ uint8_t bnum = 40 + ((b & 1) << 2 | (b & 2) | (b & 4) >> 2);
+ if (t & 8)
+ tnum += 60;
+ if (b & 8)
+ bnum += 60;
+ printf("\x1B[%d;%dm", tnum, bnum);
+ }
+ else
+ {
+ printf("\x1B[0m");
+ }
+ static_color() = new_color;
+#endif
+ }
+
+ text_color m_old;
+ console_buffer m_console;
+#ifndef _WIN32
+ static text_color& static_color()
+ {
+ static text_color color = Normal;
+ return color;
+ }
+#endif
+};
+
+template <text_color color, console_buffer console = ConsoleStdOutput>
+struct colored_text_tpl : public console_color
+{
+public:
+ colored_text_tpl() : console_color(color, console) {}
+
+private:
+};
+
+typedef colored_text_tpl<DarkBlue> darkblue_text;
+typedef colored_text_tpl<DarkGreen> darkgreen_text;
+typedef colored_text_tpl<DarkCyan> darkcyan_text;
+typedef colored_text_tpl<DarkRed> darkred_text;
+typedef colored_text_tpl<DarkMagenta> darkmagenta_text;
+typedef colored_text_tpl<DarkYellow> darkyellow_text;
+typedef colored_text_tpl<LightGrey> lightgrey_text;
+typedef colored_text_tpl<Gray> gray_text;
+typedef colored_text_tpl<Blue> blue_text;
+typedef colored_text_tpl<Green> green_text;
+typedef colored_text_tpl<Cyan> cyan_text;
+typedef colored_text_tpl<Red> red_text;
+typedef colored_text_tpl<Magenta> magenta_text;
+typedef colored_text_tpl<Yellow> yellow_text;
+typedef colored_text_tpl<White> white_text;
+}
diff --git a/tests/testo/testo.hpp b/tests/testo/testo.hpp
@@ -0,0 +1,549 @@
+#pragma once
+
+#include <kfr/cometa.hpp>
+#include <kfr/cometa/string.hpp>
+
+#include <ctime>
+#include <functional>
+#include <sstream>
+#include <utility>
+#include <vector>
+#ifdef TESTO_MPFR
+#include <mpfr/mpfr.hpp>
+#include <mpfr/mpfr_tostring.hpp>
+#endif
+#include "print_colored.hpp"
+#include <chrono>
+#include <cmath>
+
+#if !defined CLANG_DIAGNOSTIC_PRAGMA
+#if defined __clang__
+#define TESTO_STRING(str) #str
+#define CLANG_DIAGNOSTIC_PRAGMA(pragma) _Pragma(TESTO_STRING(clang diagnostic pragma))
+#else
+#define CLANG_DIAGNOSTIC_PRAGMA(pragma)
+#endif
+#endif
+
+CLANG_DIAGNOSTIC_PRAGMA(push)
+CLANG_DIAGNOSTIC_PRAGMA(ignored "-Wexit-time-destructors")
+CLANG_DIAGNOSTIC_PRAGMA(ignored "-Wpadded")
+CLANG_DIAGNOSTIC_PRAGMA(ignored "-Wshadow")
+
+namespace testo
+{
+
+using namespace cometa;
+
+#ifdef TESTO_MPFR
+using reference_number = mpfr::number;
+#else
+using reference_number = long double;
+#endif
+
+#ifdef TESTO_MPFR
+template <typename T>
+inline double ulp_distance(const mpfr::number& reference, T test)
+{
+ if (std::isnan(test) && reference.isnan())
+ return 0.0;
+ if (std::isinf(test) && (reference.isinfinity() || mpfr::abs(reference) > std::numeric_limits<T>::max()))
+ {
+ if ((reference < 0 && test < 0) || (reference > 0 && test > 0))
+ return 0.0;
+ else
+ return std::numeric_limits<double>::infinity();
+ }
+ mpfr::number testreal = test;
+ T next = std::nexttoward(test, std::numeric_limits<long double>::infinity());
+ mpfr::number ulp = testreal - mpfr::number(next);
+ return std::abs(static_cast<double>((reference - testreal) / ulp));
+}
+inline std::string number_to_string(const mpfr::number& reference, int precision)
+{
+ return mpfr::to_string(reference, precision, 'g');
+}
+#else
+template <typename T>
+inline double ulp_distance(long double reference, T test)
+{
+ if (__builtin_isnan(test) && __builtin_isnan(reference))
+ return 0.0;
+ if (__builtin_isinf(test) &&
+ (__builtin_isinf(reference) || std::fabs(reference) > std::numeric_limits<T>::max()))
+ {
+ if ((reference < 0 && test < 0) || (reference > 0 && test > 0))
+ return 0.0;
+ else
+ return std::numeric_limits<double>::infinity();
+ }
+ long double test80 = test;
+ T next = std::nexttoward(test, std::numeric_limits<long double>::infinity());
+ long double ulp = test80 - static_cast<long double>(next);
+ return std::abs(static_cast<double>((reference - test80) / ulp));
+}
+#endif
+
+using namespace print_colored;
+
+template <typename Fn, typename L, typename R>
+struct comparison
+{
+ L left;
+ R right;
+ Fn cmp;
+
+ comparison(L&& left, R&& right) : left(std::forward<L>(left)), right(std::forward<R>(right)) {}
+
+ bool operator()() { return cmp(left, right); }
+};
+
+template <typename Left, typename Right>
+struct static_assert_type_eq
+{
+ static_assert(std::is_same<Left, Right>::value, "std::is_same<Left, Right>::value");
+};
+
+template <typename T, T left, T right>
+struct static_assert_eq
+{
+ static_assert(left == right, "left == right");
+};
+
+template <typename L, typename R, typename = void>
+struct equality_comparer
+{
+ bool operator()(const L& l, const R& r) const { return l == r; }
+};
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+
+template <typename T>
+inline T& epsilon()
+{
+ static T value = std::numeric_limits<T>::epsilon();
+ return value;
+}
+
+template <>
+struct equality_comparer<float, float>
+{
+ bool operator()(const float& l, const float& r) const { return !(std::abs(l - r) > epsilon<float>()); }
+};
+template <>
+struct equality_comparer<double, double>
+{
+ bool operator()(const double& l, const double& r) const { return !(std::abs(l - r) > epsilon<double>()); }
+};
+template <>
+struct equality_comparer<long double, long double>
+{
+ bool operator()(const long double& l, const long double& r) const
+ {
+ return !(std::abs(l - r) > epsilon<long double>());
+ }
+};
+
+#pragma clang diagnostic pop
+
+template <typename L, typename R>
+struct equality_comparer<L, R, void_t<enable_if<!compound_type_traits<L>::is_scalar>>>
+{
+ using Tsubtype = subtype<L>;
+ constexpr static static_assert_type_eq<subtype<L>, subtype<R>> assert{};
+
+ bool operator()(const L& l, const R& r) const
+ {
+ if (compound_type_traits<L>::width != compound_type_traits<R>::width)
+ return false;
+
+ compound_type_traits<L> itl;
+ compound_type_traits<R> itr;
+ for (size_t i = 0; i < compound_type_traits<L>::width; i++)
+ {
+ equality_comparer<Tsubtype, Tsubtype> cmp;
+ if (!cmp(itl.at(l, i), itr.at(r, i)))
+ return false;
+ }
+ return true;
+ }
+};
+
+struct cmp_eq
+{
+ static const char* op() { return "=="; }
+
+ template <typename L, typename R>
+ bool operator()(L&& left, R&& right)
+ {
+ equality_comparer<std::decay_t<L>, std::decay_t<R>> eq;
+ return eq(left, right);
+ }
+};
+
+struct cmp_ne
+{
+ static const char* op() { return "!="; }
+
+ template <typename L, typename R>
+ bool operator()(L&& left, R&& right)
+ {
+ return !cmp_eq()(left, right);
+ }
+};
+
+struct cmp_lt
+{
+ static const char* op() { return "<"; }
+
+ template <typename L, typename R>
+ bool operator()(L&& left, R&& right)
+ {
+ return left < right;
+ }
+};
+
+struct cmp_gt
+{
+ static const char* op() { return ">"; }
+
+ template <typename L, typename R>
+ bool operator()(L&& left, R&& right)
+ {
+ return left > right;
+ }
+};
+
+struct cmp_le
+{
+ static const char* op() { return "<="; }
+
+ template <typename L, typename R>
+ bool operator()(L&& left, R&& right)
+ {
+ return left <= right;
+ }
+};
+
+struct cmp_ge
+{
+ static const char* op() { return ">="; }
+
+ template <typename L, typename R>
+ bool operator()(L&& left, R&& right)
+ {
+ return left >= right;
+ }
+};
+
+template <typename L>
+struct half_comparison
+{
+ half_comparison(L&& left) : left(std::forward<L>(left)) {}
+
+ template <typename R>
+ comparison<cmp_eq, L, R> operator==(R&& right)
+ {
+ return comparison<cmp_eq, L, R>(std::forward<L>(left), std::forward<R>(right));
+ }
+
+ template <typename R>
+ comparison<cmp_ne, L, R> operator!=(R&& right)
+ {
+ return comparison<cmp_ne, L, R>(std::forward<L>(left), std::forward<R>(right));
+ }
+
+ template <typename R>
+ comparison<cmp_lt, L, R> operator<(R&& right)
+ {
+ return comparison<cmp_lt, L, R>(std::forward<L>(left), std::forward<R>(right));
+ }
+
+ template <typename R>
+ comparison<cmp_gt, L, R> operator>(R&& right)
+ {
+ return comparison<cmp_gt, L, R>(std::forward<L>(left), std::forward<R>(right));
+ }
+
+ template <typename R>
+ comparison<cmp_le, L, R> operator<=(R&& right)
+ {
+ return comparison<cmp_le, L, R>(std::forward<L>(left), std::forward<R>(right));
+ }
+
+ template <typename R>
+ comparison<cmp_ge, L, R> operator>=(R&& right)
+ {
+ return comparison<cmp_ge, L, R>(std::forward<L>(left), std::forward<R>(right));
+ }
+
+ L left;
+};
+
+struct make_comparison
+{
+ template <typename L>
+ half_comparison<L> operator<=(L&& left)
+ {
+ return half_comparison<L>(std::forward<L>(left));
+ }
+};
+
+inline std::vector<std::string> split(const std::string& text, char delimeter)
+{
+ std::string r = text;
+ size_t prev_pos = 0;
+ size_t start_pos = 0;
+ std::vector<std::string> list;
+ while ((start_pos = r.find(delimeter, prev_pos)) != std::string::npos)
+ {
+ list.push_back(text.substr(prev_pos, start_pos - prev_pos));
+ prev_pos = start_pos + 1;
+ }
+ list.push_back(text.substr(prev_pos));
+ return list;
+}
+
+struct test_case;
+
+inline test_case*& active_test()
+{
+ static test_case* instance = nullptr;
+ return instance;
+}
+
+struct test_case
+{
+ using test_func = void (*)();
+
+ static std::vector<test_case*>& tests()
+ {
+ static std::vector<test_case*> list;
+ return list;
+ }
+
+ test_case(test_func func, const char* name)
+ : func(func), name(name), success(0), failed(0), time(0), show_progress(false)
+ {
+ tests().push_back(this);
+ }
+
+ bool run(bool show_successful)
+ {
+ using namespace std::chrono;
+ using time_point = high_resolution_clock::time_point;
+ {
+ console_color cc(Cyan);
+ printfmt("[{}]", padcenter(11, std::string("RUN"), '-'));
+ }
+ printfmt(" {}...\n", name);
+ time_point start = high_resolution_clock::now();
+ active_test() = this;
+ func();
+ active_test() = nullptr;
+ time_point stop = high_resolution_clock::now();
+ time = duration_cast<duration<double>>(stop - start).count();
+
+ {
+ console_color cc(failed ? Red : Green);
+ printfmt("[{}] {} subtests of {}\n", padcenter(11, failed ? "ERROR" : "SUCCESS", '-'),
+ failed ? failed : success, success + failed);
+ }
+ if (failed)
+ {
+ for (const subtest& s : subtests)
+ {
+ if ((s.success && show_successful) || !s.success)
+ {
+ if (!s.comment.empty())
+ printfmt(" {}:\n", s.comment);
+ {
+ console_color cc(s.success ? Green : Red);
+ printfmt(" {} ", s.success ? "[success]" : "[fail] ");
+ }
+ printfmt("{}\n", s.text);
+ }
+ }
+ console_color cc(White);
+ }
+ return !failed;
+ }
+
+ void check(bool result, const std::string& value, const char* expr)
+ {
+ subtests.push_back(subtest{ result, format("{} | {}", padleft(22, expr), value), comment });
+ result ? success++ : failed++;
+ if (show_progress)
+ {
+ if (result)
+ {
+ console_color cc(Green);
+ print(".");
+ }
+ else
+ {
+ console_color cc(Red);
+ print("E");
+ }
+ }
+ }
+
+ template <typename Op, typename L, typename R>
+ void check(comparison<Op, L, R> comparison, const char* expr)
+ {
+ bool result = comparison();
+ check(result, format("{} {} {}", as_string(comparison.left), Op::op(), as_string(comparison.right)),
+ expr);
+ }
+
+ template <typename L>
+ void check(half_comparison<L> comparison, const char* expr)
+ {
+ bool result = comparison.left ? true : false;
+ check(result, as_string(comparison.left), expr);
+ }
+
+ void set_comment(const std::string& text)
+ {
+ comment = text;
+ if (show_progress)
+ {
+ printfmt("\n{}:\n", comment);
+ }
+ }
+
+ struct subtest
+ {
+ bool success;
+ std::string text;
+ std::string comment;
+ };
+
+ test_func func;
+ const char* name;
+ std::vector<subtest> subtests;
+ std::string comment;
+ int success;
+ int failed;
+ double time;
+ bool show_progress;
+};
+
+template <typename Number>
+struct statistics
+{
+ Number minimum;
+ Number maximum;
+ double sum;
+ unsigned long long count;
+ std::vector<Number> values;
+ void reset() { *this = statistics<Number>(); }
+ std::string str()
+ {
+ return format("{} ... {} (avg={}, median={})\n", minimum, maximum, cometa::fmt<'f', 2>(average()),
+ median());
+ }
+ double average() const { return sum / count; }
+ Number median()
+ {
+ std::sort(values.begin(), values.end());
+ return values.empty() ? Number() : values[values.size() / 2];
+ }
+ statistics()
+ : sum(), count(), minimum(std::numeric_limits<Number>::max()),
+ maximum(std::numeric_limits<Number>::min())
+ {
+ }
+ void operator()(Number x)
+ {
+ minimum = std::min(minimum, x);
+ maximum = std::max(maximum, x);
+ sum += x;
+ count++;
+ values.push_back(x);
+ }
+};
+
+template <typename Arg0, typename Fn>
+void matrix(named_arg<Arg0>&& arg0, Fn&& fn)
+{
+ cforeach(std::forward<Arg0>(arg0.value), [&](auto v0) {
+ active_test()->set_comment(format("{} = {}", arg0.name, v0));
+ fn(v0);
+ });
+ if (active_test()->show_progress)
+ printfmt("\n");
+}
+
+template <typename Arg0, typename Arg1, typename Fn>
+void matrix(named_arg<Arg0>&& arg0, named_arg<Arg1>&& arg1, Fn&& fn)
+{
+ cforeach(std::forward<Arg0>(arg0.value), std::forward<Arg1>(arg1.value), [&](auto v0, auto v1) {
+ active_test()->set_comment(format("{} = {}, {} = {}", arg0.name, v0, arg1.name, v1));
+ fn(v0, v1);
+ });
+ if (active_test()->show_progress)
+ printfmt("\n");
+}
+
+template <typename Arg0, typename Arg1, typename Arg2, typename Fn>
+void matrix(named_arg<Arg0>&& arg0, named_arg<Arg1>&& arg1, named_arg<Arg2>&& arg2, Fn&& fn)
+{
+ cforeach(std::forward<Arg0>(arg0.value), std::forward<Arg1>(arg1.value), std::forward<Arg2>(arg2.value),
+ [&](auto v0, auto v1, auto v2) {
+ active_test()->set_comment(
+ format("{} = {}, {} = {}, {} = {}", arg0.name, v0, arg1.name, v1, arg2.name, v2));
+ fn(v0, v1, v2);
+ });
+ if (active_test()->show_progress)
+ printfmt("\n");
+}
+
+static int run_all(const std::string& name = std::string(), bool show_successful = false)
+{
+ std::vector<test_case*> success;
+ std::vector<test_case*> failed;
+ for (test_case* t : test_case::tests())
+ {
+ if (name.empty() || t->name == name)
+ t->run(show_successful) ? success.push_back(t) : failed.push_back(t);
+ }
+ printfmt("{}\n", std::string(79, '='));
+ if (!success.empty())
+ {
+ console_color cc(Green);
+ printfmt("[{}]", padcenter(11, "SUCCESS", '-'));
+ printfmt(" {} tests\n", success.size());
+ }
+ if (!failed.empty())
+ {
+ console_color cc(Red);
+ printfmt("[{}]", padcenter(11, "ERROR", '-'));
+ printfmt(" {} tests\n", failed.size());
+ }
+ return static_cast<int>(failed.size());
+}
+
+#define TESTO_CHECK(...) \
+ { \
+ ::testo::active_test()->check(::testo::make_comparison() <= __VA_ARGS__, #__VA_ARGS__); \
+ }
+
+#define TESTO_TEST(name) \
+ void test_function_##name(); \
+ ::testo::test_case test_case_##name(&test_function_##name, #name); \
+ void CID_NOINLINE test_function_##name()
+
+#define TESTO_DTEST(name) \
+ template <typename> \
+ void disabled_test_function_##name()
+
+#ifndef TESTO_NO_SHORT_MACROS
+#define CHECK TESTO_CHECK
+#define TEST TESTO_TEST
+#define DTEST TESTO_DTEST
+#endif
+}
+
+CLANG_DIAGNOSTIC_PRAGMA(pop)